Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::{PredicateAnalyzer, try_label_or_to_union, try_type_or_to_union};
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig,
14    ScalarIndexConfig, ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15    VectorIndexType,
16};
17use uni_cypher::ast::{
18    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
19    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
20    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
21    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
22    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
23    WithRecursiveClause,
24};
25
26/// Sentinel column name inserted into a variable's property set to request
27/// that the planner build the bare struct column (`add_structural_projection`)
28/// WITHOUT pulling the full schema.
29///
30/// Emitted by `mark_set_item_variables` for `SetItem::Property` targets only.
31/// Other SET variants (`Labels`, `Variable`, `VariablePlus`) and REMOVE still
32/// emit `"*"` because they replace/merge the whole node.
33///
34/// **Union semantics:** When both `"*"` and the sentinel appear in the same
35/// variable's HashSet (e.g. `SET n.x = 1 RETURN n` collects both), `"*"`
36/// dominates — schema expansion still happens. The sentinel only changes
37/// behavior when it's the sole structural marker present.
38///
39/// Reserved-name convention: the double-underscore prefix marks this as
40/// internal. Schema validation should reject user-declared properties with
41/// this name (deferred follow-up).
42pub(crate) const STRUCT_ONLY_SENTINEL: &str = "__set_struct__";
43
44/// Type of variable in scope for semantic validation.
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum VariableType {
47    /// Node variable (from MATCH (n), CREATE (n), etc.)
48    Node,
49    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
50    Edge,
51    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
52    Path,
53    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
54    /// Could hold a map or dynamic value — property access is allowed.
55    Scalar,
56    /// Scalar from a known non-graph literal (int, float, bool, string, list).
57    /// Property access is NOT allowed on these at compile time.
58    ScalarLiteral,
59    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
60    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
61    Imported,
62}
63
64impl VariableType {
65    /// Returns true if this type is compatible with the expected type.
66    ///
67    /// `Imported` is always compatible because the actual type is unknown at plan time.
68    fn is_compatible_with(self, expected: VariableType) -> bool {
69        self == expected
70            || self == VariableType::Imported
71            // ScalarLiteral behaves like Scalar for compatibility checks
72            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
73    }
74}
75
76/// Information about a variable in scope during planning.
77#[derive(Debug, Clone)]
78pub struct VariableInfo {
79    /// Variable name as written in the query.
80    pub name: String,
81    /// Semantic type of the variable.
82    pub var_type: VariableType,
83    /// True if this is a variable-length path (VLP) step variable.
84    ///
85    /// VLP step variables are typed as Edge but semantically hold edge lists.
86    pub is_vlp: bool,
87}
88
89impl VariableInfo {
90    pub fn new(name: String, var_type: VariableType) -> Self {
91        Self {
92            name,
93            var_type,
94            is_vlp: false,
95        }
96    }
97}
98
99/// Find a variable in scope by name.
100fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
101    vars.iter().find(|v| v.name == name)
102}
103
104/// Check if a variable is in scope.
105fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
106    find_var_in_scope(vars, name).is_some()
107}
108
109/// Check if an expression contains a pattern predicate.
110fn contains_pattern_predicate(expr: &Expr) -> bool {
111    if matches!(
112        expr,
113        Expr::Exists {
114            from_pattern_predicate: true,
115            ..
116        }
117    ) {
118        return true;
119    }
120    let mut found = false;
121    expr.for_each_child(&mut |child| {
122        if !found {
123            found = contains_pattern_predicate(child);
124        }
125    });
126    found
127}
128
129/// Add a variable to scope with type conflict validation.
130/// Returns an error if the variable already exists with a different type.
131fn add_var_to_scope(
132    vars: &mut Vec<VariableInfo>,
133    name: &str,
134    var_type: VariableType,
135) -> Result<()> {
136    if name.is_empty() {
137        return Ok(());
138    }
139
140    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
141        if existing.var_type == VariableType::Imported {
142            // Imported vars upgrade to the concrete type
143            existing.var_type = var_type;
144        } else if var_type == VariableType::Imported || existing.var_type == var_type {
145            // New type is Imported (keep existing) or same type — no conflict
146        } else if matches!(
147            existing.var_type,
148            VariableType::Scalar | VariableType::ScalarLiteral
149        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
150        {
151            // Scalar can be used as Node/Edge in CREATE context — a scalar
152            // holding a node/edge reference is valid for pattern use
153            existing.var_type = var_type;
154        } else {
155            return Err(anyhow!(
156                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
157                name,
158                existing.var_type,
159                var_type
160            ));
161        }
162    } else {
163        vars.push(VariableInfo::new(name.to_string(), var_type));
164    }
165    Ok(())
166}
167
168/// Convert VariableInfo vec to String vec for backward compatibility
169fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
170    vars.iter().map(|v| v.name.clone()).collect()
171}
172
173fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
174    match expr {
175        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
176            .map(|info| info.var_type)
177            .unwrap_or(VariableType::Scalar),
178        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
179        // Known non-graph literals: property access is NOT valid on these.
180        Expr::Literal(CypherLiteral::Integer(_))
181        | Expr::Literal(CypherLiteral::Float(_))
182        | Expr::Literal(CypherLiteral::String(_))
183        | Expr::Literal(CypherLiteral::Bool(_))
184        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
185        Expr::FunctionCall { name, args, .. } => {
186            let lower = name.to_lowercase();
187            if lower == "coalesce" {
188                infer_coalesce_type(args, vars_in_scope)
189            } else if lower == "collect" && !args.is_empty() {
190                let collected = infer_with_output_type(&args[0], vars_in_scope);
191                if matches!(
192                    collected,
193                    VariableType::Node
194                        | VariableType::Edge
195                        | VariableType::Path
196                        | VariableType::Imported
197                ) {
198                    collected
199                } else {
200                    VariableType::Scalar
201                }
202            } else {
203                VariableType::Scalar
204            }
205        }
206        // WITH list literals/expressions produce scalar list values. Preserving
207        // entity typing here causes invalid node/edge reuse in later MATCH clauses
208        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
209        // Lists are ScalarLiteral since property access is not valid on them.
210        Expr::List(_) => VariableType::ScalarLiteral,
211        _ => VariableType::Scalar,
212    }
213}
214
215fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
216    let mut resolved: Option<VariableType> = None;
217    let mut saw_imported = false;
218    for arg in args {
219        let t = infer_with_output_type(arg, vars_in_scope);
220        match t {
221            VariableType::Node | VariableType::Edge | VariableType::Path => {
222                if let Some(existing) = resolved {
223                    if existing != t {
224                        return VariableType::Scalar;
225                    }
226                } else {
227                    resolved = Some(t);
228                }
229            }
230            VariableType::Imported => saw_imported = true,
231            VariableType::Scalar | VariableType::ScalarLiteral => {}
232        }
233    }
234    if let Some(t) = resolved {
235        t
236    } else if saw_imported {
237        VariableType::Imported
238    } else {
239        VariableType::Scalar
240    }
241}
242
243fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
244    match expr {
245        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
246            .map(|info| info.var_type)
247            .unwrap_or(VariableType::Scalar),
248        Expr::FunctionCall { name, args, .. }
249            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
250        {
251            infer_with_output_type(&args[0], vars_in_scope)
252        }
253        Expr::List(items) => {
254            let mut inferred: Option<VariableType> = None;
255            for item in items {
256                let t = infer_with_output_type(item, vars_in_scope);
257                if !matches!(
258                    t,
259                    VariableType::Node
260                        | VariableType::Edge
261                        | VariableType::Path
262                        | VariableType::Imported
263                ) {
264                    return VariableType::Scalar;
265                }
266                if let Some(existing) = inferred {
267                    if existing != t
268                        && t != VariableType::Imported
269                        && existing != VariableType::Imported
270                    {
271                        return VariableType::Scalar;
272                    }
273                    if existing == VariableType::Imported && t != VariableType::Imported {
274                        inferred = Some(t);
275                    }
276                } else {
277                    inferred = Some(t);
278                }
279            }
280            inferred.unwrap_or(VariableType::Scalar)
281        }
282        _ => VariableType::Scalar,
283    }
284}
285
286/// Collect all variable names referenced in an expression
287fn collect_expr_variables(expr: &Expr) -> Vec<String> {
288    let mut vars = Vec::new();
289    collect_expr_variables_inner(expr, &mut vars);
290    vars
291}
292
293/// Collect the names of `$param` references in a constant-foldable expression.
294///
295/// Walks the variants that `eval_const_numeric_expr` accepts (the only shapes a
296/// successfully-folded `LIMIT`/`SKIP` expression can take): parameters,
297/// literals, unary/binary arithmetic, and the whitelisted numeric functions.
298/// Used to tell the plan cache which parameter values were baked into the plan.
299fn collect_expr_parameters(expr: &Expr, names: &mut Vec<String>) {
300    match expr {
301        Expr::Parameter(name) => {
302            if !names.contains(name) {
303                names.push(name.clone());
304            }
305        }
306        Expr::UnaryOp { expr: e, .. } => collect_expr_parameters(e, names),
307        Expr::BinaryOp { left, right, .. } => {
308            collect_expr_parameters(left, names);
309            collect_expr_parameters(right, names);
310        }
311        Expr::FunctionCall { args, .. } => {
312            for a in args {
313                collect_expr_parameters(a, names);
314            }
315        }
316        _ => {}
317    }
318}
319
320fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
321    let mut add_var = |name: &String| {
322        if !vars.contains(name) {
323            vars.push(name.clone());
324        }
325    };
326
327    match expr {
328        Expr::Variable(name) => add_var(name),
329        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
330        Expr::BinaryOp { left, right, .. } => {
331            collect_expr_variables_inner(left, vars);
332            collect_expr_variables_inner(right, vars);
333        }
334        Expr::UnaryOp { expr: e, .. }
335        | Expr::IsNull(e)
336        | Expr::IsNotNull(e)
337        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
338        Expr::FunctionCall { args, .. } => {
339            for a in args {
340                collect_expr_variables_inner(a, vars);
341            }
342        }
343        Expr::List(items) => {
344            for item in items {
345                collect_expr_variables_inner(item, vars);
346            }
347        }
348        Expr::In { expr: e, list } => {
349            collect_expr_variables_inner(e, vars);
350            collect_expr_variables_inner(list, vars);
351        }
352        Expr::Case {
353            expr: case_expr,
354            when_then,
355            else_expr,
356        } => {
357            if let Some(e) = case_expr {
358                collect_expr_variables_inner(e, vars);
359            }
360            for (w, t) in when_then {
361                collect_expr_variables_inner(w, vars);
362                collect_expr_variables_inner(t, vars);
363            }
364            if let Some(e) = else_expr {
365                collect_expr_variables_inner(e, vars);
366            }
367        }
368        Expr::Map(entries) => {
369            for (_, v) in entries {
370                collect_expr_variables_inner(v, vars);
371            }
372        }
373        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
374        Expr::ArrayIndex { array, index } => {
375            collect_expr_variables_inner(array, vars);
376            collect_expr_variables_inner(index, vars);
377        }
378        Expr::ArraySlice { array, start, end } => {
379            collect_expr_variables_inner(array, vars);
380            if let Some(s) = start {
381                collect_expr_variables_inner(s, vars);
382            }
383            if let Some(e) = end {
384                collect_expr_variables_inner(e, vars);
385            }
386        }
387        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
388        // they introduce local variable bindings not in outer scope.
389        _ => {}
390    }
391}
392
393/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
394///
395/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
396/// before the final RETURN projection without losing alias semantics.
397fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
398    let repr = expr.to_string_repr();
399    if let Some(rewritten) = aliases.get(&repr) {
400        return rewritten.clone();
401    }
402
403    match expr {
404        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
405        Expr::Property(base, prop) => Expr::Property(
406            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
407            prop.clone(),
408        ),
409        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
410            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
411            op: *op,
412            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
413        },
414        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
415            op: *op,
416            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
417        },
418        Expr::FunctionCall {
419            name,
420            args,
421            distinct,
422            window_spec,
423        } => Expr::FunctionCall {
424            name: name.clone(),
425            args: args
426                .iter()
427                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
428                .collect(),
429            distinct: *distinct,
430            window_spec: window_spec.clone(),
431        },
432        Expr::List(items) => Expr::List(
433            items
434                .iter()
435                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
436                .collect(),
437        ),
438        Expr::Map(entries) => Expr::Map(
439            entries
440                .iter()
441                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
442                .collect(),
443        ),
444        Expr::Case {
445            expr: case_expr,
446            when_then,
447            else_expr,
448        } => Expr::Case {
449            expr: case_expr
450                .as_ref()
451                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
452            when_then: when_then
453                .iter()
454                .map(|(w, t)| {
455                    (
456                        rewrite_order_by_expr_with_aliases(w, aliases),
457                        rewrite_order_by_expr_with_aliases(t, aliases),
458                    )
459                })
460                .collect(),
461            else_expr: else_expr
462                .as_ref()
463                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
464        },
465        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
466        // they introduce local variable bindings that could shadow aliases.
467        _ => expr.clone(),
468    }
469}
470
471/// Validate function call argument types.
472/// Returns error if type constraints are violated.
473fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
474    let name_lower = name.to_lowercase();
475
476    // labels() requires Node
477    if name_lower == "labels"
478        && let Some(Expr::Variable(var_name)) = args.first()
479        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
480        && !info.var_type.is_compatible_with(VariableType::Node)
481    {
482        return Err(anyhow!(
483            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
484        ));
485    }
486
487    // type() requires Edge
488    if name_lower == "type"
489        && let Some(Expr::Variable(var_name)) = args.first()
490        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
491        && !info.var_type.is_compatible_with(VariableType::Edge)
492    {
493        return Err(anyhow!(
494            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
495        ));
496    }
497
498    // properties() requires Node/Edge/Map (not scalar literals)
499    if name_lower == "properties"
500        && let Some(arg) = args.first()
501    {
502        match arg {
503            Expr::Literal(CypherLiteral::Integer(_))
504            | Expr::Literal(CypherLiteral::Float(_))
505            | Expr::Literal(CypherLiteral::String(_))
506            | Expr::Literal(CypherLiteral::Bool(_))
507            | Expr::List(_) => {
508                return Err(anyhow!(
509                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
510                ));
511            }
512            Expr::Variable(var_name) => {
513                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
514                    && matches!(
515                        info.var_type,
516                        VariableType::Scalar | VariableType::ScalarLiteral
517                    )
518                {
519                    return Err(anyhow!(
520                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
521                    ));
522                }
523            }
524            _ => {}
525        }
526    }
527
528    // nodes()/relationships() require Path
529    if (name_lower == "nodes" || name_lower == "relationships")
530        && let Some(Expr::Variable(var_name)) = args.first()
531        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
532        && !info.var_type.is_compatible_with(VariableType::Path)
533    {
534        return Err(anyhow!(
535            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
536            name_lower
537        ));
538    }
539
540    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
541    if name_lower == "size"
542        && let Some(Expr::Variable(var_name)) = args.first()
543        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
544        && info.var_type == VariableType::Path
545    {
546        return Err(anyhow!(
547            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
548        ));
549    }
550
551    // length()/size() do NOT accept Node or single-Edge arguments.
552    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
553    // but are actually edge lists — size()/length() is valid on those.
554    if (name_lower == "length" || name_lower == "size")
555        && let Some(Expr::Variable(var_name)) = args.first()
556        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
557        && (info.var_type == VariableType::Node
558            || (info.var_type == VariableType::Edge && !info.is_vlp))
559    {
560        return Err(anyhow!(
561            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
562            name_lower
563        ));
564    }
565
566    Ok(())
567}
568
569/// Check if an expression is a non-boolean literal.
570fn is_non_boolean_literal(expr: &Expr) -> bool {
571    matches!(
572        expr,
573        Expr::Literal(CypherLiteral::Integer(_))
574            | Expr::Literal(CypherLiteral::Float(_))
575            | Expr::Literal(CypherLiteral::String(_))
576            | Expr::List(_)
577            | Expr::Map(_)
578    )
579}
580
581/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
582fn validate_boolean_expression(expr: &Expr) -> Result<()> {
583    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
584    if let Expr::BinaryOp { left, op, right } = expr
585        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
586    {
587        let op_name = format!("{op:?}").to_uppercase();
588        for operand in [left.as_ref(), right.as_ref()] {
589            if is_non_boolean_literal(operand) {
590                return Err(anyhow!(
591                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
592                    op_name
593                ));
594            }
595        }
596    }
597    if let Expr::UnaryOp {
598        op: uni_cypher::ast::UnaryOp::Not,
599        expr: inner,
600    } = expr
601        && is_non_boolean_literal(inner)
602    {
603        return Err(anyhow!(
604            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
605        ));
606    }
607    let mut result = Ok(());
608    expr.for_each_child(&mut |child| {
609        if result.is_ok() {
610            result = validate_boolean_expression(child);
611        }
612    });
613    result
614}
615
616/// Validate that all variables used in an expression are in scope.
617fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
618    let used_vars = collect_expr_variables(expr);
619    for var in used_vars {
620        if !is_var_in_scope(vars_in_scope, &var) {
621            return Err(anyhow!(
622                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
623                var
624            ));
625        }
626    }
627    Ok(())
628}
629
630/// Check if a function name (lowercase) is an aggregate function.
631fn is_aggregate_function_name(name: &str) -> bool {
632    matches!(
633        name.to_lowercase().as_str(),
634        "count"
635            | "sum"
636            | "avg"
637            | "min"
638            | "max"
639            | "collect"
640            | "stdev"
641            | "stddev"
642            | "stdevp"
643            | "stddevp"
644            | "variance"
645            | "variancep"
646            | "percentiledisc"
647            | "percentilecont"
648            | "btic_min"
649            | "btic_max"
650            | "btic_span_agg"
651            | "btic_count_at"
652    ) || uni_cypher::is_known_plugin_aggregate(name)
653}
654
655/// Returns true if the expression is a window function (FunctionCall with window_spec).
656fn is_window_function(expr: &Expr) -> bool {
657    matches!(
658        expr,
659        Expr::FunctionCall {
660            window_spec: Some(_),
661            ..
662        }
663    )
664}
665
666/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
667/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
668/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
669/// size() call, BinaryOp, etc.).
670fn is_compound_aggregate(expr: &Expr) -> bool {
671    if !expr.is_aggregate() {
672        return false;
673    }
674    match expr {
675        Expr::FunctionCall {
676            name, window_spec, ..
677        } => {
678            // A bare aggregate FunctionCall is NOT compound
679            if window_spec.is_some() {
680                return true; // window wrapping an aggregate — treat as compound
681            }
682            !is_aggregate_function_name(name)
683        }
684        // Subquery aggregates are "bare" (not compound)
685        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
686        // Everything else (ListComprehension, BinaryOp, etc.) is compound
687        _ => true,
688    }
689}
690
691/// Recursively collect all bare aggregate FunctionCall sub-expressions from
692/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
693/// the outermost aggregate boundaries).
694///
695/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
696/// is searched because the body (`map_expr`, `predicate`, `expr`) references
697/// the loop variable, not outer-scope aggregates.
698fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
699    let mut out = Vec::new();
700    extract_inner_aggregates_rec(expr, &mut out);
701    out
702}
703
704fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
705    match expr {
706        Expr::FunctionCall {
707            name, window_spec, ..
708        } if window_spec.is_none() && is_aggregate_function_name(name) => {
709            // Found a bare aggregate — collect it and stop recursing
710            out.push(expr.clone());
711        }
712        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
713            out.push(expr.clone());
714        }
715        // For list comprehension, only search the `list` source for aggregates
716        Expr::ListComprehension { list, .. } => {
717            extract_inner_aggregates_rec(list, out);
718        }
719        // For quantifier, only search the `list` source
720        Expr::Quantifier { list, .. } => {
721            extract_inner_aggregates_rec(list, out);
722        }
723        // For reduce, search `init` and `list` (not the body `expr`)
724        Expr::Reduce { init, list, .. } => {
725            extract_inner_aggregates_rec(init, out);
726            extract_inner_aggregates_rec(list, out);
727        }
728        // Standard recursive cases
729        Expr::FunctionCall { args, .. } => {
730            for arg in args {
731                extract_inner_aggregates_rec(arg, out);
732            }
733        }
734        Expr::BinaryOp { left, right, .. } => {
735            extract_inner_aggregates_rec(left, out);
736            extract_inner_aggregates_rec(right, out);
737        }
738        Expr::UnaryOp { expr: e, .. }
739        | Expr::IsNull(e)
740        | Expr::IsNotNull(e)
741        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
742        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
743        Expr::List(items) => {
744            for item in items {
745                extract_inner_aggregates_rec(item, out);
746            }
747        }
748        Expr::Case {
749            expr: case_expr,
750            when_then,
751            else_expr,
752        } => {
753            if let Some(e) = case_expr {
754                extract_inner_aggregates_rec(e, out);
755            }
756            for (w, t) in when_then {
757                extract_inner_aggregates_rec(w, out);
758                extract_inner_aggregates_rec(t, out);
759            }
760            if let Some(e) = else_expr {
761                extract_inner_aggregates_rec(e, out);
762            }
763        }
764        Expr::In {
765            expr: in_expr,
766            list,
767        } => {
768            extract_inner_aggregates_rec(in_expr, out);
769            extract_inner_aggregates_rec(list, out);
770        }
771        Expr::ArrayIndex { array, index } => {
772            extract_inner_aggregates_rec(array, out);
773            extract_inner_aggregates_rec(index, out);
774        }
775        Expr::ArraySlice { array, start, end } => {
776            extract_inner_aggregates_rec(array, out);
777            if let Some(s) = start {
778                extract_inner_aggregates_rec(s, out);
779            }
780            if let Some(e) = end {
781                extract_inner_aggregates_rec(e, out);
782            }
783        }
784        Expr::Map(entries) => {
785            for (_, v) in entries {
786                extract_inner_aggregates_rec(v, out);
787            }
788        }
789        _ => {}
790    }
791}
792
793/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
794/// `Expr::Variable(aggregate_column_name(agg))`.
795///
796/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
797/// rewritten (the body references the loop variable, not outer-scope columns).
798fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
799    match expr {
800        Expr::FunctionCall {
801            name, window_spec, ..
802        } if window_spec.is_none() && is_aggregate_function_name(name) => {
803            // Replace bare aggregate with column reference
804            Expr::Variable(aggregate_column_name(expr))
805        }
806        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
807            Expr::Variable(aggregate_column_name(expr))
808        }
809        Expr::ListComprehension {
810            variable,
811            list,
812            where_clause,
813            map_expr,
814        } => Expr::ListComprehension {
815            variable: variable.clone(),
816            list: Box::new(replace_aggregates_with_columns(list)),
817            where_clause: where_clause.clone(), // don't touch — references loop var
818            map_expr: map_expr.clone(),         // don't touch — references loop var
819        },
820        Expr::Quantifier {
821            quantifier,
822            variable,
823            list,
824            predicate,
825        } => Expr::Quantifier {
826            quantifier: *quantifier,
827            variable: variable.clone(),
828            list: Box::new(replace_aggregates_with_columns(list)),
829            predicate: predicate.clone(), // don't touch — references loop var
830        },
831        Expr::Reduce {
832            accumulator,
833            init,
834            variable,
835            list,
836            expr: body,
837        } => Expr::Reduce {
838            accumulator: accumulator.clone(),
839            init: Box::new(replace_aggregates_with_columns(init)),
840            variable: variable.clone(),
841            list: Box::new(replace_aggregates_with_columns(list)),
842            expr: body.clone(), // don't touch — references loop var
843        },
844        Expr::FunctionCall {
845            name,
846            args,
847            distinct,
848            window_spec,
849        } => Expr::FunctionCall {
850            name: name.clone(),
851            args: args.iter().map(replace_aggregates_with_columns).collect(),
852            distinct: *distinct,
853            window_spec: window_spec.clone(),
854        },
855        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
856            left: Box::new(replace_aggregates_with_columns(left)),
857            op: *op,
858            right: Box::new(replace_aggregates_with_columns(right)),
859        },
860        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
861            op: *op,
862            expr: Box::new(replace_aggregates_with_columns(e)),
863        },
864        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
865        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
866        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
867        Expr::Property(base, prop) => Expr::Property(
868            Box::new(replace_aggregates_with_columns(base)),
869            prop.clone(),
870        ),
871        Expr::List(items) => {
872            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
873        }
874        Expr::Case {
875            expr: case_expr,
876            when_then,
877            else_expr,
878        } => Expr::Case {
879            expr: case_expr
880                .as_ref()
881                .map(|e| Box::new(replace_aggregates_with_columns(e))),
882            when_then: when_then
883                .iter()
884                .map(|(w, t)| {
885                    (
886                        replace_aggregates_with_columns(w),
887                        replace_aggregates_with_columns(t),
888                    )
889                })
890                .collect(),
891            else_expr: else_expr
892                .as_ref()
893                .map(|e| Box::new(replace_aggregates_with_columns(e))),
894        },
895        Expr::In {
896            expr: in_expr,
897            list,
898        } => Expr::In {
899            expr: Box::new(replace_aggregates_with_columns(in_expr)),
900            list: Box::new(replace_aggregates_with_columns(list)),
901        },
902        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
903            array: Box::new(replace_aggregates_with_columns(array)),
904            index: Box::new(replace_aggregates_with_columns(index)),
905        },
906        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
907            array: Box::new(replace_aggregates_with_columns(array)),
908            start: start
909                .as_ref()
910                .map(|e| Box::new(replace_aggregates_with_columns(e))),
911            end: end
912                .as_ref()
913                .map(|e| Box::new(replace_aggregates_with_columns(e))),
914        },
915        Expr::Map(entries) => Expr::Map(
916            entries
917                .iter()
918                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
919                .collect(),
920        ),
921        // Leaf expressions — return as-is
922        other => other.clone(),
923    }
924}
925
926/// Check if an expression contains any aggregate function (recursively).
927fn contains_aggregate_recursive(expr: &Expr) -> bool {
928    match expr {
929        Expr::FunctionCall { name, args, .. } => {
930            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
931        }
932        Expr::BinaryOp { left, right, .. } => {
933            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
934        }
935        Expr::UnaryOp { expr: e, .. }
936        | Expr::IsNull(e)
937        | Expr::IsNotNull(e)
938        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
939        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
940        Expr::Case {
941            expr,
942            when_then,
943            else_expr,
944        } => {
945            expr.as_deref().is_some_and(contains_aggregate_recursive)
946                || when_then.iter().any(|(w, t)| {
947                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
948                })
949                || else_expr
950                    .as_deref()
951                    .is_some_and(contains_aggregate_recursive)
952        }
953        Expr::In { expr, list } => {
954            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
955        }
956        Expr::Property(base, _) => contains_aggregate_recursive(base),
957        Expr::ListComprehension { list, .. } => {
958            // Only check the list source — where_clause/map_expr reference the loop variable
959            contains_aggregate_recursive(list)
960        }
961        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
962        Expr::Reduce { init, list, .. } => {
963            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
964        }
965        Expr::ArrayIndex { array, index } => {
966            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
967        }
968        Expr::ArraySlice { array, start, end } => {
969            contains_aggregate_recursive(array)
970                || start.as_deref().is_some_and(contains_aggregate_recursive)
971                || end.as_deref().is_some_and(contains_aggregate_recursive)
972        }
973        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
974        _ => false,
975    }
976}
977
978/// Check if an expression contains a non-deterministic function (e.g. rand()).
979fn contains_non_deterministic(expr: &Expr) -> bool {
980    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
981        return true;
982    }
983    let mut found = false;
984    expr.for_each_child(&mut |child| {
985        if !found {
986            found = contains_non_deterministic(child);
987        }
988    });
989    found
990}
991
992fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
993    match expr {
994        Expr::FunctionCall { name, args, .. } => {
995            if is_aggregate_function_name(name) {
996                out.insert(expr.to_string_repr());
997                return;
998            }
999            for arg in args {
1000                collect_aggregate_reprs(arg, out);
1001            }
1002        }
1003        Expr::BinaryOp { left, right, .. } => {
1004            collect_aggregate_reprs(left, out);
1005            collect_aggregate_reprs(right, out);
1006        }
1007        Expr::UnaryOp { expr, .. }
1008        | Expr::IsNull(expr)
1009        | Expr::IsNotNull(expr)
1010        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
1011        Expr::List(items) => {
1012            for item in items {
1013                collect_aggregate_reprs(item, out);
1014            }
1015        }
1016        Expr::Case {
1017            expr,
1018            when_then,
1019            else_expr,
1020        } => {
1021            if let Some(e) = expr {
1022                collect_aggregate_reprs(e, out);
1023            }
1024            for (w, t) in when_then {
1025                collect_aggregate_reprs(w, out);
1026                collect_aggregate_reprs(t, out);
1027            }
1028            if let Some(e) = else_expr {
1029                collect_aggregate_reprs(e, out);
1030            }
1031        }
1032        Expr::In { expr, list } => {
1033            collect_aggregate_reprs(expr, out);
1034            collect_aggregate_reprs(list, out);
1035        }
1036        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
1037        Expr::ListComprehension { list, .. } => {
1038            collect_aggregate_reprs(list, out);
1039        }
1040        Expr::Quantifier { list, .. } => {
1041            collect_aggregate_reprs(list, out);
1042        }
1043        Expr::Reduce { init, list, .. } => {
1044            collect_aggregate_reprs(init, out);
1045            collect_aggregate_reprs(list, out);
1046        }
1047        Expr::ArrayIndex { array, index } => {
1048            collect_aggregate_reprs(array, out);
1049            collect_aggregate_reprs(index, out);
1050        }
1051        Expr::ArraySlice { array, start, end } => {
1052            collect_aggregate_reprs(array, out);
1053            if let Some(s) = start {
1054                collect_aggregate_reprs(s, out);
1055            }
1056            if let Some(e) = end {
1057                collect_aggregate_reprs(e, out);
1058            }
1059        }
1060        _ => {}
1061    }
1062}
1063
1064#[derive(Debug, Clone)]
1065enum NonAggregateRef {
1066    Var(String),
1067    Property {
1068        repr: String,
1069        base_var: Option<String>,
1070    },
1071}
1072
1073fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1074    match expr {
1075        Expr::FunctionCall { name, args, .. } => {
1076            if is_aggregate_function_name(name) {
1077                return;
1078            }
1079            for arg in args {
1080                collect_non_aggregate_refs(arg, inside_agg, out);
1081            }
1082        }
1083        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1084        Expr::Property(base, _) if !inside_agg => {
1085            let base_var = if let Expr::Variable(v) = base.as_ref() {
1086                Some(v.clone())
1087            } else {
1088                None
1089            };
1090            out.push(NonAggregateRef::Property {
1091                repr: expr.to_string_repr(),
1092                base_var,
1093            });
1094        }
1095        Expr::BinaryOp { left, right, .. } => {
1096            collect_non_aggregate_refs(left, inside_agg, out);
1097            collect_non_aggregate_refs(right, inside_agg, out);
1098        }
1099        Expr::UnaryOp { expr, .. }
1100        | Expr::IsNull(expr)
1101        | Expr::IsNotNull(expr)
1102        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1103        Expr::List(items) => {
1104            for item in items {
1105                collect_non_aggregate_refs(item, inside_agg, out);
1106            }
1107        }
1108        Expr::Case {
1109            expr,
1110            when_then,
1111            else_expr,
1112        } => {
1113            if let Some(e) = expr {
1114                collect_non_aggregate_refs(e, inside_agg, out);
1115            }
1116            for (w, t) in when_then {
1117                collect_non_aggregate_refs(w, inside_agg, out);
1118                collect_non_aggregate_refs(t, inside_agg, out);
1119            }
1120            if let Some(e) = else_expr {
1121                collect_non_aggregate_refs(e, inside_agg, out);
1122            }
1123        }
1124        Expr::In { expr, list } => {
1125            collect_non_aggregate_refs(expr, inside_agg, out);
1126            collect_non_aggregate_refs(list, inside_agg, out);
1127        }
1128        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1129        // source. The body references the loop variable, not outer-scope vars.
1130        Expr::ListComprehension { list, .. } => {
1131            collect_non_aggregate_refs(list, inside_agg, out);
1132        }
1133        Expr::Quantifier { list, .. } => {
1134            collect_non_aggregate_refs(list, inside_agg, out);
1135        }
1136        Expr::Reduce { init, list, .. } => {
1137            collect_non_aggregate_refs(init, inside_agg, out);
1138            collect_non_aggregate_refs(list, inside_agg, out);
1139        }
1140        _ => {}
1141    }
1142}
1143
1144fn validate_with_order_by_aggregate_item(
1145    expr: &Expr,
1146    projected_aggregate_reprs: &HashSet<String>,
1147    projected_simple_reprs: &HashSet<String>,
1148    projected_aliases: &HashSet<String>,
1149) -> Result<()> {
1150    let mut aggregate_reprs = HashSet::new();
1151    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1152    for agg in aggregate_reprs {
1153        if !projected_aggregate_reprs.contains(&agg) {
1154            return Err(anyhow!(
1155                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1156                agg
1157            ));
1158        }
1159    }
1160
1161    let mut refs = Vec::new();
1162    collect_non_aggregate_refs(expr, false, &mut refs);
1163    refs.retain(|r| match r {
1164        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1165        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1166    });
1167
1168    let mut dedup = HashSet::new();
1169    refs.retain(|r| {
1170        let key = match r {
1171            NonAggregateRef::Var(v) => format!("v:{v}"),
1172            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1173        };
1174        dedup.insert(key)
1175    });
1176
1177    if refs.len() > 1 {
1178        return Err(anyhow!(
1179            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1180        ));
1181    }
1182
1183    if let Some(r) = refs.first() {
1184        return match r {
1185            NonAggregateRef::Var(v) => Err(anyhow!(
1186                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1187                v
1188            )),
1189            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1190                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1191                base_var
1192                    .clone()
1193                    .unwrap_or_else(|| "<property-base>".to_string())
1194            )),
1195        };
1196    }
1197
1198    Ok(())
1199}
1200
1201/// Validate that no aggregation functions appear in WHERE clause.
1202fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1203    if contains_aggregate_recursive(predicate) {
1204        return Err(anyhow!(
1205            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1206        ));
1207    }
1208    Ok(())
1209}
1210
1211#[derive(Debug, Clone, Copy)]
1212enum ConstNumber {
1213    Int(i64),
1214    Float(f64),
1215}
1216
1217impl ConstNumber {
1218    fn to_f64(self) -> f64 {
1219        match self {
1220            Self::Int(v) => v as f64,
1221            Self::Float(v) => v,
1222        }
1223    }
1224}
1225
1226fn eval_const_numeric_expr(
1227    expr: &Expr,
1228    params: &HashMap<String, uni_common::Value>,
1229) -> Result<ConstNumber> {
1230    match expr {
1231        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1232        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1233        Expr::Parameter(name) => match params.get(name) {
1234            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1235            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1236            Some(uni_common::Value::Null) => Err(anyhow!(
1237                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1238                name
1239            )),
1240            Some(other) => Err(anyhow!(
1241                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1242                name,
1243                other
1244            )),
1245            None => Err(anyhow!(
1246                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1247            )),
1248        },
1249        Expr::UnaryOp {
1250            op: uni_cypher::ast::UnaryOp::Neg,
1251            expr,
1252        } => match eval_const_numeric_expr(expr, params)? {
1253            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1254            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1255        },
1256        Expr::BinaryOp { left, op, right } => {
1257            let l = eval_const_numeric_expr(left, params)?;
1258            let r = eval_const_numeric_expr(right, params)?;
1259            match op {
1260                BinaryOp::Add => match (l, r) {
1261                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1262                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1263                },
1264                BinaryOp::Sub => match (l, r) {
1265                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1266                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1267                },
1268                BinaryOp::Mul => match (l, r) {
1269                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1270                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1271                },
1272                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1273                BinaryOp::Mod => match (l, r) {
1274                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1275                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1276                },
1277                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1278                _ => Err(anyhow!(
1279                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1280                )),
1281            }
1282        }
1283        Expr::FunctionCall { name, args, .. } => {
1284            let lower = name.to_lowercase();
1285            match lower.as_str() {
1286                "rand" if args.is_empty() => {
1287                    use rand::RngExt;
1288                    let mut rng = rand::rng();
1289                    Ok(ConstNumber::Float(rng.random::<f64>()))
1290                }
1291                "tointeger" | "toint" if args.len() == 1 => {
1292                    match eval_const_numeric_expr(&args[0], params)? {
1293                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1294                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1295                    }
1296                }
1297                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1298                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1299                )),
1300                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1301                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1302                )),
1303                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1304                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1305                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1306                },
1307                _ => Err(anyhow!(
1308                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1309                )),
1310            }
1311        }
1312        _ => Err(anyhow!(
1313            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1314        )),
1315    }
1316}
1317
1318/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1319/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1320fn parse_non_negative_integer(
1321    expr: &Expr,
1322    clause_name: &str,
1323    params: &HashMap<String, uni_common::Value>,
1324) -> Result<Option<usize>> {
1325    let referenced_vars = collect_expr_variables(expr);
1326    if !referenced_vars.is_empty() {
1327        return Err(anyhow!(
1328            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1329            clause_name
1330        ));
1331    }
1332
1333    let value = eval_const_numeric_expr(expr, params)?;
1334    let as_int = match value {
1335        ConstNumber::Int(v) => v,
1336        ConstNumber::Float(v) => {
1337            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1338                return Err(anyhow!(
1339                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1340                    clause_name
1341                ));
1342            }
1343            v as i64
1344        }
1345    };
1346    if as_int < 0 {
1347        return Err(anyhow!(
1348            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1349            clause_name
1350        ));
1351    }
1352    Ok(Some(as_int as usize))
1353}
1354
1355/// Validate that aggregation functions are not nested.
1356fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1357    if let Expr::FunctionCall { name, args, .. } = expr
1358        && is_aggregate_function_name(name)
1359    {
1360        for arg in args {
1361            if contains_aggregate_recursive(arg) {
1362                return Err(anyhow!(
1363                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1364                ));
1365            }
1366            if contains_non_deterministic(arg) {
1367                return Err(anyhow!(
1368                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1369                ));
1370            }
1371        }
1372    }
1373    let mut result = Ok(());
1374    expr.for_each_child(&mut |child| {
1375        if result.is_ok() {
1376            result = validate_no_nested_aggregation(child);
1377        }
1378    });
1379    result
1380}
1381
1382/// Validate that an expression does not access properties or labels of
1383/// deleted entities. `type(r)` on a deleted relationship is allowed per
1384/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1385fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1386    // Check n.prop on a deleted variable
1387    if let Expr::Property(inner, _) = expr
1388        && let Expr::Variable(name) = inner.as_ref()
1389        && deleted_vars.contains(name)
1390    {
1391        return Err(anyhow!(
1392            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1393            name
1394        ));
1395    }
1396    // Check labels(n) or keys(n) on a deleted variable
1397    if let Expr::FunctionCall { name, args, .. } = expr
1398        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1399        && args.len() == 1
1400        && let Expr::Variable(var) = &args[0]
1401        && deleted_vars.contains(var)
1402    {
1403        return Err(anyhow!(
1404            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1405            name.to_lowercase(),
1406            var
1407        ));
1408    }
1409    let mut result = Ok(());
1410    expr.for_each_child(&mut |child| {
1411        if result.is_ok() {
1412            result = validate_no_deleted_entity_access(child, deleted_vars);
1413        }
1414    });
1415    result
1416}
1417
1418/// Validate that all variables referenced in properties are defined,
1419/// either in scope or in the local CREATE variable list.
1420fn validate_property_variables(
1421    properties: &Option<Expr>,
1422    vars_in_scope: &[VariableInfo],
1423    create_vars: &[&str],
1424) -> Result<()> {
1425    if let Some(props) = properties {
1426        for var in collect_expr_variables(props) {
1427            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1428                return Err(anyhow!(
1429                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1430                    var
1431                ));
1432            }
1433        }
1434    }
1435    Ok(())
1436}
1437
1438/// Check that a variable name is not already bound in scope or in the local CREATE list.
1439/// Used to prevent rebinding in CREATE clauses.
1440fn check_not_already_bound(
1441    name: &str,
1442    vars_in_scope: &[VariableInfo],
1443    create_vars: &[&str],
1444) -> Result<()> {
1445    if is_var_in_scope(vars_in_scope, name) {
1446        return Err(anyhow!(
1447            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1448            name
1449        ));
1450    }
1451    if create_vars.contains(&name) {
1452        return Err(anyhow!(
1453            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1454            name
1455        ));
1456    }
1457    Ok(())
1458}
1459
1460fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1461    let mut scope = vars_in_scope.to_vec();
1462
1463    for path in &pattern.paths {
1464        if let Some(path_var) = &path.variable
1465            && !path_var.is_empty()
1466            && !is_var_in_scope(&scope, path_var)
1467        {
1468            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1469        }
1470        for element in &path.elements {
1471            match element {
1472                PatternElement::Node(n) => {
1473                    if let Some(v) = &n.variable
1474                        && !v.is_empty()
1475                        && !is_var_in_scope(&scope, v)
1476                    {
1477                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1478                    }
1479                }
1480                PatternElement::Relationship(r) => {
1481                    if let Some(v) = &r.variable
1482                        && !v.is_empty()
1483                        && !is_var_in_scope(&scope, v)
1484                    {
1485                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1486                    }
1487                }
1488                PatternElement::Parenthesized { .. } => {}
1489            }
1490        }
1491    }
1492
1493    scope
1494}
1495
1496fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1497    match item {
1498        SetItem::Property { expr, value } => {
1499            validate_expression_variables(expr, vars_in_scope)?;
1500            validate_expression(expr, vars_in_scope)?;
1501            validate_expression_variables(value, vars_in_scope)?;
1502            validate_expression(value, vars_in_scope)?;
1503            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1504                return Err(anyhow!(
1505                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1506                ));
1507            }
1508        }
1509        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1510            if !is_var_in_scope(vars_in_scope, variable) {
1511                return Err(anyhow!(
1512                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1513                    variable
1514                ));
1515            }
1516            validate_expression_variables(value, vars_in_scope)?;
1517            validate_expression(value, vars_in_scope)?;
1518            if contains_pattern_predicate(value) {
1519                return Err(anyhow!(
1520                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1521                ));
1522            }
1523        }
1524        SetItem::Labels { variable, .. } => {
1525            if !is_var_in_scope(vars_in_scope, variable) {
1526                return Err(anyhow!(
1527                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1528                    variable
1529                ));
1530            }
1531        }
1532    }
1533
1534    Ok(())
1535}
1536
1537/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1538/// The OpenCypher spec requires all property values in MERGE to be non-null.
1539fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1540    if let Some(Expr::Map(entries)) = properties {
1541        for (key, value) in entries {
1542            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1543                return Err(anyhow!(
1544                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1545                    key
1546                ));
1547            }
1548        }
1549    }
1550    Ok(())
1551}
1552
1553/// Flatten every label name appearing in a `Pattern` (across all paths
1554/// and node elements). Used by the M5 follow-up #6 write-rejection
1555/// guard to refuse CREATE/MERGE that names a virtual catalog-resolved
1556/// label.
1557fn collect_pattern_labels(pattern: &uni_cypher::ast::Pattern) -> Vec<String> {
1558    let mut out = Vec::new();
1559    for path in &pattern.paths {
1560        for element in &path.elements {
1561            if let PatternElement::Node(n) = element {
1562                for l in n.labels.names() {
1563                    out.push(l.clone());
1564                }
1565            }
1566        }
1567    }
1568    out
1569}
1570
1571fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1572    for path in &merge_clause.pattern.paths {
1573        for element in &path.elements {
1574            match element {
1575                PatternElement::Node(n) => {
1576                    if let Some(Expr::Parameter(_)) = &n.properties {
1577                        return Err(anyhow!(
1578                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1579                        ));
1580                    }
1581                    reject_null_merge_properties(&n.properties)?;
1582                    // VariableAlreadyBound: reject if a bound variable is used
1583                    // as a standalone MERGE node or introduces new labels/properties.
1584                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1585                    if let Some(variable) = &n.variable
1586                        && !variable.is_empty()
1587                        && is_var_in_scope(vars_in_scope, variable)
1588                    {
1589                        let is_standalone = path.elements.len() == 1;
1590                        let has_new_labels = !n.labels.is_empty();
1591                        let has_new_properties = n.properties.is_some();
1592                        if is_standalone || has_new_labels || has_new_properties {
1593                            return Err(anyhow!(
1594                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1595                                variable
1596                            ));
1597                        }
1598                    }
1599                }
1600                PatternElement::Relationship(r) => {
1601                    if let Some(variable) = &r.variable
1602                        && !variable.is_empty()
1603                        && is_var_in_scope(vars_in_scope, variable)
1604                    {
1605                        return Err(anyhow!(
1606                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1607                            variable
1608                        ));
1609                    }
1610                    if r.types.len() != 1 {
1611                        return Err(anyhow!(
1612                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1613                        ));
1614                    }
1615                    if r.range.is_some() {
1616                        return Err(anyhow!(
1617                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1618                        ));
1619                    }
1620                    if let Some(Expr::Parameter(_)) = &r.properties {
1621                        return Err(anyhow!(
1622                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1623                        ));
1624                    }
1625                    reject_null_merge_properties(&r.properties)?;
1626                }
1627                PatternElement::Parenthesized { .. } => {}
1628            }
1629        }
1630    }
1631
1632    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1633    for item in &merge_clause.on_create {
1634        validate_merge_set_item(item, &merge_scope)?;
1635    }
1636    for item in &merge_clause.on_match {
1637        validate_merge_set_item(item, &merge_scope)?;
1638    }
1639
1640    Ok(())
1641}
1642
1643/// Recursively validate an expression for type errors, undefined variables, etc.
1644fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1645    // Validate boolean operators and nested aggregation first
1646    validate_boolean_expression(expr)?;
1647    validate_no_nested_aggregation(expr)?;
1648
1649    // Helper to validate multiple expressions
1650    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1651        for e in exprs {
1652            validate_expression(e, vars)?;
1653        }
1654        Ok(())
1655    }
1656
1657    match expr {
1658        Expr::FunctionCall { name, args, .. } => {
1659            validate_function_call(name, args, vars_in_scope)?;
1660            validate_all(args, vars_in_scope)
1661        }
1662        Expr::BinaryOp { left, right, .. } => {
1663            validate_expression(left, vars_in_scope)?;
1664            validate_expression(right, vars_in_scope)
1665        }
1666        Expr::UnaryOp { expr: e, .. }
1667        | Expr::IsNull(e)
1668        | Expr::IsNotNull(e)
1669        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1670        Expr::Property(base, prop) => {
1671            if let Expr::Variable(var_name) = base.as_ref()
1672                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1673            {
1674                // Paths don't have properties
1675                if var_info.var_type == VariableType::Path {
1676                    return Err(anyhow!(
1677                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1678                        var_name,
1679                        prop
1680                    ));
1681                }
1682                // Known non-graph literals (int, float, bool, string, list) don't have properties
1683                if var_info.var_type == VariableType::ScalarLiteral {
1684                    return Err(anyhow!(
1685                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1686                    ));
1687                }
1688            }
1689            validate_expression(base, vars_in_scope)
1690        }
1691        Expr::List(items) => validate_all(items, vars_in_scope),
1692        Expr::Case {
1693            expr: case_expr,
1694            when_then,
1695            else_expr,
1696        } => {
1697            if let Some(e) = case_expr {
1698                validate_expression(e, vars_in_scope)?;
1699            }
1700            for (w, t) in when_then {
1701                validate_expression(w, vars_in_scope)?;
1702                validate_expression(t, vars_in_scope)?;
1703            }
1704            if let Some(e) = else_expr {
1705                validate_expression(e, vars_in_scope)?;
1706            }
1707            Ok(())
1708        }
1709        Expr::In { expr: e, list } => {
1710            validate_expression(e, vars_in_scope)?;
1711            validate_expression(list, vars_in_scope)
1712        }
1713        Expr::Exists {
1714            query,
1715            from_pattern_predicate: true,
1716        } => {
1717            // Pattern predicates cannot introduce new named variables.
1718            // Extract named vars from inner MATCH pattern, check each is in scope.
1719            if let Query::Single(stmt) = query.as_ref() {
1720                for clause in &stmt.clauses {
1721                    if let Clause::Match(m) = clause {
1722                        for path in &m.pattern.paths {
1723                            for elem in &path.elements {
1724                                match elem {
1725                                    PatternElement::Node(n) => {
1726                                        if let Some(var) = &n.variable
1727                                            && !is_var_in_scope(vars_in_scope, var)
1728                                        {
1729                                            return Err(anyhow!(
1730                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1731                                                var
1732                                            ));
1733                                        }
1734                                    }
1735                                    PatternElement::Relationship(r) => {
1736                                        if let Some(var) = &r.variable
1737                                            && !is_var_in_scope(vars_in_scope, var)
1738                                        {
1739                                            return Err(anyhow!(
1740                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1741                                                var
1742                                            ));
1743                                        }
1744                                    }
1745                                    _ => {}
1746                                }
1747                            }
1748                        }
1749                    }
1750                }
1751            }
1752            Ok(())
1753        }
1754        _ => Ok(()),
1755    }
1756}
1757
1758/// One step (hop) in a Quantified Path Pattern sub-pattern.
1759///
1760/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1761#[derive(Debug, Clone)]
1762pub struct QppStepInfo {
1763    /// Edge type IDs that this step can traverse.
1764    pub edge_type_ids: Vec<u32>,
1765    /// Traversal direction for this step.
1766    pub direction: Direction,
1767    /// Optional label constraint on the target node.
1768    pub target_label: Option<String>,
1769}
1770
1771/// Phase 5a-impl: per-type fusion strategy for `LogicalPlan::FusedIndexScan`.
1772///
1773/// `#[non_exhaustive]` so Phase 5b can add `AnnRerank` and `Bm25Rrf`
1774/// without breaking downstream pattern-match exhaustiveness.
1775#[derive(Debug, Clone)]
1776#[non_exhaustive]
1777pub enum FusionKind {
1778    /// Union of parent + fork-local BTree hits, deduped by VID.
1779    BtreeUnion,
1780    /// k-way merge of pre-sorted parent + fork streams (ORDER BY).
1781    SortedKWayMerge,
1782    /// Fork-first UID lookup; falls back to parent on miss. Used
1783    /// when a fork rebinds an external UID and queries must see the
1784    /// fork's binding before the parent's.
1785    VidUidForkFirst,
1786    /// Phase 5b — vector ANN rerank: top-k from primary's index +
1787    /// top-k from fork-local index, merged and reranked by exact
1788    /// distance. Recall ≥ 95% per spec §8.2.
1789    AnnRerank,
1790    /// Phase 5b — BM25 reciprocal rank fusion: ranked lists from
1791    /// primary's and fork-local FTS indexes combined via standard
1792    /// RRF (`score = sum 1 / (k_rrf + rank_i)`, k_rrf = 60).
1793    Bm25Rrf,
1794}
1795
1796/// Logical query plan produced by [`QueryPlanner`].
1797///
1798/// Each variant represents one step in the Cypher execution pipeline.
1799/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1800/// transform or join them, and the root node defines the final output.
1801#[derive(Debug, Clone)]
1802pub enum LogicalPlan {
1803    /// UNION / UNION ALL of two sub-plans.
1804    Union {
1805        left: Box<LogicalPlan>,
1806        right: Box<LogicalPlan>,
1807        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1808        all: bool,
1809    },
1810    /// Scan vertices of a single labeled dataset.
1811    Scan {
1812        label_id: u16,
1813        labels: Vec<String>,
1814        variable: String,
1815        filter: Option<Expr>,
1816        optional: bool,
1817    },
1818    /// Phase 5a-impl: fused scan over both primary's index and the
1819    /// forked session's fork-local index. Emitted by the planner only
1820    /// when (a) the session is forked AND (b) `StorageManager::fork_index_exists`
1821    /// returns `Some(_)` for the target column. Otherwise the planner
1822    /// keeps emitting `Scan` and Lance's `base_paths` chain transparently
1823    /// covers parent-inherited indexes.
1824    ///
1825    /// `kind` selects the per-type fusion strategy:
1826    /// - `BtreeUnion` — union of parent + fork hits, dedup by VID.
1827    /// - `SortedKWayMerge` — k-way merge of two pre-sorted streams.
1828    /// - `VidUidForkFirst` — probe fork's branch first, fall back to
1829    ///   parent's UID index on miss.
1830    FusedIndexScan {
1831        label_id: u16,
1832        labels: Vec<String>,
1833        variable: String,
1834        filter: Option<Expr>,
1835        optional: bool,
1836        kind: FusionKind,
1837    },
1838    /// Phase 5b followup: planner-side observability marker for the
1839    /// lossy fusion types. Wraps the original `VectorKnn` or
1840    /// `InvertedIndexLookup` (or any future leaf operator whose
1841    /// shape differs from `Scan`) without changing its fields, so
1842    /// the physical planner can decay it to `inner` unchanged.
1843    ///
1844    /// Runtime behavior is identical to running `inner` directly;
1845    /// the wrap is purely for explain-plan and runtime-stats
1846    /// observability. The actual fusion happens at the
1847    /// `BranchedBackend` layer (per-branch Lance reads via
1848    /// `base_paths`), exactly as in Phase 5b's core ship.
1849    FusedIndexScanWrapped {
1850        inner: Box<LogicalPlan>,
1851        kind: FusionKind,
1852    },
1853    /// Lookup vertices by ext_id using the main vertices table.
1854    /// Used when a query references ext_id without specifying a label.
1855    ExtIdLookup {
1856        variable: String,
1857        ext_id: String,
1858        filter: Option<Expr>,
1859        optional: bool,
1860    },
1861    /// Scan all vertices from main table (MATCH (n) without label).
1862    /// Used for schemaless queries that don't specify any label.
1863    ScanAll {
1864        variable: String,
1865        filter: Option<Expr>,
1866        optional: bool,
1867    },
1868    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1869    /// Used for labels not defined in schema (schemaless support).
1870    /// Scan main vertices table by label name(s) for schemaless support.
1871    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1872    ScanMainByLabels {
1873        labels: Vec<String>,
1874        variable: String,
1875        filter: Option<Expr>,
1876        optional: bool,
1877    },
1878    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1879    Empty,
1880    /// UNWIND: expand a list expression into one row per element.
1881    Unwind {
1882        input: Box<LogicalPlan>,
1883        expr: Expr,
1884        variable: String,
1885    },
1886    Traverse {
1887        input: Box<LogicalPlan>,
1888        edge_type_ids: Vec<u32>,
1889        direction: Direction,
1890        source_variable: String,
1891        target_variable: String,
1892        target_label_id: u16,
1893        step_variable: Option<String>,
1894        min_hops: usize,
1895        max_hops: usize,
1896        optional: bool,
1897        target_filter: Option<Expr>,
1898        path_variable: Option<String>,
1899        edge_properties: HashSet<String>,
1900        /// Whether this is a variable-length pattern (has `*` range specifier).
1901        /// When true, step_variable holds a list of edges (even for *1..1).
1902        is_variable_length: bool,
1903        /// All variables from this OPTIONAL MATCH pattern.
1904        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1905        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1906        optional_pattern_vars: HashSet<String>,
1907        /// Variable names (node + edge) from the current MATCH clause scope.
1908        /// Used for relationship uniqueness scoping: only edge ID columns whose
1909        /// associated variable is in this set participate in uniqueness filtering.
1910        /// Variables from previous disconnected MATCH clauses are excluded.
1911        scope_match_variables: HashSet<String>,
1912        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1913        edge_filter_expr: Option<Expr>,
1914        /// Path traversal semantics (Trail by default for OpenCypher).
1915        path_mode: crate::query::df_graph::nfa::PathMode,
1916        /// QPP steps for multi-hop quantified path patterns.
1917        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1918        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1919        qpp_steps: Option<Vec<QppStepInfo>>,
1920    },
1921    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1922    /// Used for edge types not defined in schema (schemaless support).
1923    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1924    TraverseMainByType {
1925        type_names: Vec<String>,
1926        input: Box<LogicalPlan>,
1927        direction: Direction,
1928        source_variable: String,
1929        target_variable: String,
1930        step_variable: Option<String>,
1931        min_hops: usize,
1932        max_hops: usize,
1933        optional: bool,
1934        target_filter: Option<Expr>,
1935        path_variable: Option<String>,
1936        /// Whether this is a variable-length pattern (has `*` range specifier).
1937        /// When true, step_variable holds a list of edges (even for *1..1).
1938        is_variable_length: bool,
1939        /// All variables from this OPTIONAL MATCH pattern.
1940        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1941        optional_pattern_vars: HashSet<String>,
1942        /// Variables belonging to the current MATCH clause scope.
1943        /// Used for relationship uniqueness scoping: only edge columns whose
1944        /// associated variable is in this set participate in uniqueness filtering.
1945        scope_match_variables: HashSet<String>,
1946        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1947        edge_filter_expr: Option<Expr>,
1948        /// Path traversal semantics (Trail by default for OpenCypher).
1949        path_mode: crate::query::df_graph::nfa::PathMode,
1950    },
1951    Filter {
1952        input: Box<LogicalPlan>,
1953        predicate: Expr,
1954        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1955        /// When evaluating the filter, if any of these variables are NULL,
1956        /// the row is preserved regardless of the predicate result.
1957        optional_variables: HashSet<String>,
1958    },
1959    Create {
1960        input: Box<LogicalPlan>,
1961        pattern: Pattern,
1962    },
1963    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1964    ///
1965    /// This variant combines multiple CREATE patterns into a single plan node
1966    /// to avoid deep recursion when executing many CREATEs sequentially.
1967    CreateBatch {
1968        input: Box<LogicalPlan>,
1969        patterns: Vec<Pattern>,
1970    },
1971    Merge {
1972        input: Box<LogicalPlan>,
1973        pattern: Pattern,
1974        on_match: Option<SetClause>,
1975        on_create: Option<SetClause>,
1976    },
1977    Set {
1978        input: Box<LogicalPlan>,
1979        items: Vec<SetItem>,
1980    },
1981    Remove {
1982        input: Box<LogicalPlan>,
1983        items: Vec<RemoveItem>,
1984    },
1985    Delete {
1986        input: Box<LogicalPlan>,
1987        items: Vec<Expr>,
1988        detach: bool,
1989    },
1990    /// FOREACH (variable IN list | clauses)
1991    Foreach {
1992        input: Box<LogicalPlan>,
1993        variable: String,
1994        list: Expr,
1995        body: Vec<LogicalPlan>,
1996    },
1997    Sort {
1998        input: Box<LogicalPlan>,
1999        order_by: Vec<SortItem>,
2000    },
2001    Limit {
2002        input: Box<LogicalPlan>,
2003        skip: Option<usize>,
2004        fetch: Option<usize>,
2005    },
2006    Aggregate {
2007        input: Box<LogicalPlan>,
2008        group_by: Vec<Expr>,
2009        aggregates: Vec<Expr>,
2010    },
2011    Distinct {
2012        input: Box<LogicalPlan>,
2013    },
2014    Window {
2015        input: Box<LogicalPlan>,
2016        window_exprs: Vec<Expr>,
2017    },
2018    Project {
2019        input: Box<LogicalPlan>,
2020        projections: Vec<(Expr, Option<String>)>,
2021    },
2022    CrossJoin {
2023        left: Box<LogicalPlan>,
2024        right: Box<LogicalPlan>,
2025    },
2026    Apply {
2027        input: Box<LogicalPlan>,
2028        subquery: Box<LogicalPlan>,
2029        input_filter: Option<Expr>,
2030    },
2031    RecursiveCTE {
2032        cte_name: String,
2033        initial: Box<LogicalPlan>,
2034        recursive: Box<LogicalPlan>,
2035    },
2036    ProcedureCall {
2037        procedure_name: String,
2038        arguments: Vec<Expr>,
2039        yield_items: Vec<(String, Option<String>)>,
2040    },
2041    SubqueryCall {
2042        input: Box<LogicalPlan>,
2043        subquery: Box<LogicalPlan>,
2044    },
2045    VectorKnn {
2046        label_id: u16,
2047        variable: String,
2048        property: String,
2049        query: Expr,
2050        k: usize,
2051        threshold: Option<f32>,
2052    },
2053    InvertedIndexLookup {
2054        label_id: u16,
2055        variable: String,
2056        property: String,
2057        terms: Expr,
2058    },
2059    ShortestPath {
2060        input: Box<LogicalPlan>,
2061        edge_type_ids: Vec<u32>,
2062        direction: Direction,
2063        source_variable: String,
2064        target_variable: String,
2065        target_label_id: u16,
2066        path_variable: String,
2067        /// Minimum number of hops (edges) in the path. Default is 1.
2068        min_hops: u32,
2069        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2070        max_hops: u32,
2071    },
2072    /// allShortestPaths() - Returns all paths with minimum length
2073    AllShortestPaths {
2074        input: Box<LogicalPlan>,
2075        edge_type_ids: Vec<u32>,
2076        direction: Direction,
2077        source_variable: String,
2078        target_variable: String,
2079        target_label_id: u16,
2080        path_variable: String,
2081        /// Minimum number of hops (edges) in the path. Default is 1.
2082        min_hops: u32,
2083        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2084        max_hops: u32,
2085    },
2086    QuantifiedPattern {
2087        input: Box<LogicalPlan>,
2088        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
2089        min_iterations: u32,
2090        max_iterations: u32,
2091        path_variable: Option<String>,
2092        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
2093        binding_variable: String, // Output variable of iteration (e.g. 'b')
2094    },
2095    // DDL Plans
2096    CreateVectorIndex {
2097        config: VectorIndexConfig,
2098        if_not_exists: bool,
2099    },
2100    CreateFullTextIndex {
2101        config: FullTextIndexConfig,
2102        if_not_exists: bool,
2103    },
2104    CreateScalarIndex {
2105        config: ScalarIndexConfig,
2106        if_not_exists: bool,
2107    },
2108    CreateJsonFtsIndex {
2109        config: JsonFtsIndexConfig,
2110        if_not_exists: bool,
2111    },
2112    DropIndex {
2113        name: String,
2114        if_exists: bool,
2115    },
2116    ShowIndexes {
2117        filter: Option<String>,
2118    },
2119    Copy {
2120        target: String,
2121        source: String,
2122        is_export: bool,
2123        options: HashMap<String, Value>,
2124    },
2125    Backup {
2126        destination: String,
2127        options: HashMap<String, Value>,
2128    },
2129    Explain {
2130        plan: Box<LogicalPlan>,
2131    },
2132    // Admin Plans
2133    ShowDatabase,
2134    ShowConfig,
2135    ShowStatistics,
2136    Vacuum,
2137    Checkpoint,
2138    CopyTo {
2139        label: String,
2140        path: String,
2141        format: String,
2142        options: HashMap<String, Value>,
2143    },
2144    CopyFrom {
2145        label: String,
2146        path: String,
2147        format: String,
2148        options: HashMap<String, Value>,
2149    },
2150    // Schema DDL
2151    CreateLabel(CreateLabel),
2152    CreateEdgeType(CreateEdgeType),
2153    AlterLabel(AlterLabel),
2154    AlterEdgeType(AlterEdgeType),
2155    DropLabel(DropLabel),
2156    DropEdgeType(DropEdgeType),
2157    // Constraints
2158    CreateConstraint(CreateConstraint),
2159    DropConstraint(DropConstraint),
2160    ShowConstraints(ShowConstraints),
2161    /// Bind a zero-length path (single node pattern with path variable).
2162    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2163    BindZeroLengthPath {
2164        input: Box<LogicalPlan>,
2165        node_variable: String,
2166        path_variable: String,
2167    },
2168    /// Bind a fixed-length path from already-computed node and edge columns.
2169    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2170    BindPath {
2171        input: Box<LogicalPlan>,
2172        node_variables: Vec<String>,
2173        edge_variables: Vec<String>,
2174        path_variable: String,
2175    },
2176
2177    // ── Locy variants ──────────────────────────────────────────
2178    /// Top-level Locy program: stratified rules + commands.
2179    LocyProgram {
2180        strata: Vec<super::planner_locy_types::LocyStratum>,
2181        commands: Vec<super::planner_locy_types::LocyCommand>,
2182        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2183        max_iterations: usize,
2184        timeout: std::time::Duration,
2185        max_derived_bytes: usize,
2186        deterministic_best_by: bool,
2187        strict_probability_domain: bool,
2188        probability_epsilon: f64,
2189        exact_probability: bool,
2190        max_bdd_variables: usize,
2191        top_k_proofs: usize,
2192        /// Active probability semiring (rollout D-7). Defaults to
2193        /// `AddMultProb` (Phase 1/2 byte-identical behavior). `BddExact`
2194        /// is selected by `LocyConfig::resolve()` when `exact_probability`
2195        /// is true.
2196        semiring_kind: uni_locy::SemiringKind,
2197        /// Phase B Slice 3: per-evaluation registry of neural classifiers
2198        /// keyed by model name. Empty for programs without `CREATE MODEL`.
2199        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2200        /// Phase B follow-up: optional memoization cache. `None` →
2201        /// runtime creates a fresh per-query cache; `Some` → shared
2202        /// across queries (caller-managed).
2203        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2204        /// Phase C B1-B3 follow-up: per-query side-channel store
2205        /// for per-invocation (raw, calibrated, confidence_band)
2206        /// records. Flows alongside `classifier_cache` into
2207        /// `LocyProgramExec`.
2208        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2209    },
2210    /// FOLD operator: lattice-join non-key columns per KEY group.
2211    LocyFold {
2212        input: Box<LogicalPlan>,
2213        key_columns: Vec<String>,
2214        fold_bindings: Vec<(String, Expr)>,
2215        strict_probability_domain: bool,
2216        probability_epsilon: f64,
2217    },
2218    /// BEST BY operator: select best row per KEY group by ordered criteria.
2219    LocyBestBy {
2220        input: Box<LogicalPlan>,
2221        key_columns: Vec<String>,
2222        /// (expression, ascending) pairs.
2223        criteria: Vec<(Expr, bool)>,
2224    },
2225    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2226    LocyPriority {
2227        input: Box<LogicalPlan>,
2228        key_columns: Vec<String>,
2229    },
2230    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2231    LocyDerivedScan {
2232        scan_index: usize,
2233        data: Arc<RwLock<Vec<RecordBatch>>>,
2234        schema: SchemaRef,
2235    },
2236    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2237    /// without carrying through helper/property columns like the regular Project.
2238    LocyProject {
2239        input: Box<LogicalPlan>,
2240        projections: Vec<(Expr, Option<String>)>,
2241        /// Expected output Arrow type per projection (for CAST support).
2242        target_types: Vec<DataType>,
2243    },
2244    /// Phase B A4: invoke registered neural classifiers against the
2245    /// input batches and overwrite the per-invocation placeholder
2246    /// column with each row's predicted probability. Wraps a Locy
2247    /// clause body plan when `CompiledClause.model_invocations` is
2248    /// non-empty; transparent (passes batches through unchanged) when
2249    /// the list is empty.
2250    ///
2251    /// Registry and cache are carried on the node so that
2252    /// `execute_subplan` — which spins up a fresh
2253    /// `HybridPhysicalPlanner` per call — can lower it to a physical
2254    /// `LocyModelInvokeExec` without depending on planner-side
2255    /// runtime state.
2256    LocyModelInvoke {
2257        input: Box<LogicalPlan>,
2258        invocations: Vec<uni_locy::ModelInvocation>,
2259        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2260        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2261        /// Phase C B1-B3 follow-up: per-query side-channel store
2262        /// for per-invocation (raw, calibrated, confidence_band)
2263        /// records. `LocyModelInvokeExec` writes here after each
2264        /// classifier call; EXPLAIN reads via collect_neural_calls
2265        /// to surface NeuralProvenance for ALONG/FOLD-position
2266        /// invocations and Mode B re-execution paths.
2267        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2268        /// Phase D D3 runtime: one handle per `path_context.source_rule`
2269        /// referenced by any invocation on this node. The handle's
2270        /// `data: Arc<RwLock<Vec<RecordBatch>>>` is shared with the
2271        /// `DerivedScanRegistry`; the source rule's derived facts are
2272        /// already converged by the time this node executes (the
2273        /// dependency-graph builder ensures source rules sit in
2274        /// earlier strata).
2275        path_context_handles: std::collections::HashMap<
2276            String,
2277            super::df_graph::locy_model_invoke::PathContextHandle,
2278        >,
2279    },
2280}
2281
2282/// Extracted vector similarity predicate info for optimization
2283struct VectorSimilarityPredicate {
2284    variable: String,
2285    property: String,
2286    query: Expr,
2287    threshold: Option<f32>,
2288}
2289
2290/// Result of extracting vector_similarity from a predicate
2291struct VectorSimilarityExtraction {
2292    /// The extracted vector similarity predicate
2293    predicate: VectorSimilarityPredicate,
2294    /// Remaining predicates that couldn't be optimized (if any)
2295    residual: Option<Expr>,
2296}
2297
2298/// Try to extract a vector_similarity predicate from an expression.
2299/// Matches patterns like:
2300/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2301/// - n.embedding ~= $query
2302///
2303/// Also handles AND predicates.
2304fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2305    match expr {
2306        Expr::BinaryOp { left, op, right } => {
2307            // Handle AND: check both sides for vector_similarity
2308            if matches!(op, BinaryOp::And) {
2309                // Try left side first
2310                if let Some(vs) = extract_simple_vector_similarity(left) {
2311                    return Some(VectorSimilarityExtraction {
2312                        predicate: vs,
2313                        residual: Some(right.as_ref().clone()),
2314                    });
2315                }
2316                // Try right side
2317                if let Some(vs) = extract_simple_vector_similarity(right) {
2318                    return Some(VectorSimilarityExtraction {
2319                        predicate: vs,
2320                        residual: Some(left.as_ref().clone()),
2321                    });
2322                }
2323                // Recursively check within left/right for nested ANDs
2324                if let Some(mut extraction) = extract_vector_similarity(left) {
2325                    extraction.residual = Some(combine_with_and(
2326                        extraction.residual,
2327                        right.as_ref().clone(),
2328                    ));
2329                    return Some(extraction);
2330                }
2331                if let Some(mut extraction) = extract_vector_similarity(right) {
2332                    extraction.residual =
2333                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2334                    return Some(extraction);
2335                }
2336                return None;
2337            }
2338
2339            // Simple case: direct vector_similarity comparison
2340            if let Some(vs) = extract_simple_vector_similarity(expr) {
2341                return Some(VectorSimilarityExtraction {
2342                    predicate: vs,
2343                    residual: None,
2344                });
2345            }
2346            None
2347        }
2348        _ => None,
2349    }
2350}
2351
2352/// Helper to combine an optional expression with another using AND
2353fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2354    match opt_expr {
2355        Some(e) => Expr::BinaryOp {
2356            left: Box::new(e),
2357            op: BinaryOp::And,
2358            right: Box::new(other),
2359        },
2360        None => other,
2361    }
2362}
2363
2364/// Extract a simple vector_similarity comparison (no AND)
2365fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2366    match expr {
2367        Expr::BinaryOp { left, op, right } => {
2368            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2369            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2370                && let (Some(vs), Some(thresh)) = (
2371                    extract_vector_similarity_call(left),
2372                    extract_float_literal(right),
2373                )
2374            {
2375                return Some(VectorSimilarityPredicate {
2376                    variable: vs.0,
2377                    property: vs.1,
2378                    query: vs.2,
2379                    threshold: Some(thresh),
2380                });
2381            }
2382            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2383            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2384                && let (Some(thresh), Some(vs)) = (
2385                    extract_float_literal(left),
2386                    extract_vector_similarity_call(right),
2387                )
2388            {
2389                return Some(VectorSimilarityPredicate {
2390                    variable: vs.0,
2391                    property: vs.1,
2392                    query: vs.2,
2393                    threshold: Some(thresh),
2394                });
2395            }
2396            // Pattern: n.embedding ~= query
2397            if matches!(op, BinaryOp::ApproxEq)
2398                && let Expr::Property(var_expr, prop) = left.as_ref()
2399                && let Expr::Variable(var) = var_expr.as_ref()
2400            {
2401                return Some(VectorSimilarityPredicate {
2402                    variable: var.clone(),
2403                    property: prop.clone(),
2404                    query: right.as_ref().clone(),
2405                    threshold: None,
2406                });
2407            }
2408            None
2409        }
2410        _ => None,
2411    }
2412}
2413
2414/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2415fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2416    if let Expr::FunctionCall { name, args, .. } = expr
2417        && name.eq_ignore_ascii_case("vector_similarity")
2418        && args.len() == 2
2419    {
2420        // First arg should be Property(Identifier(var), prop)
2421        if let Expr::Property(var_expr, prop) = &args[0]
2422            && let Expr::Variable(var) = var_expr.as_ref()
2423        {
2424            // Second arg is query
2425            return Some((var.clone(), prop.clone(), args[1].clone()));
2426        }
2427    }
2428    None
2429}
2430
2431/// Extract a float value from a literal expression
2432fn extract_float_literal(expr: &Expr) -> Option<f32> {
2433    match expr {
2434        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2435        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2436        _ => None,
2437    }
2438}
2439
2440/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2441///
2442/// `QueryPlanner` applies semantic validation (variable scoping, label
2443/// resolution, type checking) and produces a plan tree that the executor
2444/// can run against storage.
2445#[derive(Debug)]
2446pub struct QueryPlanner {
2447    schema: Arc<Schema>,
2448    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2449    gen_expr_cache: HashMap<(String, String), Expr>,
2450    /// Counter for generating unique anonymous variable names.
2451    anon_counter: std::sync::atomic::AtomicUsize,
2452    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2453    params: HashMap<String, uni_common::Value>,
2454    /// Optional plugin registry consulted when label / edge-type / identifier
2455    /// resolution misses the local schema (M5b — Catalog / ReplacementScan).
2456    plugin_registry: Option<Arc<uni_plugin::PluginRegistry>>,
2457    /// Gate for replacement-scan dispatch on unknown identifiers (M5b).
2458    replacement_scans_enabled: bool,
2459    /// Names of parameters folded into a `LIMIT`/`SKIP` position during the
2460    /// plan. The resulting `LogicalPlan::Limit` bakes the concrete values in, so
2461    /// a plan cache keyed on query text must additionally key on these
2462    /// parameters' values (see `folded_limit_skip_params`). Interior-mutable
2463    /// because `plan` takes `&self`.
2464    folded_limit_skip_params: std::sync::Mutex<std::collections::BTreeSet<String>>,
2465}
2466
2467struct TraverseParams<'a> {
2468    rel: &'a RelationshipPattern,
2469    target_node: &'a NodePattern,
2470    optional: bool,
2471    path_variable: Option<String>,
2472    /// All variables from this OPTIONAL MATCH pattern.
2473    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2474    optional_pattern_vars: HashSet<String>,
2475}
2476
2477impl QueryPlanner {
2478    /// Create a new planner for the given schema.
2479    ///
2480    /// Pre-parses all generation expressions defined in the schema so that
2481    /// repeated plan calls avoid redundant parsing.
2482    pub fn new(schema: Arc<Schema>) -> Self {
2483        // Pre-parse all generation expressions for caching
2484        let mut gen_expr_cache = HashMap::new();
2485        for (label, props) in &schema.properties {
2486            for (gen_col, meta) in props {
2487                if let Some(expr_str) = &meta.generation_expression
2488                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2489                {
2490                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2491                }
2492            }
2493        }
2494        Self {
2495            schema,
2496            gen_expr_cache,
2497            anon_counter: std::sync::atomic::AtomicUsize::new(0),
2498            params: HashMap::new(),
2499            plugin_registry: None,
2500            replacement_scans_enabled: false,
2501            folded_limit_skip_params: std::sync::Mutex::new(std::collections::BTreeSet::new()),
2502        }
2503    }
2504
2505    /// Record the parameters referenced by a successfully-folded `LIMIT`/`SKIP`
2506    /// expression so the caller's plan cache can key on their values.
2507    fn note_folded_limit_skip(&self, expr: &Expr) {
2508        let mut names = Vec::new();
2509        collect_expr_parameters(expr, &mut names);
2510        if !names.is_empty()
2511            && let Ok(mut acc) = self.folded_limit_skip_params.lock()
2512        {
2513            acc.extend(names);
2514        }
2515    }
2516
2517    /// Parameter names folded into `LIMIT`/`SKIP` positions during the last
2518    /// [`plan`](Self::plan).
2519    ///
2520    /// The cached plan bakes these values in, so a text-keyed plan cache must
2521    /// fold their current values into its key — otherwise two calls differing
2522    /// only in a LIMIT/SKIP parameter would wrongly share one cached plan.
2523    /// Returns an empty vector when no parameter was folded.
2524    #[must_use]
2525    pub fn folded_limit_skip_params(&self) -> Vec<String> {
2526        self.folded_limit_skip_params
2527            .lock()
2528            .map(|acc| acc.iter().cloned().collect())
2529            .unwrap_or_default()
2530    }
2531
2532    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2533    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2534        self.params = params;
2535        self
2536    }
2537
2538    /// Attach a plugin registry for catalog / replacement-scan fallbacks
2539    /// (M5b). When absent, label / edge-type resolution behaves exactly as
2540    /// before; when present, an unknown label is offered to each
2541    /// `CatalogProvider` before erroring.
2542    #[must_use]
2543    pub fn with_plugin_registry(mut self, registry: Arc<uni_plugin::PluginRegistry>) -> Self {
2544        self.plugin_registry = Some(registry);
2545        self
2546    }
2547
2548    /// Enable replacement-scan dispatch on unknown identifiers (M5b §4.23).
2549    /// Default off; opt-in only.
2550    #[must_use]
2551    pub fn with_replacement_scans(mut self, enabled: bool) -> Self {
2552        self.replacement_scans_enabled = enabled;
2553        self
2554    }
2555
2556    /// Allocate (or look up) a virtual label ID for `name` by consulting
2557    /// every registered `CatalogProvider` and then every registered
2558    /// `ReplacementScanProvider` (only the latter when the replacement-
2559    /// scan gate is on). On a first claim the catalog table is stashed
2560    /// on the host's [`uni_plugin::PluginRegistry`] under a freshly
2561    /// allocated virtual ID; subsequent calls with the same name return
2562    /// the cached ID and refresh the stashed table.
2563    ///
2564    /// Returns `None` if no provider claims the label or no plugin
2565    /// registry is attached. Returns `Some((id, table))` on a hit; the
2566    /// `id` lies in `[VIRTUAL_LABEL_ID_START, VIRTUAL_LABEL_ID_SENTINEL)`.
2567    /// Errors are surfaced as `Some(Err(_))`-equivalent via `Result`.
2568    fn allocate_virtual_label(
2569        &self,
2570        name: &str,
2571    ) -> Result<Option<(u16, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2572        let Some(registry) = self.plugin_registry.as_ref() else {
2573            return Ok(None);
2574        };
2575        // 1. CatalogProvider (always consulted, no gate — Batch 2 semantics).
2576        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2577        for cat in registry.catalogs() {
2578            if let Some(t) = cat.resolve_label(name) {
2579                claimed = Some(t);
2580                break;
2581            }
2582        }
2583        // 2. ReplacementScanProvider (gated). Only consult if no
2584        //    CatalogProvider already claimed.
2585        if claimed.is_none() {
2586            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2587            if let Some(Replacement::CatalogTable(t)) =
2588                self.consult_replacement_scan(ReplacementRequest::Label(name))
2589            {
2590                claimed = Some(t);
2591            }
2592        }
2593        let Some(table) = claimed else {
2594            return Ok(None);
2595        };
2596        let id = registry
2597            .register_virtual_label(name, Arc::clone(&table))
2598            .map_err(|e| anyhow!("virtual label registration failed for `{name}`: {e}"))?;
2599        Ok(Some((id, table)))
2600    }
2601
2602    /// Reject any write operation that names a label currently allocated
2603    /// as a virtual (catalog-backed) label. Catalog tables are read-only
2604    /// in this milestone — there is no write-back path through
2605    /// `CatalogTable::scan` to the originating provider, so silently
2606    /// allowing the write would produce ghosted state on the host side
2607    /// without affecting the external catalog. Errors with a clear,
2608    /// actionable message.
2609    fn reject_virtual_label_writes(&self, labels: &[String], op: &str) -> Result<()> {
2610        let Some(registry) = self.plugin_registry.as_ref() else {
2611            return Ok(());
2612        };
2613        for label in labels {
2614            if registry.virtual_label_by_name(label).is_some() {
2615                return Err(anyhow!(
2616                    "Cannot {op} on virtual (catalog-resolved) label `{label}` — virtual \
2617                     labels are read-only; write back via the originating catalog \
2618                     instead"
2619                ));
2620            }
2621        }
2622        Ok(())
2623    }
2624
2625    /// Edge-type analog of [`Self::allocate_virtual_label`].
2626    fn allocate_virtual_edge_type(
2627        &self,
2628        name: &str,
2629    ) -> Result<Option<(u32, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2630        let Some(registry) = self.plugin_registry.as_ref() else {
2631            return Ok(None);
2632        };
2633        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2634        for cat in registry.catalogs() {
2635            if let Some(t) = cat.resolve_edge_type(name) {
2636                claimed = Some(t);
2637                break;
2638            }
2639        }
2640        let Some(table) = claimed else {
2641            return Ok(None);
2642        };
2643        let id = registry
2644            .register_virtual_edge_type(name, Arc::clone(&table))
2645            .map_err(|e| anyhow!("virtual edge-type registration failed for `{name}`: {e}"))?;
2646        Ok(Some((id, table)))
2647    }
2648
2649    /// Try to resolve an unknown identifier through replacement-scan providers
2650    /// (gated by [`Self::with_replacement_scans`]). Returns the first
2651    /// [`Replacement`] any registered provider produces, or `None` if the
2652    /// gate is off, no registry is attached, or no provider claims the
2653    /// identifier. First-match wins (mirrors DuckDB).
2654    pub(crate) fn consult_replacement_scan(
2655        &self,
2656        request: uni_plugin::traits::catalog::ReplacementRequest<'_>,
2657    ) -> Option<uni_plugin::traits::catalog::Replacement> {
2658        if !self.replacement_scans_enabled {
2659            return None;
2660        }
2661        let registry = self.plugin_registry.as_ref()?;
2662        for r in registry.replacement_scans().iter() {
2663            if let Some(replacement) = r.replace(&request) {
2664                tracing::debug!(
2665                    target: "uni.plugin.registry",
2666                    ?request,
2667                    ?replacement,
2668                    "identifier resolved via ReplacementScanProvider"
2669                );
2670                return Some(replacement);
2671            }
2672        }
2673        None
2674    }
2675
2676    /// Resolve a user-typed procedure name against the attached plugin
2677    /// registry, applying the same namespace-prefix rules as
2678    /// `ProcedureRegistry::resolve_user_procedure` (host-coupled
2679    /// procedure dispatch). Returns `true` if any namespace claims the
2680    /// name. Used by the procedure-call replacement-scan gate to decide
2681    /// whether to consult before substituting.
2682    fn procedure_resolves(&self, user_name: &str) -> bool {
2683        let Some(registry) = self.plugin_registry.as_ref() else {
2684            return false;
2685        };
2686        // Try every namespace/local split (first-dot → last-dot) so dotted
2687        // plugin ids resolve alongside the first-dot M9/builtin convention.
2688        // Mirrors `ProcedureRegistry::resolve_user_procedure`.
2689        if uni_plugin::QName::candidate_splits(user_name).any(|q| registry.procedure(&q).is_some())
2690        {
2691            return true;
2692        }
2693        let stripped = user_name.strip_prefix("uni.").unwrap_or(user_name);
2694        for plugin_id in ["uni", "builtin", "apoc-core", "custom"] {
2695            if registry
2696                .procedure(&uni_plugin::QName::new(plugin_id, stripped))
2697                .is_some()
2698            {
2699                return true;
2700            }
2701        }
2702        false
2703    }
2704
2705    /// Construct a [`uni_plugin::QName`] from a user-typed identifier for
2706    /// passing to [`Replacement`]-scan providers. If the name is dotted,
2707    /// the last segment is the local and the rest is the namespace
2708    /// (mirroring `QName::parse`). Bare names — which Cypher allows for
2709    /// procedures (`CALL foo()`) and functions (`RETURN foo(x)`) — are
2710    /// encoded with the conventional `"user"` namespace; providers that
2711    /// want to match a bare-typed name should inspect `.local()`.
2712    fn qname_from_user(name: &str) -> uni_plugin::QName {
2713        uni_plugin::QName::parse(name).unwrap_or_else(|_| uni_plugin::QName::new("user", name))
2714    }
2715
2716    /// Apply `ReplacementScanProvider`-driven function rewrites to the
2717    /// query's AST. When the gate is off or no registry is attached, the
2718    /// walker is short-circuited and the query is returned unchanged.
2719    /// Otherwise, every [`uni_cypher::ast::Expr::FunctionCall`] is offered
2720    /// to registered providers (first-match wins); a returned
2721    /// `Replacement::Function(new_qname)` substitutes the name in place.
2722    /// Rewrite depth is capped at 1 — the rewritten name is NOT re-
2723    /// consulted (a chained `A→B→A` provider therefore stops after the
2724    /// first hop). Wrong-variant returns (`CatalogTable`, `Procedure`)
2725    /// error immediately.
2726    fn rewrite_function_calls_in_query(
2727        &self,
2728        query: uni_cypher::ast::Query,
2729    ) -> Result<uni_cypher::ast::Query> {
2730        if !self.replacement_scans_enabled || self.plugin_registry.is_none() {
2731            return Ok(query);
2732        }
2733        let mut rename = |name: &str| -> Result<Option<String>> {
2734            let qname = Self::qname_from_user(name);
2735            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2736            match self.consult_replacement_scan(ReplacementRequest::Function(&qname)) {
2737                Some(Replacement::Function(new_qname)) => {
2738                    // Cypher function-call dispatch is bare-name-keyed
2739                    // (the per-category translators in `df_expr` match on
2740                    // `name.to_uppercase()` against bare local strings —
2741                    // "UPPER", "ABS", etc.). When the provider returns a
2742                    // synthetic-namespace target (`builtin.*` or `user.*`),
2743                    // strip the namespace so the AST name is what those
2744                    // dispatchers expect; for plugin-namespaced targets,
2745                    // preserve the full dotted form (matches how users
2746                    // type them).
2747                    let rewritten = match new_qname.namespace() {
2748                        "builtin" | "user" => new_qname.local().to_string(),
2749                        _ => new_qname.to_string(),
2750                    };
2751                    tracing::debug!(
2752                        target: "uni.plugin.registry",
2753                        from = %name,
2754                        to = %rewritten,
2755                        "function call rerouted via ReplacementScanProvider"
2756                    );
2757                    Ok(Some(rewritten))
2758                }
2759                Some(other) => Err(anyhow!(
2760                    "ReplacementScanProvider returned wrong variant for Function \
2761                     request `{}`: expected `Function`, got {:?}",
2762                    name,
2763                    other
2764                )),
2765                None => Ok(None),
2766            }
2767        };
2768        crate::query::rewrite::function_rename::rewrite_function_calls_in_query(query, &mut rename)
2769    }
2770
2771    /// Plan a Cypher query with no pre-bound variables.
2772    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2773        self.plan_with_scope(query, Vec::new())
2774    }
2775
2776    /// Plan a Cypher query with a set of externally pre-bound variable names.
2777    ///
2778    /// `vars` lists variable names already in scope before this query executes
2779    /// (e.g., from an enclosing Locy rule body).
2780    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2781        // Apply query rewrites before planning
2782        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2783        // M5 follow-up #5: function-call rewrite via ReplacementScanProvider.
2784        // Done as an AST pass *before* planning so the rewritten name flows
2785        // through every downstream stage (translation, UDF resolution,
2786        // execution) as if the user had typed it. No-op when the gate is
2787        // off or no provider claims the call. First-match wins; hard-cap
2788        // at one rewrite per call site (the rewritten name is NOT re-
2789        // consulted) — see `rewrite_function_calls_in_query`.
2790        let rewritten_query = self.rewrite_function_calls_in_query(rewritten_query)?;
2791        if Self::has_mixed_union_modes(&rewritten_query) {
2792            return Err(anyhow!(
2793                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2794            ));
2795        }
2796
2797        match rewritten_query {
2798            Query::Single(stmt) => self.plan_single(stmt, vars),
2799            Query::Union { left, right, all } => {
2800                let l = self.plan_with_scope(*left, vars.clone())?;
2801                let r = self.plan_with_scope(*right, vars)?;
2802
2803                // Validate that both sides have the same column names
2804                let left_cols = Self::extract_projection_columns(&l);
2805                let right_cols = Self::extract_projection_columns(&r);
2806
2807                if left_cols != right_cols {
2808                    return Err(anyhow!(
2809                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2810                    ));
2811                }
2812
2813                Ok(LogicalPlan::Union {
2814                    left: Box::new(l),
2815                    right: Box::new(r),
2816                    all,
2817                })
2818            }
2819            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2820            Query::Explain(inner) => {
2821                let inner_plan = self.plan_with_scope(*inner, vars)?;
2822                Ok(LogicalPlan::Explain {
2823                    plan: Box::new(inner_plan),
2824                })
2825            }
2826            Query::TimeTravel { .. } => {
2827                unreachable!("TimeTravel should be resolved at API layer before planning")
2828            }
2829        }
2830    }
2831
2832    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2833        match query {
2834            Query::Union { left, right, all } => {
2835                out.insert(*all);
2836                Self::collect_union_modes(left, out);
2837                Self::collect_union_modes(right, out);
2838            }
2839            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2840            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2841            Query::Single(_) | Query::Schema(_) => {}
2842        }
2843    }
2844
2845    fn has_mixed_union_modes(query: &Query) -> bool {
2846        let mut modes = HashSet::new();
2847        Self::collect_union_modes(query, &mut modes);
2848        modes.len() > 1
2849    }
2850
2851    fn next_anon_var(&self) -> String {
2852        let id = self
2853            .anon_counter
2854            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2855        format!("_anon_{}", id)
2856    }
2857
2858    /// Extract projection column names from a logical plan.
2859    /// Used for UNION column validation.
2860    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2861        match plan {
2862            LogicalPlan::Project { projections, .. } => projections
2863                .iter()
2864                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2865                .collect(),
2866            LogicalPlan::Limit { input, .. }
2867            | LogicalPlan::Sort { input, .. }
2868            | LogicalPlan::Distinct { input, .. }
2869            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2870            LogicalPlan::Union { left, right, .. } => {
2871                let left_cols = Self::extract_projection_columns(left);
2872                if left_cols.is_empty() {
2873                    Self::extract_projection_columns(right)
2874                } else {
2875                    left_cols
2876                }
2877            }
2878            LogicalPlan::Aggregate {
2879                group_by,
2880                aggregates,
2881                ..
2882            } => {
2883                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2884                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2885                cols
2886            }
2887            _ => Vec::new(),
2888        }
2889    }
2890
2891    fn plan_return_clause(
2892        &self,
2893        return_clause: &ReturnClause,
2894        plan: LogicalPlan,
2895        vars_in_scope: &[VariableInfo],
2896    ) -> Result<LogicalPlan> {
2897        let mut plan = plan;
2898        let mut group_by = Vec::new();
2899        let mut aggregates = Vec::new();
2900        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2901        let mut has_agg = false;
2902        let mut projections = Vec::new();
2903        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2904        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2905        let mut projected_aliases: HashSet<String> = HashSet::new();
2906
2907        for item in &return_clause.items {
2908            match item {
2909                ReturnItem::All => {
2910                    // RETURN * - add all user-named variables in scope
2911                    // (anonymous variables like _anon_0 are excluded)
2912                    let user_vars: Vec<_> = vars_in_scope
2913                        .iter()
2914                        .filter(|v| !v.name.starts_with("_anon_"))
2915                        .collect();
2916                    if user_vars.is_empty() {
2917                        return Err(anyhow!(
2918                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2919                        ));
2920                    }
2921                    for v in user_vars {
2922                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2923                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2924                            group_by.push(Expr::Variable(v.name.clone()));
2925                        }
2926                        projected_aliases.insert(v.name.clone());
2927                        projected_simple_reprs.insert(v.name.clone());
2928                    }
2929                }
2930                ReturnItem::Expr {
2931                    expr,
2932                    alias,
2933                    source_text,
2934                } => {
2935                    if matches!(expr, Expr::Wildcard) {
2936                        for v in vars_in_scope {
2937                            projections
2938                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2939                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2940                                group_by.push(Expr::Variable(v.name.clone()));
2941                            }
2942                            projected_aliases.insert(v.name.clone());
2943                            projected_simple_reprs.insert(v.name.clone());
2944                        }
2945                    } else {
2946                        // Validate expression variables are defined
2947                        validate_expression_variables(expr, vars_in_scope)?;
2948                        // Validate function argument types and boolean operators
2949                        validate_expression(expr, vars_in_scope)?;
2950                        // Pattern predicates are not allowed in RETURN
2951                        if contains_pattern_predicate(expr) {
2952                            return Err(anyhow!(
2953                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2954                            ));
2955                        }
2956
2957                        // Use source text as column name when no explicit alias
2958                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2959                        projections.push((expr.clone(), effective_alias));
2960                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2961                            // Bare aggregate — push directly
2962                            has_agg = true;
2963                            aggregates.push(expr.clone());
2964                            projected_aggregate_reprs.insert(expr.to_string_repr());
2965                        } else if !is_window_function(expr)
2966                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2967                        {
2968                            // Compound aggregate or expression containing aggregates —
2969                            // extract the inner bare aggregates for the Aggregate node
2970                            has_agg = true;
2971                            compound_agg_exprs.push(expr.clone());
2972                            for inner in extract_inner_aggregates(expr) {
2973                                let repr = inner.to_string_repr();
2974                                if !projected_aggregate_reprs.contains(&repr) {
2975                                    aggregates.push(inner);
2976                                    projected_aggregate_reprs.insert(repr);
2977                                }
2978                            }
2979                        } else if !group_by.contains(expr) {
2980                            group_by.push(expr.clone());
2981                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2982                                projected_simple_reprs.insert(expr.to_string_repr());
2983                            }
2984                        }
2985
2986                        if let Some(a) = alias {
2987                            if projected_aliases.contains(a) {
2988                                return Err(anyhow!(
2989                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2990                                    a
2991                                ));
2992                            }
2993                            projected_aliases.insert(a.clone());
2994                        } else if let Expr::Variable(v) = expr {
2995                            if projected_aliases.contains(v) {
2996                                return Err(anyhow!(
2997                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2998                                    v
2999                                ));
3000                            }
3001                            projected_aliases.insert(v.clone());
3002                        }
3003                    }
3004                }
3005            }
3006        }
3007
3008        // Validate compound aggregate expressions: non-aggregate refs must be
3009        // individually present in the group_by as simple variables or properties.
3010        if has_agg {
3011            let group_by_reprs: HashSet<String> =
3012                group_by.iter().map(|e| e.to_string_repr()).collect();
3013            for expr in &compound_agg_exprs {
3014                let mut refs = Vec::new();
3015                collect_non_aggregate_refs(expr, false, &mut refs);
3016                for r in &refs {
3017                    let is_covered = match r {
3018                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
3019                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
3020                    };
3021                    if !is_covered {
3022                        return Err(anyhow!(
3023                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
3024                        ));
3025                    }
3026                }
3027            }
3028        }
3029
3030        if has_agg {
3031            plan = LogicalPlan::Aggregate {
3032                input: Box::new(plan),
3033                group_by,
3034                aggregates,
3035            };
3036        }
3037
3038        let mut window_exprs = Vec::new();
3039        for (expr, _) in &projections {
3040            Self::collect_window_functions(expr, &mut window_exprs);
3041        }
3042
3043        if let Some(order_by) = &return_clause.order_by {
3044            for item in order_by {
3045                Self::collect_window_functions(&item.expr, &mut window_exprs);
3046            }
3047        }
3048
3049        let has_window_exprs = !window_exprs.is_empty();
3050
3051        if has_window_exprs {
3052            // Before creating the Window node, we need to ensure all properties
3053            // referenced by window functions are available. Create a Project node
3054            // that loads these properties.
3055            let mut props_needed_for_window: Vec<Expr> = Vec::new();
3056            for window_expr in &window_exprs {
3057                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
3058            }
3059
3060            // Also include non-window expressions from projections that might be needed
3061            // Preserve qualified names (e.g., "e.salary") as aliases for properties
3062            let non_window_projections: Vec<_> = projections
3063                .iter()
3064                .filter_map(|(expr, alias)| {
3065                    // Keep expressions that don't have window_spec
3066                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
3067                        window_spec.is_none()
3068                    } else {
3069                        true
3070                    };
3071
3072                    if keep {
3073                        // For property references, use the qualified name as alias
3074                        let new_alias = if matches!(expr, Expr::Property(..)) {
3075                            Some(expr.to_string_repr())
3076                        } else {
3077                            alias.clone()
3078                        };
3079                        Some((expr.clone(), new_alias))
3080                    } else {
3081                        None
3082                    }
3083                })
3084                .collect();
3085
3086            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
3087                let mut intermediate_projections = non_window_projections;
3088                // Add any additional property references needed by window functions
3089                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
3090                for prop in &props_needed_for_window {
3091                    if !intermediate_projections
3092                        .iter()
3093                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
3094                    {
3095                        let qualified_name = prop.to_string_repr();
3096                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
3097                    }
3098                }
3099
3100                if !intermediate_projections.is_empty() {
3101                    plan = LogicalPlan::Project {
3102                        input: Box::new(plan),
3103                        projections: intermediate_projections,
3104                    };
3105                }
3106            }
3107
3108            // Transform property expressions in window functions to use qualified variable names
3109            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
3110            let transformed_window_exprs: Vec<Expr> = window_exprs
3111                .into_iter()
3112                .map(Self::transform_window_expr_properties)
3113                .collect();
3114
3115            plan = LogicalPlan::Window {
3116                input: Box::new(plan),
3117                window_exprs: transformed_window_exprs,
3118            };
3119        }
3120
3121        if let Some(order_by) = &return_clause.order_by {
3122            let alias_exprs: HashMap<String, Expr> = projections
3123                .iter()
3124                .filter_map(|(expr, alias)| {
3125                    alias.as_ref().map(|a| {
3126                        // ORDER BY is planned before the final RETURN projection.
3127                        // In aggregate contexts, aliases must resolve to the
3128                        // post-aggregate output columns, not raw aggregate calls.
3129                        let rewritten = if has_agg && !has_window_exprs {
3130                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
3131                                Expr::Variable(aggregate_column_name(expr))
3132                            } else if is_compound_aggregate(expr)
3133                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
3134                            {
3135                                replace_aggregates_with_columns(expr)
3136                            } else {
3137                                Expr::Variable(expr.to_string_repr())
3138                            }
3139                        } else {
3140                            expr.clone()
3141                        };
3142                        (a.clone(), rewritten)
3143                    })
3144                })
3145                .collect();
3146
3147            // Build an extended scope that includes RETURN aliases so ORDER BY
3148            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
3149            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
3150                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
3151                // Keep aliases and directly returned variables in scope.
3152                let mut scope = Vec::new();
3153                for (expr, alias) in &projections {
3154                    if let Some(a) = alias
3155                        && !is_var_in_scope(&scope, a)
3156                    {
3157                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3158                    }
3159                    if let Expr::Variable(v) = expr
3160                        && !is_var_in_scope(&scope, v)
3161                    {
3162                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3163                    }
3164                }
3165                scope
3166            } else {
3167                let mut scope = vars_in_scope.to_vec();
3168                for (expr, alias) in &projections {
3169                    if let Some(a) = alias
3170                        && !is_var_in_scope(&scope, a)
3171                    {
3172                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3173                    } else if let Expr::Variable(v) = expr
3174                        && !is_var_in_scope(&scope, v)
3175                    {
3176                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3177                    }
3178                }
3179                scope
3180            };
3181            // Validate ORDER BY expressions against the extended scope
3182            for item in order_by {
3183                // DISTINCT allows ORDER BY on the same projected expression
3184                // even when underlying variables are not otherwise visible.
3185                let matches_projected_expr = return_clause.distinct
3186                    && projections
3187                        .iter()
3188                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
3189                if !matches_projected_expr {
3190                    validate_expression_variables(&item.expr, &order_by_scope)?;
3191                    validate_expression(&item.expr, &order_by_scope)?;
3192                }
3193                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
3194                if has_aggregate_in_item && !has_agg {
3195                    return Err(anyhow!(
3196                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
3197                    ));
3198                }
3199                if has_agg && has_aggregate_in_item {
3200                    validate_with_order_by_aggregate_item(
3201                        &item.expr,
3202                        &projected_aggregate_reprs,
3203                        &projected_simple_reprs,
3204                        &projected_aliases,
3205                    )?;
3206                }
3207            }
3208            let rewritten_order_by: Vec<SortItem> = order_by
3209                .iter()
3210                .map(|item| SortItem {
3211                    expr: {
3212                        let mut rewritten =
3213                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
3214                        if has_agg && !has_window_exprs {
3215                            rewritten = replace_aggregates_with_columns(&rewritten);
3216                        }
3217                        rewritten
3218                    },
3219                    ascending: item.ascending,
3220                })
3221                .collect();
3222            plan = LogicalPlan::Sort {
3223                input: Box::new(plan),
3224                order_by: rewritten_order_by,
3225            };
3226        }
3227
3228        if return_clause.skip.is_some() || return_clause.limit.is_some() {
3229            let skip = return_clause
3230                .skip
3231                .as_ref()
3232                .map(|e| {
3233                    self.note_folded_limit_skip(e);
3234                    parse_non_negative_integer(e, "SKIP", &self.params)
3235                })
3236                .transpose()?
3237                .flatten();
3238            let fetch = return_clause
3239                .limit
3240                .as_ref()
3241                .map(|e| {
3242                    self.note_folded_limit_skip(e);
3243                    parse_non_negative_integer(e, "LIMIT", &self.params)
3244                })
3245                .transpose()?
3246                .flatten();
3247
3248            plan = LogicalPlan::Limit {
3249                input: Box::new(plan),
3250                skip,
3251                fetch,
3252            };
3253        }
3254
3255        if !projections.is_empty() {
3256            // If we created an Aggregate or Window node, we need to adjust the final projections
3257            // to reference aggregate/window function results as columns instead of re-evaluating them
3258            let final_projections = if has_agg || has_window_exprs {
3259                projections
3260                    .into_iter()
3261                    .map(|(expr, alias)| {
3262                        // Check if this expression is an aggregate function
3263                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
3264                        {
3265                            // Bare aggregate — replace with column reference
3266                            let col_name = aggregate_column_name(&expr);
3267                            (Expr::Variable(col_name), alias)
3268                        } else if !has_window_exprs
3269                            && (is_compound_aggregate(&expr)
3270                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
3271                        {
3272                            // Compound aggregate — replace inner aggregates with
3273                            // column references, keep outer expression for Project
3274                            (replace_aggregates_with_columns(&expr), alias)
3275                        }
3276                        // For grouped RETURN projections, reference the pre-computed
3277                        // group-by output column instead of re-evaluating the expression
3278                        // against the aggregate schema (which no longer has original vars).
3279                        else if has_agg
3280                            && !has_window_exprs
3281                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
3282                        {
3283                            (Expr::Variable(expr.to_string_repr()), alias)
3284                        }
3285                        // Check if this expression is a window function
3286                        else if let Expr::FunctionCall {
3287                            window_spec: Some(_),
3288                            ..
3289                        } = &expr
3290                        {
3291                            // Replace window function with a column reference to its result
3292                            // The column name in the Window output is the full expression string
3293                            let window_col_name = expr.to_string_repr();
3294                            // Keep the original alias for the final output
3295                            (Expr::Variable(window_col_name), alias)
3296                        } else {
3297                            (expr, alias)
3298                        }
3299                    })
3300                    .collect()
3301            } else {
3302                projections
3303            };
3304
3305            plan = LogicalPlan::Project {
3306                input: Box::new(plan),
3307                projections: final_projections,
3308            };
3309        }
3310
3311        if return_clause.distinct {
3312            plan = LogicalPlan::Distinct {
3313                input: Box::new(plan),
3314            };
3315        }
3316
3317        Ok(plan)
3318    }
3319
3320    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
3321        let typed_vars: Vec<VariableInfo> = initial_vars
3322            .into_iter()
3323            .map(|name| VariableInfo::new(name, VariableType::Imported))
3324            .collect();
3325        self.plan_single_typed(query, typed_vars)
3326    }
3327
3328    /// Rewrite a query then plan it, preserving typed variable scope when possible.
3329    ///
3330    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
3331    /// variable info through and avoid false type-conflict errors in subqueries.
3332    /// For unions and other compound queries, falls back to `plan_with_scope`.
3333    fn rewrite_and_plan_typed(
3334        &self,
3335        query: Query,
3336        typed_vars: &[VariableInfo],
3337    ) -> Result<LogicalPlan> {
3338        let rewritten = crate::query::rewrite::rewrite_query(query)?;
3339        match rewritten {
3340            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
3341            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
3342        }
3343    }
3344
3345    fn plan_single_typed(
3346        &self,
3347        query: Statement,
3348        initial_vars: Vec<VariableInfo>,
3349    ) -> Result<LogicalPlan> {
3350        let mut plan = LogicalPlan::Empty;
3351
3352        if !initial_vars.is_empty() {
3353            // Project bound variables from outer scope as parameters.
3354            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
3355            // Use Parameter expressions to read from params, not Variable which would read from input row.
3356            let projections = initial_vars
3357                .iter()
3358                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
3359                .collect();
3360            plan = LogicalPlan::Project {
3361                input: Box::new(plan),
3362                projections,
3363            };
3364        }
3365
3366        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
3367        // Track variables introduced by CREATE clauses so we can distinguish
3368        // MATCH-introduced variables (which cannot be re-created as bare nodes)
3369        // from CREATE-introduced variables (which can be referenced as bare nodes).
3370        let mut create_introduced_vars: HashSet<String> = HashSet::new();
3371        // Track variables targeted by DELETE so we can reject property/label
3372        // access on deleted entities in subsequent RETURN clauses.
3373        let mut deleted_vars: HashSet<String> = HashSet::new();
3374
3375        let clause_count = query.clauses.len();
3376        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
3377            match clause {
3378                Clause::Match(match_clause) => {
3379                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
3380                }
3381                Clause::Unwind(unwind) => {
3382                    plan = LogicalPlan::Unwind {
3383                        input: Box::new(plan),
3384                        expr: unwind.expr.clone(),
3385                        variable: unwind.variable.clone(),
3386                    };
3387                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
3388                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
3389                }
3390                Clause::Call(call_clause) => {
3391                    match &call_clause.kind {
3392                        CallKind::Procedure {
3393                            procedure,
3394                            arguments,
3395                        } => {
3396                            // Validate that procedure arguments don't contain aggregation functions
3397                            for arg in arguments {
3398                                if contains_aggregate_recursive(arg) {
3399                                    return Err(anyhow!(
3400                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
3401                                    ));
3402                                }
3403                            }
3404
3405                            let has_yield_star = call_clause.yield_items.len() == 1
3406                                && call_clause.yield_items[0].name == "*"
3407                                && call_clause.yield_items[0].alias.is_none();
3408                            if has_yield_star && clause_idx + 1 < clause_count {
3409                                return Err(anyhow!(
3410                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
3411                                ));
3412                            }
3413
3414                            // Validate for duplicate yield names (VariableAlreadyBound)
3415                            let mut yield_names = Vec::new();
3416                            for item in &call_clause.yield_items {
3417                                if item.name == "*" {
3418                                    continue;
3419                                }
3420                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
3421                                if yield_names.contains(output_name) {
3422                                    return Err(anyhow!(
3423                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
3424                                        output_name
3425                                    ));
3426                                }
3427                                // Check against existing scope (in-query CALL must not shadow)
3428                                if clause_idx > 0
3429                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
3430                                {
3431                                    return Err(anyhow!(
3432                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
3433                                        output_name
3434                                    ));
3435                                }
3436                                yield_names.push(output_name.clone());
3437                            }
3438
3439                            let mut yields = Vec::new();
3440                            for item in &call_clause.yield_items {
3441                                if item.name == "*" {
3442                                    continue;
3443                                }
3444                                yields.push((item.name.clone(), item.alias.clone()));
3445                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
3446                                // Use Imported because procedure return types are unknown
3447                                // at plan time (could be nodes, edges, or scalars)
3448                                add_var_to_scope(
3449                                    &mut vars_in_scope,
3450                                    var_name,
3451                                    VariableType::Imported,
3452                                )?;
3453                            }
3454                            // M5 follow-up #5: if replacement-scan dispatch is
3455                            // enabled and the procedure name does not resolve
3456                            // against the plugin registry, consult registered
3457                            // `ReplacementScanProvider`s. A `Replacement::Procedure`
3458                            // substitutes the call's target name in the logical
3459                            // plan; the rewritten name must itself resolve or
3460                            // we error immediately (no second-tier consult — caps
3461                            // rewrite depth at one).
3462                            let procedure_name = if self.replacement_scans_enabled
3463                                && !self.procedure_resolves(procedure)
3464                            {
3465                                use uni_plugin::traits::catalog::{
3466                                    Replacement, ReplacementRequest,
3467                                };
3468                                let qname = Self::qname_from_user(procedure);
3469                                match self
3470                                    .consult_replacement_scan(ReplacementRequest::Procedure(&qname))
3471                                {
3472                                    Some(Replacement::Procedure(new_qname)) => {
3473                                        let rewritten = new_qname.to_string();
3474                                        if !self.procedure_resolves(&rewritten) {
3475                                            return Err(anyhow!(
3476                                                "ReplacementScanProvider rerouted procedure \
3477                                                 `{}` to `{}`, which also did not resolve",
3478                                                procedure,
3479                                                rewritten
3480                                            ));
3481                                        }
3482                                        tracing::debug!(
3483                                            target: "uni.plugin.registry",
3484                                            from = %procedure,
3485                                            to = %rewritten,
3486                                            "procedure rerouted via ReplacementScanProvider"
3487                                        );
3488                                        rewritten
3489                                    }
3490                                    Some(other) => {
3491                                        return Err(anyhow!(
3492                                            "ReplacementScanProvider returned wrong variant \
3493                                             for Procedure request `{}`: expected \
3494                                             `Procedure`, got {:?}",
3495                                            procedure,
3496                                            other
3497                                        ));
3498                                    }
3499                                    None => procedure.clone(),
3500                                }
3501                            } else {
3502                                procedure.clone()
3503                            };
3504                            let proc_plan = LogicalPlan::ProcedureCall {
3505                                procedure_name,
3506                                arguments: arguments.clone(),
3507                                yield_items: yields.clone(),
3508                            };
3509
3510                            if matches!(plan, LogicalPlan::Empty) {
3511                                // Standalone CALL (first clause) — use directly
3512                                plan = proc_plan;
3513                            } else if yields.is_empty() {
3514                                // In-query CALL with no YIELD (void procedure):
3515                                // preserve the input rows unchanged
3516                            } else {
3517                                // In-query CALL with YIELD: cross-join input × procedure output
3518                                plan = LogicalPlan::Apply {
3519                                    input: Box::new(plan),
3520                                    subquery: Box::new(proc_plan),
3521                                    input_filter: None,
3522                                };
3523                            }
3524                        }
3525                        CallKind::Subquery(query) => {
3526                            let subquery_plan =
3527                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3528
3529                            // Extract variables from subquery RETURN clause
3530                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3531
3532                            // Add new variables to scope (as Scalar since they come from subquery projection)
3533                            for var in subquery_vars {
3534                                if !is_var_in_scope(&vars_in_scope, &var) {
3535                                    add_var_to_scope(
3536                                        &mut vars_in_scope,
3537                                        &var,
3538                                        VariableType::Scalar,
3539                                    )?;
3540                                }
3541                            }
3542
3543                            plan = LogicalPlan::SubqueryCall {
3544                                input: Box::new(plan),
3545                                subquery: Box::new(subquery_plan),
3546                            };
3547                        }
3548                    }
3549                }
3550                Clause::Merge(merge_clause) => {
3551                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3552                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3553                    // read-only — reject MERGE that names one.
3554                    let merge_labels = collect_pattern_labels(&merge_clause.pattern);
3555                    self.reject_virtual_label_writes(&merge_labels, "MERGE")?;
3556
3557                    plan = LogicalPlan::Merge {
3558                        input: Box::new(plan),
3559                        pattern: merge_clause.pattern.clone(),
3560                        on_match: Some(SetClause {
3561                            items: merge_clause.on_match.clone(),
3562                        }),
3563                        on_create: Some(SetClause {
3564                            items: merge_clause.on_create.clone(),
3565                        }),
3566                    };
3567
3568                    for path in &merge_clause.pattern.paths {
3569                        if let Some(path_var) = &path.variable
3570                            && !path_var.is_empty()
3571                            && !is_var_in_scope(&vars_in_scope, path_var)
3572                        {
3573                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3574                        }
3575                        for element in &path.elements {
3576                            if let PatternElement::Node(n) = element {
3577                                if let Some(v) = &n.variable
3578                                    && !is_var_in_scope(&vars_in_scope, v)
3579                                {
3580                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3581                                }
3582                            } else if let PatternElement::Relationship(r) = element
3583                                && let Some(v) = &r.variable
3584                                && !is_var_in_scope(&vars_in_scope, v)
3585                            {
3586                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3587                            }
3588                        }
3589                    }
3590                }
3591                Clause::Create(create_clause) => {
3592                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3593                    // read-only — reject CREATE that names one.
3594                    let create_labels = collect_pattern_labels(&create_clause.pattern);
3595                    self.reject_virtual_label_writes(&create_labels, "CREATE")?;
3596                    // Validate CREATE patterns:
3597                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3598                    // - Bare nodes (v) are "references" if bound, "creations" if not
3599                    // - Relationships are always creations - can't rebind
3600                    // - Within CREATE, each new variable can only be defined once
3601                    // - Variables used in properties must be defined
3602                    let mut create_vars: Vec<&str> = Vec::new();
3603                    for path in &create_clause.pattern.paths {
3604                        let is_standalone_node = path.elements.len() == 1;
3605                        for element in &path.elements {
3606                            match element {
3607                                PatternElement::Node(n) => {
3608                                    validate_property_variables(
3609                                        &n.properties,
3610                                        &vars_in_scope,
3611                                        &create_vars,
3612                                    )?;
3613
3614                                    if let Some(v) = n.variable.as_deref()
3615                                        && !v.is_empty()
3616                                    {
3617                                        // A node is a "creation" if it has labels or properties
3618                                        let is_creation =
3619                                            !n.labels.is_empty() || n.properties.is_some();
3620
3621                                        if is_creation {
3622                                            check_not_already_bound(
3623                                                v,
3624                                                &vars_in_scope,
3625                                                &create_vars,
3626                                            )?;
3627                                            create_vars.push(v);
3628                                        } else if is_standalone_node
3629                                            && is_var_in_scope(&vars_in_scope, v)
3630                                            && !create_introduced_vars.contains(v)
3631                                        {
3632                                            // Standalone bare node referencing a variable from a
3633                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3634                                            // Bare nodes used as relationship endpoints
3635                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3636                                            return Err(anyhow!(
3637                                                "SyntaxError: VariableAlreadyBound - '{}'",
3638                                                v
3639                                            ));
3640                                        } else if !create_vars.contains(&v) {
3641                                            // New bare variable — register it
3642                                            create_vars.push(v);
3643                                        }
3644                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3645                                    }
3646                                }
3647                                PatternElement::Relationship(r) => {
3648                                    validate_property_variables(
3649                                        &r.properties,
3650                                        &vars_in_scope,
3651                                        &create_vars,
3652                                    )?;
3653
3654                                    if let Some(v) = r.variable.as_deref()
3655                                        && !v.is_empty()
3656                                    {
3657                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3658                                        create_vars.push(v);
3659                                    }
3660
3661                                    // Validate relationship constraints for CREATE
3662                                    if r.types.len() != 1 {
3663                                        return Err(anyhow!(
3664                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3665                                        ));
3666                                    }
3667                                    if r.direction == Direction::Both {
3668                                        return Err(anyhow!(
3669                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3670                                        ));
3671                                    }
3672                                    if r.range.is_some() {
3673                                        return Err(anyhow!(
3674                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3675                                        ));
3676                                    }
3677                                }
3678                                PatternElement::Parenthesized { .. } => {}
3679                            }
3680                        }
3681                    }
3682
3683                    // Batch consecutive CREATEs to avoid deep recursion
3684                    match &mut plan {
3685                        LogicalPlan::CreateBatch { patterns, .. } => {
3686                            // Append to existing batch
3687                            patterns.push(create_clause.pattern.clone());
3688                        }
3689                        LogicalPlan::Create { input, pattern } => {
3690                            // Convert single Create to CreateBatch with both patterns
3691                            let first_pattern = pattern.clone();
3692                            plan = LogicalPlan::CreateBatch {
3693                                input: input.clone(),
3694                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3695                            };
3696                        }
3697                        _ => {
3698                            // Start new Create (may become batch if more CREATEs follow)
3699                            plan = LogicalPlan::Create {
3700                                input: Box::new(plan),
3701                                pattern: create_clause.pattern.clone(),
3702                            };
3703                        }
3704                    }
3705                    // Add variables from created nodes and relationships to scope
3706                    for path in &create_clause.pattern.paths {
3707                        for element in &path.elements {
3708                            match element {
3709                                PatternElement::Node(n) => {
3710                                    if let Some(var) = &n.variable
3711                                        && !var.is_empty()
3712                                    {
3713                                        create_introduced_vars.insert(var.clone());
3714                                        add_var_to_scope(
3715                                            &mut vars_in_scope,
3716                                            var,
3717                                            VariableType::Node,
3718                                        )?;
3719                                    }
3720                                }
3721                                PatternElement::Relationship(r) => {
3722                                    if let Some(var) = &r.variable
3723                                        && !var.is_empty()
3724                                    {
3725                                        create_introduced_vars.insert(var.clone());
3726                                        add_var_to_scope(
3727                                            &mut vars_in_scope,
3728                                            var,
3729                                            VariableType::Edge,
3730                                        )?;
3731                                    }
3732                                }
3733                                PatternElement::Parenthesized { .. } => {
3734                                    // Skip for now - not commonly used in CREATE
3735                                }
3736                            }
3737                        }
3738                    }
3739                }
3740                Clause::Set(set_clause) => {
3741                    // Validate SET value expressions
3742                    for item in &set_clause.items {
3743                        match item {
3744                            SetItem::Property { value, .. }
3745                            | SetItem::Variable { value, .. }
3746                            | SetItem::VariablePlus { value, .. } => {
3747                                validate_expression_variables(value, &vars_in_scope)?;
3748                                validate_expression(value, &vars_in_scope)?;
3749                                if contains_pattern_predicate(value) {
3750                                    return Err(anyhow!(
3751                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3752                                    ));
3753                                }
3754                            }
3755                            SetItem::Labels { .. } => {}
3756                        }
3757                    }
3758                    plan = LogicalPlan::Set {
3759                        input: Box::new(plan),
3760                        items: set_clause.items.clone(),
3761                    };
3762                }
3763                Clause::Remove(remove_clause) => {
3764                    plan = LogicalPlan::Remove {
3765                        input: Box::new(plan),
3766                        items: remove_clause.items.clone(),
3767                    };
3768                }
3769                Clause::Delete(delete_clause) => {
3770                    // Validate DELETE targets
3771                    for item in &delete_clause.items {
3772                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3773                        if matches!(item, Expr::LabelCheck { .. }) {
3774                            return Err(anyhow!(
3775                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3776                            ));
3777                        }
3778                        let vars_used = collect_expr_variables(item);
3779                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3780                        if vars_used.is_empty() {
3781                            return Err(anyhow!(
3782                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3783                            ));
3784                        }
3785                        for var in &vars_used {
3786                            // Check if variable is defined
3787                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3788                                return Err(anyhow!(
3789                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3790                                    var
3791                                ));
3792                            }
3793                        }
3794                        // Strict type check only for simple variable references —
3795                        // complex expressions (property access, array index, etc.)
3796                        // may resolve to a node/edge at runtime even if the base
3797                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3798                        if let Expr::Variable(name) = item
3799                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3800                            && matches!(
3801                                info.var_type,
3802                                VariableType::Scalar | VariableType::ScalarLiteral
3803                            )
3804                        {
3805                            return Err(anyhow!(
3806                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3807                                name
3808                            ));
3809                        }
3810                    }
3811                    // Track deleted variables for later validation
3812                    for item in &delete_clause.items {
3813                        if let Expr::Variable(name) = item {
3814                            deleted_vars.insert(name.clone());
3815                        }
3816                    }
3817                    plan = LogicalPlan::Delete {
3818                        input: Box::new(plan),
3819                        items: delete_clause.items.clone(),
3820                        detach: delete_clause.detach,
3821                    };
3822                }
3823                Clause::With(with_clause) => {
3824                    let (new_plan, new_vars) =
3825                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3826                    plan = new_plan;
3827                    vars_in_scope = new_vars;
3828                }
3829                Clause::WithRecursive(with_recursive) => {
3830                    // Plan the recursive CTE
3831                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3832                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3833                    add_var_to_scope(
3834                        &mut vars_in_scope,
3835                        &with_recursive.name,
3836                        VariableType::Scalar,
3837                    )?;
3838                }
3839                Clause::Return(return_clause) => {
3840                    // Check for property/label access on deleted entities
3841                    if !deleted_vars.is_empty() {
3842                        for item in &return_clause.items {
3843                            if let ReturnItem::Expr { expr, .. } = item {
3844                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3845                            }
3846                        }
3847                    }
3848                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3849                } // All Clause variants are handled above - no catch-all needed
3850            }
3851        }
3852
3853        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3854        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3855        // If RETURN was used, the plan will have been wrapped in Project, so we only
3856        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3857        let plan = match &plan {
3858            LogicalPlan::Create { .. }
3859            | LogicalPlan::CreateBatch { .. }
3860            | LogicalPlan::Delete { .. }
3861            | LogicalPlan::Set { .. }
3862            | LogicalPlan::Remove { .. }
3863            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3864                input: Box::new(plan),
3865                skip: None,
3866                fetch: Some(0),
3867            },
3868            _ => plan,
3869        };
3870
3871        Ok(plan)
3872    }
3873
3874    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3875        match expr {
3876            Expr::Property(_, _)
3877                if !collected
3878                    .iter()
3879                    .any(|e| e.to_string_repr() == expr.to_string_repr()) =>
3880            {
3881                collected.push(expr.clone());
3882            }
3883            Expr::Property(_, _) => {}
3884            Expr::Variable(_) => {
3885                // Variables are already available, don't need to project them
3886            }
3887            Expr::BinaryOp { left, right, .. } => {
3888                Self::collect_properties_from_expr(left, collected);
3889                Self::collect_properties_from_expr(right, collected);
3890            }
3891            Expr::FunctionCall {
3892                args, window_spec, ..
3893            } => {
3894                for arg in args {
3895                    Self::collect_properties_from_expr(arg, collected);
3896                }
3897                if let Some(spec) = window_spec {
3898                    for partition_expr in &spec.partition_by {
3899                        Self::collect_properties_from_expr(partition_expr, collected);
3900                    }
3901                    for sort_item in &spec.order_by {
3902                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3903                    }
3904                }
3905            }
3906            Expr::List(items) => {
3907                for item in items {
3908                    Self::collect_properties_from_expr(item, collected);
3909                }
3910            }
3911            Expr::UnaryOp { expr: e, .. }
3912            | Expr::IsNull(e)
3913            | Expr::IsNotNull(e)
3914            | Expr::IsUnique(e) => {
3915                Self::collect_properties_from_expr(e, collected);
3916            }
3917            Expr::Case {
3918                expr,
3919                when_then,
3920                else_expr,
3921            } => {
3922                if let Some(e) = expr {
3923                    Self::collect_properties_from_expr(e, collected);
3924                }
3925                for (w, t) in when_then {
3926                    Self::collect_properties_from_expr(w, collected);
3927                    Self::collect_properties_from_expr(t, collected);
3928                }
3929                if let Some(e) = else_expr {
3930                    Self::collect_properties_from_expr(e, collected);
3931                }
3932            }
3933            Expr::In { expr, list } => {
3934                Self::collect_properties_from_expr(expr, collected);
3935                Self::collect_properties_from_expr(list, collected);
3936            }
3937            Expr::ArrayIndex { array, index } => {
3938                Self::collect_properties_from_expr(array, collected);
3939                Self::collect_properties_from_expr(index, collected);
3940            }
3941            Expr::ArraySlice { array, start, end } => {
3942                Self::collect_properties_from_expr(array, collected);
3943                if let Some(s) = start {
3944                    Self::collect_properties_from_expr(s, collected);
3945                }
3946                if let Some(e) = end {
3947                    Self::collect_properties_from_expr(e, collected);
3948                }
3949            }
3950            _ => {}
3951        }
3952    }
3953
3954    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3955        if let Expr::FunctionCall { window_spec, .. } = expr {
3956            // Collect any function with a window spec (OVER clause)
3957            if window_spec.is_some() {
3958                if !collected
3959                    .iter()
3960                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3961                {
3962                    collected.push(expr.clone());
3963                }
3964                return;
3965            }
3966        }
3967
3968        match expr {
3969            Expr::BinaryOp { left, right, .. } => {
3970                Self::collect_window_functions(left, collected);
3971                Self::collect_window_functions(right, collected);
3972            }
3973            Expr::FunctionCall { args, .. } => {
3974                for arg in args {
3975                    Self::collect_window_functions(arg, collected);
3976                }
3977            }
3978            Expr::List(items) => {
3979                for i in items {
3980                    Self::collect_window_functions(i, collected);
3981                }
3982            }
3983            Expr::Map(items) => {
3984                for (_, i) in items {
3985                    Self::collect_window_functions(i, collected);
3986                }
3987            }
3988            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3989                Self::collect_window_functions(e, collected);
3990            }
3991            Expr::Case {
3992                expr,
3993                when_then,
3994                else_expr,
3995            } => {
3996                if let Some(e) = expr {
3997                    Self::collect_window_functions(e, collected);
3998                }
3999                for (w, t) in when_then {
4000                    Self::collect_window_functions(w, collected);
4001                    Self::collect_window_functions(t, collected);
4002                }
4003                if let Some(e) = else_expr {
4004                    Self::collect_window_functions(e, collected);
4005                }
4006            }
4007            Expr::Reduce {
4008                init, list, expr, ..
4009            } => {
4010                Self::collect_window_functions(init, collected);
4011                Self::collect_window_functions(list, collected);
4012                Self::collect_window_functions(expr, collected);
4013            }
4014            Expr::Quantifier {
4015                list, predicate, ..
4016            } => {
4017                Self::collect_window_functions(list, collected);
4018                Self::collect_window_functions(predicate, collected);
4019            }
4020            Expr::In { expr, list } => {
4021                Self::collect_window_functions(expr, collected);
4022                Self::collect_window_functions(list, collected);
4023            }
4024            Expr::ArrayIndex { array, index } => {
4025                Self::collect_window_functions(array, collected);
4026                Self::collect_window_functions(index, collected);
4027            }
4028            Expr::ArraySlice { array, start, end } => {
4029                Self::collect_window_functions(array, collected);
4030                if let Some(s) = start {
4031                    Self::collect_window_functions(s, collected);
4032                }
4033                if let Some(e) = end {
4034                    Self::collect_window_functions(e, collected);
4035                }
4036            }
4037            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
4038            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
4039            _ => {}
4040        }
4041    }
4042
4043    /// Transform property expressions in manual window functions to use qualified variable names.
4044    ///
4045    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
4046    /// so the executor can look up values directly from the row HashMap after the
4047    /// intermediate projection has materialized these properties with qualified names.
4048    ///
4049    /// Transforms ALL window functions (both manual and aggregate).
4050    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
4051    fn transform_window_expr_properties(expr: Expr) -> Expr {
4052        let Expr::FunctionCall {
4053            name,
4054            args,
4055            window_spec: Some(spec),
4056            distinct,
4057        } = expr
4058        else {
4059            return expr;
4060        };
4061
4062        // Transform arguments for ALL window functions
4063        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
4064        let transformed_args = args
4065            .into_iter()
4066            .map(Self::transform_property_to_variable)
4067            .collect();
4068
4069        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
4070        let transformed_partition_by = spec
4071            .partition_by
4072            .into_iter()
4073            .map(Self::transform_property_to_variable)
4074            .collect();
4075
4076        let transformed_order_by = spec
4077            .order_by
4078            .into_iter()
4079            .map(|item| SortItem {
4080                expr: Self::transform_property_to_variable(item.expr),
4081                ascending: item.ascending,
4082            })
4083            .collect();
4084
4085        Expr::FunctionCall {
4086            name,
4087            args: transformed_args,
4088            window_spec: Some(WindowSpec {
4089                partition_by: transformed_partition_by,
4090                order_by: transformed_order_by,
4091            }),
4092            distinct,
4093        }
4094    }
4095
4096    /// Transform a property expression to a variable expression with qualified name.
4097    ///
4098    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
4099    fn transform_property_to_variable(expr: Expr) -> Expr {
4100        let Expr::Property(base, prop) = expr else {
4101            return expr;
4102        };
4103
4104        match *base {
4105            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
4106            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
4107        }
4108    }
4109
4110    /// Transform VALID_AT macro into function call
4111    ///
4112    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
4113    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
4114    fn transform_valid_at_to_function(expr: Expr) -> Expr {
4115        match expr {
4116            Expr::ValidAt {
4117                entity,
4118                timestamp,
4119                start_prop,
4120                end_prop,
4121            } => {
4122                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
4123                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
4124
4125                Expr::FunctionCall {
4126                    name: "uni.temporal.validAt".to_string(),
4127                    args: vec![
4128                        Self::transform_valid_at_to_function(*entity),
4129                        Expr::Literal(CypherLiteral::String(start)),
4130                        Expr::Literal(CypherLiteral::String(end)),
4131                        Self::transform_valid_at_to_function(*timestamp),
4132                    ],
4133                    distinct: false,
4134                    window_spec: None,
4135                }
4136            }
4137            // Recursively transform nested expressions
4138            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4139                left: Box::new(Self::transform_valid_at_to_function(*left)),
4140                op,
4141                right: Box::new(Self::transform_valid_at_to_function(*right)),
4142            },
4143            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
4144                op,
4145                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4146            },
4147            Expr::FunctionCall {
4148                name,
4149                args,
4150                distinct,
4151                window_spec,
4152            } => Expr::FunctionCall {
4153                name,
4154                args: args
4155                    .into_iter()
4156                    .map(Self::transform_valid_at_to_function)
4157                    .collect(),
4158                distinct,
4159                window_spec,
4160            },
4161            Expr::Property(base, prop) => {
4162                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
4163            }
4164            Expr::List(items) => Expr::List(
4165                items
4166                    .into_iter()
4167                    .map(Self::transform_valid_at_to_function)
4168                    .collect(),
4169            ),
4170            Expr::In { expr, list } => Expr::In {
4171                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4172                list: Box::new(Self::transform_valid_at_to_function(*list)),
4173            },
4174            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
4175            Expr::IsNotNull(e) => {
4176                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
4177            }
4178            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
4179            // Other cases: return as-is
4180            other => other,
4181        }
4182    }
4183
4184    /// Rewrite system-metadata function calls (`id(v)`, `created_at(v)`,
4185    /// `updated_at(v)`) to direct property access on the corresponding
4186    /// internal column (`v._vid`, `v._created_at`, `v._updated_at`). This
4187    /// normalization enables predicate pushdown via the Property pattern
4188    /// recognized by `PredicateAnalyzer`.
4189    ///
4190    /// All three functions share the same shape: single-arg, argument
4191    /// must be a node/edge variable, returns the column value directly.
4192    fn rewrite_id_to_vid(expr: Expr, vars_in_scope: &[VariableInfo]) -> Expr {
4193        match expr {
4194            Expr::FunctionCall {
4195                name,
4196                args,
4197                distinct,
4198                window_spec,
4199            } if args.len() == 1 && Self::metadata_function_column(&name, None).is_some() => {
4200                if let Expr::Variable(ref var) = args[0] {
4201                    // `id()` resolves to `_eid` for an edge binding and `_vid`
4202                    // for a node — edge rows expose `_eid`, not `_vid`. Mirror
4203                    // the projection path (`df_expr.rs` translate of `id`).
4204                    let var_type = find_var_in_scope(vars_in_scope, var).map(|v| v.var_type);
4205                    let column = Self::metadata_function_column(&name, var_type)
4206                        .unwrap()
4207                        .to_string();
4208                    Expr::Property(Box::new(Expr::Variable(var.clone())), column)
4209                } else {
4210                    Expr::FunctionCall {
4211                        name,
4212                        args,
4213                        distinct,
4214                        window_spec,
4215                    }
4216                }
4217            }
4218            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4219                left: Box::new(Self::rewrite_id_to_vid(*left, vars_in_scope)),
4220                op,
4221                right: Box::new(Self::rewrite_id_to_vid(*right, vars_in_scope)),
4222            },
4223            Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
4224                op,
4225                expr: Box::new(Self::rewrite_id_to_vid(*inner, vars_in_scope)),
4226            },
4227            other => other,
4228        }
4229    }
4230
4231    /// Return the internal column name for a system-metadata function, or
4232    /// `None` if the name is not one of the recognised metadata functions.
4233    ///
4234    /// `id()` maps to `_eid` when its argument is a relationship
4235    /// (`VariableType::Edge`) and `_vid` otherwise; `var_type` is `None` when the
4236    /// caller only needs the is-metadata-function test.
4237    fn metadata_function_column(
4238        name: &str,
4239        var_type: Option<VariableType>,
4240    ) -> Option<&'static str> {
4241        if name.eq_ignore_ascii_case("id") {
4242            if matches!(var_type, Some(VariableType::Edge)) {
4243                Some("_eid")
4244            } else {
4245                Some("_vid")
4246            }
4247        } else if name.eq_ignore_ascii_case("created_at") {
4248            Some("_created_at")
4249        } else if name.eq_ignore_ascii_case("updated_at") {
4250            Some("_updated_at")
4251        } else {
4252            None
4253        }
4254    }
4255
4256    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
4257    fn plan_match_clause(
4258        &self,
4259        match_clause: &MatchClause,
4260        plan: LogicalPlan,
4261        vars_in_scope: &mut Vec<VariableInfo>,
4262    ) -> Result<LogicalPlan> {
4263        let mut plan = plan;
4264
4265        if match_clause.pattern.paths.is_empty() {
4266            return Err(anyhow!("Empty pattern"));
4267        }
4268
4269        // Track variables introduced by this OPTIONAL MATCH
4270        let vars_before_pattern = vars_in_scope.len();
4271
4272        for path in &match_clause.pattern.paths {
4273            if let Some(mode) = &path.shortest_path_mode {
4274                plan =
4275                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
4276            } else {
4277                plan = self.plan_path(
4278                    path,
4279                    plan,
4280                    vars_in_scope,
4281                    match_clause.optional,
4282                    vars_before_pattern,
4283                )?;
4284            }
4285        }
4286
4287        // Collect variables introduced by this OPTIONAL MATCH pattern
4288        let optional_vars: HashSet<String> = if match_clause.optional {
4289            vars_in_scope[vars_before_pattern..]
4290                .iter()
4291                .map(|v| v.name.clone())
4292                .collect()
4293        } else {
4294            HashSet::new()
4295        };
4296
4297        // Handle WHERE clause with vector_similarity and predicate pushdown
4298        if let Some(predicate) = &match_clause.where_clause {
4299            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
4300        }
4301
4302        Ok(plan)
4303    }
4304
4305    /// Plan a shortestPath pattern.
4306    fn plan_shortest_path(
4307        &self,
4308        path: &PathPattern,
4309        plan: LogicalPlan,
4310        vars_in_scope: &mut Vec<VariableInfo>,
4311        mode: &ShortestPathMode,
4312        _vars_before_pattern: usize,
4313    ) -> Result<LogicalPlan> {
4314        let mut plan = plan;
4315        let elements = &path.elements;
4316
4317        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
4318        if elements.len() < 3 || elements.len().is_multiple_of(2) {
4319            return Err(anyhow!(
4320                "shortestPath requires at least one relationship: (a)-[*]->(b)"
4321            ));
4322        }
4323
4324        let source_node = match &elements[0] {
4325            PatternElement::Node(n) => n,
4326            _ => return Err(anyhow!("ShortestPath must start with a node")),
4327        };
4328        let rel = match &elements[1] {
4329            PatternElement::Relationship(r) => r,
4330            _ => {
4331                return Err(anyhow!(
4332                    "ShortestPath middle element must be a relationship"
4333                ));
4334            }
4335        };
4336        let target_node = match &elements[2] {
4337            PatternElement::Node(n) => n,
4338            _ => return Err(anyhow!("ShortestPath must end with a node")),
4339        };
4340
4341        let source_var = source_node
4342            .variable
4343            .clone()
4344            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
4345        let target_var = target_node
4346            .variable
4347            .clone()
4348            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
4349        let path_var = path
4350            .variable
4351            .clone()
4352            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
4353
4354        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
4355        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
4356
4357        // Plan source node if not bound
4358        if !source_bound {
4359            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
4360        } else if let Some(prop_filter) =
4361            self.properties_to_expr(&source_var, &source_node.properties)
4362        {
4363            plan = LogicalPlan::Filter {
4364                input: Box::new(plan),
4365                predicate: prop_filter,
4366                optional_variables: HashSet::new(),
4367            };
4368        }
4369
4370        // Plan target node if not bound
4371        let target_label_id = if !target_bound {
4372            // Use first label for target_label_id
4373            let target_label_name = target_node
4374                .labels
4375                .first()
4376                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
4377            // Native lookup first; then consult `CatalogProvider` /
4378            // `ReplacementScanProvider` and allocate a virtual label-id
4379            // (M5b follow-up #6). Virtual ids dispatch to
4380            // `CatalogVertexScanExec` at physical-plan time.
4381            let target_label_id =
4382                if let Some(meta) = self.schema.get_label_case_insensitive(target_label_name) {
4383                    meta.id
4384                } else if let Some((vid, _)) = self.allocate_virtual_label(target_label_name)? {
4385                    vid
4386                } else {
4387                    return Err(anyhow!("Label {} not found", target_label_name));
4388                };
4389
4390            let target_scan = LogicalPlan::Scan {
4391                label_id: target_label_id,
4392                labels: target_node.labels.names().to_vec(),
4393                variable: target_var.clone(),
4394                filter: self.properties_to_expr(&target_var, &target_node.properties),
4395                optional: false,
4396            };
4397
4398            plan = Self::join_with_plan(plan, target_scan);
4399            target_label_id
4400        } else {
4401            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
4402            {
4403                plan = LogicalPlan::Filter {
4404                    input: Box::new(plan),
4405                    predicate: prop_filter,
4406                    optional_variables: HashSet::new(),
4407                };
4408            }
4409            0 // Wildcard for already-bound target
4410        };
4411
4412        // Add ShortestPath operator
4413        let edge_type_ids = if rel.types.is_empty() {
4414            // If no type specified, fetch all edge types (both schema and schemaless)
4415            self.schema.all_edge_type_ids()
4416        } else {
4417            let mut ids = Vec::new();
4418            for type_name in &rel.types {
4419                let id = if let Some(meta) = self.schema.edge_types.get(type_name) {
4420                    meta.id
4421                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
4422                    vid
4423                } else {
4424                    return Err(anyhow!("Edge type {} not found", type_name));
4425                };
4426                ids.push(id);
4427            }
4428            ids
4429        };
4430
4431        // Extract hop constraints from relationship pattern
4432        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
4433        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
4434
4435        let sp_plan = match mode {
4436            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
4437                input: Box::new(plan),
4438                edge_type_ids,
4439                direction: rel.direction.clone(),
4440                source_variable: source_var.clone(),
4441                target_variable: target_var.clone(),
4442                target_label_id,
4443                path_variable: path_var.clone(),
4444                min_hops,
4445                max_hops,
4446            },
4447            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
4448                input: Box::new(plan),
4449                edge_type_ids,
4450                direction: rel.direction.clone(),
4451                source_variable: source_var.clone(),
4452                target_variable: target_var.clone(),
4453                target_label_id,
4454                path_variable: path_var.clone(),
4455                min_hops,
4456                max_hops,
4457            },
4458        };
4459
4460        if !source_bound {
4461            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
4462        }
4463        if !target_bound {
4464            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
4465        }
4466        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
4467
4468        Ok(sp_plan)
4469    }
4470    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
4471    ///
4472    /// This is a public entry point for the Locy plan builder to reuse the
4473    /// existing pattern-planning logic for clause bodies.
4474    pub fn plan_pattern(
4475        &self,
4476        pattern: &Pattern,
4477        initial_vars: &[VariableInfo],
4478    ) -> Result<LogicalPlan> {
4479        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
4480        let vars_before_pattern = vars_in_scope.len();
4481        let mut plan = LogicalPlan::Empty;
4482        for path in &pattern.paths {
4483            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
4484        }
4485        Ok(plan)
4486    }
4487
4488    /// Plan a regular MATCH path (not shortestPath).
4489    fn plan_path(
4490        &self,
4491        path: &PathPattern,
4492        plan: LogicalPlan,
4493        vars_in_scope: &mut Vec<VariableInfo>,
4494        optional: bool,
4495        vars_before_pattern: usize,
4496    ) -> Result<LogicalPlan> {
4497        let mut plan = plan;
4498        let elements = &path.elements;
4499        let mut i = 0;
4500
4501        let path_variable = path.variable.clone();
4502
4503        // Check for VariableAlreadyBound: path variable already in scope
4504        if let Some(pv) = &path_variable
4505            && !pv.is_empty()
4506            && is_var_in_scope(vars_in_scope, pv)
4507        {
4508            return Err(anyhow!(
4509                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4510                pv
4511            ));
4512        }
4513
4514        // Check for VariableAlreadyBound: path variable conflicts with element variables
4515        if let Some(pv) = &path_variable
4516            && !pv.is_empty()
4517        {
4518            for element in elements {
4519                match element {
4520                    PatternElement::Node(n) => {
4521                        if let Some(v) = &n.variable
4522                            && v == pv
4523                        {
4524                            return Err(anyhow!(
4525                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4526                                pv
4527                            ));
4528                        }
4529                    }
4530                    PatternElement::Relationship(r) => {
4531                        if let Some(v) = &r.variable
4532                            && v == pv
4533                        {
4534                            return Err(anyhow!(
4535                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4536                                pv
4537                            ));
4538                        }
4539                    }
4540                    PatternElement::Parenthesized { .. } => {}
4541                }
4542            }
4543        }
4544
4545        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
4546        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
4547        let mut optional_pattern_vars: HashSet<String> = if optional {
4548            let mut vars = HashSet::new();
4549            for element in elements {
4550                match element {
4551                    PatternElement::Node(n) => {
4552                        if let Some(v) = &n.variable
4553                            && !v.is_empty()
4554                            && !is_var_in_scope(vars_in_scope, v)
4555                        {
4556                            vars.insert(v.clone());
4557                        }
4558                    }
4559                    PatternElement::Relationship(r) => {
4560                        if let Some(v) = &r.variable
4561                            && !v.is_empty()
4562                            && !is_var_in_scope(vars_in_scope, v)
4563                        {
4564                            vars.insert(v.clone());
4565                        }
4566                    }
4567                    PatternElement::Parenthesized { pattern, .. } => {
4568                        // Also check nested patterns
4569                        for nested_elem in &pattern.elements {
4570                            match nested_elem {
4571                                PatternElement::Node(n) => {
4572                                    if let Some(v) = &n.variable
4573                                        && !v.is_empty()
4574                                        && !is_var_in_scope(vars_in_scope, v)
4575                                    {
4576                                        vars.insert(v.clone());
4577                                    }
4578                                }
4579                                PatternElement::Relationship(r) => {
4580                                    if let Some(v) = &r.variable
4581                                        && !v.is_empty()
4582                                        && !is_var_in_scope(vars_in_scope, v)
4583                                    {
4584                                        vars.insert(v.clone());
4585                                    }
4586                                }
4587                                _ => {}
4588                            }
4589                        }
4590                    }
4591                }
4592            }
4593            // Include path variable if present
4594            if let Some(pv) = &path_variable
4595                && !pv.is_empty()
4596            {
4597                vars.insert(pv.clone());
4598            }
4599            vars
4600        } else {
4601            HashSet::new()
4602        };
4603
4604        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
4605        // These must participate in Trail mode (relationship uniqueness) enforcement
4606        // across ALL segments in this path, so that VLP segments like [*0..1] don't
4607        // traverse through edges already claimed by a bound relationship [r].
4608        let path_bound_edge_vars: HashSet<String> = {
4609            let mut bound = HashSet::new();
4610            for element in elements {
4611                if let PatternElement::Relationship(rel) = element
4612                    && let Some(ref var_name) = rel.variable
4613                    && !var_name.is_empty()
4614                    && vars_in_scope[..vars_before_pattern]
4615                        .iter()
4616                        .any(|v| v.name == *var_name)
4617                {
4618                    bound.insert(var_name.clone());
4619                }
4620            }
4621            bound
4622        };
4623
4624        // Track if any traverses were added (for zero-length path detection)
4625        let mut had_traverses = false;
4626        // Track the node variable for zero-length path binding
4627        let mut single_node_variable: Option<String> = None;
4628        // Collect node/edge variables for BindPath (fixed-length path binding)
4629        let mut path_node_vars: Vec<String> = Vec::new();
4630        let mut path_edge_vars: Vec<String> = Vec::new();
4631        // Track the last processed outer node variable for QPP source binding.
4632        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4633        let mut last_outer_node_var: Option<String> = None;
4634
4635        // Multi-hop path variables are now supported - path is accumulated across hops
4636        while i < elements.len() {
4637            let element = &elements[i];
4638            match element {
4639                PatternElement::Node(n) => {
4640                    let mut variable = n.variable.clone().unwrap_or_default();
4641                    if variable.is_empty() {
4642                        variable = self.next_anon_var();
4643                    }
4644                    // Track first node variable for zero-length path
4645                    if single_node_variable.is_none() {
4646                        single_node_variable = Some(variable.clone());
4647                    }
4648                    let is_bound =
4649                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4650                    if optional && !is_bound {
4651                        optional_pattern_vars.insert(variable.clone());
4652                    }
4653
4654                    if is_bound {
4655                        // Check for type conflict - can't use an Edge/Path as a Node
4656                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4657                            && !info.var_type.is_compatible_with(VariableType::Node)
4658                        {
4659                            return Err(anyhow!(
4660                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4661                                variable,
4662                                info.var_type
4663                            ));
4664                        }
4665                        if let Some(node_filter) =
4666                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4667                        {
4668                            plan = LogicalPlan::Filter {
4669                                input: Box::new(plan),
4670                                predicate: node_filter,
4671                                optional_variables: HashSet::new(),
4672                            };
4673                        }
4674                    } else {
4675                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4676                        if !variable.is_empty() {
4677                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4678                        }
4679                    }
4680
4681                    // Track source node for BindPath
4682                    if path_variable.is_some() && path_node_vars.is_empty() {
4683                        path_node_vars.push(variable.clone());
4684                    }
4685
4686                    // Look ahead for relationships
4687                    let mut current_source_var = variable;
4688                    last_outer_node_var = Some(current_source_var.clone());
4689                    i += 1;
4690                    while i < elements.len() {
4691                        if let PatternElement::Relationship(r) = &elements[i] {
4692                            if i + 1 < elements.len() {
4693                                let target_node_part = &elements[i + 1];
4694                                if let PatternElement::Node(n_target) = target_node_part {
4695                                    // For VLP traversals, pass path_variable through
4696                                    // For fixed-length, we use BindPath instead
4697                                    let is_vlp = r.range.is_some();
4698                                    let traverse_path_var =
4699                                        if is_vlp { path_variable.clone() } else { None };
4700
4701                                    // If we're about to start a VLP segment and there are
4702                                    // collected fixed-hop path vars, create an intermediate
4703                                    // BindPath for the fixed prefix first. The VLP will then
4704                                    // extend this existing path.
4705                                    if is_vlp
4706                                        && let Some(pv) = path_variable.as_ref()
4707                                        && !path_node_vars.is_empty()
4708                                    {
4709                                        plan = LogicalPlan::BindPath {
4710                                            input: Box::new(plan),
4711                                            node_variables: std::mem::take(&mut path_node_vars),
4712                                            edge_variables: std::mem::take(&mut path_edge_vars),
4713                                            path_variable: pv.clone(),
4714                                        };
4715                                        if !is_var_in_scope(vars_in_scope, pv) {
4716                                            add_var_to_scope(
4717                                                vars_in_scope,
4718                                                pv,
4719                                                VariableType::Path,
4720                                            )?;
4721                                        }
4722                                    }
4723
4724                                    // Plan the traverse from the current source node
4725                                    let target_was_bound =
4726                                        n_target.variable.as_ref().is_some_and(|v| {
4727                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4728                                        });
4729                                    let (new_plan, target_var, effective_target) = self
4730                                        .plan_traverse_with_source(
4731                                            plan,
4732                                            vars_in_scope,
4733                                            TraverseParams {
4734                                                rel: r,
4735                                                target_node: n_target,
4736                                                optional,
4737                                                path_variable: traverse_path_var,
4738                                                optional_pattern_vars: optional_pattern_vars
4739                                                    .clone(),
4740                                            },
4741                                            &current_source_var,
4742                                            vars_before_pattern,
4743                                            &path_bound_edge_vars,
4744                                        )?;
4745                                    plan = new_plan;
4746                                    if optional && !target_was_bound {
4747                                        optional_pattern_vars.insert(target_var.clone());
4748                                    }
4749
4750                                    // Track edge/target node for BindPath
4751                                    if path_variable.is_some() && !is_vlp {
4752                                        // Use the edge variable if given, otherwise use
4753                                        // the internal tracking column pattern.
4754                                        // Use effective_target (which may be __rebound_x
4755                                        // for bound-target traversals) to match the actual
4756                                        // column name produced by GraphTraverseExec.
4757                                        if let Some(ev) = &r.variable {
4758                                            path_edge_vars.push(ev.clone());
4759                                        } else {
4760                                            path_edge_vars
4761                                                .push(format!("__eid_to_{}", effective_target));
4762                                        }
4763                                        path_node_vars.push(target_var.clone());
4764                                    }
4765
4766                                    current_source_var = target_var;
4767                                    last_outer_node_var = Some(current_source_var.clone());
4768                                    had_traverses = true;
4769                                    i += 2;
4770                                } else {
4771                                    return Err(anyhow!("Relationship must be followed by a node"));
4772                                }
4773                            } else {
4774                                return Err(anyhow!("Relationship cannot be the last element"));
4775                            }
4776                        } else {
4777                            break;
4778                        }
4779                    }
4780                }
4781                PatternElement::Relationship(_) => {
4782                    return Err(anyhow!("Pattern must start with a node"));
4783                }
4784                PatternElement::Parenthesized { pattern, range } => {
4785                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4786                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4787                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4788                        return Err(anyhow!(
4789                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4790                        ));
4791                    }
4792
4793                    let source_node = match &pattern.elements[0] {
4794                        PatternElement::Node(n) => n,
4795                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4796                    };
4797
4798                    // Extract all relationship-node pairs (QPP steps)
4799                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4800                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4801                        let rel = match &pattern.elements[pair_idx] {
4802                            PatternElement::Relationship(r) => r,
4803                            _ => {
4804                                return Err(anyhow!(
4805                                    "Quantified pattern element at position {} must be a relationship",
4806                                    pair_idx
4807                                ));
4808                            }
4809                        };
4810                        let node = match &pattern.elements[pair_idx + 1] {
4811                            PatternElement::Node(n) => n,
4812                            _ => {
4813                                return Err(anyhow!(
4814                                    "Quantified pattern element at position {} must be a node",
4815                                    pair_idx + 1
4816                                ));
4817                            }
4818                        };
4819                        // Reject nested quantifiers
4820                        if rel.range.is_some() {
4821                            return Err(anyhow!(
4822                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4823                            ));
4824                        }
4825                        qpp_rels.push((rel, node));
4826                    }
4827
4828                    // Check if there's an outer target node after the Parenthesized element.
4829                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4830                    // target that should receive the traversal result.
4831                    let inner_target_node = qpp_rels.last().unwrap().1;
4832                    let outer_target_node = if i + 1 < elements.len() {
4833                        match &elements[i + 1] {
4834                            PatternElement::Node(n) => Some(n),
4835                            _ => None,
4836                        }
4837                    } else {
4838                        None
4839                    };
4840                    // Use the outer target for variable binding and filters; inner target
4841                    // labels are used for state constraints within the NFA.
4842                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4843
4844                    // For simple 3-element single-hop QPP without intermediate label constraints,
4845                    // fall back to existing VLP behavior (copy range to relationship).
4846                    let use_simple_vlp = qpp_rels.len() == 1
4847                        && inner_target_node
4848                            .labels
4849                            .first()
4850                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4851                            .is_none();
4852
4853                    // Plan source node.
4854                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4855                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4856                    // node variable, use it; otherwise fall back to the inner source.
4857                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4858                        // The preceding outer node is already bound and in scope
4859                        // Apply any property filters from the inner source node
4860                        if let Some(prop_filter) =
4861                            self.properties_to_expr(outer_src, &source_node.properties)
4862                        {
4863                            plan = LogicalPlan::Filter {
4864                                input: Box::new(plan),
4865                                predicate: prop_filter,
4866                                optional_variables: HashSet::new(),
4867                            };
4868                        }
4869                        outer_src.clone()
4870                    } else {
4871                        let sv = source_node
4872                            .variable
4873                            .clone()
4874                            .filter(|v| !v.is_empty())
4875                            .unwrap_or_else(|| self.next_anon_var());
4876
4877                        if is_var_in_scope(vars_in_scope, &sv) {
4878                            // Source is already bound, apply property filter if needed
4879                            if let Some(prop_filter) =
4880                                self.properties_to_expr(&sv, &source_node.properties)
4881                            {
4882                                plan = LogicalPlan::Filter {
4883                                    input: Box::new(plan),
4884                                    predicate: prop_filter,
4885                                    optional_variables: HashSet::new(),
4886                                };
4887                            }
4888                        } else {
4889                            // Source is unbound, scan it
4890                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4891                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4892                            if optional {
4893                                optional_pattern_vars.insert(sv.clone());
4894                            }
4895                        }
4896                        sv
4897                    };
4898
4899                    if use_simple_vlp {
4900                        // Simple single-hop QPP: apply range to relationship and use VLP path
4901                        let mut relationship = qpp_rels[0].0.clone();
4902                        relationship.range = range.clone();
4903
4904                        let target_was_bound = target_node
4905                            .variable
4906                            .as_ref()
4907                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4908                        let (new_plan, target_var, _effective_target) = self
4909                            .plan_traverse_with_source(
4910                                plan,
4911                                vars_in_scope,
4912                                TraverseParams {
4913                                    rel: &relationship,
4914                                    target_node,
4915                                    optional,
4916                                    path_variable: path_variable.clone(),
4917                                    optional_pattern_vars: optional_pattern_vars.clone(),
4918                                },
4919                                &source_variable,
4920                                vars_before_pattern,
4921                                &path_bound_edge_vars,
4922                            )?;
4923                        plan = new_plan;
4924                        if optional && !target_was_bound {
4925                            optional_pattern_vars.insert(target_var);
4926                        }
4927                    } else {
4928                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4929                        let mut qpp_step_infos = Vec::new();
4930                        let mut all_edge_type_ids = Vec::new();
4931
4932                        for (rel, node) in &qpp_rels {
4933                            let mut step_edge_type_ids = Vec::new();
4934                            if rel.types.is_empty() {
4935                                step_edge_type_ids = self.schema.all_edge_type_ids();
4936                            } else {
4937                                for type_name in &rel.types {
4938                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4939                                        step_edge_type_ids.push(edge_meta.id);
4940                                    }
4941                                }
4942                            }
4943                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4944
4945                            let target_label = node.labels.first().and_then(|l| {
4946                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4947                            });
4948
4949                            qpp_step_infos.push(QppStepInfo {
4950                                edge_type_ids: step_edge_type_ids,
4951                                direction: rel.direction.clone(),
4952                                target_label,
4953                            });
4954                        }
4955
4956                        // Deduplicate edge type IDs for adjacency warming
4957                        all_edge_type_ids.sort_unstable();
4958                        all_edge_type_ids.dedup();
4959
4960                        // Compute iteration bounds from range
4961                        let hops_per_iter = qpp_step_infos.len();
4962                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4963                        let (min_iter, max_iter) = if let Some(range) = range {
4964                            let min = range.min.unwrap_or(1) as usize;
4965                            let max = range
4966                                .max
4967                                .map(|m| m as usize)
4968                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4969                            (min, max)
4970                        } else {
4971                            (1, 1)
4972                        };
4973                        let min_hops = min_iter * hops_per_iter;
4974                        let max_hops = max_iter * hops_per_iter;
4975
4976                        // Target variable from the last node in the QPP sub-pattern
4977                        let target_variable = target_node
4978                            .variable
4979                            .clone()
4980                            .filter(|v| !v.is_empty())
4981                            .unwrap_or_else(|| self.next_anon_var());
4982
4983                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4984
4985                        // Determine target label for the final node
4986                        let target_label_meta = target_node
4987                            .labels
4988                            .first()
4989                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4990
4991                        // Collect scope match variables
4992                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4993                            [vars_before_pattern..]
4994                            .iter()
4995                            .map(|v| v.name.clone())
4996                            .collect();
4997                        scope_match_variables.insert(target_variable.clone());
4998
4999                        // Handle bound target: use rebound variable for traverse
5000                        let rebound_target_var = if target_is_bound {
5001                            Some(target_variable.clone())
5002                        } else {
5003                            None
5004                        };
5005                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5006                            format!("__rebound_{}", bv)
5007                        } else {
5008                            target_variable.clone()
5009                        };
5010
5011                        plan = LogicalPlan::Traverse {
5012                            input: Box::new(plan),
5013                            edge_type_ids: all_edge_type_ids,
5014                            direction: qpp_rels[0].0.direction.clone(),
5015                            source_variable: source_variable.to_string(),
5016                            target_variable: effective_target_var.clone(),
5017                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
5018                            step_variable: None, // QPP doesn't expose intermediate edges
5019                            min_hops,
5020                            max_hops,
5021                            optional,
5022                            target_filter: self.node_filter_expr(
5023                                &target_variable,
5024                                &target_node.labels,
5025                                &target_node.properties,
5026                            ),
5027                            path_variable: path_variable.clone(),
5028                            edge_properties: HashSet::new(),
5029                            is_variable_length: true,
5030                            optional_pattern_vars: optional_pattern_vars.clone(),
5031                            scope_match_variables,
5032                            edge_filter_expr: None,
5033                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5034                            qpp_steps: Some(qpp_step_infos),
5035                        };
5036
5037                        // Handle bound target: filter rebound results against original variable
5038                        if let Some(ref btv) = rebound_target_var {
5039                            // Filter: __rebound_x._vid = x._vid
5040                            let filter_pred = Expr::BinaryOp {
5041                                left: Box::new(Expr::Property(
5042                                    Box::new(Expr::Variable(effective_target_var.clone())),
5043                                    "_vid".to_string(),
5044                                )),
5045                                op: BinaryOp::Eq,
5046                                right: Box::new(Expr::Property(
5047                                    Box::new(Expr::Variable(btv.clone())),
5048                                    "_vid".to_string(),
5049                                )),
5050                            };
5051                            plan = LogicalPlan::Filter {
5052                                input: Box::new(plan),
5053                                predicate: filter_pred,
5054                                optional_variables: if optional {
5055                                    optional_pattern_vars.clone()
5056                                } else {
5057                                    HashSet::new()
5058                                },
5059                            };
5060                        }
5061
5062                        // Add target variable to scope
5063                        if !target_is_bound {
5064                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5065                        }
5066
5067                        // Add path variable to scope
5068                        if let Some(ref pv) = path_variable
5069                            && !pv.is_empty()
5070                            && !is_var_in_scope(vars_in_scope, pv)
5071                        {
5072                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5073                        }
5074                    }
5075                    had_traverses = true;
5076
5077                    // Skip the outer target node if we consumed it
5078                    if outer_target_node.is_some() {
5079                        i += 2; // skip both Parenthesized and the following Node
5080                    } else {
5081                        i += 1;
5082                    }
5083                }
5084            }
5085        }
5086
5087        // If this is a single-node pattern with a path variable, bind the zero-length path
5088        // E.g., `p = (a)` should create a Path with one node and zero edges
5089        if let Some(ref path_var) = path_variable
5090            && !path_var.is_empty()
5091            && !had_traverses
5092            && let Some(node_var) = single_node_variable
5093        {
5094            plan = LogicalPlan::BindZeroLengthPath {
5095                input: Box::new(plan),
5096                node_variable: node_var,
5097                path_variable: path_var.clone(),
5098            };
5099            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5100        }
5101
5102        // Bind fixed-length path from collected node/edge variables
5103        if let Some(ref path_var) = path_variable
5104            && !path_var.is_empty()
5105            && had_traverses
5106            && !path_node_vars.is_empty()
5107            && !is_var_in_scope(vars_in_scope, path_var)
5108        {
5109            plan = LogicalPlan::BindPath {
5110                input: Box::new(plan),
5111                node_variables: path_node_vars,
5112                edge_variables: path_edge_vars,
5113                path_variable: path_var.clone(),
5114            };
5115            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5116        }
5117
5118        Ok(plan)
5119    }
5120
5121    /// Plan a traverse with an explicit source variable name.
5122    ///
5123    /// Returns `(plan, target_variable, effective_target_variable)` where:
5124    /// - `target_variable` is the semantic variable name for downstream scope
5125    /// - `effective_target_variable` is the actual column-name prefix used by
5126    ///   the traverse (may be `__rebound_x` for bound-target patterns)
5127    fn plan_traverse_with_source(
5128        &self,
5129        plan: LogicalPlan,
5130        vars_in_scope: &mut Vec<VariableInfo>,
5131        params: TraverseParams<'_>,
5132        source_variable: &str,
5133        vars_before_pattern: usize,
5134        path_bound_edge_vars: &HashSet<String>,
5135    ) -> Result<(LogicalPlan, String, String)> {
5136        // Check for parameter used as relationship predicate
5137        if let Some(Expr::Parameter(_)) = &params.rel.properties {
5138            return Err(anyhow!(
5139                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
5140            ));
5141        }
5142
5143        let mut edge_type_ids = Vec::new();
5144        let mut dst_labels = Vec::new();
5145        let mut unknown_types = Vec::new();
5146
5147        if params.rel.types.is_empty() {
5148            // All types - include both schema and schemaless edge types
5149            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
5150            edge_type_ids = self.schema.all_edge_type_ids();
5151            for meta in self.schema.edge_types.values() {
5152                dst_labels.extend(meta.dst_labels.iter().cloned());
5153            }
5154        } else {
5155            for type_name in &params.rel.types {
5156                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
5157                    // Known type - use standard Traverse with type_id
5158                    edge_type_ids.push(edge_meta.id);
5159                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
5160                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
5161                    // M5b.3: virtual edge type (plugin-registered CatalogTable).
5162                    // Resolving it into `edge_type_ids` (not `unknown_types`)
5163                    // lets the regular `Traverse` planner build a structured
5164                    // plan that the physical planner can dispatch to a
5165                    // `CatalogEdgeScanExec` mid-pattern.
5166                    edge_type_ids.push(vid);
5167                } else {
5168                    // Unknown type - will use TraverseMainByType
5169                    unknown_types.push(type_name.clone());
5170                }
5171            }
5172        }
5173
5174        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
5175        edge_type_ids.sort_unstable();
5176        edge_type_ids.dedup();
5177        unknown_types.sort_unstable();
5178        unknown_types.dedup();
5179
5180        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
5181        if target_variable.is_empty() {
5182            target_variable = self.next_anon_var();
5183        }
5184        let target_is_bound =
5185            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
5186
5187        // Check for VariableTypeConflict: relationship variable used as node
5188        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
5189        if let Some(rel_var) = &params.rel.variable
5190            && !rel_var.is_empty()
5191            && rel_var == &target_variable
5192        {
5193            return Err(anyhow!(
5194                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
5195                rel_var
5196            ));
5197        }
5198
5199        // Check for VariableTypeConflict/RelationshipUniquenessViolation
5200        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
5201        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
5202        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
5203        let mut bound_edge_var: Option<String> = None;
5204        let mut bound_edge_list_var: Option<String> = None;
5205        if let Some(rel_var) = &params.rel.variable
5206            && !rel_var.is_empty()
5207            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
5208        {
5209            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
5210                .iter()
5211                .any(|v| v.name == *rel_var);
5212
5213            if info.var_type == VariableType::Edge {
5214                // Check if this edge variable comes from a previous clause (before this MATCH)
5215                if is_from_previous_clause {
5216                    // Edge variable bound from previous clause - this is allowed
5217                    // We'll filter the traversal to match this specific edge
5218                    bound_edge_var = Some(rel_var.clone());
5219                } else {
5220                    // Same relationship variable used twice in the same MATCH clause
5221                    return Err(anyhow!(
5222                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
5223                        rel_var
5224                    ));
5225                }
5226            } else if params.rel.range.is_some()
5227                && is_from_previous_clause
5228                && matches!(
5229                    info.var_type,
5230                    VariableType::Scalar | VariableType::ScalarLiteral
5231                )
5232            {
5233                // Allow VLP rebound against a previously bound relationship list
5234                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
5235                bound_edge_list_var = Some(rel_var.clone());
5236            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
5237                return Err(anyhow!(
5238                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
5239                    rel_var,
5240                    info.var_type
5241                ));
5242            }
5243        }
5244
5245        // Check for VariableTypeConflict: target node variable already bound as non-Node
5246        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
5247        if target_is_bound
5248            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5249            && !info.var_type.is_compatible_with(VariableType::Node)
5250        {
5251            return Err(anyhow!(
5252                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
5253                target_variable,
5254                info.var_type
5255            ));
5256        }
5257
5258        // If all requested types are unknown (schemaless), use TraverseMainByType
5259        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
5260        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
5261        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
5262            // All types are unknown - use schemaless traversal
5263
5264            let is_variable_length = params.rel.range.is_some();
5265
5266            const DEFAULT_MAX_HOPS: usize = 100;
5267            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5268                let min = range.min.unwrap_or(1) as usize;
5269                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5270                (min, max)
5271            } else {
5272                (1, 1)
5273            };
5274
5275            // For both single-hop and variable-length paths:
5276            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5277            //   Single-hop: step_var holds a single edge object
5278            //   VLP: step_var holds a list of edge objects
5279            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5280            let step_var = params.rel.variable.clone();
5281            let path_var = params.path_variable.clone();
5282
5283            // Compute scope_match_variables for relationship uniqueness scoping.
5284            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5285                .iter()
5286                .map(|v| v.name.clone())
5287                .collect();
5288            if let Some(ref sv) = step_var {
5289                // Only add the step variable to scope if it's NOT rebound from a previous clause.
5290                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
5291                // filtering because the second MATCH intentionally reuses the same edge.
5292                if bound_edge_var.is_none() {
5293                    scope_match_variables.insert(sv.clone());
5294                }
5295            }
5296            scope_match_variables.insert(target_variable.clone());
5297            // Include bound edge variables from this path for cross-segment Trail mode
5298            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
5299            // edges already claimed by a bound relationship [r] in the same path.
5300            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
5301            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
5302            scope_match_variables.extend(
5303                path_bound_edge_vars
5304                    .iter()
5305                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
5306                    .cloned(),
5307            );
5308
5309            let mut plan = LogicalPlan::TraverseMainByType {
5310                type_names: unknown_types,
5311                input: Box::new(plan),
5312                direction: params.rel.direction.clone(),
5313                source_variable: source_variable.to_string(),
5314                target_variable: target_variable.clone(),
5315                step_variable: step_var.clone(),
5316                min_hops,
5317                max_hops,
5318                optional: params.optional,
5319                target_filter: self.node_filter_expr(
5320                    &target_variable,
5321                    &params.target_node.labels,
5322                    &params.target_node.properties,
5323                ),
5324                path_variable: path_var.clone(),
5325                is_variable_length,
5326                optional_pattern_vars: params.optional_pattern_vars.clone(),
5327                scope_match_variables,
5328                edge_filter_expr: if is_variable_length {
5329                    let filter_var = step_var
5330                        .clone()
5331                        .unwrap_or_else(|| "__anon_edge".to_string());
5332                    self.properties_to_expr(&filter_var, &params.rel.properties)
5333                } else {
5334                    None
5335                },
5336                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5337            };
5338
5339            // Only apply bound target filter for Imported variables (from outer scope/subquery).
5340            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5341            // uses Parameter which requires the value to be in params (subquery context).
5342            if target_is_bound
5343                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5344                && info.var_type == VariableType::Imported
5345            {
5346                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5347            }
5348
5349            // Apply relationship property predicates for fixed-length schemaless
5350            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
5351            // For VLP, predicates are stored inline in edge_filter_expr (above).
5352            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5353            if !is_variable_length
5354                && let Some(edge_var_name) = step_var.as_ref()
5355                && let Some(edge_prop_filter) =
5356                    self.properties_to_expr(edge_var_name, &params.rel.properties)
5357            {
5358                let filter_optional_vars = if params.optional {
5359                    params.optional_pattern_vars.clone()
5360                } else {
5361                    HashSet::new()
5362                };
5363                plan = LogicalPlan::Filter {
5364                    input: Box::new(plan),
5365                    predicate: edge_prop_filter,
5366                    optional_variables: filter_optional_vars,
5367                };
5368            }
5369
5370            // Add the bound variables to scope
5371            if let Some(sv) = &step_var {
5372                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5373                if is_variable_length
5374                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5375                {
5376                    info.is_vlp = true;
5377                }
5378            }
5379            if let Some(pv) = &path_var
5380                && !is_var_in_scope(vars_in_scope, pv)
5381            {
5382                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5383            }
5384            if !is_var_in_scope(vars_in_scope, &target_variable) {
5385                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5386            }
5387
5388            return Ok((plan, target_variable.clone(), target_variable));
5389        }
5390
5391        // If we have a mix of known and unknown types, error for now
5392        // (could be extended to Union of Traverse + TraverseMainByType)
5393        if !unknown_types.is_empty() {
5394            return Err(anyhow!(
5395                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
5396                unknown_types
5397            ));
5398        }
5399
5400        // Resolve target label to either a schema id or a virtual id from the
5401        // plugin registry. Mid-pattern virtual-label dispatch (M5b.3) requires
5402        // the virtual id to flow into `Traverse.target_label_id` so the
5403        // physical planner can layer a `CatalogVertexScanExec` join on the
5404        // traverse output. Mirrors the schema-then-virtual fallthrough used
5405        // by single-vertex `Scan` planning (~`plan_node_pattern` below).
5406        let mut virtual_target_label_id: Option<u16> = None;
5407        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
5408            // Use first label for target_label_id
5409            // For schemaless support, allow unknown target labels
5410            match self.schema.get_label_case_insensitive(label_name) {
5411                Some(meta) => Some(meta),
5412                None => {
5413                    if let Some((vid, _)) = self.allocate_virtual_label(label_name)? {
5414                        virtual_target_label_id = Some(vid);
5415                    }
5416                    None
5417                }
5418            }
5419        } else if !target_is_bound {
5420            // Infer from edge type(s)
5421            let unique_dsts: Vec<_> = dst_labels
5422                .into_iter()
5423                .collect::<HashSet<_>>()
5424                .into_iter()
5425                .collect();
5426            if unique_dsts.len() == 1 {
5427                let label_name = &unique_dsts[0];
5428                self.schema.get_label_case_insensitive(label_name)
5429            } else {
5430                // Multiple or no destination labels inferred - allow any target
5431                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
5432                // where the edge type can connect to multiple labels
5433                None
5434            }
5435        } else {
5436            None
5437        };
5438
5439        // Check if this is a variable-length pattern (has range specifier like *1..3)
5440        let is_variable_length = params.rel.range.is_some();
5441
5442        // For VLP patterns, default min to 1 and max to a reasonable limit.
5443        // For single-hop patterns (no range), both are 1.
5444        const DEFAULT_MAX_HOPS: usize = 100;
5445        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5446            let min = range.min.unwrap_or(1) as usize;
5447            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5448            (min, max)
5449        } else {
5450            (1, 1)
5451        };
5452
5453        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5454        //   Single-hop: step_var holds a single edge object
5455        //   VLP: step_var holds a list of edge objects
5456        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5457        let step_var = params.rel.variable.clone();
5458        let path_var = params.path_variable.clone();
5459
5460        // If we have a bound edge variable from a previous clause, use a temp variable
5461        // for the Traverse step, then filter to match the bound edge
5462        let rebound_var = bound_edge_var
5463            .as_ref()
5464            .or(bound_edge_list_var.as_ref())
5465            .cloned();
5466        let effective_step_var = if let Some(ref bv) = rebound_var {
5467            Some(format!("__rebound_{}", bv))
5468        } else {
5469            step_var.clone()
5470        };
5471
5472        // If we have a bound target variable from a previous clause (e.g. WITH),
5473        // use a temp variable for the Traverse step, then filter to match the bound
5474        // target — mirroring the bound edge pattern above.
5475        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
5476            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
5477                .map(|info| info.var_type == VariableType::Imported)
5478                .unwrap_or(false);
5479            if !is_imported {
5480                Some(target_variable.clone())
5481            } else {
5482                None
5483            }
5484        } else {
5485            None
5486        };
5487
5488        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5489            format!("__rebound_{}", bv)
5490        } else {
5491            target_variable.clone()
5492        };
5493
5494        // Collect all variables (node + edge) from the current MATCH clause scope
5495        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
5496        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
5497        // if their associated variable is in this set. This prevents relationship
5498        // uniqueness from being enforced across disconnected MATCH clauses.
5499        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5500            .iter()
5501            .map(|v| v.name.clone())
5502            .collect();
5503        // Include the current traverse's edge variable (not yet added to vars_in_scope)
5504        if let Some(ref sv) = effective_step_var {
5505            scope_match_variables.insert(sv.clone());
5506        }
5507        // Include the target variable (not yet added to vars_in_scope)
5508        scope_match_variables.insert(effective_target_var.clone());
5509        // Include bound edge variables from this path for cross-segment Trail mode
5510        // enforcement (same as the schemaless path above).
5511        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
5512
5513        let mut plan = LogicalPlan::Traverse {
5514            input: Box::new(plan),
5515            edge_type_ids,
5516            direction: params.rel.direction.clone(),
5517            source_variable: source_variable.to_string(),
5518            target_variable: effective_target_var.clone(),
5519            target_label_id: target_label_meta
5520                .map(|m| m.id)
5521                .or(virtual_target_label_id)
5522                .unwrap_or(0),
5523            step_variable: effective_step_var.clone(),
5524            min_hops,
5525            max_hops,
5526            optional: params.optional,
5527            target_filter: self.node_filter_expr(
5528                &target_variable,
5529                &params.target_node.labels,
5530                &params.target_node.properties,
5531            ),
5532            path_variable: path_var.clone(),
5533            edge_properties: HashSet::new(),
5534            is_variable_length,
5535            optional_pattern_vars: params.optional_pattern_vars.clone(),
5536            scope_match_variables,
5537            edge_filter_expr: if is_variable_length {
5538                // Use the step variable name, or a fallback for anonymous edges.
5539                // The variable name is used by properties_to_expr to build
5540                // `var.prop = value` expressions. For BFS property checking,
5541                // only the property name and value matter (the variable name
5542                // is stripped during extraction).
5543                let filter_var = effective_step_var
5544                    .clone()
5545                    .unwrap_or_else(|| "__anon_edge".to_string());
5546                self.properties_to_expr(&filter_var, &params.rel.properties)
5547            } else {
5548                None
5549            },
5550            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5551            qpp_steps: None,
5552        };
5553
5554        // Pre-compute optional variables set for filter nodes in this traverse.
5555        // Used by relationship property filters and bound-edge filters below.
5556        let filter_optional_vars = if params.optional {
5557            params.optional_pattern_vars.clone()
5558        } else {
5559            HashSet::new()
5560        };
5561
5562        // Apply relationship property predicates (e.g. [r {k: v}]).
5563        // For VLP, predicates are stored inline in edge_filter_expr (above).
5564        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5565        if !is_variable_length
5566            && let Some(edge_var_name) = effective_step_var.as_ref()
5567            && let Some(edge_prop_filter) =
5568                self.properties_to_expr(edge_var_name, &params.rel.properties)
5569        {
5570            plan = LogicalPlan::Filter {
5571                input: Box::new(plan),
5572                predicate: edge_prop_filter,
5573                optional_variables: filter_optional_vars.clone(),
5574            };
5575        }
5576
5577        // Only apply bound target filter for Imported variables (from outer scope/subquery).
5578        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5579        // uses Parameter which requires the value to be in params (subquery context).
5580        if target_is_bound
5581            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5582            && info.var_type == VariableType::Imported
5583        {
5584            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5585        }
5586
5587        // If we have a bound edge variable, add a filter to match it
5588        if let Some(ref bv) = bound_edge_var {
5589            let temp_var = format!("__rebound_{}", bv);
5590            let bound_check = Expr::BinaryOp {
5591                left: Box::new(Expr::Property(
5592                    Box::new(Expr::Variable(temp_var)),
5593                    "_eid".to_string(),
5594                )),
5595                op: BinaryOp::Eq,
5596                right: Box::new(Expr::Property(
5597                    Box::new(Expr::Variable(bv.clone())),
5598                    "_eid".to_string(),
5599                )),
5600            };
5601            plan = LogicalPlan::Filter {
5602                input: Box::new(plan),
5603                predicate: bound_check,
5604                optional_variables: filter_optional_vars.clone(),
5605            };
5606        }
5607
5608        // If we have a bound relationship list variable for a VLP pattern,
5609        // add a filter to match the traversed relationship list exactly.
5610        if let Some(ref bv) = bound_edge_list_var {
5611            let temp_var = format!("__rebound_{}", bv);
5612            let temp_eids = Expr::ListComprehension {
5613                variable: "__rebound_edge".to_string(),
5614                list: Box::new(Expr::Variable(temp_var)),
5615                where_clause: None,
5616                map_expr: Box::new(Expr::FunctionCall {
5617                    name: "toInteger".to_string(),
5618                    args: vec![Expr::Property(
5619                        Box::new(Expr::Variable("__rebound_edge".to_string())),
5620                        "_eid".to_string(),
5621                    )],
5622                    distinct: false,
5623                    window_spec: None,
5624                }),
5625            };
5626            let bound_eids = Expr::ListComprehension {
5627                variable: "__bound_edge".to_string(),
5628                list: Box::new(Expr::Variable(bv.clone())),
5629                where_clause: None,
5630                map_expr: Box::new(Expr::FunctionCall {
5631                    name: "toInteger".to_string(),
5632                    args: vec![Expr::Property(
5633                        Box::new(Expr::Variable("__bound_edge".to_string())),
5634                        "_eid".to_string(),
5635                    )],
5636                    distinct: false,
5637                    window_spec: None,
5638                }),
5639            };
5640            let bound_list_check = Expr::BinaryOp {
5641                left: Box::new(temp_eids),
5642                op: BinaryOp::Eq,
5643                right: Box::new(bound_eids),
5644            };
5645            plan = LogicalPlan::Filter {
5646                input: Box::new(plan),
5647                predicate: bound_list_check,
5648                optional_variables: filter_optional_vars.clone(),
5649            };
5650        }
5651
5652        // If we have a bound target variable (non-imported), add a filter to constrain
5653        // the traversal output to match the previously bound target node.
5654        if let Some(ref bv) = rebound_target_var {
5655            let temp_var = format!("__rebound_{}", bv);
5656            let bound_check = Expr::BinaryOp {
5657                left: Box::new(Expr::Property(
5658                    Box::new(Expr::Variable(temp_var.clone())),
5659                    "_vid".to_string(),
5660                )),
5661                op: BinaryOp::Eq,
5662                right: Box::new(Expr::Property(
5663                    Box::new(Expr::Variable(bv.clone())),
5664                    "_vid".to_string(),
5665                )),
5666            };
5667            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5668            // so that OptionalFilterExec excludes it from the grouping key and
5669            // properly nullifies it in recovery rows when all matches are filtered out.
5670            // Without this, each traverse result creates its own group (keyed by
5671            // __rebound_c._vid), and null-row recovery emits a spurious null row
5672            // for every non-matching target instead of one per source group.
5673            let mut rebound_filter_vars = filter_optional_vars;
5674            if params.optional {
5675                rebound_filter_vars.insert(temp_var);
5676            }
5677            plan = LogicalPlan::Filter {
5678                input: Box::new(plan),
5679                predicate: bound_check,
5680                optional_variables: rebound_filter_vars,
5681            };
5682        }
5683
5684        // Add the bound variables to scope
5685        // Skip adding the edge variable if it's already bound from a previous clause
5686        if let Some(sv) = &step_var
5687            && bound_edge_var.is_none()
5688            && bound_edge_list_var.is_none()
5689        {
5690            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5691            if is_variable_length
5692                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5693            {
5694                info.is_vlp = true;
5695            }
5696        }
5697        if let Some(pv) = &path_var
5698            && !is_var_in_scope(vars_in_scope, pv)
5699        {
5700            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5701        }
5702        if !is_var_in_scope(vars_in_scope, &target_variable) {
5703            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5704        }
5705
5706        Ok((plan, target_variable, effective_target_var))
5707    }
5708
5709    /// Combine a new scan plan with an existing plan.
5710    ///
5711    /// If the existing plan is `Empty`, returns the new plan directly.
5712    /// Otherwise, wraps them in a `CrossJoin`.
5713    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5714        if matches!(existing, LogicalPlan::Empty) {
5715            new
5716        } else {
5717            LogicalPlan::CrossJoin {
5718                left: Box::new(existing),
5719                right: Box::new(new),
5720            }
5721        }
5722    }
5723
5724    /// Split node map predicates into scan-pushable and residual filters.
5725    ///
5726    /// A predicate is scan-pushable when its value expression references only
5727    /// the node variable itself (or no variables). Predicates referencing other
5728    /// in-scope variables (correlated predicates) are returned as residual so
5729    /// they can be applied after joining with the existing plan.
5730    fn split_node_property_filters_for_scan(
5731        &self,
5732        variable: &str,
5733        properties: &Option<Expr>,
5734    ) -> (Option<Expr>, Option<Expr>) {
5735        let entries = match properties {
5736            Some(Expr::Map(entries)) => entries,
5737            _ => return (None, None),
5738        };
5739
5740        if entries.is_empty() {
5741            return (None, None);
5742        }
5743
5744        let mut pushdown_entries = Vec::new();
5745        let mut residual_entries = Vec::new();
5746
5747        for (prop, val_expr) in entries {
5748            let vars = collect_expr_variables(val_expr);
5749            if vars.iter().all(|v| v == variable) {
5750                pushdown_entries.push((prop.clone(), val_expr.clone()));
5751            } else {
5752                residual_entries.push((prop.clone(), val_expr.clone()));
5753            }
5754        }
5755
5756        let pushdown_map = if pushdown_entries.is_empty() {
5757            None
5758        } else {
5759            Some(Expr::Map(pushdown_entries))
5760        };
5761        let residual_map = if residual_entries.is_empty() {
5762            None
5763        } else {
5764            Some(Expr::Map(residual_entries))
5765        };
5766
5767        (
5768            self.properties_to_expr(variable, &pushdown_map),
5769            self.properties_to_expr(variable, &residual_map),
5770        )
5771    }
5772
5773    /// Decide whether per-label `Scan` branches for a label disjunction can
5774    /// safely be combined under `LogicalPlan::Union`. Returns `true` iff every
5775    /// label in `labels` is registered in the schema AND every pair shares an
5776    /// identical property name+type set.
5777    ///
5778    /// When this returns `false`, the disjunction must fall back to a single
5779    /// `ScanMainByLabels` over all labels — otherwise DataFusion's
5780    /// `UnionExec::try_new` panics in `union_schema` because the per-label
5781    /// `GraphScanExec` outputs (`_vid` + `_labels` + per-label projected
5782    /// properties) have different field counts. Issue rustic-ai/uni-db#62.
5783    ///
5784    /// We deliberately compare full schema property sets rather than only the
5785    /// properties referenced by the current query: at this logical-planning
5786    /// stage we have not yet collected `all_properties`, and `*` wildcards
5787    /// (e.g. from unknown function calls) would expand per-label downstream
5788    /// in `df_planner::resolve_properties` even when the query text only
5789    /// touches common columns.
5790    fn label_branches_share_property_schema(&self, labels: &[String]) -> bool {
5791        if labels.len() < 2 {
5792            return true;
5793        }
5794        let mut iter = labels.iter();
5795        let first = iter.next().expect("len >= 2");
5796        let Some(first_props) = self.schema.properties.get(first) else {
5797            return false;
5798        };
5799        for label in iter {
5800            let Some(props) = self.schema.properties.get(label) else {
5801                return false;
5802            };
5803            if props.len() != first_props.len() {
5804                return false;
5805            }
5806            for (name, meta) in first_props {
5807                let Some(other_meta) = props.get(name) else {
5808                    return false;
5809                };
5810                if meta.r#type != other_meta.r#type {
5811                    return false;
5812                }
5813            }
5814        }
5815        true
5816    }
5817
5818    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5819    fn plan_unbound_node(
5820        &self,
5821        node: &NodePattern,
5822        variable: &str,
5823        plan: LogicalPlan,
5824        optional: bool,
5825    ) -> Result<LogicalPlan> {
5826        // Properties handling
5827        let properties = match &node.properties {
5828            Some(Expr::Map(entries)) => entries.as_slice(),
5829            Some(Expr::Parameter(_)) => {
5830                return Err(anyhow!(
5831                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5832                ));
5833            }
5834            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5835            None => &[],
5836        };
5837
5838        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5839
5840        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5841            if let Some(predicate) = residual {
5842                LogicalPlan::Filter {
5843                    input: Box::new(input),
5844                    predicate,
5845                    optional_variables: HashSet::new(),
5846                }
5847            } else {
5848                input
5849            }
5850        };
5851
5852        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5853            self.split_node_property_filters_for_scan(variable, &node.properties)
5854        } else {
5855            (self.properties_to_expr(variable, &node.properties), None)
5856        };
5857
5858        // Check for ext_id in properties when no label is specified
5859        if node.labels.is_empty() {
5860            // Try to find ext_id property for main table lookup
5861            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5862                // Extract the ext_id value as a string
5863                let ext_id = match ext_id_value {
5864                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5865                    _ => {
5866                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5867                    }
5868                };
5869
5870                // Build filter for remaining properties (excluding ext_id)
5871                let remaining_props: Vec<_> = properties
5872                    .iter()
5873                    .filter(|(k, _)| k != "ext_id")
5874                    .cloned()
5875                    .collect();
5876
5877                let remaining_expr = if remaining_props.is_empty() {
5878                    None
5879                } else {
5880                    Some(Expr::Map(remaining_props))
5881                };
5882
5883                let (prop_filter, residual_filter) = if has_existing_scope {
5884                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5885                } else {
5886                    (self.properties_to_expr(variable, &remaining_expr), None)
5887                };
5888
5889                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5890                    variable: variable.to_string(),
5891                    ext_id,
5892                    filter: prop_filter,
5893                    optional,
5894                };
5895
5896                let joined = Self::join_with_plan(plan, ext_id_lookup);
5897                return Ok(apply_residual_filter(joined, residual_filter));
5898            }
5899
5900            // No ext_id: create ScanAll for unlabeled node pattern
5901            let scan_all = LogicalPlan::ScanAll {
5902                variable: variable.to_string(),
5903                filter: node_scan_filter,
5904                optional,
5905            };
5906
5907            let joined = Self::join_with_plan(plan, scan_all);
5908            return Ok(apply_residual_filter(joined, node_residual_filter));
5909        }
5910
5911        // Label disjunction `(n:A|B|C)` — emit Union of label-scoped Scans.
5912        //
5913        // Storage fact: a multi-labeled vertex is fanned out into every
5914        // per-label table it carries (uni-store/src/runtime/writer.rs's
5915        // `push_vertex_to_labels`), so the same vid can appear in both the
5916        // `A` scan and the `B` scan of a disjunctive query. Use
5917        // `Union { all: false }` so the combined result deduplicates by row
5918        // contents (which include the vid) rather than emitting the same
5919        // vertex twice. The single-label-disjunction case (`Disjunction(["A"])`)
5920        // is encoded the same way the parser already encodes single edge
5921        // types, and reduces to one Scan with no Union wrapping.
5922        if node.labels.is_proper_disjunction() {
5923            let label_names: Vec<String> = node.labels.names().to_vec();
5924
5925            // Per-label branches under a `Union` only line up when every
5926            // branch produces the same Arrow schema. The narrow-scan
5927            // `Scan` path resolves columns *per label*, so heterogeneous
5928            // property sets (or any schemaless label in the mix) yield
5929            // mismatched widths and DataFusion's `UnionExec::try_new`
5930            // panics inside `union_schema` (issue rustic-ai/uni-db#62).
5931            //
5932            // For those cases, lower every branch to a *single-label*
5933            // `ScanMainByLabels` instead. The schemaless main-table scan
5934            // resolves columns from `all_properties` directly (no per-label
5935            // expansion), so all branches emit a uniform schema and the
5936            // outer `Union { all: false }` deduplicates correctly. We
5937            // keep the per-branch Union shape (rather than collapsing to
5938            // a single multi-label scan) because multi-label
5939            // `ScanMainByLabels` has AND/intersection semantics — wrong
5940            // for a disjunction.
5941            let use_main_table_branches = !self.label_branches_share_property_schema(&label_names);
5942
5943            let mut branches: Vec<LogicalPlan> = Vec::with_capacity(label_names.len());
5944            for label_name in &label_names {
5945                let branch = if use_main_table_branches {
5946                    LogicalPlan::ScanMainByLabels {
5947                        labels: vec![label_name.clone()],
5948                        variable: variable.to_string(),
5949                        filter: node_scan_filter.clone(),
5950                        optional,
5951                    }
5952                } else {
5953                    let meta = self
5954                        .schema
5955                        .get_label_case_insensitive(label_name)
5956                        .expect("share_property_schema true implies all labels in schema");
5957                    LogicalPlan::Scan {
5958                        label_id: meta.id,
5959                        labels: vec![label_name.clone()],
5960                        variable: variable.to_string(),
5961                        filter: node_scan_filter.clone(),
5962                        optional,
5963                    }
5964                };
5965                branches.push(branch);
5966            }
5967            // Left-leaning Union: Union(Union(A, B), C). All inner
5968            // unions dedupe by row, so the outer one does too.
5969            let mut iter = branches.into_iter();
5970            let mut union_plan = iter
5971                .next()
5972                .expect("is_proper_disjunction implies at least 2 labels");
5973            for next in iter {
5974                union_plan = LogicalPlan::Union {
5975                    left: Box::new(union_plan),
5976                    right: Box::new(next),
5977                    all: false,
5978                };
5979            }
5980            let joined = Self::join_with_plan(plan, union_plan);
5981            return Ok(apply_residual_filter(joined, node_residual_filter));
5982        }
5983
5984        // Use first label for label_id (primary label for dataset selection)
5985        let label_name = &node.labels[0];
5986
5987        // Check if label exists in schema
5988        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5989            // Known label: use standard Scan
5990            let scan = LogicalPlan::Scan {
5991                label_id: label_meta.id,
5992                labels: node.labels.names().to_vec(),
5993                variable: variable.to_string(),
5994                filter: node_scan_filter,
5995                optional,
5996            };
5997
5998            let joined = Self::join_with_plan(plan, scan);
5999            Ok(apply_residual_filter(joined, node_residual_filter))
6000        } else {
6001            // Unknown label. Try a CatalogProvider / ReplacementScanProvider
6002            // claim first: on success allocate a virtual label-ID and emit a
6003            // regular `Scan` against the virtual id (`df_planner` dispatches
6004            // to `CatalogVertexScanExec`). When no provider claims and the
6005            // replacement-scan gate is on, strict-mode errors. When the gate
6006            // is off and no provider claims, preserve today's silent-empty
6007            // schemaless `ScanMainByLabels` behavior bit-for-bit.
6008            if let Some((virtual_id, _)) = self.allocate_virtual_label(label_name)? {
6009                let scan = LogicalPlan::Scan {
6010                    label_id: virtual_id,
6011                    labels: node.labels.names().to_vec(),
6012                    variable: variable.to_string(),
6013                    filter: node_scan_filter,
6014                    optional,
6015                };
6016                let joined = Self::join_with_plan(plan, scan);
6017                return Ok(apply_residual_filter(joined, node_residual_filter));
6018            }
6019            if self.replacement_scans_enabled {
6020                return Err(anyhow!(
6021                    "Label `{}` is not defined in schema and no \
6022                     CatalogProvider or ReplacementScanProvider claimed it; \
6023                     strict-mode (replacement_scans=true) requires the label \
6024                     to resolve",
6025                    label_name
6026                ));
6027            }
6028
6029            let scan_main = LogicalPlan::ScanMainByLabels {
6030                labels: node.labels.names().to_vec(),
6031                variable: variable.to_string(),
6032                filter: node_scan_filter,
6033                optional,
6034            };
6035
6036            let joined = Self::join_with_plan(plan, scan_main);
6037            Ok(apply_residual_filter(joined, node_residual_filter))
6038        }
6039    }
6040
6041    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
6042    ///
6043    /// When `optional_vars` is non-empty, the Filter will preserve rows where
6044    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
6045    fn plan_where_clause(
6046        &self,
6047        predicate: &Expr,
6048        plan: LogicalPlan,
6049        vars_in_scope: &[VariableInfo],
6050        optional_vars: HashSet<String>,
6051    ) -> Result<LogicalPlan> {
6052        // Validate no aggregation functions in WHERE clause
6053        validate_no_aggregation_in_where(predicate)?;
6054
6055        // Validate all variables used are in scope
6056        validate_expression_variables(predicate, vars_in_scope)?;
6057
6058        // Validate expression types (function args, boolean operators)
6059        validate_expression(predicate, vars_in_scope)?;
6060
6061        // Check that WHERE predicate isn't a bare node/edge/path variable
6062        if let Expr::Variable(var_name) = predicate
6063            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
6064            && matches!(
6065                info.var_type,
6066                VariableType::Node | VariableType::Edge | VariableType::Path
6067            )
6068        {
6069            return Err(anyhow!(
6070                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
6071                info.var_type
6072            ));
6073        }
6074
6075        let mut plan = plan;
6076
6077        // Transform VALID_AT macro to function call
6078        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
6079
6080        // Rewrite id(var) to var._vid (or var._eid for an edge) so
6081        // PredicateAnalyzer can push it down.
6082        let transformed_predicate = Self::rewrite_id_to_vid(transformed_predicate, vars_in_scope);
6083
6084        let mut current_predicate =
6085            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
6086
6087        // 1. Try to extract vector_similarity predicate for optimization
6088        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
6089            let vs = &extraction.predicate;
6090            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
6091                plan = Self::replace_scan_with_knn(
6092                    plan,
6093                    &vs.variable,
6094                    &vs.property,
6095                    vs.query.clone(),
6096                    vs.threshold,
6097                );
6098                if let Some(residual) = extraction.residual {
6099                    current_predicate = residual;
6100                } else {
6101                    current_predicate = Expr::TRUE;
6102                }
6103            }
6104        }
6105
6106        // 2. Label/type disjunction → narrow-scan rewrite.
6107        //
6108        // `WHERE n:A OR n:B` and `WHERE type(r) = 'A' OR type(r) = 'B'`
6109        // are functionally identical to the inline forms `(n:A|B)` and
6110        // `[r:A|B]`, but a literal pattern lowering would route them
6111        // through `Filter(LabelCheck OR LabelCheck)` over `ScanAll` —
6112        // a full vertex/edge scan plus residual filter, missing the
6113        // narrow-scan fast-path that the inline forms get for free.
6114        // Detect those OR-chains here and rewrite the upstream
6115        // `ScanAll` / `Traverse` accordingly.
6116        let conjuncts = Self::split_and_conjuncts(&current_predicate);
6117        let mut keep: Vec<Expr> = Vec::with_capacity(conjuncts.len());
6118        for conj in conjuncts {
6119            let mut consumed = false;
6120            for var in vars_in_scope {
6121                if optional_vars.contains(&var.name) {
6122                    continue;
6123                }
6124                // Node label disjunction → Union of label-scoped Scans.
6125                if Self::is_scan_all_for(&plan, &var.name)
6126                    && let Some(labels) = try_label_or_to_union(&conj, &var.name)
6127                {
6128                    plan = self.replace_scan_all_with_label_union(plan, &var.name, &labels, false);
6129                    consumed = true;
6130                    break;
6131                }
6132                // Edge type disjunction → merge into Traverse.edge_type_ids.
6133                if let Some(types) = try_type_or_to_union(&conj, &var.name)
6134                    && Self::merge_traverse_types_for(&plan, &var.name, &types).is_some()
6135                {
6136                    let mut ids: Vec<u32> = Vec::with_capacity(types.len());
6137                    let mut all_known = true;
6138                    for t in &types {
6139                        match self.schema.edge_types.get(t) {
6140                            Some(meta) => ids.push(meta.id),
6141                            None => {
6142                                all_known = false;
6143                                break;
6144                            }
6145                        }
6146                    }
6147                    if all_known {
6148                        plan = Self::set_traverse_edge_type_ids(plan, &var.name, ids);
6149                        consumed = true;
6150                        break;
6151                    }
6152                }
6153            }
6154            if !consumed {
6155                keep.push(conj);
6156            }
6157        }
6158        current_predicate = Self::combine_predicates(keep).unwrap_or(Expr::TRUE);
6159
6160        // 3. Push eligible predicates to Scan OR Traverse filters
6161        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
6162        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
6163        // rows. Let them stay in the Filter operator which handles NULL preservation.
6164        for var in vars_in_scope {
6165            // Skip pushdown for optional variables - they need NULL preservation in Filter
6166            if optional_vars.contains(&var.name) {
6167                continue;
6168            }
6169
6170            // Check if var is produced by a Scan
6171            if Self::find_scan_label_id(&plan, &var.name).is_some() {
6172                let (pushable, residual) =
6173                    Self::extract_variable_predicates(&current_predicate, &var.name);
6174
6175                for pred in pushable {
6176                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
6177                }
6178
6179                if let Some(r) = residual {
6180                    current_predicate = r;
6181                } else {
6182                    current_predicate = Expr::TRUE;
6183                }
6184            } else if Self::is_traverse_target(&plan, &var.name) {
6185                // Push to Traverse
6186                let (pushable, residual) =
6187                    Self::extract_variable_predicates(&current_predicate, &var.name);
6188
6189                for pred in pushable {
6190                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
6191                }
6192
6193                if let Some(r) = residual {
6194                    current_predicate = r;
6195                } else {
6196                    current_predicate = Expr::TRUE;
6197                }
6198            }
6199        }
6200
6201        // 4. Push predicates to Apply.input_filter
6202        // This filters input rows BEFORE executing correlated subqueries.
6203        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
6204
6205        // 5. Add Filter node for any remaining predicates
6206        if !current_predicate.is_true_literal() {
6207            plan = LogicalPlan::Filter {
6208                input: Box::new(plan),
6209                predicate: current_predicate,
6210                optional_variables: optional_vars,
6211            };
6212        }
6213
6214        Ok(plan)
6215    }
6216
6217    fn rewrite_predicates_using_indexes(
6218        &self,
6219        predicate: &Expr,
6220        plan: &LogicalPlan,
6221        vars_in_scope: &[VariableInfo],
6222    ) -> Result<Expr> {
6223        let mut rewritten = predicate.clone();
6224
6225        for var in vars_in_scope {
6226            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
6227                // Find label name
6228                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
6229
6230                if let Some(label) = label_name
6231                    && let Some(props) = self.schema.properties.get(&label)
6232                {
6233                    for (gen_col, meta) in props {
6234                        if meta.generation_expression.is_some() {
6235                            // Use cached parsed expression
6236                            if let Some(schema_expr) =
6237                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
6238                            {
6239                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
6240                                rewritten = Self::replace_expression(
6241                                    rewritten,
6242                                    schema_expr,
6243                                    &var.name,
6244                                    gen_col,
6245                                );
6246                            }
6247                        }
6248                    }
6249                }
6250            }
6251        }
6252        Ok(rewritten)
6253    }
6254
6255    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
6256        // First, normalize schema_expr to use query_var
6257        let schema_var = schema_expr.extract_variable();
6258
6259        if let Some(s_var) = schema_var {
6260            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
6261
6262            if expr == target_expr {
6263                return Expr::Property(
6264                    Box::new(Expr::Variable(query_var.to_string())),
6265                    gen_col.to_string(),
6266                );
6267            }
6268        }
6269
6270        // Recurse
6271        match expr {
6272            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
6273                left: Box::new(Self::replace_expression(
6274                    *left,
6275                    schema_expr,
6276                    query_var,
6277                    gen_col,
6278                )),
6279                op,
6280                right: Box::new(Self::replace_expression(
6281                    *right,
6282                    schema_expr,
6283                    query_var,
6284                    gen_col,
6285                )),
6286            },
6287            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
6288                op,
6289                expr: Box::new(Self::replace_expression(
6290                    *expr,
6291                    schema_expr,
6292                    query_var,
6293                    gen_col,
6294                )),
6295            },
6296            Expr::FunctionCall {
6297                name,
6298                args,
6299                distinct,
6300                window_spec,
6301            } => Expr::FunctionCall {
6302                name,
6303                args: args
6304                    .into_iter()
6305                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
6306                    .collect(),
6307                distinct,
6308                window_spec,
6309            },
6310            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
6311                *expr,
6312                schema_expr,
6313                query_var,
6314                gen_col,
6315            ))),
6316            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
6317                *expr,
6318                schema_expr,
6319                query_var,
6320                gen_col,
6321            ))),
6322            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
6323                *expr,
6324                schema_expr,
6325                query_var,
6326                gen_col,
6327            ))),
6328            Expr::ArrayIndex {
6329                array: e,
6330                index: idx,
6331            } => Expr::ArrayIndex {
6332                array: Box::new(Self::replace_expression(
6333                    *e,
6334                    schema_expr,
6335                    query_var,
6336                    gen_col,
6337                )),
6338                index: Box::new(Self::replace_expression(
6339                    *idx,
6340                    schema_expr,
6341                    query_var,
6342                    gen_col,
6343                )),
6344            },
6345            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
6346                array: Box::new(Self::replace_expression(
6347                    *array,
6348                    schema_expr,
6349                    query_var,
6350                    gen_col,
6351                )),
6352                start: start.map(|s| {
6353                    Box::new(Self::replace_expression(
6354                        *s,
6355                        schema_expr,
6356                        query_var,
6357                        gen_col,
6358                    ))
6359                }),
6360                end: end.map(|e| {
6361                    Box::new(Self::replace_expression(
6362                        *e,
6363                        schema_expr,
6364                        query_var,
6365                        gen_col,
6366                    ))
6367                }),
6368            },
6369            Expr::List(exprs) => Expr::List(
6370                exprs
6371                    .into_iter()
6372                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
6373                    .collect(),
6374            ),
6375            Expr::Map(entries) => Expr::Map(
6376                entries
6377                    .into_iter()
6378                    .map(|(k, v)| {
6379                        (
6380                            k,
6381                            Self::replace_expression(v, schema_expr, query_var, gen_col),
6382                        )
6383                    })
6384                    .collect(),
6385            ),
6386            Expr::Property(e, prop) => Expr::Property(
6387                Box::new(Self::replace_expression(
6388                    *e,
6389                    schema_expr,
6390                    query_var,
6391                    gen_col,
6392                )),
6393                prop,
6394            ),
6395            Expr::Case {
6396                expr: case_expr,
6397                when_then,
6398                else_expr,
6399            } => Expr::Case {
6400                expr: case_expr.map(|e| {
6401                    Box::new(Self::replace_expression(
6402                        *e,
6403                        schema_expr,
6404                        query_var,
6405                        gen_col,
6406                    ))
6407                }),
6408                when_then: when_then
6409                    .into_iter()
6410                    .map(|(w, t)| {
6411                        (
6412                            Self::replace_expression(w, schema_expr, query_var, gen_col),
6413                            Self::replace_expression(t, schema_expr, query_var, gen_col),
6414                        )
6415                    })
6416                    .collect(),
6417                else_expr: else_expr.map(|e| {
6418                    Box::new(Self::replace_expression(
6419                        *e,
6420                        schema_expr,
6421                        query_var,
6422                        gen_col,
6423                    ))
6424                }),
6425            },
6426            Expr::Reduce {
6427                accumulator,
6428                init,
6429                variable: reduce_var,
6430                list,
6431                expr: reduce_expr,
6432            } => Expr::Reduce {
6433                accumulator,
6434                init: Box::new(Self::replace_expression(
6435                    *init,
6436                    schema_expr,
6437                    query_var,
6438                    gen_col,
6439                )),
6440                variable: reduce_var,
6441                list: Box::new(Self::replace_expression(
6442                    *list,
6443                    schema_expr,
6444                    query_var,
6445                    gen_col,
6446                )),
6447                expr: Box::new(Self::replace_expression(
6448                    *reduce_expr,
6449                    schema_expr,
6450                    query_var,
6451                    gen_col,
6452                )),
6453            },
6454
6455            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
6456            _ => expr,
6457        }
6458    }
6459
6460    /// Returns `true` iff `variable` is bound to a `ScanAll` operator
6461    /// (somewhere under `plan`). Used to gate the
6462    /// `WHERE n:A OR n:B` → `Union(Scan{A}, Scan{B})` rewrite — we only
6463    /// fire it when the variable is currently doing a full vertex scan,
6464    /// not when it's already bound to a labeled `Scan`.
6465    fn is_scan_all_for(plan: &LogicalPlan, variable: &str) -> bool {
6466        match plan {
6467            LogicalPlan::ScanAll { variable: var, .. } => var == variable,
6468            LogicalPlan::Filter { input, .. }
6469            | LogicalPlan::Project { input, .. }
6470            | LogicalPlan::Sort { input, .. }
6471            | LogicalPlan::Limit { input, .. }
6472            | LogicalPlan::Aggregate { input, .. }
6473            | LogicalPlan::Apply { input, .. }
6474            | LogicalPlan::Traverse { input, .. } => Self::is_scan_all_for(input, variable),
6475            LogicalPlan::CrossJoin { left, right } => {
6476                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6477            }
6478            LogicalPlan::Union { left, right, .. } => {
6479                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6480            }
6481            _ => false,
6482        }
6483    }
6484
6485    /// Replace the `ScanAll` for `variable` in `plan` with a left-leaning
6486    /// `Union` of label-scoped `Scan` (or `ScanMainByLabels` for unknown
6487    /// labels) operators built from `labels`. Used by the
6488    /// `WHERE n:A OR n:B` rewrite.
6489    fn replace_scan_all_with_label_union(
6490        &self,
6491        plan: LogicalPlan,
6492        variable: &str,
6493        labels: &[String],
6494        optional: bool,
6495    ) -> LogicalPlan {
6496        match plan {
6497            LogicalPlan::ScanAll {
6498                variable: var,
6499                filter,
6500                optional: scan_optional,
6501            } if var == variable => {
6502                // Heterogeneous (or any-schemaless) disjunction: route every
6503                // branch through a single-label `ScanMainByLabels` so all
6504                // branches emit a uniform schemaless schema. Avoids the
6505                // DataFusion `union_schema` panic. See `plan_unbound_node`
6506                // and issue rustic-ai/uni-db#62.
6507                let use_main_table_branches = !self.label_branches_share_property_schema(labels);
6508
6509                let mut branches: Vec<LogicalPlan> = Vec::with_capacity(labels.len());
6510                for label in labels {
6511                    let branch = if use_main_table_branches {
6512                        LogicalPlan::ScanMainByLabels {
6513                            labels: vec![label.clone()],
6514                            variable: variable.to_string(),
6515                            filter: filter.clone(),
6516                            optional: scan_optional || optional,
6517                        }
6518                    } else {
6519                        let meta = self
6520                            .schema
6521                            .get_label_case_insensitive(label)
6522                            .expect("share_property_schema true implies all labels in schema");
6523                        LogicalPlan::Scan {
6524                            label_id: meta.id,
6525                            labels: vec![label.clone()],
6526                            variable: variable.to_string(),
6527                            filter: filter.clone(),
6528                            optional: scan_optional || optional,
6529                        }
6530                    };
6531                    branches.push(branch);
6532                }
6533                let mut iter = branches.into_iter();
6534                let mut union_plan = iter.next().expect("at least one label");
6535                for next in iter {
6536                    union_plan = LogicalPlan::Union {
6537                        left: Box::new(union_plan),
6538                        right: Box::new(next),
6539                        all: false,
6540                    };
6541                }
6542                union_plan
6543            }
6544            LogicalPlan::Filter {
6545                input,
6546                predicate,
6547                optional_variables,
6548            } => LogicalPlan::Filter {
6549                input: Box::new(
6550                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6551                ),
6552                predicate,
6553                optional_variables,
6554            },
6555            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6556                input: Box::new(
6557                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6558                ),
6559                projections,
6560            },
6561            LogicalPlan::CrossJoin { left, right } => {
6562                if Self::is_scan_all_for(&left, variable) {
6563                    LogicalPlan::CrossJoin {
6564                        left: Box::new(
6565                            self.replace_scan_all_with_label_union(
6566                                *left, variable, labels, optional,
6567                            ),
6568                        ),
6569                        right,
6570                    }
6571                } else {
6572                    LogicalPlan::CrossJoin {
6573                        left,
6574                        right: Box::new(
6575                            self.replace_scan_all_with_label_union(
6576                                *right, variable, labels, optional,
6577                            ),
6578                        ),
6579                    }
6580                }
6581            }
6582            LogicalPlan::Traverse {
6583                input,
6584                edge_type_ids,
6585                direction,
6586                source_variable,
6587                target_variable,
6588                target_label_id,
6589                step_variable,
6590                min_hops,
6591                max_hops,
6592                optional: trav_optional,
6593                target_filter,
6594                path_variable,
6595                edge_properties,
6596                is_variable_length,
6597                optional_pattern_vars,
6598                scope_match_variables,
6599                edge_filter_expr,
6600                path_mode,
6601                qpp_steps,
6602            } => LogicalPlan::Traverse {
6603                input: Box::new(
6604                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6605                ),
6606                edge_type_ids,
6607                direction,
6608                source_variable,
6609                target_variable,
6610                target_label_id,
6611                step_variable,
6612                min_hops,
6613                max_hops,
6614                optional: trav_optional,
6615                target_filter,
6616                path_variable,
6617                edge_properties,
6618                is_variable_length,
6619                optional_pattern_vars,
6620                scope_match_variables,
6621                edge_filter_expr,
6622                path_mode,
6623                qpp_steps,
6624            },
6625            other => other,
6626        }
6627    }
6628
6629    /// Returns `Some(())` iff `variable` is the `step_variable` (i.e. the
6630    /// edge variable) of some `Traverse` operator in `plan`. Used to gate
6631    /// the `WHERE type(r) = 'A' OR type(r) = 'B'` rewrite — we need a
6632    /// Traverse whose types we can merge into.
6633    fn merge_traverse_types_for(
6634        plan: &LogicalPlan,
6635        edge_var: &str,
6636        _types: &[String],
6637    ) -> Option<()> {
6638        match plan {
6639            LogicalPlan::Traverse {
6640                step_variable,
6641                input,
6642                ..
6643            } => {
6644                if step_variable.as_deref() == Some(edge_var) {
6645                    Some(())
6646                } else {
6647                    Self::merge_traverse_types_for(input, edge_var, _types)
6648                }
6649            }
6650            LogicalPlan::Filter { input, .. }
6651            | LogicalPlan::Project { input, .. }
6652            | LogicalPlan::Sort { input, .. }
6653            | LogicalPlan::Limit { input, .. }
6654            | LogicalPlan::Aggregate { input, .. }
6655            | LogicalPlan::Apply { input, .. } => {
6656                Self::merge_traverse_types_for(input, edge_var, _types)
6657            }
6658            LogicalPlan::CrossJoin { left, right } | LogicalPlan::Union { left, right, .. } => {
6659                Self::merge_traverse_types_for(left, edge_var, _types)
6660                    .or_else(|| Self::merge_traverse_types_for(right, edge_var, _types))
6661            }
6662            _ => None,
6663        }
6664    }
6665
6666    /// Replace `edge_type_ids` on the Traverse whose `step_variable`
6667    /// equals `edge_var`. Used by the type-OR rewrite.
6668    fn set_traverse_edge_type_ids(
6669        plan: LogicalPlan,
6670        edge_var: &str,
6671        new_ids: Vec<u32>,
6672    ) -> LogicalPlan {
6673        match plan {
6674            LogicalPlan::Traverse {
6675                input,
6676                edge_type_ids,
6677                direction,
6678                source_variable,
6679                target_variable,
6680                target_label_id,
6681                step_variable,
6682                min_hops,
6683                max_hops,
6684                optional,
6685                target_filter,
6686                path_variable,
6687                edge_properties,
6688                is_variable_length,
6689                optional_pattern_vars,
6690                scope_match_variables,
6691                edge_filter_expr,
6692                path_mode,
6693                qpp_steps,
6694            } => {
6695                let matches_var = step_variable.as_deref() == Some(edge_var);
6696                let recursed_input = if matches_var {
6697                    input
6698                } else {
6699                    Box::new(Self::set_traverse_edge_type_ids(
6700                        *input,
6701                        edge_var,
6702                        new_ids.clone(),
6703                    ))
6704                };
6705                LogicalPlan::Traverse {
6706                    input: recursed_input,
6707                    edge_type_ids: if matches_var { new_ids } else { edge_type_ids },
6708                    direction,
6709                    source_variable,
6710                    target_variable,
6711                    target_label_id,
6712                    step_variable,
6713                    min_hops,
6714                    max_hops,
6715                    optional,
6716                    target_filter,
6717                    path_variable,
6718                    edge_properties,
6719                    is_variable_length,
6720                    optional_pattern_vars,
6721                    scope_match_variables,
6722                    edge_filter_expr,
6723                    path_mode,
6724                    qpp_steps,
6725                }
6726            }
6727            LogicalPlan::Filter {
6728                input,
6729                predicate,
6730                optional_variables,
6731            } => LogicalPlan::Filter {
6732                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6733                predicate,
6734                optional_variables,
6735            },
6736            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6737                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6738                projections,
6739            },
6740            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6741                left: Box::new(Self::set_traverse_edge_type_ids(
6742                    *left,
6743                    edge_var,
6744                    new_ids.clone(),
6745                )),
6746                right: Box::new(Self::set_traverse_edge_type_ids(*right, edge_var, new_ids)),
6747            },
6748            other => other,
6749        }
6750    }
6751
6752    /// Check if the variable is the target of a Traverse node
6753    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
6754        match plan {
6755            LogicalPlan::Traverse {
6756                target_variable,
6757                input,
6758                ..
6759            } => target_variable == variable || Self::is_traverse_target(input, variable),
6760            LogicalPlan::Filter { input, .. }
6761            | LogicalPlan::Project { input, .. }
6762            | LogicalPlan::Sort { input, .. }
6763            | LogicalPlan::Limit { input, .. }
6764            | LogicalPlan::Aggregate { input, .. }
6765            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
6766            LogicalPlan::CrossJoin { left, right } => {
6767                Self::is_traverse_target(left, variable)
6768                    || Self::is_traverse_target(right, variable)
6769            }
6770            _ => false,
6771        }
6772    }
6773
6774    /// Push a predicate into a Traverse's target_filter for the specified variable
6775    fn push_predicate_to_traverse(
6776        plan: LogicalPlan,
6777        variable: &str,
6778        predicate: Expr,
6779    ) -> LogicalPlan {
6780        match plan {
6781            LogicalPlan::Traverse {
6782                input,
6783                edge_type_ids,
6784                direction,
6785                source_variable,
6786                target_variable,
6787                target_label_id,
6788                step_variable,
6789                min_hops,
6790                max_hops,
6791                optional,
6792                target_filter,
6793                path_variable,
6794                edge_properties,
6795                is_variable_length,
6796                optional_pattern_vars,
6797                scope_match_variables,
6798                edge_filter_expr,
6799                path_mode,
6800                qpp_steps,
6801            } => {
6802                if target_variable == variable {
6803                    // Found the traverse producing this variable
6804                    let new_filter = match target_filter {
6805                        Some(existing) => Some(Expr::BinaryOp {
6806                            left: Box::new(existing),
6807                            op: BinaryOp::And,
6808                            right: Box::new(predicate),
6809                        }),
6810                        None => Some(predicate),
6811                    };
6812                    LogicalPlan::Traverse {
6813                        input,
6814                        edge_type_ids,
6815                        direction,
6816                        source_variable,
6817                        target_variable,
6818                        target_label_id,
6819                        step_variable,
6820                        min_hops,
6821                        max_hops,
6822                        optional,
6823                        target_filter: new_filter,
6824                        path_variable,
6825                        edge_properties,
6826                        is_variable_length,
6827                        optional_pattern_vars,
6828                        scope_match_variables,
6829                        edge_filter_expr,
6830                        path_mode,
6831                        qpp_steps,
6832                    }
6833                } else {
6834                    // Recurse into input
6835                    LogicalPlan::Traverse {
6836                        input: Box::new(Self::push_predicate_to_traverse(
6837                            *input, variable, predicate,
6838                        )),
6839                        edge_type_ids,
6840                        direction,
6841                        source_variable,
6842                        target_variable,
6843                        target_label_id,
6844                        step_variable,
6845                        min_hops,
6846                        max_hops,
6847                        optional,
6848                        target_filter,
6849                        path_variable,
6850                        edge_properties,
6851                        is_variable_length,
6852                        optional_pattern_vars,
6853                        scope_match_variables,
6854                        edge_filter_expr,
6855                        path_mode,
6856                        qpp_steps,
6857                    }
6858                }
6859            }
6860            LogicalPlan::Filter {
6861                input,
6862                predicate: p,
6863                optional_variables: opt_vars,
6864            } => LogicalPlan::Filter {
6865                input: Box::new(Self::push_predicate_to_traverse(
6866                    *input, variable, predicate,
6867                )),
6868                predicate: p,
6869                optional_variables: opt_vars,
6870            },
6871            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6872                input: Box::new(Self::push_predicate_to_traverse(
6873                    *input, variable, predicate,
6874                )),
6875                projections,
6876            },
6877            LogicalPlan::CrossJoin { left, right } => {
6878                // Check which side has the variable
6879                if Self::is_traverse_target(&left, variable) {
6880                    LogicalPlan::CrossJoin {
6881                        left: Box::new(Self::push_predicate_to_traverse(
6882                            *left, variable, predicate,
6883                        )),
6884                        right,
6885                    }
6886                } else {
6887                    LogicalPlan::CrossJoin {
6888                        left,
6889                        right: Box::new(Self::push_predicate_to_traverse(
6890                            *right, variable, predicate,
6891                        )),
6892                    }
6893                }
6894            }
6895            other => other,
6896        }
6897    }
6898
6899    /// Plan a WITH clause, handling aggregations and projections.
6900    fn plan_with_clause(
6901        &self,
6902        with_clause: &WithClause,
6903        plan: LogicalPlan,
6904        vars_in_scope: &[VariableInfo],
6905    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
6906        let mut plan = plan;
6907        let mut group_by: Vec<Expr> = Vec::new();
6908        let mut aggregates: Vec<Expr> = Vec::new();
6909        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
6910        let mut has_agg = false;
6911        let mut projections = Vec::new();
6912        let mut new_vars: Vec<VariableInfo> = Vec::new();
6913        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
6914        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
6915        let mut projected_aliases: HashSet<String> = HashSet::new();
6916        let mut has_unaliased_non_variable_expr = false;
6917
6918        for item in &with_clause.items {
6919            match item {
6920                ReturnItem::All => {
6921                    // WITH * - add all variables in scope
6922                    for v in vars_in_scope {
6923                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6924                        projected_aliases.insert(v.name.clone());
6925                        projected_simple_reprs.insert(v.name.clone());
6926                    }
6927                    new_vars.extend(vars_in_scope.iter().cloned());
6928                }
6929                ReturnItem::Expr { expr, alias, .. } => {
6930                    if matches!(expr, Expr::Wildcard) {
6931                        for v in vars_in_scope {
6932                            projections
6933                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6934                            projected_aliases.insert(v.name.clone());
6935                            projected_simple_reprs.insert(v.name.clone());
6936                        }
6937                        new_vars.extend(vars_in_scope.iter().cloned());
6938                    } else {
6939                        // Validate expression variables and syntax
6940                        validate_expression_variables(expr, vars_in_scope)?;
6941                        validate_expression(expr, vars_in_scope)?;
6942                        // Pattern predicates are not allowed in WITH
6943                        if contains_pattern_predicate(expr) {
6944                            return Err(anyhow!(
6945                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
6946                            ));
6947                        }
6948
6949                        projections.push((expr.clone(), alias.clone()));
6950                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
6951                            // Bare aggregate — push directly
6952                            has_agg = true;
6953                            aggregates.push(expr.clone());
6954                            projected_aggregate_reprs.insert(expr.to_string_repr());
6955                        } else if !is_window_function(expr)
6956                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
6957                        {
6958                            // Compound aggregate or expression containing aggregates
6959                            has_agg = true;
6960                            compound_agg_exprs.push(expr.clone());
6961                            for inner in extract_inner_aggregates(expr) {
6962                                let repr = inner.to_string_repr();
6963                                if !projected_aggregate_reprs.contains(&repr) {
6964                                    aggregates.push(inner);
6965                                    projected_aggregate_reprs.insert(repr);
6966                                }
6967                            }
6968                        } else if !group_by.contains(expr) {
6969                            group_by.push(expr.clone());
6970                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
6971                                projected_simple_reprs.insert(expr.to_string_repr());
6972                            }
6973                        }
6974
6975                        // Preserve non-scalar type information when WITH aliases
6976                        // entity/path-capable expressions.
6977                        if let Some(a) = alias {
6978                            if projected_aliases.contains(a) {
6979                                return Err(anyhow!(
6980                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6981                                    a
6982                                ));
6983                            }
6984                            let inferred = infer_with_output_type(expr, vars_in_scope);
6985                            new_vars.push(VariableInfo::new(a.clone(), inferred));
6986                            projected_aliases.insert(a.clone());
6987                        } else if let Expr::Variable(v) = expr {
6988                            if projected_aliases.contains(v) {
6989                                return Err(anyhow!(
6990                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6991                                    v
6992                                ));
6993                            }
6994                            // Preserve the original type if the variable is just passed through
6995                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
6996                                new_vars.push(existing.clone());
6997                            } else {
6998                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
6999                            }
7000                            projected_aliases.insert(v.clone());
7001                        } else {
7002                            has_unaliased_non_variable_expr = true;
7003                        }
7004                    }
7005                }
7006            }
7007        }
7008
7009        // Collect extra variables that need to survive the projection stage
7010        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
7011        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
7012        let mut passthrough_extras: Vec<String> = Vec::new();
7013        let mut seen_passthrough: HashSet<String> = HashSet::new();
7014
7015        if let Some(predicate) = &with_clause.where_clause {
7016            for name in collect_expr_variables(predicate) {
7017                if !projected_names.contains(name.as_str())
7018                    && find_var_in_scope(vars_in_scope, &name).is_some()
7019                    && seen_passthrough.insert(name.clone())
7020                {
7021                    passthrough_extras.push(name);
7022                }
7023            }
7024        }
7025
7026        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
7027        // Carry those variables through the projection so Sort can resolve them.
7028        if !has_agg && let Some(order_by) = &with_clause.order_by {
7029            for item in order_by {
7030                for name in collect_expr_variables(&item.expr) {
7031                    if !projected_names.contains(name.as_str())
7032                        && find_var_in_scope(vars_in_scope, &name).is_some()
7033                        && seen_passthrough.insert(name.clone())
7034                    {
7035                        passthrough_extras.push(name);
7036                    }
7037                }
7038            }
7039        }
7040
7041        let needs_cleanup = !passthrough_extras.is_empty();
7042        for extra in &passthrough_extras {
7043            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
7044        }
7045
7046        // Validate compound aggregate expressions: non-aggregate refs must be
7047        // individually present in the group_by as simple variables or properties.
7048        if has_agg {
7049            let group_by_reprs: HashSet<String> =
7050                group_by.iter().map(|e| e.to_string_repr()).collect();
7051            for expr in &compound_agg_exprs {
7052                let mut refs = Vec::new();
7053                collect_non_aggregate_refs(expr, false, &mut refs);
7054                for r in &refs {
7055                    let is_covered = match r {
7056                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
7057                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
7058                    };
7059                    if !is_covered {
7060                        return Err(anyhow!(
7061                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
7062                        ));
7063                    }
7064                }
7065            }
7066        }
7067
7068        if has_agg {
7069            plan = LogicalPlan::Aggregate {
7070                input: Box::new(plan),
7071                group_by,
7072                aggregates,
7073            };
7074
7075            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
7076            // can reference the WITH aliases instead of raw column names.
7077            let rename_projections: Vec<(Expr, Option<String>)> = projections
7078                .iter()
7079                .map(|(expr, alias)| {
7080                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
7081                        // Bare aggregate — reference by column name
7082                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
7083                    } else if is_compound_aggregate(expr)
7084                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
7085                    {
7086                        // Compound aggregate — replace inner aggregates with
7087                        // column references, keep outer expression
7088                        (replace_aggregates_with_columns(expr), alias.clone())
7089                    } else {
7090                        (Expr::Variable(expr.to_string_repr()), alias.clone())
7091                    }
7092                })
7093                .collect();
7094            plan = LogicalPlan::Project {
7095                input: Box::new(plan),
7096                projections: rename_projections,
7097            };
7098        } else if !projections.is_empty() {
7099            plan = LogicalPlan::Project {
7100                input: Box::new(plan),
7101                projections: projections.clone(),
7102            };
7103        }
7104
7105        // Apply the WHERE filter (post-projection, with extras still visible).
7106        if let Some(predicate) = &with_clause.where_clause {
7107            plan = LogicalPlan::Filter {
7108                input: Box::new(plan),
7109                predicate: predicate.clone(),
7110                optional_variables: HashSet::new(),
7111            };
7112        }
7113
7114        // Validate and apply ORDER BY for WITH clause.
7115        // Keep pre-WITH vars in scope for parser compatibility, then apply
7116        // stricter checks for aggregate-containing ORDER BY items.
7117        if let Some(order_by) = &with_clause.order_by {
7118            // Build a mapping from aliases and projected expression reprs to
7119            // output columns of the preceding Project/Aggregate pipeline.
7120            let with_order_aliases: HashMap<String, Expr> = projections
7121                .iter()
7122                .flat_map(|(expr, alias)| {
7123                    let output_col = if let Some(a) = alias {
7124                        a.clone()
7125                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
7126                        aggregate_column_name(expr)
7127                    } else {
7128                        expr.to_string_repr()
7129                    };
7130
7131                    let mut entries = Vec::new();
7132                    // ORDER BY alias
7133                    if let Some(a) = alias {
7134                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
7135                    }
7136                    // ORDER BY projected expression (e.g. me.age)
7137                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
7138                    entries
7139                })
7140                .collect();
7141
7142            let order_by_scope: Vec<VariableInfo> = {
7143                let mut scope = new_vars.clone();
7144                for v in vars_in_scope {
7145                    if !is_var_in_scope(&scope, &v.name) {
7146                        scope.push(v.clone());
7147                    }
7148                }
7149                scope
7150            };
7151            for item in order_by {
7152                validate_expression_variables(&item.expr, &order_by_scope)?;
7153                validate_expression(&item.expr, &order_by_scope)?;
7154                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
7155                if has_aggregate_in_item && !has_agg {
7156                    return Err(anyhow!(
7157                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
7158                    ));
7159                }
7160                if has_agg && has_aggregate_in_item {
7161                    validate_with_order_by_aggregate_item(
7162                        &item.expr,
7163                        &projected_aggregate_reprs,
7164                        &projected_simple_reprs,
7165                        &projected_aliases,
7166                    )?;
7167                }
7168            }
7169            let rewritten_order_by: Vec<SortItem> = order_by
7170                .iter()
7171                .map(|item| {
7172                    let mut expr =
7173                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
7174                    if has_agg {
7175                        // Rewrite any aggregate calls to the aggregate output
7176                        // columns produced by Aggregate.
7177                        expr = replace_aggregates_with_columns(&expr);
7178                        // Then re-map projected property expressions to aliases
7179                        // from the WITH projection.
7180                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
7181                    }
7182                    SortItem {
7183                        expr,
7184                        ascending: item.ascending,
7185                    }
7186                })
7187                .collect();
7188            plan = LogicalPlan::Sort {
7189                input: Box::new(plan),
7190                order_by: rewritten_order_by,
7191            };
7192        }
7193
7194        // Non-variable expressions in WITH must be aliased.
7195        // This check is intentionally placed after ORDER BY validation so
7196        // higher-priority semantic errors (e.g., ambiguous aggregation in
7197        // ORDER BY) can surface first.
7198        if has_unaliased_non_variable_expr {
7199            return Err(anyhow!(
7200                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
7201            ));
7202        }
7203
7204        // Validate and apply SKIP/LIMIT for WITH clause
7205        let skip = with_clause
7206            .skip
7207            .as_ref()
7208            .map(|e| {
7209                self.note_folded_limit_skip(e);
7210                parse_non_negative_integer(e, "SKIP", &self.params)
7211            })
7212            .transpose()?
7213            .flatten();
7214        let fetch = with_clause
7215            .limit
7216            .as_ref()
7217            .map(|e| {
7218                self.note_folded_limit_skip(e);
7219                parse_non_negative_integer(e, "LIMIT", &self.params)
7220            })
7221            .transpose()?
7222            .flatten();
7223
7224        if skip.is_some() || fetch.is_some() {
7225            plan = LogicalPlan::Limit {
7226                input: Box::new(plan),
7227                skip,
7228                fetch,
7229            };
7230        }
7231
7232        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
7233        if needs_cleanup {
7234            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
7235                .iter()
7236                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
7237                .collect();
7238            plan = LogicalPlan::Project {
7239                input: Box::new(plan),
7240                projections: cleanup_projections,
7241            };
7242        }
7243
7244        if with_clause.distinct {
7245            plan = LogicalPlan::Distinct {
7246                input: Box::new(plan),
7247            };
7248        }
7249
7250        Ok((plan, new_vars))
7251    }
7252
7253    fn plan_with_recursive(
7254        &self,
7255        with_recursive: &WithRecursiveClause,
7256        _prev_plan: LogicalPlan,
7257        vars_in_scope: &[VariableInfo],
7258    ) -> Result<LogicalPlan> {
7259        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
7260        match &*with_recursive.query {
7261            Query::Union { left, right, .. } => {
7262                // Plan the anchor (initial) query with current scope
7263                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
7264
7265                // Plan the recursive query with the CTE name added to scope
7266                // so it can reference itself
7267                let mut recursive_scope = vars_in_scope.to_vec();
7268                recursive_scope.push(VariableInfo::new(
7269                    with_recursive.name.clone(),
7270                    VariableType::Scalar,
7271                ));
7272                let recursive_plan =
7273                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
7274
7275                Ok(LogicalPlan::RecursiveCTE {
7276                    cte_name: with_recursive.name.clone(),
7277                    initial: Box::new(initial_plan),
7278                    recursive: Box::new(recursive_plan),
7279                })
7280            }
7281            _ => Err(anyhow::anyhow!(
7282                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
7283            )),
7284        }
7285    }
7286
7287    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
7288        let entries = match properties {
7289            Some(Expr::Map(entries)) => entries,
7290            _ => return None,
7291        };
7292
7293        if entries.is_empty() {
7294            return None;
7295        }
7296        let mut final_expr = None;
7297        for (prop, val_expr) in entries {
7298            let eq_expr = Expr::BinaryOp {
7299                left: Box::new(Expr::Property(
7300                    Box::new(Expr::Variable(variable.to_string())),
7301                    prop.clone(),
7302                )),
7303                op: BinaryOp::Eq,
7304                right: Box::new(val_expr.clone()),
7305            };
7306
7307            if let Some(e) = final_expr {
7308                final_expr = Some(Expr::BinaryOp {
7309                    left: Box::new(e),
7310                    op: BinaryOp::And,
7311                    right: Box::new(eq_expr),
7312                });
7313            } else {
7314                final_expr = Some(eq_expr);
7315            }
7316        }
7317        final_expr
7318    }
7319
7320    /// Build a filter expression from node properties and labels.
7321    ///
7322    /// This is used for TraverseMainByType where we need to filter target nodes
7323    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
7324    pub fn node_filter_expr(
7325        &self,
7326        variable: &str,
7327        labels: &[String],
7328        properties: &Option<Expr>,
7329    ) -> Option<Expr> {
7330        let mut final_expr = None;
7331
7332        // Add label checks using hasLabel(variable, 'label')
7333        for label in labels {
7334            let label_check = Expr::FunctionCall {
7335                name: "hasLabel".to_string(),
7336                args: vec![
7337                    Expr::Variable(variable.to_string()),
7338                    Expr::Literal(CypherLiteral::String(label.clone())),
7339                ],
7340                distinct: false,
7341                window_spec: None,
7342            };
7343
7344            final_expr = match final_expr {
7345                Some(e) => Some(Expr::BinaryOp {
7346                    left: Box::new(e),
7347                    op: BinaryOp::And,
7348                    right: Box::new(label_check),
7349                }),
7350                None => Some(label_check),
7351            };
7352        }
7353
7354        // Add property checks
7355        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
7356            final_expr = match final_expr {
7357                Some(e) => Some(Expr::BinaryOp {
7358                    left: Box::new(e),
7359                    op: BinaryOp::And,
7360                    right: Box::new(prop_expr),
7361                }),
7362                None => Some(prop_expr),
7363            };
7364        }
7365
7366        final_expr
7367    }
7368
7369    /// Create a filter plan that ensures traversed target matches a bound variable.
7370    ///
7371    /// Used in EXISTS subquery patterns where the target is already bound.
7372    /// Compares the target's VID against the bound variable's VID.
7373    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
7374        // Compare the traverse-discovered target's VID against the bound variable's VID.
7375        // Left side: Property access on the variable from current scope.
7376        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
7377        // We use Variable("{var}._vid") to access the VID column from the traverse output,
7378        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
7379        let bound_check = Expr::BinaryOp {
7380            left: Box::new(Expr::Property(
7381                Box::new(Expr::Variable(target_variable.to_string())),
7382                "_vid".to_string(),
7383            )),
7384            op: BinaryOp::Eq,
7385            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
7386        };
7387        LogicalPlan::Filter {
7388            input: Box::new(plan),
7389            predicate: bound_check,
7390            optional_variables: HashSet::new(),
7391        }
7392    }
7393
7394    /// Replace a Scan node matching the variable with a VectorKnn node
7395    fn replace_scan_with_knn(
7396        plan: LogicalPlan,
7397        variable: &str,
7398        property: &str,
7399        query: Expr,
7400        threshold: Option<f32>,
7401    ) -> LogicalPlan {
7402        match plan {
7403            LogicalPlan::Scan {
7404                label_id,
7405                labels,
7406                variable: scan_var,
7407                filter,
7408                optional,
7409            } => {
7410                if scan_var == variable {
7411                    // Inject any existing scan filter into VectorKnn?
7412                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
7413                    // Typically filter is applied post-Knn or during Knn if supported.
7414                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
7415                    // If `filter` is present on Scan, it must be preserved.
7416                    // We can wrap VectorKnn in Filter if Scan had filter.
7417
7418                    let knn = LogicalPlan::VectorKnn {
7419                        label_id,
7420                        variable: variable.to_string(),
7421                        property: property.to_string(),
7422                        query,
7423                        k: 100, // Default K, should push down LIMIT
7424                        threshold,
7425                    };
7426
7427                    if let Some(f) = filter {
7428                        LogicalPlan::Filter {
7429                            input: Box::new(knn),
7430                            predicate: f,
7431                            optional_variables: HashSet::new(),
7432                        }
7433                    } else {
7434                        knn
7435                    }
7436                } else {
7437                    LogicalPlan::Scan {
7438                        label_id,
7439                        labels,
7440                        variable: scan_var,
7441                        filter,
7442                        optional,
7443                    }
7444                }
7445            }
7446            LogicalPlan::Filter {
7447                input,
7448                predicate,
7449                optional_variables,
7450            } => LogicalPlan::Filter {
7451                input: Box::new(Self::replace_scan_with_knn(
7452                    *input, variable, property, query, threshold,
7453                )),
7454                predicate,
7455                optional_variables,
7456            },
7457            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7458                input: Box::new(Self::replace_scan_with_knn(
7459                    *input, variable, property, query, threshold,
7460                )),
7461                projections,
7462            },
7463            LogicalPlan::Limit { input, skip, fetch } => {
7464                // If we encounter Limit, we should ideally push K down to VectorKnn
7465                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
7466                // Limit comes later.
7467                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
7468                LogicalPlan::Limit {
7469                    input: Box::new(Self::replace_scan_with_knn(
7470                        *input, variable, property, query, threshold,
7471                    )),
7472                    skip,
7473                    fetch,
7474                }
7475            }
7476            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7477                left: Box::new(Self::replace_scan_with_knn(
7478                    *left,
7479                    variable,
7480                    property,
7481                    query.clone(),
7482                    threshold,
7483                )),
7484                right: Box::new(Self::replace_scan_with_knn(
7485                    *right, variable, property, query, threshold,
7486                )),
7487            },
7488            other => other,
7489        }
7490    }
7491
7492    /// Find the label_id for a Scan node matching the given variable
7493    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
7494        match plan {
7495            LogicalPlan::Scan {
7496                label_id,
7497                variable: var,
7498                ..
7499            } if var == variable => Some(*label_id),
7500            LogicalPlan::ScanAll { variable: var, .. } if var == variable => Some(0),
7501            LogicalPlan::Filter { input, .. }
7502            | LogicalPlan::Project { input, .. }
7503            | LogicalPlan::Sort { input, .. }
7504            | LogicalPlan::Limit { input, .. }
7505            | LogicalPlan::Aggregate { input, .. }
7506            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
7507            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
7508                .or_else(|| Self::find_scan_label_id(right, variable)),
7509            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
7510            _ => None,
7511        }
7512    }
7513
7514    /// Push a predicate into a Scan's filter for the specified variable
7515    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
7516        match plan {
7517            LogicalPlan::Scan {
7518                label_id,
7519                labels,
7520                variable: var,
7521                filter,
7522                optional,
7523            } if var == variable => {
7524                // Merge the predicate with existing filter
7525                let new_filter = match filter {
7526                    Some(existing) => Some(Expr::BinaryOp {
7527                        left: Box::new(existing),
7528                        op: BinaryOp::And,
7529                        right: Box::new(predicate),
7530                    }),
7531                    None => Some(predicate),
7532                };
7533                LogicalPlan::Scan {
7534                    label_id,
7535                    labels,
7536                    variable: var,
7537                    filter: new_filter,
7538                    optional,
7539                }
7540            }
7541            LogicalPlan::ScanAll {
7542                variable: var,
7543                filter,
7544                optional,
7545            } if var == variable => {
7546                let new_filter = match filter {
7547                    Some(existing) => Some(Expr::BinaryOp {
7548                        left: Box::new(existing),
7549                        op: BinaryOp::And,
7550                        right: Box::new(predicate),
7551                    }),
7552                    None => Some(predicate),
7553                };
7554                LogicalPlan::ScanAll {
7555                    variable: var,
7556                    filter: new_filter,
7557                    optional,
7558                }
7559            }
7560            LogicalPlan::Filter {
7561                input,
7562                predicate: p,
7563                optional_variables: opt_vars,
7564            } => LogicalPlan::Filter {
7565                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7566                predicate: p,
7567                optional_variables: opt_vars,
7568            },
7569            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7570                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7571                projections,
7572            },
7573            LogicalPlan::CrossJoin { left, right } => {
7574                // Check which side has the variable
7575                if Self::find_scan_label_id(&left, variable).is_some() {
7576                    LogicalPlan::CrossJoin {
7577                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
7578                        right,
7579                    }
7580                } else {
7581                    LogicalPlan::CrossJoin {
7582                        left,
7583                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
7584                    }
7585                }
7586            }
7587            LogicalPlan::Traverse {
7588                input,
7589                edge_type_ids,
7590                direction,
7591                source_variable,
7592                target_variable,
7593                target_label_id,
7594                step_variable,
7595                min_hops,
7596                max_hops,
7597                optional,
7598                target_filter,
7599                path_variable,
7600                edge_properties,
7601                is_variable_length,
7602                optional_pattern_vars,
7603                scope_match_variables,
7604                edge_filter_expr,
7605                path_mode,
7606                qpp_steps,
7607            } => LogicalPlan::Traverse {
7608                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7609                edge_type_ids,
7610                direction,
7611                source_variable,
7612                target_variable,
7613                target_label_id,
7614                step_variable,
7615                min_hops,
7616                max_hops,
7617                optional,
7618                target_filter,
7619                path_variable,
7620                edge_properties,
7621                is_variable_length,
7622                optional_pattern_vars,
7623                scope_match_variables,
7624                edge_filter_expr,
7625                path_mode,
7626                qpp_steps,
7627            },
7628            other => other,
7629        }
7630    }
7631
7632    /// Extract predicates that reference only the specified variable
7633    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
7634        let analyzer = PredicateAnalyzer::new();
7635        let analysis = analyzer.analyze(predicate, variable);
7636
7637        // Return pushable predicates and combined residual
7638        let residual = if analysis.residual.is_empty() {
7639            None
7640        } else {
7641            let mut iter = analysis.residual.into_iter();
7642            let first = iter.next().unwrap();
7643            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
7644                left: Box::new(acc),
7645                op: BinaryOp::And,
7646                right: Box::new(e),
7647            }))
7648        };
7649
7650        (analysis.pushable, residual)
7651    }
7652
7653    // =====================================================================
7654    // Apply Predicate Pushdown - Helper Functions
7655    // =====================================================================
7656
7657    /// Split AND-connected predicates into a list.
7658    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
7659        match expr {
7660            Expr::BinaryOp {
7661                left,
7662                op: BinaryOp::And,
7663                right,
7664            } => {
7665                let mut result = Self::split_and_conjuncts(left);
7666                result.extend(Self::split_and_conjuncts(right));
7667                result
7668            }
7669            _ => vec![expr.clone()],
7670        }
7671    }
7672
7673    /// Combine predicates with AND.
7674    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
7675        if predicates.is_empty() {
7676            return None;
7677        }
7678        let mut result = predicates[0].clone();
7679        for pred in predicates.iter().skip(1) {
7680            result = Expr::BinaryOp {
7681                left: Box::new(result),
7682                op: BinaryOp::And,
7683                right: Box::new(pred.clone()),
7684            };
7685        }
7686        Some(result)
7687    }
7688
7689    /// Collect all variable names referenced in an expression.
7690    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
7691        let mut vars = HashSet::new();
7692        Self::collect_expr_variables_impl(expr, &mut vars);
7693        vars
7694    }
7695
7696    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
7697        match expr {
7698            Expr::Variable(name) => {
7699                vars.insert(name.clone());
7700            }
7701            Expr::Property(inner, _) => {
7702                if let Expr::Variable(name) = inner.as_ref() {
7703                    vars.insert(name.clone());
7704                } else {
7705                    Self::collect_expr_variables_impl(inner, vars);
7706                }
7707            }
7708            Expr::BinaryOp { left, right, .. } => {
7709                Self::collect_expr_variables_impl(left, vars);
7710                Self::collect_expr_variables_impl(right, vars);
7711            }
7712            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7713            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
7714            Expr::FunctionCall { args, .. } => {
7715                for arg in args {
7716                    Self::collect_expr_variables_impl(arg, vars);
7717                }
7718            }
7719            Expr::List(items) => {
7720                for item in items {
7721                    Self::collect_expr_variables_impl(item, vars);
7722                }
7723            }
7724            Expr::Case {
7725                expr,
7726                when_then,
7727                else_expr,
7728            } => {
7729                if let Some(e) = expr {
7730                    Self::collect_expr_variables_impl(e, vars);
7731                }
7732                for (w, t) in when_then {
7733                    Self::collect_expr_variables_impl(w, vars);
7734                    Self::collect_expr_variables_impl(t, vars);
7735                }
7736                if let Some(e) = else_expr {
7737                    Self::collect_expr_variables_impl(e, vars);
7738                }
7739            }
7740            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7741            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
7742            // they introduce local variable bindings not in outer scope.
7743            _ => {}
7744        }
7745    }
7746
7747    /// Collect all variables produced by a logical plan.
7748    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
7749        let mut vars = HashSet::new();
7750        Self::collect_plan_variables_impl(plan, &mut vars);
7751        vars
7752    }
7753
7754    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
7755        match plan {
7756            LogicalPlan::Scan { variable, .. } => {
7757                vars.insert(variable.clone());
7758            }
7759            LogicalPlan::Traverse {
7760                target_variable,
7761                step_variable,
7762                input,
7763                path_variable,
7764                ..
7765            } => {
7766                vars.insert(target_variable.clone());
7767                if let Some(sv) = step_variable {
7768                    vars.insert(sv.clone());
7769                }
7770                if let Some(pv) = path_variable {
7771                    vars.insert(pv.clone());
7772                }
7773                Self::collect_plan_variables_impl(input, vars);
7774            }
7775            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
7776            LogicalPlan::Project { input, projections } => {
7777                for (expr, alias) in projections {
7778                    if let Some(a) = alias {
7779                        vars.insert(a.clone());
7780                    } else if let Expr::Variable(v) = expr {
7781                        vars.insert(v.clone());
7782                    }
7783                }
7784                Self::collect_plan_variables_impl(input, vars);
7785            }
7786            LogicalPlan::Apply {
7787                input, subquery, ..
7788            } => {
7789                Self::collect_plan_variables_impl(input, vars);
7790                Self::collect_plan_variables_impl(subquery, vars);
7791            }
7792            LogicalPlan::CrossJoin { left, right } => {
7793                Self::collect_plan_variables_impl(left, vars);
7794                Self::collect_plan_variables_impl(right, vars);
7795            }
7796            LogicalPlan::Unwind {
7797                input, variable, ..
7798            } => {
7799                vars.insert(variable.clone());
7800                Self::collect_plan_variables_impl(input, vars);
7801            }
7802            LogicalPlan::Aggregate { input, .. } => {
7803                Self::collect_plan_variables_impl(input, vars);
7804            }
7805            LogicalPlan::Distinct { input } => {
7806                Self::collect_plan_variables_impl(input, vars);
7807            }
7808            LogicalPlan::Sort { input, .. } => {
7809                Self::collect_plan_variables_impl(input, vars);
7810            }
7811            LogicalPlan::Limit { input, .. } => {
7812                Self::collect_plan_variables_impl(input, vars);
7813            }
7814            LogicalPlan::VectorKnn { variable, .. } => {
7815                vars.insert(variable.clone());
7816            }
7817            LogicalPlan::ProcedureCall { yield_items, .. } => {
7818                for (name, alias) in yield_items {
7819                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
7820                }
7821            }
7822            LogicalPlan::ShortestPath {
7823                input,
7824                path_variable,
7825                ..
7826            } => {
7827                vars.insert(path_variable.clone());
7828                Self::collect_plan_variables_impl(input, vars);
7829            }
7830            LogicalPlan::AllShortestPaths {
7831                input,
7832                path_variable,
7833                ..
7834            } => {
7835                vars.insert(path_variable.clone());
7836                Self::collect_plan_variables_impl(input, vars);
7837            }
7838            LogicalPlan::RecursiveCTE {
7839                initial, recursive, ..
7840            } => {
7841                Self::collect_plan_variables_impl(initial, vars);
7842                Self::collect_plan_variables_impl(recursive, vars);
7843            }
7844            LogicalPlan::SubqueryCall {
7845                input, subquery, ..
7846            } => {
7847                Self::collect_plan_variables_impl(input, vars);
7848                Self::collect_plan_variables_impl(subquery, vars);
7849            }
7850            _ => {}
7851        }
7852    }
7853
7854    /// Extract predicates that only reference variables from Apply's input.
7855    /// Returns (input_only_predicates, remaining_predicates).
7856    fn extract_apply_input_predicates(
7857        predicate: &Expr,
7858        input_variables: &HashSet<String>,
7859        subquery_new_variables: &HashSet<String>,
7860    ) -> (Vec<Expr>, Vec<Expr>) {
7861        let conjuncts = Self::split_and_conjuncts(predicate);
7862        let mut input_preds = Vec::new();
7863        let mut remaining = Vec::new();
7864
7865        for conj in conjuncts {
7866            let vars = Self::collect_expr_variables(&conj);
7867
7868            // Predicate only references input variables (none from subquery)
7869            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
7870            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
7871
7872            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
7873                input_preds.push(conj);
7874            } else {
7875                remaining.push(conj);
7876            }
7877        }
7878
7879        (input_preds, remaining)
7880    }
7881
7882    /// Push eligible predicates into Apply.input_filter.
7883    /// This filters input rows BEFORE executing the correlated subquery.
7884    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
7885        match plan {
7886            LogicalPlan::Apply {
7887                input,
7888                subquery,
7889                input_filter,
7890            } => {
7891                // Collect variables from input plan
7892                let input_vars = Self::collect_plan_variables(&input);
7893
7894                // Collect NEW variables introduced by subquery (not in input)
7895                let subquery_vars = Self::collect_plan_variables(&subquery);
7896                let new_subquery_vars: HashSet<String> =
7897                    subquery_vars.difference(&input_vars).cloned().collect();
7898
7899                // Extract predicates that only reference input variables
7900                let (input_preds, remaining) = Self::extract_apply_input_predicates(
7901                    current_predicate,
7902                    &input_vars,
7903                    &new_subquery_vars,
7904                );
7905
7906                // Update current_predicate to only remaining predicates
7907                *current_predicate = if remaining.is_empty() {
7908                    Expr::TRUE
7909                } else {
7910                    Self::combine_predicates(remaining).unwrap()
7911                };
7912
7913                // Combine extracted predicates with existing input_filter
7914                let new_input_filter = if input_preds.is_empty() {
7915                    input_filter
7916                } else {
7917                    let extracted = Self::combine_predicates(input_preds).unwrap();
7918                    match input_filter {
7919                        Some(existing) => Some(Expr::BinaryOp {
7920                            left: Box::new(existing),
7921                            op: BinaryOp::And,
7922                            right: Box::new(extracted),
7923                        }),
7924                        None => Some(extracted),
7925                    }
7926                };
7927
7928                // Recurse into input plan
7929                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
7930
7931                LogicalPlan::Apply {
7932                    input: Box::new(new_input),
7933                    subquery,
7934                    input_filter: new_input_filter,
7935                }
7936            }
7937            // Recurse into other plan nodes
7938            LogicalPlan::Filter {
7939                input,
7940                predicate,
7941                optional_variables,
7942            } => LogicalPlan::Filter {
7943                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7944                predicate,
7945                optional_variables,
7946            },
7947            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7948                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7949                projections,
7950            },
7951            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
7952                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7953                order_by,
7954            },
7955            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
7956                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7957                skip,
7958                fetch,
7959            },
7960            LogicalPlan::Aggregate {
7961                input,
7962                group_by,
7963                aggregates,
7964            } => LogicalPlan::Aggregate {
7965                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7966                group_by,
7967                aggregates,
7968            },
7969            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7970                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
7971                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
7972            },
7973            LogicalPlan::Traverse {
7974                input,
7975                edge_type_ids,
7976                direction,
7977                source_variable,
7978                target_variable,
7979                target_label_id,
7980                step_variable,
7981                min_hops,
7982                max_hops,
7983                optional,
7984                target_filter,
7985                path_variable,
7986                edge_properties,
7987                is_variable_length,
7988                optional_pattern_vars,
7989                scope_match_variables,
7990                edge_filter_expr,
7991                path_mode,
7992                qpp_steps,
7993            } => LogicalPlan::Traverse {
7994                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7995                edge_type_ids,
7996                direction,
7997                source_variable,
7998                target_variable,
7999                target_label_id,
8000                step_variable,
8001                min_hops,
8002                max_hops,
8003                optional,
8004                target_filter,
8005                path_variable,
8006                edge_properties,
8007                is_variable_length,
8008                optional_pattern_vars,
8009                scope_match_variables,
8010                edge_filter_expr,
8011                path_mode,
8012                qpp_steps,
8013            },
8014            other => other,
8015        }
8016    }
8017}
8018
8019/// Get the expected column name for an aggregate expression.
8020///
8021/// This is the single source of truth for aggregate column naming, used by:
8022/// - Logical planner (to create column references)
8023/// - Physical planner (to rename DataFusion's auto-generated column names)
8024/// - Fallback executor (to name result columns)
8025pub fn aggregate_column_name(expr: &Expr) -> String {
8026    expr.to_string_repr()
8027}
8028
8029/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
8030#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8031pub struct ExplainOutput {
8032    /// Debug-formatted logical plan tree.
8033    pub plan_text: String,
8034    /// Index availability report for each scan in the plan.
8035    pub index_usage: Vec<IndexUsage>,
8036    /// Rough row and cost estimates for the full plan.
8037    pub cost_estimates: CostEstimates,
8038    /// Planner warnings (e.g., missing index, forced full scan).
8039    pub warnings: Vec<String>,
8040    /// Suggested indexes that would improve this query.
8041    pub suggestions: Vec<IndexSuggestion>,
8042}
8043
8044/// Suggestion for creating an index to improve query performance.
8045#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8046pub struct IndexSuggestion {
8047    /// Label or edge type that would benefit from the index.
8048    pub label_or_type: String,
8049    /// Property to index.
8050    pub property: String,
8051    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
8052    pub index_type: String,
8053    /// Human-readable explanation of the performance benefit.
8054    pub reason: String,
8055    /// Ready-to-execute Cypher statement to create the index.
8056    pub create_statement: String,
8057}
8058
8059/// Index availability report for a single scan operator.
8060#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8061pub struct IndexUsage {
8062    pub label_or_type: String,
8063    pub property: String,
8064    pub index_type: String,
8065    /// Whether the index was actually used for this scan.
8066    pub used: bool,
8067    /// Human-readable explanation of why the index was or was not used.
8068    pub reason: Option<String>,
8069}
8070
8071/// Rough cost and row count estimates for a complete logical plan.
8072#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8073pub struct CostEstimates {
8074    /// Estimated number of rows the plan will produce.
8075    pub estimated_rows: f64,
8076    /// Abstract cost units (lower is cheaper).
8077    pub estimated_cost: f64,
8078}
8079
8080impl QueryPlanner {
8081    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
8082    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
8083        let plan = self.plan(ast)?;
8084        self.explain_logical_plan(&plan)
8085    }
8086
8087    /// Produce an EXPLAIN report for an already-planned logical plan.
8088    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
8089        let index_usage = self.analyze_index_usage(plan)?;
8090        let cost_estimates = self.estimate_costs(plan)?;
8091        let suggestions = self.collect_index_suggestions(plan);
8092        let warnings = Vec::new();
8093        let plan_text = format!("{:#?}", plan);
8094
8095        Ok(ExplainOutput {
8096            plan_text,
8097            index_usage,
8098            cost_estimates,
8099            warnings,
8100            suggestions,
8101        })
8102    }
8103
8104    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
8105        let mut usage = Vec::new();
8106        self.collect_index_usage(plan, &mut usage);
8107        Ok(usage)
8108    }
8109
8110    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
8111        match plan {
8112            LogicalPlan::Scan {
8113                label_id,
8114                filter: Some(filter),
8115                ..
8116            } => {
8117                // Detect indexed-property pushdown — issue #57. Run the same
8118                // analyzer the physical planner uses; if it reports a
8119                // hash-index hit, surface it in EXPLAIN.
8120                if let Some(label_name) = self.schema.label_name_by_id(*label_id) {
8121                    let analyzer = crate::query::pushdown::IndexAwareAnalyzer::new(&self.schema);
8122                    // The variable name is the scan's binding variable; we
8123                    // reach for it via the Scan node directly.
8124                    if let LogicalPlan::Scan { variable, .. } = plan {
8125                        let strategy = analyzer.analyze(filter, variable, *label_id);
8126                        for prop in strategy.hash_index_columns {
8127                            usage.push(IndexUsage {
8128                                label_or_type: label_name.to_string(),
8129                                property: prop,
8130                                index_type: "HASH".to_string(),
8131                                used: true,
8132                                reason: Some(
8133                                    "Hash index point lookup pushed into Lance scan".to_string(),
8134                                ),
8135                            });
8136                        }
8137                    }
8138                }
8139            }
8140            LogicalPlan::Scan { .. } => {}
8141            LogicalPlan::VectorKnn {
8142                label_id, property, ..
8143            } => {
8144                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
8145                usage.push(IndexUsage {
8146                    label_or_type: label_name.to_string(),
8147                    property: property.clone(),
8148                    index_type: "VECTOR".to_string(),
8149                    used: true,
8150                    reason: None,
8151                });
8152            }
8153            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
8154            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
8155            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
8156            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
8157            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
8158            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
8159            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
8160            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8161                self.collect_index_usage(left, usage);
8162                self.collect_index_usage(right, usage);
8163            }
8164            _ => {}
8165        }
8166    }
8167
8168    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
8169        Ok(CostEstimates {
8170            estimated_rows: 100.0,
8171            estimated_cost: 10.0,
8172        })
8173    }
8174
8175    /// Collect index suggestions based on query patterns.
8176    ///
8177    /// Currently detects:
8178    /// - Temporal predicates from `uni.validAt()` function calls
8179    /// - Temporal predicates from `VALID_AT` macro expansion
8180    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
8181        let mut suggestions = Vec::new();
8182        self.collect_temporal_suggestions(plan, &mut suggestions);
8183        suggestions
8184    }
8185
8186    /// Recursively collect temporal index suggestions from the plan.
8187    fn collect_temporal_suggestions(
8188        &self,
8189        plan: &LogicalPlan,
8190        suggestions: &mut Vec<IndexSuggestion>,
8191    ) {
8192        match plan {
8193            LogicalPlan::Filter {
8194                input, predicate, ..
8195            } => {
8196                // Check for temporal patterns in the predicate
8197                self.detect_temporal_pattern(predicate, suggestions);
8198                // Recurse into input
8199                self.collect_temporal_suggestions(input, suggestions);
8200            }
8201            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
8202            LogicalPlan::Project { input, .. } => {
8203                self.collect_temporal_suggestions(input, suggestions)
8204            }
8205            LogicalPlan::Limit { input, .. } => {
8206                self.collect_temporal_suggestions(input, suggestions)
8207            }
8208            LogicalPlan::Sort { input, .. } => {
8209                self.collect_temporal_suggestions(input, suggestions)
8210            }
8211            LogicalPlan::Aggregate { input, .. } => {
8212                self.collect_temporal_suggestions(input, suggestions)
8213            }
8214            LogicalPlan::Traverse { input, .. } => {
8215                self.collect_temporal_suggestions(input, suggestions)
8216            }
8217            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8218                self.collect_temporal_suggestions(left, suggestions);
8219                self.collect_temporal_suggestions(right, suggestions);
8220            }
8221            _ => {}
8222        }
8223    }
8224
8225    /// Detect temporal predicate patterns and suggest indexes.
8226    ///
8227    /// Detects two patterns:
8228    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
8229    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
8230    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
8231        match expr {
8232            // Pattern 1: uni.temporal.validAt() function call
8233            Expr::FunctionCall { name, args, .. }
8234                if (name.eq_ignore_ascii_case("uni.temporal.validAt")
8235                    || name.eq_ignore_ascii_case("validAt"))
8236                    && args.len() >= 2 =>
8237            {
8238                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
8239                let start_prop = if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1)
8240                {
8241                    s.clone()
8242                } else {
8243                    "valid_from".to_string()
8244                };
8245
8246                // Try to extract label from the node expression
8247                if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
8248                    self.suggest_temporal_index(&var, &start_prop, suggestions);
8249                }
8250            }
8251
8252            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
8253            Expr::BinaryOp {
8254                left,
8255                op: BinaryOp::And,
8256                right,
8257            } => {
8258                // Check left side for `prop <= time` pattern (temporal start condition)
8259                if let Expr::BinaryOp {
8260                    left: prop_expr,
8261                    op: BinaryOp::LtEq,
8262                    ..
8263                } = left.as_ref()
8264                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
8265                    && (prop_name == "valid_from"
8266                        || prop_name.contains("start")
8267                        || prop_name.contains("from")
8268                        || prop_name.contains("begin"))
8269                    && let Some(var) = base.extract_variable()
8270                {
8271                    self.suggest_temporal_index(&var, prop_name, suggestions);
8272                }
8273
8274                // Recurse into both sides of AND
8275                self.detect_temporal_pattern(left.as_ref(), suggestions);
8276                self.detect_temporal_pattern(right.as_ref(), suggestions);
8277            }
8278
8279            // Recurse into other binary ops
8280            Expr::BinaryOp { left, right, .. } => {
8281                self.detect_temporal_pattern(left.as_ref(), suggestions);
8282                self.detect_temporal_pattern(right.as_ref(), suggestions);
8283            }
8284
8285            _ => {}
8286        }
8287    }
8288
8289    /// Suggest a scalar index for a temporal property if one doesn't already exist.
8290    fn suggest_temporal_index(
8291        &self,
8292        _variable: &str,
8293        property: &str,
8294        suggestions: &mut Vec<IndexSuggestion>,
8295    ) {
8296        // Check if a scalar index already exists for this property
8297        // We need to check all labels since we may not know the exact label from the variable
8298        let mut has_index = false;
8299
8300        for index in &self.schema.indexes {
8301            if let IndexDefinition::Scalar(config) = index
8302                && config.properties.contains(&property.to_string())
8303            {
8304                has_index = true;
8305                break;
8306            }
8307        }
8308
8309        if !has_index {
8310            // Avoid duplicate suggestions
8311            let already_suggested = suggestions.iter().any(|s| s.property == property);
8312            if !already_suggested {
8313                suggestions.push(IndexSuggestion {
8314                    label_or_type: "(detected from temporal query)".to_string(),
8315                    property: property.to_string(),
8316                    index_type: "SCALAR (BTree)".to_string(),
8317                    reason: format!(
8318                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
8319                        property
8320                    ),
8321                    create_statement: format!(
8322                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
8323                        property, property
8324                    ),
8325                });
8326            }
8327        }
8328    }
8329
8330    /// Helper functions for expression normalization
8331    /// Normalize an expression for storage: strip variable prefixes
8332    /// For simple property: u.email -> "email"
8333    /// For expressions: lower(u.email) -> "lower(email)"
8334    fn normalize_expression_for_storage(expr: &Expr) -> String {
8335        match expr {
8336            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
8337            _ => {
8338                // Serialize expression and strip variable prefix
8339                let expr_str = expr.to_string_repr();
8340                Self::strip_variable_prefix(&expr_str)
8341            }
8342        }
8343    }
8344
8345    /// Strip variable references like "u.prop" from expression strings
8346    /// Converts "lower(u.email)" to "lower(email)"
8347    fn strip_variable_prefix(expr_str: &str) -> String {
8348        use regex::Regex;
8349        // Match patterns like "word.property" and replace with just "property"
8350        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
8351        re.replace_all(expr_str, "$1").to_string()
8352    }
8353
8354    /// Plan a schema command from the new AST
8355    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
8356        match cmd {
8357            SchemaCommand::CreateVectorIndex(c) => {
8358                // Parse index type from options (default: IvfPq)
8359                let opt = |key: &str| {
8360                    c.options
8361                        .get(key)
8362                        .and_then(|v| v.as_str())
8363                        .and_then(|s| s.parse::<u32>().ok())
8364                };
8365                let opt_u8 = |key: &str| -> Option<u8> {
8366                    c.options
8367                        .get(key)
8368                        .and_then(|v| v.as_str())
8369                        .and_then(|s| s.parse::<u8>().ok())
8370                };
8371                let index_type = match c.options.get("type").and_then(|v| v.as_str()) {
8372                    Some("flat") => VectorIndexType::Flat,
8373                    Some("ivf_flat") => VectorIndexType::IvfFlat {
8374                        num_partitions: opt("partitions").unwrap_or(256),
8375                    },
8376                    Some("ivf_sq") => VectorIndexType::IvfSq {
8377                        num_partitions: opt("partitions").unwrap_or(256),
8378                    },
8379                    Some("ivf_rq") => VectorIndexType::IvfRq {
8380                        num_partitions: opt("partitions").unwrap_or(256),
8381                        num_bits: opt_u8("num_bits"),
8382                    },
8383                    Some("hnsw_flat") => VectorIndexType::HnswFlat {
8384                        m: opt("m").unwrap_or(16),
8385                        ef_construction: opt("ef_construction").unwrap_or(200),
8386                        num_partitions: opt("partitions"),
8387                    },
8388                    Some("hnsw") | Some("hnsw_sq") => VectorIndexType::HnswSq {
8389                        m: opt("m").unwrap_or(16),
8390                        ef_construction: opt("ef_construction").unwrap_or(200),
8391                        num_partitions: opt("partitions"),
8392                    },
8393                    Some("hnsw_pq") => VectorIndexType::HnswPq {
8394                        m: opt("m").unwrap_or(16),
8395                        ef_construction: opt("ef_construction").unwrap_or(200),
8396                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
8397                        num_partitions: opt("partitions"),
8398                    },
8399                    _ => VectorIndexType::IvfPq {
8400                        num_partitions: opt("partitions").unwrap_or(256),
8401                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
8402                        bits_per_subvector: opt_u8("num_bits").unwrap_or(8),
8403                    },
8404                };
8405
8406                // Parse embedding config from options
8407                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
8408                    Self::parse_embedding_config(emb_val)?
8409                } else {
8410                    None
8411                };
8412
8413                let config = VectorIndexConfig {
8414                    name: c.name,
8415                    label: c.label,
8416                    property: c.property,
8417                    metric: DistanceMetric::Cosine,
8418                    index_type,
8419                    embedding_config,
8420                    metadata: Default::default(),
8421                };
8422                Ok(LogicalPlan::CreateVectorIndex {
8423                    config,
8424                    if_not_exists: c.if_not_exists,
8425                })
8426            }
8427            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
8428                config: FullTextIndexConfig {
8429                    name: cfg.name,
8430                    label: cfg.label,
8431                    properties: cfg.properties,
8432                    tokenizer: TokenizerConfig::Standard,
8433                    with_positions: true,
8434                    metadata: Default::default(),
8435                },
8436                if_not_exists: cfg.if_not_exists,
8437            }),
8438            SchemaCommand::CreateScalarIndex(cfg) => {
8439                // Convert expressions to storage strings (strip variable prefix)
8440                let properties: Vec<String> = cfg
8441                    .expressions
8442                    .iter()
8443                    .map(Self::normalize_expression_for_storage)
8444                    .collect();
8445
8446                Ok(LogicalPlan::CreateScalarIndex {
8447                    config: ScalarIndexConfig {
8448                        name: cfg.name,
8449                        label: cfg.label,
8450                        properties,
8451                        index_type: ScalarIndexType::BTree,
8452                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
8453                        metadata: Default::default(),
8454                    },
8455                    if_not_exists: cfg.if_not_exists,
8456                })
8457            }
8458            SchemaCommand::CreateJsonFtsIndex(cfg) => {
8459                let with_positions = cfg
8460                    .options
8461                    .get("with_positions")
8462                    .and_then(|v| v.as_bool())
8463                    .unwrap_or(false);
8464                Ok(LogicalPlan::CreateJsonFtsIndex {
8465                    config: JsonFtsIndexConfig {
8466                        name: cfg.name,
8467                        label: cfg.label,
8468                        column: cfg.column,
8469                        paths: Vec::new(),
8470                        with_positions,
8471                        metadata: Default::default(),
8472                    },
8473                    if_not_exists: cfg.if_not_exists,
8474                })
8475            }
8476            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
8477                name: drop.name,
8478                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
8479            }),
8480            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
8481            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
8482            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
8483            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
8484            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
8485            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
8486            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
8487            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
8488            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
8489            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
8490            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
8491            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
8492            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
8493            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
8494            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
8495            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
8496                destination: path,
8497                options: HashMap::new(),
8498            }),
8499            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
8500                label: cmd.label,
8501                path: cmd.path,
8502                format: cmd.format,
8503                options: cmd.options,
8504            }),
8505            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
8506                label: cmd.label,
8507                path: cmd.path,
8508                format: cmd.format,
8509                options: cmd.options,
8510            }),
8511        }
8512    }
8513
8514    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
8515        let obj = emb_val
8516            .as_object()
8517            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
8518
8519        // Parse alias (required)
8520        let alias = obj
8521            .get("alias")
8522            .and_then(|v| v.as_str())
8523            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
8524
8525        // Parse source properties (required)
8526        let source_properties = obj
8527            .get("source")
8528            .and_then(|v| v.as_array())
8529            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
8530            .iter()
8531            .filter_map(|v| v.as_str().map(|s| s.to_string()))
8532            .collect::<Vec<_>>();
8533
8534        if source_properties.is_empty() {
8535            return Err(anyhow!(
8536                "embedding.source must contain at least one property"
8537            ));
8538        }
8539
8540        let batch_size = obj
8541            .get("batch_size")
8542            .and_then(|v| v.as_u64())
8543            .map(|v| v as usize)
8544            .unwrap_or(32);
8545
8546        let document_prefix = obj
8547            .get("document_prefix")
8548            .and_then(|v| v.as_str())
8549            .map(|s| s.to_string());
8550
8551        let query_prefix = obj
8552            .get("query_prefix")
8553            .and_then(|v| v.as_str())
8554            .map(|s| s.to_string());
8555
8556        Ok(Some(EmbeddingConfig {
8557            alias: alias.to_string(),
8558            source_properties,
8559            batch_size,
8560            document_prefix,
8561            query_prefix,
8562        }))
8563    }
8564}
8565
8566/// Collect all properties referenced anywhere in the LogicalPlan tree.
8567///
8568/// This is critical for window functions: properties must be materialized
8569/// at the Scan node so they're available for window operations later.
8570///
8571/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
8572pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
8573    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
8574    collect_properties_recursive(plan, &mut properties);
8575    properties
8576}
8577
8578/// Recursively walk the LogicalPlan tree and collect all property references.
8579fn collect_properties_recursive(
8580    plan: &LogicalPlan,
8581    properties: &mut HashMap<String, HashSet<String>>,
8582) {
8583    match plan {
8584        LogicalPlan::Window {
8585            input,
8586            window_exprs,
8587        } => {
8588            // Collect from window expressions
8589            for expr in window_exprs {
8590                collect_properties_from_expr_into(expr, properties);
8591            }
8592            collect_properties_recursive(input, properties);
8593        }
8594        LogicalPlan::Project { input, projections } => {
8595            for (expr, _alias) in projections {
8596                collect_properties_from_expr_into(expr, properties);
8597            }
8598            collect_properties_recursive(input, properties);
8599        }
8600        LogicalPlan::Sort { input, order_by } => {
8601            for sort_item in order_by {
8602                collect_properties_from_expr_into(&sort_item.expr, properties);
8603            }
8604            collect_properties_recursive(input, properties);
8605        }
8606        LogicalPlan::Filter {
8607            input, predicate, ..
8608        } => {
8609            collect_properties_from_expr_into(predicate, properties);
8610            collect_properties_recursive(input, properties);
8611        }
8612        LogicalPlan::Aggregate {
8613            input,
8614            group_by,
8615            aggregates,
8616        } => {
8617            for expr in group_by {
8618                collect_properties_from_expr_into(expr, properties);
8619            }
8620            for expr in aggregates {
8621                collect_properties_from_expr_into(expr, properties);
8622            }
8623            collect_properties_recursive(input, properties);
8624        }
8625        LogicalPlan::Scan {
8626            filter: Some(expr), ..
8627        } => {
8628            collect_properties_from_expr_into(expr, properties);
8629        }
8630        LogicalPlan::Scan { filter: None, .. } => {}
8631        LogicalPlan::ExtIdLookup {
8632            filter: Some(expr), ..
8633        } => {
8634            collect_properties_from_expr_into(expr, properties);
8635        }
8636        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
8637        LogicalPlan::ScanAll {
8638            filter: Some(expr), ..
8639        } => {
8640            collect_properties_from_expr_into(expr, properties);
8641        }
8642        LogicalPlan::ScanAll { filter: None, .. } => {}
8643        LogicalPlan::ScanMainByLabels {
8644            filter: Some(expr), ..
8645        } => {
8646            collect_properties_from_expr_into(expr, properties);
8647        }
8648        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
8649        LogicalPlan::TraverseMainByType {
8650            input,
8651            target_filter,
8652            ..
8653        } => {
8654            if let Some(expr) = target_filter {
8655                collect_properties_from_expr_into(expr, properties);
8656            }
8657            collect_properties_recursive(input, properties);
8658        }
8659        LogicalPlan::Traverse {
8660            input,
8661            target_filter,
8662            step_variable: _,
8663            ..
8664        } => {
8665            if let Some(expr) = target_filter {
8666                collect_properties_from_expr_into(expr, properties);
8667            }
8668            // Note: Edge properties (step_variable) will be collected from expressions
8669            // that reference them. The edge_properties field in LogicalPlan is populated
8670            // later during physical planning based on this collected map.
8671            collect_properties_recursive(input, properties);
8672        }
8673        LogicalPlan::Unwind { input, expr, .. } => {
8674            collect_properties_from_expr_into(expr, properties);
8675            collect_properties_recursive(input, properties);
8676        }
8677        LogicalPlan::Create { input, pattern } => {
8678            // Mark variables referenced in CREATE patterns with "*" so plan_scan
8679            // adds structural projections (bare entity columns). Without this,
8680            // execute_create_pattern() can't find bound variables and creates
8681            // spurious new nodes instead of using existing MATCH'd ones.
8682            mark_pattern_variables(pattern, properties);
8683            collect_properties_recursive(input, properties);
8684        }
8685        LogicalPlan::CreateBatch { input, patterns } => {
8686            for pattern in patterns {
8687                mark_pattern_variables(pattern, properties);
8688            }
8689            collect_properties_recursive(input, properties);
8690        }
8691        LogicalPlan::Merge {
8692            input,
8693            pattern,
8694            on_match,
8695            on_create,
8696        } => {
8697            mark_pattern_variables(pattern, properties);
8698            if let Some(set_clause) = on_match {
8699                mark_set_item_variables(&set_clause.items, properties);
8700            }
8701            if let Some(set_clause) = on_create {
8702                mark_set_item_variables(&set_clause.items, properties);
8703            }
8704            collect_properties_recursive(input, properties);
8705        }
8706        LogicalPlan::Set { input, items } => {
8707            mark_set_item_variables(items, properties);
8708            collect_properties_recursive(input, properties);
8709        }
8710        LogicalPlan::Remove { input, items } => {
8711            for item in items {
8712                match item {
8713                    RemoveItem::Property(expr) => {
8714                        // REMOVE n.prop — collect the property and mark the variable
8715                        // with "*" so full structural projection is applied.
8716                        collect_properties_from_expr_into(expr, properties);
8717                        if let Expr::Property(base, _) = expr
8718                            && let Expr::Variable(var) = base.as_ref()
8719                        {
8720                            properties
8721                                .entry(var.clone())
8722                                .or_default()
8723                                .insert("*".to_string());
8724                        }
8725                    }
8726                    RemoveItem::Labels { variable, .. } => {
8727                        // REMOVE n:Label — mark n with "*"
8728                        properties
8729                            .entry(variable.clone())
8730                            .or_default()
8731                            .insert("*".to_string());
8732                    }
8733                }
8734            }
8735            collect_properties_recursive(input, properties);
8736        }
8737        LogicalPlan::Delete { input, items, .. } => {
8738            for expr in items {
8739                collect_properties_from_expr_into(expr, properties);
8740            }
8741            collect_properties_recursive(input, properties);
8742        }
8743        LogicalPlan::Foreach {
8744            input, list, body, ..
8745        } => {
8746            collect_properties_from_expr_into(list, properties);
8747            for plan in body {
8748                collect_properties_recursive(plan, properties);
8749            }
8750            collect_properties_recursive(input, properties);
8751        }
8752        LogicalPlan::Limit { input, .. } => {
8753            collect_properties_recursive(input, properties);
8754        }
8755        LogicalPlan::CrossJoin { left, right } => {
8756            collect_properties_recursive(left, properties);
8757            collect_properties_recursive(right, properties);
8758        }
8759        LogicalPlan::Apply {
8760            input,
8761            subquery,
8762            input_filter,
8763        } => {
8764            if let Some(expr) = input_filter {
8765                collect_properties_from_expr_into(expr, properties);
8766            }
8767            collect_properties_recursive(input, properties);
8768            collect_properties_recursive(subquery, properties);
8769        }
8770        LogicalPlan::Union { left, right, .. } => {
8771            collect_properties_recursive(left, properties);
8772            collect_properties_recursive(right, properties);
8773        }
8774        LogicalPlan::RecursiveCTE {
8775            initial, recursive, ..
8776        } => {
8777            collect_properties_recursive(initial, properties);
8778            collect_properties_recursive(recursive, properties);
8779        }
8780        LogicalPlan::ProcedureCall { arguments, .. } => {
8781            for arg in arguments {
8782                collect_properties_from_expr_into(arg, properties);
8783            }
8784        }
8785        LogicalPlan::VectorKnn { query, .. } => {
8786            collect_properties_from_expr_into(query, properties);
8787        }
8788        LogicalPlan::InvertedIndexLookup { terms, .. } => {
8789            collect_properties_from_expr_into(terms, properties);
8790        }
8791        LogicalPlan::ShortestPath { input, .. } => {
8792            collect_properties_recursive(input, properties);
8793        }
8794        LogicalPlan::AllShortestPaths { input, .. } => {
8795            collect_properties_recursive(input, properties);
8796        }
8797        LogicalPlan::Distinct { input } => {
8798            collect_properties_recursive(input, properties);
8799        }
8800        LogicalPlan::QuantifiedPattern {
8801            input,
8802            pattern_plan,
8803            ..
8804        } => {
8805            collect_properties_recursive(input, properties);
8806            collect_properties_recursive(pattern_plan, properties);
8807        }
8808        LogicalPlan::BindZeroLengthPath { input, .. } => {
8809            collect_properties_recursive(input, properties);
8810        }
8811        LogicalPlan::BindPath { input, .. } => {
8812            collect_properties_recursive(input, properties);
8813        }
8814        LogicalPlan::SubqueryCall { input, subquery } => {
8815            collect_properties_recursive(input, properties);
8816            collect_properties_recursive(subquery, properties);
8817        }
8818        LogicalPlan::LocyProject {
8819            input, projections, ..
8820        } => {
8821            for (expr, _alias) in projections {
8822                match expr {
8823                    // Bare variable in LocyProject: only need _vid for node variables
8824                    // (plan_locy_project extracts VID directly). Adding "*" would create
8825                    // a structural Struct column that conflicts with derived scan columns.
8826                    Expr::Variable(name) if !name.contains('.') => {
8827                        properties
8828                            .entry(name.clone())
8829                            .or_default()
8830                            .insert("_vid".to_string());
8831                    }
8832                    _ => collect_properties_from_expr_into(expr, properties),
8833                }
8834            }
8835            collect_properties_recursive(input, properties);
8836        }
8837        LogicalPlan::LocyFold {
8838            input,
8839            fold_bindings,
8840            ..
8841        } => {
8842            for (_name, expr) in fold_bindings {
8843                collect_properties_from_expr_into(expr, properties);
8844            }
8845            collect_properties_recursive(input, properties);
8846        }
8847        LogicalPlan::LocyBestBy {
8848            input, criteria, ..
8849        } => {
8850            for (expr, _asc) in criteria {
8851                collect_properties_from_expr_into(expr, properties);
8852            }
8853            collect_properties_recursive(input, properties);
8854        }
8855        LogicalPlan::LocyPriority { input, .. } => {
8856            collect_properties_recursive(input, properties);
8857        }
8858        LogicalPlan::LocyModelInvoke { input, .. } => {
8859            // Model invocations don't introduce new property accesses
8860            // — feature expressions are lifted to hidden YIELD items
8861            // by `extract_model_invocations` (uni-locy typecheck) and
8862            // their property refs are already collected via the
8863            // wrapped LocyProject's projection walk.
8864            collect_properties_recursive(input, properties);
8865        }
8866        // DDL and other plans don't reference properties
8867        _ => {}
8868    }
8869}
8870
8871/// Mark target variables from SET items with "*" and collect value expressions.
8872fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
8873    for item in items {
8874        match item {
8875            SetItem::Property { expr, value } => {
8876                // SET n.prop = val — mark n with STRUCT_ONLY_SENTINEL so the
8877                // scan builds the bare `n` struct column (needed for executor
8878                // `row.get(var_name)`) WITHOUT pulling the full schema. The
8879                // explicit `prop` is collected via `collect_properties_from_expr_into`
8880                // below and joins the variable's HashSet alongside the sentinel.
8881                //
8882                // If the same variable is also referenced bare elsewhere
8883                // (e.g. `SET n.x = 1 RETURN n`), `collect_properties_from_expr_into`
8884                // inserts "*" through the bare-Variable path; "*" dominates
8885                // the sentinel in `resolve_properties`, so the full schema
8886                // is still pulled when actually required.
8887                collect_properties_from_expr_into(expr, properties);
8888                collect_properties_from_expr_into(value, properties);
8889                if let Expr::Property(base, _) = expr
8890                    && let Expr::Variable(var) = base.as_ref()
8891                {
8892                    properties
8893                        .entry(var.clone())
8894                        .or_default()
8895                        .insert(STRUCT_ONLY_SENTINEL.to_string());
8896                }
8897            }
8898            SetItem::Labels { variable, .. } => {
8899                // SET n:Label — need full access to n
8900                properties
8901                    .entry(variable.clone())
8902                    .or_default()
8903                    .insert("*".to_string());
8904            }
8905            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
8906                // SET n = {props} or SET n += {props}
8907                properties
8908                    .entry(variable.clone())
8909                    .or_default()
8910                    .insert("*".to_string());
8911                collect_properties_from_expr_into(value, properties);
8912            }
8913        }
8914    }
8915}
8916
8917/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
8918/// adds structural projections (bare entity Struct columns) for them.
8919/// This is needed so that execute_create_pattern() can find bound variables
8920/// in the row HashMap and reuse existing nodes instead of creating new ones.
8921fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
8922    for path in &pattern.paths {
8923        if let Some(ref v) = path.variable {
8924            properties
8925                .entry(v.clone())
8926                .or_default()
8927                .insert("*".to_string());
8928        }
8929        for element in &path.elements {
8930            match element {
8931                PatternElement::Node(n) => {
8932                    if let Some(ref v) = n.variable {
8933                        properties
8934                            .entry(v.clone())
8935                            .or_default()
8936                            .insert("*".to_string());
8937                    }
8938                    // Also collect properties from inline property expressions
8939                    if let Some(ref props) = n.properties {
8940                        collect_properties_from_expr_into(props, properties);
8941                    }
8942                }
8943                PatternElement::Relationship(r) => {
8944                    if let Some(ref v) = r.variable {
8945                        properties
8946                            .entry(v.clone())
8947                            .or_default()
8948                            .insert("*".to_string());
8949                    }
8950                    if let Some(ref props) = r.properties {
8951                        collect_properties_from_expr_into(props, properties);
8952                    }
8953                }
8954                PatternElement::Parenthesized { pattern, .. } => {
8955                    let sub = Pattern {
8956                        paths: vec![pattern.as_ref().clone()],
8957                    };
8958                    mark_pattern_variables(&sub, properties);
8959                }
8960            }
8961        }
8962    }
8963}
8964
8965/// Collect properties from an expression into a HashMap.
8966fn collect_properties_from_expr_into(
8967    expr: &Expr,
8968    properties: &mut HashMap<String, HashSet<String>>,
8969) {
8970    match expr {
8971        Expr::PatternComprehension {
8972            where_clause,
8973            map_expr,
8974            ..
8975        } => {
8976            // Collect properties from the WHERE clause and map expression.
8977            // The pattern itself creates local bindings that don't need
8978            // property collection from the outer scope.
8979            if let Some(where_expr) = where_clause {
8980                collect_properties_from_expr_into(where_expr, properties);
8981            }
8982            collect_properties_from_expr_into(map_expr, properties);
8983        }
8984        Expr::Variable(name) => {
8985            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
8986            if let Some((var, prop)) = name.split_once('.') {
8987                properties
8988                    .entry(var.to_string())
8989                    .or_default()
8990                    .insert(prop.to_string());
8991            } else {
8992                // Bare variable (e.g., RETURN n) — needs all properties materialized
8993                properties
8994                    .entry(name.clone())
8995                    .or_default()
8996                    .insert("*".to_string());
8997            }
8998        }
8999        Expr::Property(base, name) => {
9000            // Extract variable name from the base expression
9001            if let Expr::Variable(var) = base.as_ref() {
9002                properties
9003                    .entry(var.clone())
9004                    .or_default()
9005                    .insert(name.clone());
9006                // Don't recurse into Variable — that would mark it as a bare
9007                // variable reference (adding "*") when it's just a property base.
9008            } else {
9009                // Recurse for complex base expressions (nested property, function call, etc.)
9010                collect_properties_from_expr_into(base, properties);
9011            }
9012        }
9013        Expr::BinaryOp { left, right, .. } => {
9014            collect_properties_from_expr_into(left, properties);
9015            collect_properties_from_expr_into(right, properties);
9016        }
9017        Expr::FunctionCall {
9018            name,
9019            args,
9020            window_spec,
9021            ..
9022        } => {
9023            // Analyze function for property requirements (pushdown hydration)
9024            analyze_function_property_requirements(name, args, properties);
9025
9026            // Collect from arguments
9027            for arg in args {
9028                collect_properties_from_expr_into(arg, properties);
9029            }
9030
9031            // Collect from window spec (PARTITION BY, ORDER BY)
9032            if let Some(spec) = window_spec {
9033                for part_expr in &spec.partition_by {
9034                    collect_properties_from_expr_into(part_expr, properties);
9035                }
9036                for sort_item in &spec.order_by {
9037                    collect_properties_from_expr_into(&sort_item.expr, properties);
9038                }
9039            }
9040        }
9041        Expr::UnaryOp { expr, .. } => {
9042            collect_properties_from_expr_into(expr, properties);
9043        }
9044        Expr::List(items) => {
9045            for item in items {
9046                collect_properties_from_expr_into(item, properties);
9047            }
9048        }
9049        Expr::Map(entries) => {
9050            for (_key, value) in entries {
9051                collect_properties_from_expr_into(value, properties);
9052            }
9053        }
9054        Expr::ListComprehension {
9055            list,
9056            where_clause,
9057            map_expr,
9058            ..
9059        } => {
9060            collect_properties_from_expr_into(list, properties);
9061            if let Some(where_expr) = where_clause {
9062                collect_properties_from_expr_into(where_expr, properties);
9063            }
9064            collect_properties_from_expr_into(map_expr, properties);
9065        }
9066        Expr::Case {
9067            expr,
9068            when_then,
9069            else_expr,
9070        } => {
9071            if let Some(scrutinee_expr) = expr {
9072                collect_properties_from_expr_into(scrutinee_expr, properties);
9073            }
9074            for (when, then) in when_then {
9075                collect_properties_from_expr_into(when, properties);
9076                collect_properties_from_expr_into(then, properties);
9077            }
9078            if let Some(default_expr) = else_expr {
9079                collect_properties_from_expr_into(default_expr, properties);
9080            }
9081        }
9082        Expr::Quantifier {
9083            list, predicate, ..
9084        } => {
9085            collect_properties_from_expr_into(list, properties);
9086            collect_properties_from_expr_into(predicate, properties);
9087        }
9088        Expr::Reduce {
9089            init, list, expr, ..
9090        } => {
9091            collect_properties_from_expr_into(init, properties);
9092            collect_properties_from_expr_into(list, properties);
9093            collect_properties_from_expr_into(expr, properties);
9094        }
9095        Expr::Exists { query, .. } => {
9096            // Walk into EXISTS body to collect property references for outer-scope variables.
9097            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
9098            // are included in the outer scan's property list. Extra properties collected for
9099            // inner-only variables are harmless — the outer scan ignores unknown variable names.
9100            collect_properties_from_subquery(query, properties);
9101        }
9102        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
9103            collect_properties_from_subquery(query, properties);
9104        }
9105        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
9106            collect_properties_from_expr_into(expr, properties);
9107        }
9108        Expr::In { expr, list } => {
9109            collect_properties_from_expr_into(expr, properties);
9110            collect_properties_from_expr_into(list, properties);
9111        }
9112        Expr::ArrayIndex { array, index } => {
9113            if let Expr::Variable(var) = array.as_ref() {
9114                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
9115                    // Static string key: e['name'] → only need that specific property
9116                    properties
9117                        .entry(var.clone())
9118                        .or_default()
9119                        .insert(prop_name.clone());
9120                } else {
9121                    // Dynamic property access: e[prop] → need all properties
9122                    properties
9123                        .entry(var.clone())
9124                        .or_default()
9125                        .insert("*".to_string());
9126                }
9127            }
9128            collect_properties_from_expr_into(array, properties);
9129            collect_properties_from_expr_into(index, properties);
9130        }
9131        Expr::ArraySlice { array, start, end } => {
9132            collect_properties_from_expr_into(array, properties);
9133            if let Some(start_expr) = start {
9134                collect_properties_from_expr_into(start_expr, properties);
9135            }
9136            if let Some(end_expr) = end {
9137                collect_properties_from_expr_into(end_expr, properties);
9138            }
9139        }
9140        Expr::ValidAt {
9141            entity,
9142            timestamp,
9143            start_prop,
9144            end_prop,
9145        } => {
9146            // Extract property requirements from ValidAt expression
9147            if let Expr::Variable(var) = entity.as_ref() {
9148                if let Some(prop) = start_prop {
9149                    properties
9150                        .entry(var.clone())
9151                        .or_default()
9152                        .insert(prop.clone());
9153                }
9154                if let Some(prop) = end_prop {
9155                    properties
9156                        .entry(var.clone())
9157                        .or_default()
9158                        .insert(prop.clone());
9159                }
9160            }
9161            collect_properties_from_expr_into(entity, properties);
9162            collect_properties_from_expr_into(timestamp, properties);
9163        }
9164        Expr::MapProjection { base, items } => {
9165            collect_properties_from_expr_into(base, properties);
9166            for item in items {
9167                match item {
9168                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
9169                        if let Expr::Variable(var) = base.as_ref() {
9170                            properties
9171                                .entry(var.clone())
9172                                .or_default()
9173                                .insert(prop.clone());
9174                        }
9175                    }
9176                    uni_cypher::ast::MapProjectionItem::AllProperties => {
9177                        if let Expr::Variable(var) = base.as_ref() {
9178                            properties
9179                                .entry(var.clone())
9180                                .or_default()
9181                                .insert("*".to_string());
9182                        }
9183                    }
9184                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
9185                        collect_properties_from_expr_into(expr, properties);
9186                    }
9187                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
9188                }
9189            }
9190        }
9191        Expr::LabelCheck { expr, .. } => {
9192            collect_properties_from_expr_into(expr, properties);
9193        }
9194        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
9195        // Mark them with "*" so the outer scan produces structural projections that
9196        // extract_row_params can resolve.
9197        Expr::Parameter(name) => {
9198            properties
9199                .entry(name.clone())
9200                .or_default()
9201                .insert("*".to_string());
9202        }
9203        // Literals and wildcard don't reference properties
9204        Expr::Literal(_) | Expr::Wildcard => {}
9205    }
9206}
9207
9208/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
9209///
9210/// This is needed so that correlated property accesses like `a.city` inside
9211/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
9212/// scan to include `a.city` in its projected columns.
9213fn collect_properties_from_subquery(
9214    query: &Query,
9215    properties: &mut HashMap<String, HashSet<String>>,
9216) {
9217    match query {
9218        Query::Single(stmt) => {
9219            for clause in &stmt.clauses {
9220                match clause {
9221                    Clause::Match(m) => {
9222                        if let Some(ref wc) = m.where_clause {
9223                            collect_properties_from_expr_into(wc, properties);
9224                        }
9225                    }
9226                    Clause::With(w) => {
9227                        for item in &w.items {
9228                            if let ReturnItem::Expr { expr, .. } = item {
9229                                collect_properties_from_expr_into(expr, properties);
9230                            }
9231                        }
9232                        if let Some(ref wc) = w.where_clause {
9233                            collect_properties_from_expr_into(wc, properties);
9234                        }
9235                    }
9236                    Clause::Return(r) => {
9237                        for item in &r.items {
9238                            if let ReturnItem::Expr { expr, .. } = item {
9239                                collect_properties_from_expr_into(expr, properties);
9240                            }
9241                        }
9242                    }
9243                    _ => {}
9244                }
9245            }
9246        }
9247        Query::Union { left, right, .. } => {
9248            collect_properties_from_subquery(left, properties);
9249            collect_properties_from_subquery(right, properties);
9250        }
9251        _ => {}
9252    }
9253}
9254
9255/// Analyze function calls to extract property requirements for pushdown hydration
9256///
9257/// This function examines function calls and their arguments to determine which properties
9258/// need to be loaded for entity arguments. For example:
9259/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
9260/// - keys(n) -> n needs all properties (*)
9261///
9262/// The extracted requirements are added to the properties map for later use during
9263/// scan planning.
9264fn analyze_function_property_requirements(
9265    name: &str,
9266    args: &[Expr],
9267    properties: &mut HashMap<String, HashSet<String>>,
9268) {
9269    use crate::query::function_props::get_function_spec;
9270
9271    /// Helper to mark a variable as needing all properties.
9272    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
9273        properties
9274            .entry(var.to_string())
9275            .or_default()
9276            .insert("*".to_string());
9277    }
9278
9279    // System-managed timestamp functions: require only the corresponding
9280    // `_created_at` / `_updated_at` column, not full entity materialization.
9281    if name.eq_ignore_ascii_case("created_at") || name.eq_ignore_ascii_case("updated_at") {
9282        if let Some(Expr::Variable(var)) = args.first() {
9283            let col = if name.eq_ignore_ascii_case("created_at") {
9284                "_created_at"
9285            } else {
9286                "_updated_at"
9287            };
9288            properties
9289                .entry(var.clone())
9290                .or_default()
9291                .insert(col.to_string());
9292        }
9293        return;
9294    }
9295
9296    let Some(spec) = get_function_spec(name) else {
9297        // Unknown function: conservatively require all properties for variable args
9298        for arg in args {
9299            if let Expr::Variable(var) = arg {
9300                mark_wildcard(var, properties);
9301            }
9302        }
9303        return;
9304    };
9305
9306    // Extract property names from string literal arguments
9307    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
9308        let entity_arg = args.get(entity_arg_idx);
9309        let prop_arg = args.get(prop_arg_idx);
9310
9311        match (entity_arg, prop_arg) {
9312            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
9313                properties
9314                    .entry(var.clone())
9315                    .or_default()
9316                    .insert(prop.clone());
9317            }
9318            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
9319                // Parameter property name: need all properties
9320                mark_wildcard(var, properties);
9321            }
9322            _ => {}
9323        }
9324    }
9325
9326    // Handle full entity requirement (keys(), properties())
9327    if spec.needs_full_entity {
9328        for &idx in spec.entity_args {
9329            if let Some(Expr::Variable(var)) = args.get(idx) {
9330                mark_wildcard(var, properties);
9331            }
9332        }
9333    }
9334}
9335
9336// ============================================================================
9337// Phase 5a-impl — fork-aware fusion rewrite
9338// ============================================================================
9339
9340/// Trait that exposes the per-fork "is there a fork-local index for
9341/// `(label, column)`?" lookup. Implemented for `StorageManager` so
9342/// callers don't need to depend on the fork module directly; tests
9343/// can mock by implementing it on a `HashMap`.
9344pub trait ForkIndexLookup {
9345    fn fork_index_for(
9346        &self,
9347        label: &str,
9348        column: &str,
9349    ) -> Option<uni_store::fork::ForkLocalIndexKind>;
9350
9351    /// Phase 5b followup: resolve a label id, then dispatch to
9352    /// `fork_index_for`. Used by the rewrite when wrapping
9353    /// `VectorKnn` and `InvertedIndexLookup` nodes which carry
9354    /// `label_id: u16` rather than the label name. Default returns
9355    /// `None`; the `StorageManager` impl resolves via its
9356    /// `schema_manager`.
9357    fn fork_index_for_label_id(
9358        &self,
9359        _label_id: u16,
9360        _column: &str,
9361    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9362        None
9363    }
9364}
9365
9366impl ForkIndexLookup for uni_store::storage::StorageManager {
9367    fn fork_index_for(
9368        &self,
9369        label: &str,
9370        column: &str,
9371    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9372        self.fork_index_exists(label, column)
9373    }
9374
9375    fn fork_index_for_label_id(
9376        &self,
9377        label_id: u16,
9378        column: &str,
9379    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9380        let schema = self.schema_manager().schema();
9381        let label_name = schema.label_name_by_id(label_id)?;
9382        self.fork_index_exists(label_name, column)
9383    }
9384}
9385
9386/// Fold a trailing `SET var.prop = value` into the freshly-created entity's
9387/// inline property map, eliminating the separate `Set` write pass.
9388///
9389/// Rewrites `CREATE (a)-[r:T]->(b) SET r.x = e.v` into the equivalent of
9390/// `CREATE (a)-[r:T {x: e.v}]->(b)`, so the plan collapses from `Set → Create`
9391/// to a single `Create`. This removes an entire read-modify-write operator
9392/// (`MutationSetExec`) — measured at ~38% of per-edge `UNWIND … CREATE … SET`
9393/// execution — that the bulk write path never pays.
9394///
9395/// # Examples
9396///
9397/// ```ignore
9398/// // CREATE (a)-[r:LINK]->(b) SET r.role = e.role   ==>
9399/// // CREATE (a)-[r:LINK {role: e.role}]->(b)
9400/// let fused = fuse_create_set(plan);
9401/// ```
9402///
9403/// The fold is **all-or-nothing per `SET` clause** and only fires when every
9404/// item is safe:
9405/// - the item is the simple `Variable.property = value` form (not `+=`, label
9406///   set `SET n:L`, or whole-entity map assignment `SET n = {...}`),
9407/// - the target variable is introduced by the immediately-preceding
9408///   `Create`/`CreateBatch` (a MATCHed variable is left untouched),
9409/// - the target element's inline properties are absent or a map literal (a
9410///   parameter-map form such as `CREATE (n $props)` cannot be merged),
9411/// - the value references no variable created in the same statement, so
9412///   evaluating it at create time is observably identical to SET time.
9413///
9414/// When any item fails these checks the whole `Set` node is preserved, keeping
9415/// semantics unchanged. The pass is idempotent: a plan with no fusable
9416/// `Set`/`Create` adjacency passes through untouched.
9417#[must_use]
9418pub fn fuse_create_set(plan: LogicalPlan) -> LogicalPlan {
9419    match plan {
9420        LogicalPlan::Set { input, items } => {
9421            // Fuse any deeper adjacency first so chained
9422            // `CREATE … SET … CREATE … SET` collapses bottom-up.
9423            let input = fuse_create_set(*input);
9424            match input {
9425                LogicalPlan::Create {
9426                    input: child,
9427                    pattern,
9428                } => {
9429                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9430                    match try_fuse_set_items(std::slice::from_ref(&pattern), &items, &bound_vars) {
9431                        Some(mut patterns) => LogicalPlan::Create {
9432                            input: child,
9433                            // try_fuse_set_items returns exactly as many patterns
9434                            // as it was given (one here).
9435                            pattern: patterns
9436                                .pop()
9437                                .expect("one pattern in yields one pattern out"),
9438                        },
9439                        None => LogicalPlan::Set {
9440                            input: Box::new(LogicalPlan::Create {
9441                                input: child,
9442                                pattern,
9443                            }),
9444                            items,
9445                        },
9446                    }
9447                }
9448                LogicalPlan::CreateBatch {
9449                    input: child,
9450                    patterns,
9451                } => {
9452                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9453                    match try_fuse_set_items(&patterns, &items, &bound_vars) {
9454                        Some(fused) => LogicalPlan::CreateBatch {
9455                            input: child,
9456                            patterns: fused,
9457                        },
9458                        None => LogicalPlan::Set {
9459                            input: Box::new(LogicalPlan::CreateBatch {
9460                                input: child,
9461                                patterns,
9462                            }),
9463                            items,
9464                        },
9465                    }
9466                }
9467                other => LogicalPlan::Set {
9468                    input: Box::new(other),
9469                    items,
9470                },
9471            }
9472        }
9473        // Recurse through the operators that can sit above a write clause so a
9474        // `Set` under RETURN/ORDER BY/LIMIT is still reached. This mirrors the
9475        // pragmatic recursion of `rewrite_for_fork_fusion`: variants that never
9476        // sit above a write clause fall through `other => other` unchanged.
9477        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9478            input: Box::new(fuse_create_set(*input)),
9479            projections,
9480        },
9481        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9482            input: Box::new(fuse_create_set(*input)),
9483            skip,
9484            fetch,
9485        },
9486        LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
9487            input: Box::new(fuse_create_set(*input)),
9488            order_by,
9489        },
9490        LogicalPlan::Filter {
9491            input,
9492            predicate,
9493            optional_variables,
9494        } => LogicalPlan::Filter {
9495            input: Box::new(fuse_create_set(*input)),
9496            predicate,
9497            optional_variables,
9498        },
9499        LogicalPlan::Create { input, pattern } => LogicalPlan::Create {
9500            input: Box::new(fuse_create_set(*input)),
9501            pattern,
9502        },
9503        LogicalPlan::CreateBatch { input, patterns } => LogicalPlan::CreateBatch {
9504            input: Box::new(fuse_create_set(*input)),
9505            patterns,
9506        },
9507        other => other,
9508    }
9509}
9510
9511/// Try to fold every `SET` item into the given CREATE patterns.
9512///
9513/// Returns the rewritten patterns when *all* items fuse safely (see
9514/// [`fuse_create_set`] for the conditions); returns `None` the moment any item
9515/// is unfusable, so the caller can keep the original `Set` node untouched.
9516///
9517/// `bound_vars` are the variables produced by the CREATE's input plan (e.g. an
9518/// upstream MATCH). A CREATE pattern may *reuse* such a variable as an endpoint
9519/// (`MATCH (a) CREATE (a)-[r:T]->(b)`), so `pattern_variable_names` alone cannot
9520/// tell a freshly-created variable from a reused one. Reused variables are
9521/// excluded from `owner`: a `SET` on them must not fuse, because the executor
9522/// skips inline properties on already-bound elements (which would silently drop
9523/// the write).
9524fn try_fuse_set_items(
9525    patterns: &[Pattern],
9526    items: &[SetItem],
9527    bound_vars: &HashSet<String>,
9528) -> Option<Vec<Pattern>> {
9529    // Map each freshly-created variable to the index of the pattern that
9530    // introduces it, skipping any variable already bound upstream.
9531    let mut owner: HashMap<String, usize> = HashMap::new();
9532    for (idx, pattern) in patterns.iter().enumerate() {
9533        for var in crate::query::df_graph::mutation_common::pattern_variable_names(pattern) {
9534            if bound_vars.contains(&var) {
9535                continue;
9536            }
9537            owner.entry(var).or_insert(idx);
9538        }
9539    }
9540
9541    let mut out = patterns.to_vec();
9542    for item in items {
9543        let SetItem::Property { expr, value } = item else {
9544            return None; // `+=`, label set, or whole-entity map assignment
9545        };
9546        let Expr::Property(base, prop) = expr else {
9547            return None; // not a property target
9548        };
9549        let Expr::Variable(var) = base.as_ref() else {
9550            return None; // e.g. `n[expr].x` or a deeper path
9551        };
9552        let Some(&idx) = owner.get(var) else {
9553            return None; // target is a MATCHed (not created) variable
9554        };
9555        // Evaluating the value at create time must equal evaluating it at SET
9556        // time: reject any reference to a variable created in this statement
9557        // (its value may not yet exist when the element is constructed).
9558        if collect_expr_variables(value)
9559            .iter()
9560            .any(|referenced| owner.contains_key(referenced))
9561        {
9562            return None;
9563        }
9564        if !merge_pattern_property(&mut out[idx], var, prop, value) {
9565            return None; // element absent or has a non-map property form
9566        }
9567    }
9568    Some(out)
9569}
9570
9571/// Merge `var.prop = value` into the matching element's inline property map.
9572///
9573/// Returns `false` (leaving the pattern unchanged) when the variable's element
9574/// is not found or its existing properties are a non-map expression that cannot
9575/// be merged. Any pre-existing entry for `prop` is replaced so the SET's
9576/// last-write-wins precedence is preserved.
9577fn merge_pattern_property(pattern: &mut Pattern, var: &str, prop: &str, value: &Expr) -> bool {
9578    for path in &mut pattern.paths {
9579        if merge_into_elements(&mut path.elements, var, prop, value) {
9580            return true;
9581        }
9582    }
9583    false
9584}
9585
9586/// Recursive worker for [`merge_pattern_property`] over a list of elements.
9587fn merge_into_elements(
9588    elements: &mut [PatternElement],
9589    var: &str,
9590    prop: &str,
9591    value: &Expr,
9592) -> bool {
9593    for element in elements {
9594        match element {
9595            PatternElement::Node(n) if n.variable.as_deref() == Some(var) => {
9596                return set_map_property(&mut n.properties, prop, value.clone());
9597            }
9598            PatternElement::Relationship(r) if r.variable.as_deref() == Some(var) => {
9599                return set_map_property(&mut r.properties, prop, value.clone());
9600            }
9601            PatternElement::Parenthesized { pattern, .. } => {
9602                if merge_into_elements(&mut pattern.elements, var, prop, value) {
9603                    return true;
9604                }
9605            }
9606            _ => {}
9607        }
9608    }
9609    false
9610}
9611
9612/// Set `prop = value` on an optional inline property map, last-write-wins.
9613///
9614/// Returns `false` without mutating when the properties are present but are not
9615/// a map literal (e.g. `CREATE (n $params)`), which cannot accept a single key.
9616fn set_map_property(props: &mut Option<Expr>, prop: &str, value: Expr) -> bool {
9617    match props {
9618        None => {
9619            *props = Some(Expr::Map(vec![(prop.to_string(), value)]));
9620            true
9621        }
9622        Some(Expr::Map(entries)) => {
9623            entries.retain(|(k, _)| k != prop);
9624            entries.push((prop.to_string(), value));
9625            true
9626        }
9627        Some(_) => false,
9628    }
9629}
9630
9631/// Walk a [`LogicalPlan`] tree and rewrite each `Scan` whose target
9632/// `(label, column)` has a registered fork-local index into the
9633/// matching `FusedIndexScan` variant.
9634///
9635/// Phase 5a-impl Step 4 covers `VidUidForkFirst`; Steps 5 and 6 add
9636/// `BtreeUnion` and `SortedKWayMerge` by extending `kind_for_filter`.
9637///
9638/// Idempotent: a tree that already contains `FusedIndexScan` nodes
9639/// passes through unchanged.
9640#[must_use]
9641pub fn rewrite_for_fork_fusion<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9642    rewrite_node(plan, lookup)
9643}
9644
9645fn rewrite_node<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9646    match plan {
9647        LogicalPlan::Scan {
9648            label_id,
9649            labels,
9650            variable,
9651            filter,
9652            optional,
9653        } => {
9654            // VidUid fusion only fires on a single-label scan with an
9655            // equality filter on a registered UID column. BTree and
9656            // Sorted will extend this match in Steps 5 and 6.
9657            let kind = if labels.len() == 1
9658                && let Some(col) = filter
9659                    .as_ref()
9660                    .and_then(|f| equality_target_column(f, &variable))
9661                && let Some(idx_kind) = lookup.fork_index_for(&labels[0], &col)
9662            {
9663                into_fusion_kind(idx_kind)
9664            } else {
9665                None
9666            };
9667            match kind {
9668                Some(kind) => LogicalPlan::FusedIndexScan {
9669                    label_id,
9670                    labels,
9671                    variable,
9672                    filter,
9673                    optional,
9674                    kind,
9675                },
9676                None => LogicalPlan::Scan {
9677                    label_id,
9678                    labels,
9679                    variable,
9680                    filter,
9681                    optional,
9682                },
9683            }
9684        }
9685        // Phase 5b followup: wrap lossy leaf operators when a
9686        // matching fork-local index has been registered. The wrap
9687        // preserves the original node's fields (the physical
9688        // planner unwraps and recurses); only the explain-plan
9689        // surface and runtime-stats operator name change. The
9690        // actual fusion still happens at the `BranchedBackend`
9691        // layer via Lance's per-branch reads.
9692        //
9693        // The CALL-style vector/FTS queries land as `ProcedureCall`
9694        // (not the dedicated `VectorKnn`/`InvertedIndexLookup`
9695        // operators); recognize those by procedure name and the
9696        // shape of their first two arguments (`label, column, ...`).
9697        LogicalPlan::ProcedureCall {
9698            procedure_name,
9699            arguments,
9700            yield_items,
9701        } => {
9702            let kind = procedure_call_fusion_kind(&procedure_name, &arguments, lookup);
9703            let inner = LogicalPlan::ProcedureCall {
9704                procedure_name,
9705                arguments,
9706                yield_items,
9707            };
9708            match kind {
9709                Some(kind) => LogicalPlan::FusedIndexScanWrapped {
9710                    inner: Box::new(inner),
9711                    kind,
9712                },
9713                None => inner,
9714            }
9715        }
9716        LogicalPlan::VectorKnn {
9717            label_id,
9718            variable,
9719            property,
9720            query,
9721            k,
9722            threshold,
9723        } => {
9724            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9725                && let Some(kind) = into_fusion_kind(idx_kind)
9726            {
9727                LogicalPlan::FusedIndexScanWrapped {
9728                    inner: Box::new(LogicalPlan::VectorKnn {
9729                        label_id,
9730                        variable,
9731                        property,
9732                        query,
9733                        k,
9734                        threshold,
9735                    }),
9736                    kind,
9737                }
9738            } else {
9739                LogicalPlan::VectorKnn {
9740                    label_id,
9741                    variable,
9742                    property,
9743                    query,
9744                    k,
9745                    threshold,
9746                }
9747            }
9748        }
9749        LogicalPlan::InvertedIndexLookup {
9750            label_id,
9751            variable,
9752            property,
9753            terms,
9754        } => {
9755            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9756                && let Some(kind) = into_fusion_kind(idx_kind)
9757            {
9758                LogicalPlan::FusedIndexScanWrapped {
9759                    inner: Box::new(LogicalPlan::InvertedIndexLookup {
9760                        label_id,
9761                        variable,
9762                        property,
9763                        terms,
9764                    }),
9765                    kind,
9766                }
9767            } else {
9768                LogicalPlan::InvertedIndexLookup {
9769                    label_id,
9770                    variable,
9771                    property,
9772                    terms,
9773                }
9774            }
9775        }
9776        // Tree-recursive variants — only the ones that can carry a
9777        // Scan in their subtree need to recurse here. Adding more is
9778        // safe (a missing recursion just means fusion doesn't fire
9779        // for that nested context, not incorrect results).
9780        LogicalPlan::Filter {
9781            input,
9782            predicate,
9783            optional_variables,
9784        } => LogicalPlan::Filter {
9785            input: Box::new(rewrite_node(*input, lookup)),
9786            predicate,
9787            optional_variables,
9788        },
9789        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9790            input: Box::new(rewrite_node(*input, lookup)),
9791            projections,
9792        },
9793        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9794            input: Box::new(rewrite_node(*input, lookup)),
9795            skip,
9796            fetch,
9797        },
9798        LogicalPlan::Sort { input, order_by } => {
9799            // Phase 5a-impl Sorted fusion: when the immediate child
9800            // is a single-label Scan AND the sole sort key is a
9801            // single-column property reference on that scan's
9802            // variable AND the column has a fork-local Sorted index
9803            // registered, rewrite to FusedIndexScan { SortedKWayMerge }.
9804            // Otherwise recurse normally.
9805            let new_input = match (*input, &order_by[..]) {
9806                (
9807                    LogicalPlan::Scan {
9808                        label_id,
9809                        labels,
9810                        variable,
9811                        filter,
9812                        optional,
9813                    },
9814                    [single_sort],
9815                ) if labels.len() == 1
9816                    && let Some(col) = column_of_scan_variable(&single_sort.expr, &variable)
9817                    && let Some(uni_store::fork::ForkLocalIndexKind::Sorted) =
9818                        lookup.fork_index_for(&labels[0], &col) =>
9819                {
9820                    LogicalPlan::FusedIndexScan {
9821                        label_id,
9822                        labels,
9823                        variable,
9824                        filter,
9825                        optional,
9826                        kind: FusionKind::SortedKWayMerge,
9827                    }
9828                }
9829                (other_input, _) => rewrite_node(other_input, lookup),
9830            };
9831            LogicalPlan::Sort {
9832                input: Box::new(new_input),
9833                order_by,
9834            }
9835        }
9836        LogicalPlan::Union { left, right, all } => LogicalPlan::Union {
9837            left: Box::new(rewrite_node(*left, lookup)),
9838            right: Box::new(rewrite_node(*right, lookup)),
9839            all,
9840        },
9841        // Everything else passes through unchanged. Adding more
9842        // arms is purely additive — fusion just doesn't fire inside
9843        // un-recursed-into subtrees.
9844        other => other,
9845    }
9846}
9847
9848/// Phase 5b followup: inspect a CALL-style procedure invocation
9849/// for a `(label, column)` pair and check whether a fork-local
9850/// index has been registered for it.
9851///
9852/// Recognizes:
9853/// - `uni.vector.query(label, column, query_vec, k)` → `AnnRerank`
9854///   when a `Vector` fork-local index exists.
9855/// - `uni.fts.query(label, column, query, k)` → `Bm25Rrf` when a
9856///   `FullText` fork-local index exists.
9857///
9858/// Returns `None` for any other procedure (no rewrite) or when the
9859/// registry has no matching entry.
9860fn procedure_call_fusion_kind<L: ForkIndexLookup>(
9861    procedure_name: &str,
9862    arguments: &[Expr],
9863    lookup: &L,
9864) -> Option<FusionKind> {
9865    if arguments.len() < 2 {
9866        return None;
9867    }
9868    let label = match &arguments[0] {
9869        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9870        _ => return None,
9871    };
9872    let column = match &arguments[1] {
9873        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9874        _ => return None,
9875    };
9876    let expected = match procedure_name {
9877        "uni.vector.query" => uni_store::fork::ForkLocalIndexKind::Vector,
9878        "uni.fts.query" => uni_store::fork::ForkLocalIndexKind::FullText,
9879        _ => return None,
9880    };
9881    let registered = lookup.fork_index_for(label, column)?;
9882    if registered != expected {
9883        return None;
9884    }
9885    into_fusion_kind(registered)
9886}
9887
9888/// Map a fork-local index kind to its planner-side fusion variant.
9889/// Returns `None` for any future `ForkLocalIndexKind` we don't yet
9890/// know how to fuse — the caller falls back to a regular Scan.
9891fn into_fusion_kind(kind: uni_store::fork::ForkLocalIndexKind) -> Option<FusionKind> {
9892    use uni_store::fork::ForkLocalIndexKind as K;
9893    match kind {
9894        K::VidUid => Some(FusionKind::VidUidForkFirst),
9895        K::ScalarBtree => Some(FusionKind::BtreeUnion),
9896        K::Sorted => Some(FusionKind::SortedKWayMerge),
9897        K::Vector => Some(FusionKind::AnnRerank),
9898        K::FullText => Some(FusionKind::Bm25Rrf),
9899        // `ForkLocalIndexKind` is `#[non_exhaustive]`; future kinds
9900        // we don't yet handle are silently passed through as a
9901        // regular Scan so a forward-incompatible binary doesn't
9902        // panic — just misses the fusion opportunity.
9903        _ => None,
9904    }
9905}
9906
9907/// Inspect a Scan filter `Expr` for a single-column equality predicate
9908/// against the scan's variable. Returns the column name if the
9909/// predicate matches the shape `variable.column = <literal_or_param>`
9910/// (or its commuted form). Returns `None` for any other shape — fusion
9911/// only fires on the simple case in Phase 5a-impl.
9912fn equality_target_column(filter: &Expr, scan_variable: &str) -> Option<String> {
9913    let (lhs, rhs) = match filter {
9914        Expr::BinaryOp {
9915            left,
9916            op: uni_cypher::ast::BinaryOp::Eq,
9917            right,
9918        } => (left.as_ref(), right.as_ref()),
9919        _ => return None,
9920    };
9921    // Try lhs = column-of-scan-var, rhs = literal/param; or commuted.
9922    if let Some(col) = column_of_scan_variable(lhs, scan_variable)
9923        && is_constant_or_param(rhs)
9924    {
9925        return Some(col);
9926    }
9927    if let Some(col) = column_of_scan_variable(rhs, scan_variable)
9928        && is_constant_or_param(lhs)
9929    {
9930        return Some(col);
9931    }
9932    None
9933}
9934
9935fn column_of_scan_variable(expr: &Expr, scan_variable: &str) -> Option<String> {
9936    if let Expr::Property(base, prop) = expr
9937        && let Expr::Variable(v) = base.as_ref()
9938        && v == scan_variable
9939    {
9940        return Some(prop.clone());
9941    }
9942    None
9943}
9944
9945fn is_constant_or_param(expr: &Expr) -> bool {
9946    matches!(expr, Expr::Literal(_) | Expr::Parameter(_))
9947}
9948
9949#[cfg(test)]
9950mod pushdown_tests {
9951    use super::*;
9952
9953    #[test]
9954    fn test_validat_extracts_property_names() {
9955        // validAt(e, 'start', 'end', ts) → e: {start, end}
9956        let mut properties = HashMap::new();
9957
9958        let args = vec![
9959            Expr::Variable("e".to_string()),
9960            Expr::Literal(CypherLiteral::String("start".to_string())),
9961            Expr::Literal(CypherLiteral::String("end".to_string())),
9962            Expr::Variable("ts".to_string()),
9963        ];
9964
9965        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
9966
9967        assert!(properties.contains_key("e"));
9968        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
9969            .iter()
9970            .cloned()
9971            .collect();
9972        assert_eq!(properties.get("e").unwrap(), &e_props);
9973    }
9974
9975    #[test]
9976    fn test_keys_requires_wildcard() {
9977        // keys(n) → n: {*}
9978        let mut properties = HashMap::new();
9979
9980        let args = vec![Expr::Variable("n".to_string())];
9981
9982        analyze_function_property_requirements("keys", &args, &mut properties);
9983
9984        assert!(properties.contains_key("n"));
9985        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
9986        assert_eq!(properties.get("n").unwrap(), &n_props);
9987    }
9988
9989    #[test]
9990    fn test_properties_requires_wildcard() {
9991        // properties(n) → n: {*}
9992        let mut properties = HashMap::new();
9993
9994        let args = vec![Expr::Variable("n".to_string())];
9995
9996        analyze_function_property_requirements("properties", &args, &mut properties);
9997
9998        assert!(properties.contains_key("n"));
9999        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10000        assert_eq!(properties.get("n").unwrap(), &n_props);
10001    }
10002
10003    #[test]
10004    fn test_unknown_function_conservative() {
10005        // customUdf(e) → e: {*}
10006        let mut properties = HashMap::new();
10007
10008        let args = vec![Expr::Variable("e".to_string())];
10009
10010        analyze_function_property_requirements("customUdf", &args, &mut properties);
10011
10012        assert!(properties.contains_key("e"));
10013        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10014        assert_eq!(properties.get("e").unwrap(), &e_props);
10015    }
10016
10017    #[test]
10018    fn test_parameter_property_name() {
10019        // validAt(e, $start, $end, ts) → e: {*}
10020        let mut properties = HashMap::new();
10021
10022        let args = vec![
10023            Expr::Variable("e".to_string()),
10024            Expr::Parameter("start".to_string()),
10025            Expr::Parameter("end".to_string()),
10026            Expr::Variable("ts".to_string()),
10027        ];
10028
10029        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
10030
10031        assert!(properties.contains_key("e"));
10032        assert!(properties.get("e").unwrap().contains("*"));
10033    }
10034
10035    #[test]
10036    fn test_validat_expr_extracts_properties() {
10037        // Test Expr::ValidAt variant property extraction
10038        let mut properties = HashMap::new();
10039
10040        let validat_expr = Expr::ValidAt {
10041            entity: Box::new(Expr::Variable("e".to_string())),
10042            timestamp: Box::new(Expr::Variable("ts".to_string())),
10043            start_prop: Some("valid_from".to_string()),
10044            end_prop: Some("valid_to".to_string()),
10045        };
10046
10047        collect_properties_from_expr_into(&validat_expr, &mut properties);
10048
10049        assert!(properties.contains_key("e"));
10050        assert!(properties.get("e").unwrap().contains("valid_from"));
10051        assert!(properties.get("e").unwrap().contains("valid_to"));
10052    }
10053
10054    #[test]
10055    fn test_array_index_requires_wildcard() {
10056        // e[prop] → e: {*}
10057        let mut properties = HashMap::new();
10058
10059        let array_index_expr = Expr::ArrayIndex {
10060            array: Box::new(Expr::Variable("e".to_string())),
10061            index: Box::new(Expr::Variable("prop".to_string())),
10062        };
10063
10064        collect_properties_from_expr_into(&array_index_expr, &mut properties);
10065
10066        assert!(properties.contains_key("e"));
10067        assert!(properties.get("e").unwrap().contains("*"));
10068    }
10069
10070    #[test]
10071    fn test_property_access_extraction() {
10072        // e.name → e: {name}
10073        let mut properties = HashMap::new();
10074
10075        let prop_access = Expr::Property(
10076            Box::new(Expr::Variable("e".to_string())),
10077            "name".to_string(),
10078        );
10079
10080        collect_properties_from_expr_into(&prop_access, &mut properties);
10081
10082        assert!(properties.contains_key("e"));
10083        assert!(properties.get("e").unwrap().contains("name"));
10084    }
10085}