Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::{PredicateAnalyzer, try_label_or_to_union, try_type_or_to_union};
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig,
14    ScalarIndexConfig, ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15    VectorIndexType,
16};
17use uni_cypher::ast::{
18    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
19    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
20    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
21    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
22    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
23    WithRecursiveClause,
24};
25
26/// Sentinel column name inserted into a variable's property set to request
27/// that the planner build the bare struct column (`add_structural_projection`)
28/// WITHOUT pulling the full schema.
29///
30/// Emitted by `mark_set_item_variables` for `SetItem::Property` targets only.
31/// Other SET variants (`Labels`, `Variable`, `VariablePlus`) and REMOVE still
32/// emit `"*"` because they replace/merge the whole node.
33///
34/// **Union semantics:** When both `"*"` and the sentinel appear in the same
35/// variable's HashSet (e.g. `SET n.x = 1 RETURN n` collects both), `"*"`
36/// dominates — schema expansion still happens. The sentinel only changes
37/// behavior when it's the sole structural marker present.
38///
39/// Reserved-name convention: the double-underscore prefix marks this as
40/// internal. Schema validation should reject user-declared properties with
41/// this name (deferred follow-up).
42pub(crate) const STRUCT_ONLY_SENTINEL: &str = "__set_struct__";
43
44/// Type of variable in scope for semantic validation.
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum VariableType {
47    /// Node variable (from MATCH (n), CREATE (n), etc.)
48    Node,
49    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
50    Edge,
51    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
52    Path,
53    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
54    /// Could hold a map or dynamic value — property access is allowed.
55    Scalar,
56    /// Scalar from a known non-graph literal (int, float, bool, string, list).
57    /// Property access is NOT allowed on these at compile time.
58    ScalarLiteral,
59    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
60    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
61    Imported,
62}
63
64impl VariableType {
65    /// Returns true if this type is compatible with the expected type.
66    ///
67    /// `Imported` is always compatible because the actual type is unknown at plan time.
68    fn is_compatible_with(self, expected: VariableType) -> bool {
69        self == expected
70            || self == VariableType::Imported
71            // ScalarLiteral behaves like Scalar for compatibility checks
72            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
73    }
74}
75
76/// Information about a variable in scope during planning.
77#[derive(Debug, Clone)]
78pub struct VariableInfo {
79    /// Variable name as written in the query.
80    pub name: String,
81    /// Semantic type of the variable.
82    pub var_type: VariableType,
83    /// True if this is a variable-length path (VLP) step variable.
84    ///
85    /// VLP step variables are typed as Edge but semantically hold edge lists.
86    pub is_vlp: bool,
87}
88
89impl VariableInfo {
90    pub fn new(name: String, var_type: VariableType) -> Self {
91        Self {
92            name,
93            var_type,
94            is_vlp: false,
95        }
96    }
97}
98
99/// Find a variable in scope by name.
100fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
101    vars.iter().find(|v| v.name == name)
102}
103
104/// Check if a variable is in scope.
105fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
106    find_var_in_scope(vars, name).is_some()
107}
108
109/// Check if an expression contains a pattern predicate.
110fn contains_pattern_predicate(expr: &Expr) -> bool {
111    if matches!(
112        expr,
113        Expr::Exists {
114            from_pattern_predicate: true,
115            ..
116        }
117    ) {
118        return true;
119    }
120    let mut found = false;
121    expr.for_each_child(&mut |child| {
122        if !found {
123            found = contains_pattern_predicate(child);
124        }
125    });
126    found
127}
128
129/// Add a variable to scope with type conflict validation.
130/// Returns an error if the variable already exists with a different type.
131fn add_var_to_scope(
132    vars: &mut Vec<VariableInfo>,
133    name: &str,
134    var_type: VariableType,
135) -> Result<()> {
136    if name.is_empty() {
137        return Ok(());
138    }
139
140    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
141        if existing.var_type == VariableType::Imported {
142            // Imported vars upgrade to the concrete type
143            existing.var_type = var_type;
144        } else if var_type == VariableType::Imported || existing.var_type == var_type {
145            // New type is Imported (keep existing) or same type — no conflict
146        } else if matches!(
147            existing.var_type,
148            VariableType::Scalar | VariableType::ScalarLiteral
149        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
150        {
151            // Scalar can be used as Node/Edge in CREATE context — a scalar
152            // holding a node/edge reference is valid for pattern use
153            existing.var_type = var_type;
154        } else {
155            return Err(anyhow!(
156                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
157                name,
158                existing.var_type,
159                var_type
160            ));
161        }
162    } else {
163        vars.push(VariableInfo::new(name.to_string(), var_type));
164    }
165    Ok(())
166}
167
168/// Convert VariableInfo vec to String vec for backward compatibility
169fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
170    vars.iter().map(|v| v.name.clone()).collect()
171}
172
173fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
174    match expr {
175        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
176            .map(|info| info.var_type)
177            .unwrap_or(VariableType::Scalar),
178        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
179        // Known non-graph literals: property access is NOT valid on these.
180        Expr::Literal(CypherLiteral::Integer(_))
181        | Expr::Literal(CypherLiteral::Float(_))
182        | Expr::Literal(CypherLiteral::String(_))
183        | Expr::Literal(CypherLiteral::Bool(_))
184        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
185        Expr::FunctionCall { name, args, .. } => {
186            let lower = name.to_lowercase();
187            if lower == "coalesce" {
188                infer_coalesce_type(args, vars_in_scope)
189            } else if lower == "collect" && !args.is_empty() {
190                let collected = infer_with_output_type(&args[0], vars_in_scope);
191                if matches!(
192                    collected,
193                    VariableType::Node
194                        | VariableType::Edge
195                        | VariableType::Path
196                        | VariableType::Imported
197                ) {
198                    collected
199                } else {
200                    VariableType::Scalar
201                }
202            } else {
203                VariableType::Scalar
204            }
205        }
206        // WITH list literals/expressions produce scalar list values. Preserving
207        // entity typing here causes invalid node/edge reuse in later MATCH clauses
208        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
209        // Lists are ScalarLiteral since property access is not valid on them.
210        Expr::List(_) => VariableType::ScalarLiteral,
211        _ => VariableType::Scalar,
212    }
213}
214
215fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
216    let mut resolved: Option<VariableType> = None;
217    let mut saw_imported = false;
218    for arg in args {
219        let t = infer_with_output_type(arg, vars_in_scope);
220        match t {
221            VariableType::Node | VariableType::Edge | VariableType::Path => {
222                if let Some(existing) = resolved {
223                    if existing != t {
224                        return VariableType::Scalar;
225                    }
226                } else {
227                    resolved = Some(t);
228                }
229            }
230            VariableType::Imported => saw_imported = true,
231            VariableType::Scalar | VariableType::ScalarLiteral => {}
232        }
233    }
234    if let Some(t) = resolved {
235        t
236    } else if saw_imported {
237        VariableType::Imported
238    } else {
239        VariableType::Scalar
240    }
241}
242
243fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
244    match expr {
245        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
246            .map(|info| info.var_type)
247            .unwrap_or(VariableType::Scalar),
248        Expr::FunctionCall { name, args, .. }
249            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
250        {
251            infer_with_output_type(&args[0], vars_in_scope)
252        }
253        Expr::List(items) => {
254            let mut inferred: Option<VariableType> = None;
255            for item in items {
256                let t = infer_with_output_type(item, vars_in_scope);
257                if !matches!(
258                    t,
259                    VariableType::Node
260                        | VariableType::Edge
261                        | VariableType::Path
262                        | VariableType::Imported
263                ) {
264                    return VariableType::Scalar;
265                }
266                if let Some(existing) = inferred {
267                    if existing != t
268                        && t != VariableType::Imported
269                        && existing != VariableType::Imported
270                    {
271                        return VariableType::Scalar;
272                    }
273                    if existing == VariableType::Imported && t != VariableType::Imported {
274                        inferred = Some(t);
275                    }
276                } else {
277                    inferred = Some(t);
278                }
279            }
280            inferred.unwrap_or(VariableType::Scalar)
281        }
282        _ => VariableType::Scalar,
283    }
284}
285
286/// Collect all variable names referenced in an expression
287fn collect_expr_variables(expr: &Expr) -> Vec<String> {
288    let mut vars = Vec::new();
289    collect_expr_variables_inner(expr, &mut vars);
290    vars
291}
292
293/// Collect the names of `$param` references in a constant-foldable expression.
294///
295/// Walks the variants that `eval_const_numeric_expr` accepts (the only shapes a
296/// successfully-folded `LIMIT`/`SKIP` expression can take): parameters,
297/// literals, unary/binary arithmetic, and the whitelisted numeric functions.
298/// Used to tell the plan cache which parameter values were baked into the plan.
299fn collect_expr_parameters(expr: &Expr, names: &mut Vec<String>) {
300    match expr {
301        Expr::Parameter(name) => {
302            if !names.contains(name) {
303                names.push(name.clone());
304            }
305        }
306        Expr::UnaryOp { expr: e, .. } => collect_expr_parameters(e, names),
307        Expr::BinaryOp { left, right, .. } => {
308            collect_expr_parameters(left, names);
309            collect_expr_parameters(right, names);
310        }
311        Expr::FunctionCall { args, .. } => {
312            for a in args {
313                collect_expr_parameters(a, names);
314            }
315        }
316        _ => {}
317    }
318}
319
320fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
321    let mut add_var = |name: &String| {
322        if !vars.contains(name) {
323            vars.push(name.clone());
324        }
325    };
326
327    match expr {
328        Expr::Variable(name) => add_var(name),
329        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
330        Expr::BinaryOp { left, right, .. } => {
331            collect_expr_variables_inner(left, vars);
332            collect_expr_variables_inner(right, vars);
333        }
334        Expr::UnaryOp { expr: e, .. }
335        | Expr::IsNull(e)
336        | Expr::IsNotNull(e)
337        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
338        Expr::FunctionCall { args, .. } => {
339            for a in args {
340                collect_expr_variables_inner(a, vars);
341            }
342        }
343        Expr::List(items) => {
344            for item in items {
345                collect_expr_variables_inner(item, vars);
346            }
347        }
348        Expr::In { expr: e, list } => {
349            collect_expr_variables_inner(e, vars);
350            collect_expr_variables_inner(list, vars);
351        }
352        Expr::Case {
353            expr: case_expr,
354            when_then,
355            else_expr,
356        } => {
357            if let Some(e) = case_expr {
358                collect_expr_variables_inner(e, vars);
359            }
360            for (w, t) in when_then {
361                collect_expr_variables_inner(w, vars);
362                collect_expr_variables_inner(t, vars);
363            }
364            if let Some(e) = else_expr {
365                collect_expr_variables_inner(e, vars);
366            }
367        }
368        Expr::Map(entries) => {
369            for (_, v) in entries {
370                collect_expr_variables_inner(v, vars);
371            }
372        }
373        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
374        Expr::ArrayIndex { array, index } => {
375            collect_expr_variables_inner(array, vars);
376            collect_expr_variables_inner(index, vars);
377        }
378        Expr::ArraySlice { array, start, end } => {
379            collect_expr_variables_inner(array, vars);
380            if let Some(s) = start {
381                collect_expr_variables_inner(s, vars);
382            }
383            if let Some(e) = end {
384                collect_expr_variables_inner(e, vars);
385            }
386        }
387        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
388        // they introduce local variable bindings not in outer scope.
389        _ => {}
390    }
391}
392
393/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
394///
395/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
396/// before the final RETURN projection without losing alias semantics.
397fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
398    let repr = expr.to_string_repr();
399    if let Some(rewritten) = aliases.get(&repr) {
400        return rewritten.clone();
401    }
402
403    match expr {
404        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
405        Expr::Property(base, prop) => Expr::Property(
406            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
407            prop.clone(),
408        ),
409        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
410            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
411            op: *op,
412            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
413        },
414        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
415            op: *op,
416            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
417        },
418        Expr::FunctionCall {
419            name,
420            args,
421            distinct,
422            window_spec,
423        } => Expr::FunctionCall {
424            name: name.clone(),
425            args: args
426                .iter()
427                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
428                .collect(),
429            distinct: *distinct,
430            window_spec: window_spec.clone(),
431        },
432        Expr::List(items) => Expr::List(
433            items
434                .iter()
435                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
436                .collect(),
437        ),
438        Expr::Map(entries) => Expr::Map(
439            entries
440                .iter()
441                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
442                .collect(),
443        ),
444        Expr::Case {
445            expr: case_expr,
446            when_then,
447            else_expr,
448        } => Expr::Case {
449            expr: case_expr
450                .as_ref()
451                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
452            when_then: when_then
453                .iter()
454                .map(|(w, t)| {
455                    (
456                        rewrite_order_by_expr_with_aliases(w, aliases),
457                        rewrite_order_by_expr_with_aliases(t, aliases),
458                    )
459                })
460                .collect(),
461            else_expr: else_expr
462                .as_ref()
463                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
464        },
465        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
466        // they introduce local variable bindings that could shadow aliases.
467        _ => expr.clone(),
468    }
469}
470
471/// Validate function call argument types.
472/// Returns error if type constraints are violated.
473fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
474    let name_lower = name.to_lowercase();
475
476    // labels() requires Node
477    if name_lower == "labels"
478        && let Some(Expr::Variable(var_name)) = args.first()
479        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
480        && !info.var_type.is_compatible_with(VariableType::Node)
481    {
482        return Err(anyhow!(
483            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
484        ));
485    }
486
487    // type() requires Edge
488    if name_lower == "type"
489        && let Some(Expr::Variable(var_name)) = args.first()
490        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
491        && !info.var_type.is_compatible_with(VariableType::Edge)
492    {
493        return Err(anyhow!(
494            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
495        ));
496    }
497
498    // properties() requires Node/Edge/Map (not scalar literals)
499    if name_lower == "properties"
500        && let Some(arg) = args.first()
501    {
502        match arg {
503            Expr::Literal(CypherLiteral::Integer(_))
504            | Expr::Literal(CypherLiteral::Float(_))
505            | Expr::Literal(CypherLiteral::String(_))
506            | Expr::Literal(CypherLiteral::Bool(_))
507            | Expr::List(_) => {
508                return Err(anyhow!(
509                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
510                ));
511            }
512            Expr::Variable(var_name) => {
513                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
514                    && matches!(
515                        info.var_type,
516                        VariableType::Scalar | VariableType::ScalarLiteral
517                    )
518                {
519                    return Err(anyhow!(
520                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
521                    ));
522                }
523            }
524            _ => {}
525        }
526    }
527
528    // nodes()/relationships() require Path
529    if (name_lower == "nodes" || name_lower == "relationships")
530        && let Some(Expr::Variable(var_name)) = args.first()
531        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
532        && !info.var_type.is_compatible_with(VariableType::Path)
533    {
534        return Err(anyhow!(
535            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
536            name_lower
537        ));
538    }
539
540    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
541    if name_lower == "size"
542        && let Some(Expr::Variable(var_name)) = args.first()
543        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
544        && info.var_type == VariableType::Path
545    {
546        return Err(anyhow!(
547            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
548        ));
549    }
550
551    // length()/size() do NOT accept Node or single-Edge arguments.
552    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
553    // but are actually edge lists — size()/length() is valid on those.
554    if (name_lower == "length" || name_lower == "size")
555        && let Some(Expr::Variable(var_name)) = args.first()
556        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
557        && (info.var_type == VariableType::Node
558            || (info.var_type == VariableType::Edge && !info.is_vlp))
559    {
560        return Err(anyhow!(
561            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
562            name_lower
563        ));
564    }
565
566    Ok(())
567}
568
569/// Check if an expression is a non-boolean literal.
570fn is_non_boolean_literal(expr: &Expr) -> bool {
571    matches!(
572        expr,
573        Expr::Literal(CypherLiteral::Integer(_))
574            | Expr::Literal(CypherLiteral::Float(_))
575            | Expr::Literal(CypherLiteral::String(_))
576            | Expr::List(_)
577            | Expr::Map(_)
578    )
579}
580
581/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
582fn validate_boolean_expression(expr: &Expr) -> Result<()> {
583    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
584    if let Expr::BinaryOp { left, op, right } = expr
585        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
586    {
587        let op_name = format!("{op:?}").to_uppercase();
588        for operand in [left.as_ref(), right.as_ref()] {
589            if is_non_boolean_literal(operand) {
590                return Err(anyhow!(
591                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
592                    op_name
593                ));
594            }
595        }
596    }
597    if let Expr::UnaryOp {
598        op: uni_cypher::ast::UnaryOp::Not,
599        expr: inner,
600    } = expr
601        && is_non_boolean_literal(inner)
602    {
603        return Err(anyhow!(
604            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
605        ));
606    }
607    let mut result = Ok(());
608    expr.for_each_child(&mut |child| {
609        if result.is_ok() {
610            result = validate_boolean_expression(child);
611        }
612    });
613    result
614}
615
616/// Validate that all variables used in an expression are in scope.
617fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
618    let used_vars = collect_expr_variables(expr);
619    for var in used_vars {
620        if !is_var_in_scope(vars_in_scope, &var) {
621            return Err(anyhow!(
622                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
623                var
624            ));
625        }
626    }
627    Ok(())
628}
629
630/// Check if a function name (lowercase) is an aggregate function.
631fn is_aggregate_function_name(name: &str) -> bool {
632    matches!(
633        name.to_lowercase().as_str(),
634        "count"
635            | "sum"
636            | "avg"
637            | "min"
638            | "max"
639            | "collect"
640            | "stdev"
641            | "stdevp"
642            | "percentiledisc"
643            | "percentilecont"
644            | "btic_min"
645            | "btic_max"
646            | "btic_span_agg"
647            | "btic_count_at"
648    ) || uni_cypher::is_known_plugin_aggregate(name)
649}
650
651/// Returns true if the expression is a window function (FunctionCall with window_spec).
652fn is_window_function(expr: &Expr) -> bool {
653    matches!(
654        expr,
655        Expr::FunctionCall {
656            window_spec: Some(_),
657            ..
658        }
659    )
660}
661
662/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
663/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
664/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
665/// size() call, BinaryOp, etc.).
666fn is_compound_aggregate(expr: &Expr) -> bool {
667    if !expr.is_aggregate() {
668        return false;
669    }
670    match expr {
671        Expr::FunctionCall {
672            name, window_spec, ..
673        } => {
674            // A bare aggregate FunctionCall is NOT compound
675            if window_spec.is_some() {
676                return true; // window wrapping an aggregate — treat as compound
677            }
678            !is_aggregate_function_name(name)
679        }
680        // Subquery aggregates are "bare" (not compound)
681        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
682        // Everything else (ListComprehension, BinaryOp, etc.) is compound
683        _ => true,
684    }
685}
686
687/// Recursively collect all bare aggregate FunctionCall sub-expressions from
688/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
689/// the outermost aggregate boundaries).
690///
691/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
692/// is searched because the body (`map_expr`, `predicate`, `expr`) references
693/// the loop variable, not outer-scope aggregates.
694fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
695    let mut out = Vec::new();
696    extract_inner_aggregates_rec(expr, &mut out);
697    out
698}
699
700fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
701    match expr {
702        Expr::FunctionCall {
703            name, window_spec, ..
704        } if window_spec.is_none() && is_aggregate_function_name(name) => {
705            // Found a bare aggregate — collect it and stop recursing
706            out.push(expr.clone());
707        }
708        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
709            out.push(expr.clone());
710        }
711        // For list comprehension, only search the `list` source for aggregates
712        Expr::ListComprehension { list, .. } => {
713            extract_inner_aggregates_rec(list, out);
714        }
715        // For quantifier, only search the `list` source
716        Expr::Quantifier { list, .. } => {
717            extract_inner_aggregates_rec(list, out);
718        }
719        // For reduce, search `init` and `list` (not the body `expr`)
720        Expr::Reduce { init, list, .. } => {
721            extract_inner_aggregates_rec(init, out);
722            extract_inner_aggregates_rec(list, out);
723        }
724        // Standard recursive cases
725        Expr::FunctionCall { args, .. } => {
726            for arg in args {
727                extract_inner_aggregates_rec(arg, out);
728            }
729        }
730        Expr::BinaryOp { left, right, .. } => {
731            extract_inner_aggregates_rec(left, out);
732            extract_inner_aggregates_rec(right, out);
733        }
734        Expr::UnaryOp { expr: e, .. }
735        | Expr::IsNull(e)
736        | Expr::IsNotNull(e)
737        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
738        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
739        Expr::List(items) => {
740            for item in items {
741                extract_inner_aggregates_rec(item, out);
742            }
743        }
744        Expr::Case {
745            expr: case_expr,
746            when_then,
747            else_expr,
748        } => {
749            if let Some(e) = case_expr {
750                extract_inner_aggregates_rec(e, out);
751            }
752            for (w, t) in when_then {
753                extract_inner_aggregates_rec(w, out);
754                extract_inner_aggregates_rec(t, out);
755            }
756            if let Some(e) = else_expr {
757                extract_inner_aggregates_rec(e, out);
758            }
759        }
760        Expr::In {
761            expr: in_expr,
762            list,
763        } => {
764            extract_inner_aggregates_rec(in_expr, out);
765            extract_inner_aggregates_rec(list, out);
766        }
767        Expr::ArrayIndex { array, index } => {
768            extract_inner_aggregates_rec(array, out);
769            extract_inner_aggregates_rec(index, out);
770        }
771        Expr::ArraySlice { array, start, end } => {
772            extract_inner_aggregates_rec(array, out);
773            if let Some(s) = start {
774                extract_inner_aggregates_rec(s, out);
775            }
776            if let Some(e) = end {
777                extract_inner_aggregates_rec(e, out);
778            }
779        }
780        Expr::Map(entries) => {
781            for (_, v) in entries {
782                extract_inner_aggregates_rec(v, out);
783            }
784        }
785        _ => {}
786    }
787}
788
789/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
790/// `Expr::Variable(aggregate_column_name(agg))`.
791///
792/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
793/// rewritten (the body references the loop variable, not outer-scope columns).
794fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
795    match expr {
796        Expr::FunctionCall {
797            name, window_spec, ..
798        } if window_spec.is_none() && is_aggregate_function_name(name) => {
799            // Replace bare aggregate with column reference
800            Expr::Variable(aggregate_column_name(expr))
801        }
802        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
803            Expr::Variable(aggregate_column_name(expr))
804        }
805        Expr::ListComprehension {
806            variable,
807            list,
808            where_clause,
809            map_expr,
810        } => Expr::ListComprehension {
811            variable: variable.clone(),
812            list: Box::new(replace_aggregates_with_columns(list)),
813            where_clause: where_clause.clone(), // don't touch — references loop var
814            map_expr: map_expr.clone(),         // don't touch — references loop var
815        },
816        Expr::Quantifier {
817            quantifier,
818            variable,
819            list,
820            predicate,
821        } => Expr::Quantifier {
822            quantifier: *quantifier,
823            variable: variable.clone(),
824            list: Box::new(replace_aggregates_with_columns(list)),
825            predicate: predicate.clone(), // don't touch — references loop var
826        },
827        Expr::Reduce {
828            accumulator,
829            init,
830            variable,
831            list,
832            expr: body,
833        } => Expr::Reduce {
834            accumulator: accumulator.clone(),
835            init: Box::new(replace_aggregates_with_columns(init)),
836            variable: variable.clone(),
837            list: Box::new(replace_aggregates_with_columns(list)),
838            expr: body.clone(), // don't touch — references loop var
839        },
840        Expr::FunctionCall {
841            name,
842            args,
843            distinct,
844            window_spec,
845        } => Expr::FunctionCall {
846            name: name.clone(),
847            args: args.iter().map(replace_aggregates_with_columns).collect(),
848            distinct: *distinct,
849            window_spec: window_spec.clone(),
850        },
851        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
852            left: Box::new(replace_aggregates_with_columns(left)),
853            op: *op,
854            right: Box::new(replace_aggregates_with_columns(right)),
855        },
856        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
857            op: *op,
858            expr: Box::new(replace_aggregates_with_columns(e)),
859        },
860        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
861        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
862        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
863        Expr::Property(base, prop) => Expr::Property(
864            Box::new(replace_aggregates_with_columns(base)),
865            prop.clone(),
866        ),
867        Expr::List(items) => {
868            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
869        }
870        Expr::Case {
871            expr: case_expr,
872            when_then,
873            else_expr,
874        } => Expr::Case {
875            expr: case_expr
876                .as_ref()
877                .map(|e| Box::new(replace_aggregates_with_columns(e))),
878            when_then: when_then
879                .iter()
880                .map(|(w, t)| {
881                    (
882                        replace_aggregates_with_columns(w),
883                        replace_aggregates_with_columns(t),
884                    )
885                })
886                .collect(),
887            else_expr: else_expr
888                .as_ref()
889                .map(|e| Box::new(replace_aggregates_with_columns(e))),
890        },
891        Expr::In {
892            expr: in_expr,
893            list,
894        } => Expr::In {
895            expr: Box::new(replace_aggregates_with_columns(in_expr)),
896            list: Box::new(replace_aggregates_with_columns(list)),
897        },
898        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
899            array: Box::new(replace_aggregates_with_columns(array)),
900            index: Box::new(replace_aggregates_with_columns(index)),
901        },
902        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
903            array: Box::new(replace_aggregates_with_columns(array)),
904            start: start
905                .as_ref()
906                .map(|e| Box::new(replace_aggregates_with_columns(e))),
907            end: end
908                .as_ref()
909                .map(|e| Box::new(replace_aggregates_with_columns(e))),
910        },
911        Expr::Map(entries) => Expr::Map(
912            entries
913                .iter()
914                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
915                .collect(),
916        ),
917        // Leaf expressions — return as-is
918        other => other.clone(),
919    }
920}
921
922/// Check if an expression contains any aggregate function (recursively).
923fn contains_aggregate_recursive(expr: &Expr) -> bool {
924    match expr {
925        Expr::FunctionCall { name, args, .. } => {
926            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
927        }
928        Expr::BinaryOp { left, right, .. } => {
929            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
930        }
931        Expr::UnaryOp { expr: e, .. }
932        | Expr::IsNull(e)
933        | Expr::IsNotNull(e)
934        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
935        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
936        Expr::Case {
937            expr,
938            when_then,
939            else_expr,
940        } => {
941            expr.as_deref().is_some_and(contains_aggregate_recursive)
942                || when_then.iter().any(|(w, t)| {
943                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
944                })
945                || else_expr
946                    .as_deref()
947                    .is_some_and(contains_aggregate_recursive)
948        }
949        Expr::In { expr, list } => {
950            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
951        }
952        Expr::Property(base, _) => contains_aggregate_recursive(base),
953        Expr::ListComprehension { list, .. } => {
954            // Only check the list source — where_clause/map_expr reference the loop variable
955            contains_aggregate_recursive(list)
956        }
957        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
958        Expr::Reduce { init, list, .. } => {
959            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
960        }
961        Expr::ArrayIndex { array, index } => {
962            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
963        }
964        Expr::ArraySlice { array, start, end } => {
965            contains_aggregate_recursive(array)
966                || start.as_deref().is_some_and(contains_aggregate_recursive)
967                || end.as_deref().is_some_and(contains_aggregate_recursive)
968        }
969        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
970        _ => false,
971    }
972}
973
974/// Check if an expression contains a non-deterministic function (e.g. rand()).
975fn contains_non_deterministic(expr: &Expr) -> bool {
976    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
977        return true;
978    }
979    let mut found = false;
980    expr.for_each_child(&mut |child| {
981        if !found {
982            found = contains_non_deterministic(child);
983        }
984    });
985    found
986}
987
988fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
989    match expr {
990        Expr::FunctionCall { name, args, .. } => {
991            if is_aggregate_function_name(name) {
992                out.insert(expr.to_string_repr());
993                return;
994            }
995            for arg in args {
996                collect_aggregate_reprs(arg, out);
997            }
998        }
999        Expr::BinaryOp { left, right, .. } => {
1000            collect_aggregate_reprs(left, out);
1001            collect_aggregate_reprs(right, out);
1002        }
1003        Expr::UnaryOp { expr, .. }
1004        | Expr::IsNull(expr)
1005        | Expr::IsNotNull(expr)
1006        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
1007        Expr::List(items) => {
1008            for item in items {
1009                collect_aggregate_reprs(item, out);
1010            }
1011        }
1012        Expr::Case {
1013            expr,
1014            when_then,
1015            else_expr,
1016        } => {
1017            if let Some(e) = expr {
1018                collect_aggregate_reprs(e, out);
1019            }
1020            for (w, t) in when_then {
1021                collect_aggregate_reprs(w, out);
1022                collect_aggregate_reprs(t, out);
1023            }
1024            if let Some(e) = else_expr {
1025                collect_aggregate_reprs(e, out);
1026            }
1027        }
1028        Expr::In { expr, list } => {
1029            collect_aggregate_reprs(expr, out);
1030            collect_aggregate_reprs(list, out);
1031        }
1032        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
1033        Expr::ListComprehension { list, .. } => {
1034            collect_aggregate_reprs(list, out);
1035        }
1036        Expr::Quantifier { list, .. } => {
1037            collect_aggregate_reprs(list, out);
1038        }
1039        Expr::Reduce { init, list, .. } => {
1040            collect_aggregate_reprs(init, out);
1041            collect_aggregate_reprs(list, out);
1042        }
1043        Expr::ArrayIndex { array, index } => {
1044            collect_aggregate_reprs(array, out);
1045            collect_aggregate_reprs(index, out);
1046        }
1047        Expr::ArraySlice { array, start, end } => {
1048            collect_aggregate_reprs(array, out);
1049            if let Some(s) = start {
1050                collect_aggregate_reprs(s, out);
1051            }
1052            if let Some(e) = end {
1053                collect_aggregate_reprs(e, out);
1054            }
1055        }
1056        _ => {}
1057    }
1058}
1059
1060#[derive(Debug, Clone)]
1061enum NonAggregateRef {
1062    Var(String),
1063    Property {
1064        repr: String,
1065        base_var: Option<String>,
1066    },
1067}
1068
1069fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1070    match expr {
1071        Expr::FunctionCall { name, args, .. } => {
1072            if is_aggregate_function_name(name) {
1073                return;
1074            }
1075            for arg in args {
1076                collect_non_aggregate_refs(arg, inside_agg, out);
1077            }
1078        }
1079        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1080        Expr::Property(base, _) if !inside_agg => {
1081            let base_var = if let Expr::Variable(v) = base.as_ref() {
1082                Some(v.clone())
1083            } else {
1084                None
1085            };
1086            out.push(NonAggregateRef::Property {
1087                repr: expr.to_string_repr(),
1088                base_var,
1089            });
1090        }
1091        Expr::BinaryOp { left, right, .. } => {
1092            collect_non_aggregate_refs(left, inside_agg, out);
1093            collect_non_aggregate_refs(right, inside_agg, out);
1094        }
1095        Expr::UnaryOp { expr, .. }
1096        | Expr::IsNull(expr)
1097        | Expr::IsNotNull(expr)
1098        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1099        Expr::List(items) => {
1100            for item in items {
1101                collect_non_aggregate_refs(item, inside_agg, out);
1102            }
1103        }
1104        Expr::Case {
1105            expr,
1106            when_then,
1107            else_expr,
1108        } => {
1109            if let Some(e) = expr {
1110                collect_non_aggregate_refs(e, inside_agg, out);
1111            }
1112            for (w, t) in when_then {
1113                collect_non_aggregate_refs(w, inside_agg, out);
1114                collect_non_aggregate_refs(t, inside_agg, out);
1115            }
1116            if let Some(e) = else_expr {
1117                collect_non_aggregate_refs(e, inside_agg, out);
1118            }
1119        }
1120        Expr::In { expr, list } => {
1121            collect_non_aggregate_refs(expr, inside_agg, out);
1122            collect_non_aggregate_refs(list, inside_agg, out);
1123        }
1124        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1125        // source. The body references the loop variable, not outer-scope vars.
1126        Expr::ListComprehension { list, .. } => {
1127            collect_non_aggregate_refs(list, inside_agg, out);
1128        }
1129        Expr::Quantifier { list, .. } => {
1130            collect_non_aggregate_refs(list, inside_agg, out);
1131        }
1132        Expr::Reduce { init, list, .. } => {
1133            collect_non_aggregate_refs(init, inside_agg, out);
1134            collect_non_aggregate_refs(list, inside_agg, out);
1135        }
1136        _ => {}
1137    }
1138}
1139
1140fn validate_with_order_by_aggregate_item(
1141    expr: &Expr,
1142    projected_aggregate_reprs: &HashSet<String>,
1143    projected_simple_reprs: &HashSet<String>,
1144    projected_aliases: &HashSet<String>,
1145) -> Result<()> {
1146    let mut aggregate_reprs = HashSet::new();
1147    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1148    for agg in aggregate_reprs {
1149        if !projected_aggregate_reprs.contains(&agg) {
1150            return Err(anyhow!(
1151                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1152                agg
1153            ));
1154        }
1155    }
1156
1157    let mut refs = Vec::new();
1158    collect_non_aggregate_refs(expr, false, &mut refs);
1159    refs.retain(|r| match r {
1160        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1161        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1162    });
1163
1164    let mut dedup = HashSet::new();
1165    refs.retain(|r| {
1166        let key = match r {
1167            NonAggregateRef::Var(v) => format!("v:{v}"),
1168            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1169        };
1170        dedup.insert(key)
1171    });
1172
1173    if refs.len() > 1 {
1174        return Err(anyhow!(
1175            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1176        ));
1177    }
1178
1179    if let Some(r) = refs.first() {
1180        return match r {
1181            NonAggregateRef::Var(v) => Err(anyhow!(
1182                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1183                v
1184            )),
1185            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1186                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1187                base_var
1188                    .clone()
1189                    .unwrap_or_else(|| "<property-base>".to_string())
1190            )),
1191        };
1192    }
1193
1194    Ok(())
1195}
1196
1197/// Validate that no aggregation functions appear in WHERE clause.
1198fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1199    if contains_aggregate_recursive(predicate) {
1200        return Err(anyhow!(
1201            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1202        ));
1203    }
1204    Ok(())
1205}
1206
1207#[derive(Debug, Clone, Copy)]
1208enum ConstNumber {
1209    Int(i64),
1210    Float(f64),
1211}
1212
1213impl ConstNumber {
1214    fn to_f64(self) -> f64 {
1215        match self {
1216            Self::Int(v) => v as f64,
1217            Self::Float(v) => v,
1218        }
1219    }
1220}
1221
1222fn eval_const_numeric_expr(
1223    expr: &Expr,
1224    params: &HashMap<String, uni_common::Value>,
1225) -> Result<ConstNumber> {
1226    match expr {
1227        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1228        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1229        Expr::Parameter(name) => match params.get(name) {
1230            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1231            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1232            Some(uni_common::Value::Null) => Err(anyhow!(
1233                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1234                name
1235            )),
1236            Some(other) => Err(anyhow!(
1237                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1238                name,
1239                other
1240            )),
1241            None => Err(anyhow!(
1242                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1243            )),
1244        },
1245        Expr::UnaryOp {
1246            op: uni_cypher::ast::UnaryOp::Neg,
1247            expr,
1248        } => match eval_const_numeric_expr(expr, params)? {
1249            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1250            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1251        },
1252        Expr::BinaryOp { left, op, right } => {
1253            let l = eval_const_numeric_expr(left, params)?;
1254            let r = eval_const_numeric_expr(right, params)?;
1255            match op {
1256                BinaryOp::Add => match (l, r) {
1257                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1258                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1259                },
1260                BinaryOp::Sub => match (l, r) {
1261                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1262                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1263                },
1264                BinaryOp::Mul => match (l, r) {
1265                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1266                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1267                },
1268                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1269                BinaryOp::Mod => match (l, r) {
1270                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1271                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1272                },
1273                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1274                _ => Err(anyhow!(
1275                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1276                )),
1277            }
1278        }
1279        Expr::FunctionCall { name, args, .. } => {
1280            let lower = name.to_lowercase();
1281            match lower.as_str() {
1282                "rand" if args.is_empty() => {
1283                    use rand::RngExt;
1284                    let mut rng = rand::rng();
1285                    Ok(ConstNumber::Float(rng.random::<f64>()))
1286                }
1287                "tointeger" | "toint" if args.len() == 1 => {
1288                    match eval_const_numeric_expr(&args[0], params)? {
1289                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1290                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1291                    }
1292                }
1293                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1294                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1295                )),
1296                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1297                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1298                )),
1299                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1300                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1301                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1302                },
1303                _ => Err(anyhow!(
1304                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1305                )),
1306            }
1307        }
1308        _ => Err(anyhow!(
1309            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1310        )),
1311    }
1312}
1313
1314/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1315/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1316fn parse_non_negative_integer(
1317    expr: &Expr,
1318    clause_name: &str,
1319    params: &HashMap<String, uni_common::Value>,
1320) -> Result<Option<usize>> {
1321    let referenced_vars = collect_expr_variables(expr);
1322    if !referenced_vars.is_empty() {
1323        return Err(anyhow!(
1324            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1325            clause_name
1326        ));
1327    }
1328
1329    let value = eval_const_numeric_expr(expr, params)?;
1330    let as_int = match value {
1331        ConstNumber::Int(v) => v,
1332        ConstNumber::Float(v) => {
1333            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1334                return Err(anyhow!(
1335                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1336                    clause_name
1337                ));
1338            }
1339            v as i64
1340        }
1341    };
1342    if as_int < 0 {
1343        return Err(anyhow!(
1344            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1345            clause_name
1346        ));
1347    }
1348    Ok(Some(as_int as usize))
1349}
1350
1351/// Validate that aggregation functions are not nested.
1352fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1353    if let Expr::FunctionCall { name, args, .. } = expr
1354        && is_aggregate_function_name(name)
1355    {
1356        for arg in args {
1357            if contains_aggregate_recursive(arg) {
1358                return Err(anyhow!(
1359                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1360                ));
1361            }
1362            if contains_non_deterministic(arg) {
1363                return Err(anyhow!(
1364                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1365                ));
1366            }
1367        }
1368    }
1369    let mut result = Ok(());
1370    expr.for_each_child(&mut |child| {
1371        if result.is_ok() {
1372            result = validate_no_nested_aggregation(child);
1373        }
1374    });
1375    result
1376}
1377
1378/// Validate that an expression does not access properties or labels of
1379/// deleted entities. `type(r)` on a deleted relationship is allowed per
1380/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1381fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1382    // Check n.prop on a deleted variable
1383    if let Expr::Property(inner, _) = expr
1384        && let Expr::Variable(name) = inner.as_ref()
1385        && deleted_vars.contains(name)
1386    {
1387        return Err(anyhow!(
1388            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1389            name
1390        ));
1391    }
1392    // Check labels(n) or keys(n) on a deleted variable
1393    if let Expr::FunctionCall { name, args, .. } = expr
1394        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1395        && args.len() == 1
1396        && let Expr::Variable(var) = &args[0]
1397        && deleted_vars.contains(var)
1398    {
1399        return Err(anyhow!(
1400            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1401            name.to_lowercase(),
1402            var
1403        ));
1404    }
1405    let mut result = Ok(());
1406    expr.for_each_child(&mut |child| {
1407        if result.is_ok() {
1408            result = validate_no_deleted_entity_access(child, deleted_vars);
1409        }
1410    });
1411    result
1412}
1413
1414/// Validate that all variables referenced in properties are defined,
1415/// either in scope or in the local CREATE variable list.
1416fn validate_property_variables(
1417    properties: &Option<Expr>,
1418    vars_in_scope: &[VariableInfo],
1419    create_vars: &[&str],
1420) -> Result<()> {
1421    if let Some(props) = properties {
1422        for var in collect_expr_variables(props) {
1423            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1424                return Err(anyhow!(
1425                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1426                    var
1427                ));
1428            }
1429        }
1430    }
1431    Ok(())
1432}
1433
1434/// Check that a variable name is not already bound in scope or in the local CREATE list.
1435/// Used to prevent rebinding in CREATE clauses.
1436fn check_not_already_bound(
1437    name: &str,
1438    vars_in_scope: &[VariableInfo],
1439    create_vars: &[&str],
1440) -> Result<()> {
1441    if is_var_in_scope(vars_in_scope, name) {
1442        return Err(anyhow!(
1443            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1444            name
1445        ));
1446    }
1447    if create_vars.contains(&name) {
1448        return Err(anyhow!(
1449            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1450            name
1451        ));
1452    }
1453    Ok(())
1454}
1455
1456fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1457    let mut scope = vars_in_scope.to_vec();
1458
1459    for path in &pattern.paths {
1460        if let Some(path_var) = &path.variable
1461            && !path_var.is_empty()
1462            && !is_var_in_scope(&scope, path_var)
1463        {
1464            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1465        }
1466        for element in &path.elements {
1467            match element {
1468                PatternElement::Node(n) => {
1469                    if let Some(v) = &n.variable
1470                        && !v.is_empty()
1471                        && !is_var_in_scope(&scope, v)
1472                    {
1473                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1474                    }
1475                }
1476                PatternElement::Relationship(r) => {
1477                    if let Some(v) = &r.variable
1478                        && !v.is_empty()
1479                        && !is_var_in_scope(&scope, v)
1480                    {
1481                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1482                    }
1483                }
1484                PatternElement::Parenthesized { .. } => {}
1485            }
1486        }
1487    }
1488
1489    scope
1490}
1491
1492fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1493    match item {
1494        SetItem::Property { expr, value } => {
1495            validate_expression_variables(expr, vars_in_scope)?;
1496            validate_expression(expr, vars_in_scope)?;
1497            validate_expression_variables(value, vars_in_scope)?;
1498            validate_expression(value, vars_in_scope)?;
1499            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1500                return Err(anyhow!(
1501                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1502                ));
1503            }
1504        }
1505        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1506            if !is_var_in_scope(vars_in_scope, variable) {
1507                return Err(anyhow!(
1508                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1509                    variable
1510                ));
1511            }
1512            validate_expression_variables(value, vars_in_scope)?;
1513            validate_expression(value, vars_in_scope)?;
1514            if contains_pattern_predicate(value) {
1515                return Err(anyhow!(
1516                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1517                ));
1518            }
1519        }
1520        SetItem::Labels { variable, .. } => {
1521            if !is_var_in_scope(vars_in_scope, variable) {
1522                return Err(anyhow!(
1523                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1524                    variable
1525                ));
1526            }
1527        }
1528    }
1529
1530    Ok(())
1531}
1532
1533/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1534/// The OpenCypher spec requires all property values in MERGE to be non-null.
1535fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1536    if let Some(Expr::Map(entries)) = properties {
1537        for (key, value) in entries {
1538            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1539                return Err(anyhow!(
1540                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1541                    key
1542                ));
1543            }
1544        }
1545    }
1546    Ok(())
1547}
1548
1549/// Flatten every label name appearing in a `Pattern` (across all paths
1550/// and node elements). Used by the M5 follow-up #6 write-rejection
1551/// guard to refuse CREATE/MERGE that names a virtual catalog-resolved
1552/// label.
1553fn collect_pattern_labels(pattern: &uni_cypher::ast::Pattern) -> Vec<String> {
1554    let mut out = Vec::new();
1555    for path in &pattern.paths {
1556        for element in &path.elements {
1557            if let PatternElement::Node(n) = element {
1558                for l in n.labels.names() {
1559                    out.push(l.clone());
1560                }
1561            }
1562        }
1563    }
1564    out
1565}
1566
1567fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1568    for path in &merge_clause.pattern.paths {
1569        for element in &path.elements {
1570            match element {
1571                PatternElement::Node(n) => {
1572                    if let Some(Expr::Parameter(_)) = &n.properties {
1573                        return Err(anyhow!(
1574                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1575                        ));
1576                    }
1577                    reject_null_merge_properties(&n.properties)?;
1578                    // VariableAlreadyBound: reject if a bound variable is used
1579                    // as a standalone MERGE node or introduces new labels/properties.
1580                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1581                    if let Some(variable) = &n.variable
1582                        && !variable.is_empty()
1583                        && is_var_in_scope(vars_in_scope, variable)
1584                    {
1585                        let is_standalone = path.elements.len() == 1;
1586                        let has_new_labels = !n.labels.is_empty();
1587                        let has_new_properties = n.properties.is_some();
1588                        if is_standalone || has_new_labels || has_new_properties {
1589                            return Err(anyhow!(
1590                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1591                                variable
1592                            ));
1593                        }
1594                    }
1595                }
1596                PatternElement::Relationship(r) => {
1597                    if let Some(variable) = &r.variable
1598                        && !variable.is_empty()
1599                        && is_var_in_scope(vars_in_scope, variable)
1600                    {
1601                        return Err(anyhow!(
1602                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1603                            variable
1604                        ));
1605                    }
1606                    if r.types.len() != 1 {
1607                        return Err(anyhow!(
1608                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1609                        ));
1610                    }
1611                    if r.range.is_some() {
1612                        return Err(anyhow!(
1613                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1614                        ));
1615                    }
1616                    if let Some(Expr::Parameter(_)) = &r.properties {
1617                        return Err(anyhow!(
1618                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1619                        ));
1620                    }
1621                    reject_null_merge_properties(&r.properties)?;
1622                }
1623                PatternElement::Parenthesized { .. } => {}
1624            }
1625        }
1626    }
1627
1628    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1629    for item in &merge_clause.on_create {
1630        validate_merge_set_item(item, &merge_scope)?;
1631    }
1632    for item in &merge_clause.on_match {
1633        validate_merge_set_item(item, &merge_scope)?;
1634    }
1635
1636    Ok(())
1637}
1638
1639/// Recursively validate an expression for type errors, undefined variables, etc.
1640fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1641    // Validate boolean operators and nested aggregation first
1642    validate_boolean_expression(expr)?;
1643    validate_no_nested_aggregation(expr)?;
1644
1645    // Helper to validate multiple expressions
1646    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1647        for e in exprs {
1648            validate_expression(e, vars)?;
1649        }
1650        Ok(())
1651    }
1652
1653    match expr {
1654        Expr::FunctionCall { name, args, .. } => {
1655            validate_function_call(name, args, vars_in_scope)?;
1656            validate_all(args, vars_in_scope)
1657        }
1658        Expr::BinaryOp { left, right, .. } => {
1659            validate_expression(left, vars_in_scope)?;
1660            validate_expression(right, vars_in_scope)
1661        }
1662        Expr::UnaryOp { expr: e, .. }
1663        | Expr::IsNull(e)
1664        | Expr::IsNotNull(e)
1665        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1666        Expr::Property(base, prop) => {
1667            if let Expr::Variable(var_name) = base.as_ref()
1668                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1669            {
1670                // Paths don't have properties
1671                if var_info.var_type == VariableType::Path {
1672                    return Err(anyhow!(
1673                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1674                        var_name,
1675                        prop
1676                    ));
1677                }
1678                // Known non-graph literals (int, float, bool, string, list) don't have properties
1679                if var_info.var_type == VariableType::ScalarLiteral {
1680                    return Err(anyhow!(
1681                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1682                    ));
1683                }
1684            }
1685            validate_expression(base, vars_in_scope)
1686        }
1687        Expr::List(items) => validate_all(items, vars_in_scope),
1688        Expr::Case {
1689            expr: case_expr,
1690            when_then,
1691            else_expr,
1692        } => {
1693            if let Some(e) = case_expr {
1694                validate_expression(e, vars_in_scope)?;
1695            }
1696            for (w, t) in when_then {
1697                validate_expression(w, vars_in_scope)?;
1698                validate_expression(t, vars_in_scope)?;
1699            }
1700            if let Some(e) = else_expr {
1701                validate_expression(e, vars_in_scope)?;
1702            }
1703            Ok(())
1704        }
1705        Expr::In { expr: e, list } => {
1706            validate_expression(e, vars_in_scope)?;
1707            validate_expression(list, vars_in_scope)
1708        }
1709        Expr::Exists {
1710            query,
1711            from_pattern_predicate: true,
1712        } => {
1713            // Pattern predicates cannot introduce new named variables.
1714            // Extract named vars from inner MATCH pattern, check each is in scope.
1715            if let Query::Single(stmt) = query.as_ref() {
1716                for clause in &stmt.clauses {
1717                    if let Clause::Match(m) = clause {
1718                        for path in &m.pattern.paths {
1719                            for elem in &path.elements {
1720                                match elem {
1721                                    PatternElement::Node(n) => {
1722                                        if let Some(var) = &n.variable
1723                                            && !is_var_in_scope(vars_in_scope, var)
1724                                        {
1725                                            return Err(anyhow!(
1726                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1727                                                var
1728                                            ));
1729                                        }
1730                                    }
1731                                    PatternElement::Relationship(r) => {
1732                                        if let Some(var) = &r.variable
1733                                            && !is_var_in_scope(vars_in_scope, var)
1734                                        {
1735                                            return Err(anyhow!(
1736                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1737                                                var
1738                                            ));
1739                                        }
1740                                    }
1741                                    _ => {}
1742                                }
1743                            }
1744                        }
1745                    }
1746                }
1747            }
1748            Ok(())
1749        }
1750        _ => Ok(()),
1751    }
1752}
1753
1754/// One step (hop) in a Quantified Path Pattern sub-pattern.
1755///
1756/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1757#[derive(Debug, Clone)]
1758pub struct QppStepInfo {
1759    /// Edge type IDs that this step can traverse.
1760    pub edge_type_ids: Vec<u32>,
1761    /// Traversal direction for this step.
1762    pub direction: Direction,
1763    /// Optional label constraint on the target node.
1764    pub target_label: Option<String>,
1765}
1766
1767/// Phase 5a-impl: per-type fusion strategy for `LogicalPlan::FusedIndexScan`.
1768///
1769/// `#[non_exhaustive]` so Phase 5b can add `AnnRerank` and `Bm25Rrf`
1770/// without breaking downstream pattern-match exhaustiveness.
1771#[derive(Debug, Clone)]
1772#[non_exhaustive]
1773pub enum FusionKind {
1774    /// Union of parent + fork-local BTree hits, deduped by VID.
1775    BtreeUnion,
1776    /// k-way merge of pre-sorted parent + fork streams (ORDER BY).
1777    SortedKWayMerge,
1778    /// Fork-first UID lookup; falls back to parent on miss. Used
1779    /// when a fork rebinds an external UID and queries must see the
1780    /// fork's binding before the parent's.
1781    VidUidForkFirst,
1782    /// Phase 5b — vector ANN rerank: top-k from primary's index +
1783    /// top-k from fork-local index, merged and reranked by exact
1784    /// distance. Recall ≥ 95% per spec §8.2.
1785    AnnRerank,
1786    /// Phase 5b — BM25 reciprocal rank fusion: ranked lists from
1787    /// primary's and fork-local FTS indexes combined via standard
1788    /// RRF (`score = sum 1 / (k_rrf + rank_i)`, k_rrf = 60).
1789    Bm25Rrf,
1790}
1791
1792/// Logical query plan produced by [`QueryPlanner`].
1793///
1794/// Each variant represents one step in the Cypher execution pipeline.
1795/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1796/// transform or join them, and the root node defines the final output.
1797#[derive(Debug, Clone)]
1798pub enum LogicalPlan {
1799    /// UNION / UNION ALL of two sub-plans.
1800    Union {
1801        left: Box<LogicalPlan>,
1802        right: Box<LogicalPlan>,
1803        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1804        all: bool,
1805    },
1806    /// Scan vertices of a single labeled dataset.
1807    Scan {
1808        label_id: u16,
1809        labels: Vec<String>,
1810        variable: String,
1811        filter: Option<Expr>,
1812        optional: bool,
1813    },
1814    /// Phase 5a-impl: fused scan over both primary's index and the
1815    /// forked session's fork-local index. Emitted by the planner only
1816    /// when (a) the session is forked AND (b) `StorageManager::fork_index_exists`
1817    /// returns `Some(_)` for the target column. Otherwise the planner
1818    /// keeps emitting `Scan` and Lance's `base_paths` chain transparently
1819    /// covers parent-inherited indexes.
1820    ///
1821    /// `kind` selects the per-type fusion strategy:
1822    /// - `BtreeUnion` — union of parent + fork hits, dedup by VID.
1823    /// - `SortedKWayMerge` — k-way merge of two pre-sorted streams.
1824    /// - `VidUidForkFirst` — probe fork's branch first, fall back to
1825    ///   parent's UID index on miss.
1826    FusedIndexScan {
1827        label_id: u16,
1828        labels: Vec<String>,
1829        variable: String,
1830        filter: Option<Expr>,
1831        optional: bool,
1832        kind: FusionKind,
1833    },
1834    /// Phase 5b followup: planner-side observability marker for the
1835    /// lossy fusion types. Wraps the original `VectorKnn` or
1836    /// `InvertedIndexLookup` (or any future leaf operator whose
1837    /// shape differs from `Scan`) without changing its fields, so
1838    /// the physical planner can decay it to `inner` unchanged.
1839    ///
1840    /// Runtime behavior is identical to running `inner` directly;
1841    /// the wrap is purely for explain-plan and runtime-stats
1842    /// observability. The actual fusion happens at the
1843    /// `BranchedBackend` layer (per-branch Lance reads via
1844    /// `base_paths`), exactly as in Phase 5b's core ship.
1845    FusedIndexScanWrapped {
1846        inner: Box<LogicalPlan>,
1847        kind: FusionKind,
1848    },
1849    /// Lookup vertices by ext_id using the main vertices table.
1850    /// Used when a query references ext_id without specifying a label.
1851    ExtIdLookup {
1852        variable: String,
1853        ext_id: String,
1854        filter: Option<Expr>,
1855        optional: bool,
1856    },
1857    /// Scan all vertices from main table (MATCH (n) without label).
1858    /// Used for schemaless queries that don't specify any label.
1859    ScanAll {
1860        variable: String,
1861        filter: Option<Expr>,
1862        optional: bool,
1863    },
1864    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1865    /// Used for labels not defined in schema (schemaless support).
1866    /// Scan main vertices table by label name(s) for schemaless support.
1867    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1868    ScanMainByLabels {
1869        labels: Vec<String>,
1870        variable: String,
1871        filter: Option<Expr>,
1872        optional: bool,
1873    },
1874    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1875    Empty,
1876    /// UNWIND: expand a list expression into one row per element.
1877    Unwind {
1878        input: Box<LogicalPlan>,
1879        expr: Expr,
1880        variable: String,
1881    },
1882    Traverse {
1883        input: Box<LogicalPlan>,
1884        edge_type_ids: Vec<u32>,
1885        direction: Direction,
1886        source_variable: String,
1887        target_variable: String,
1888        target_label_id: u16,
1889        step_variable: Option<String>,
1890        min_hops: usize,
1891        max_hops: usize,
1892        optional: bool,
1893        target_filter: Option<Expr>,
1894        path_variable: Option<String>,
1895        edge_properties: HashSet<String>,
1896        /// Whether this is a variable-length pattern (has `*` range specifier).
1897        /// When true, step_variable holds a list of edges (even for *1..1).
1898        is_variable_length: bool,
1899        /// All variables from this OPTIONAL MATCH pattern.
1900        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1901        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1902        optional_pattern_vars: HashSet<String>,
1903        /// Variable names (node + edge) from the current MATCH clause scope.
1904        /// Used for relationship uniqueness scoping: only edge ID columns whose
1905        /// associated variable is in this set participate in uniqueness filtering.
1906        /// Variables from previous disconnected MATCH clauses are excluded.
1907        scope_match_variables: HashSet<String>,
1908        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1909        edge_filter_expr: Option<Expr>,
1910        /// Path traversal semantics (Trail by default for OpenCypher).
1911        path_mode: crate::query::df_graph::nfa::PathMode,
1912        /// QPP steps for multi-hop quantified path patterns.
1913        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1914        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1915        qpp_steps: Option<Vec<QppStepInfo>>,
1916    },
1917    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1918    /// Used for edge types not defined in schema (schemaless support).
1919    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1920    TraverseMainByType {
1921        type_names: Vec<String>,
1922        input: Box<LogicalPlan>,
1923        direction: Direction,
1924        source_variable: String,
1925        target_variable: String,
1926        step_variable: Option<String>,
1927        min_hops: usize,
1928        max_hops: usize,
1929        optional: bool,
1930        target_filter: Option<Expr>,
1931        path_variable: Option<String>,
1932        /// Whether this is a variable-length pattern (has `*` range specifier).
1933        /// When true, step_variable holds a list of edges (even for *1..1).
1934        is_variable_length: bool,
1935        /// All variables from this OPTIONAL MATCH pattern.
1936        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1937        optional_pattern_vars: HashSet<String>,
1938        /// Variables belonging to the current MATCH clause scope.
1939        /// Used for relationship uniqueness scoping: only edge columns whose
1940        /// associated variable is in this set participate in uniqueness filtering.
1941        scope_match_variables: HashSet<String>,
1942        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1943        edge_filter_expr: Option<Expr>,
1944        /// Path traversal semantics (Trail by default for OpenCypher).
1945        path_mode: crate::query::df_graph::nfa::PathMode,
1946    },
1947    Filter {
1948        input: Box<LogicalPlan>,
1949        predicate: Expr,
1950        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1951        /// When evaluating the filter, if any of these variables are NULL,
1952        /// the row is preserved regardless of the predicate result.
1953        optional_variables: HashSet<String>,
1954    },
1955    Create {
1956        input: Box<LogicalPlan>,
1957        pattern: Pattern,
1958    },
1959    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1960    ///
1961    /// This variant combines multiple CREATE patterns into a single plan node
1962    /// to avoid deep recursion when executing many CREATEs sequentially.
1963    CreateBatch {
1964        input: Box<LogicalPlan>,
1965        patterns: Vec<Pattern>,
1966    },
1967    Merge {
1968        input: Box<LogicalPlan>,
1969        pattern: Pattern,
1970        on_match: Option<SetClause>,
1971        on_create: Option<SetClause>,
1972    },
1973    Set {
1974        input: Box<LogicalPlan>,
1975        items: Vec<SetItem>,
1976    },
1977    Remove {
1978        input: Box<LogicalPlan>,
1979        items: Vec<RemoveItem>,
1980    },
1981    Delete {
1982        input: Box<LogicalPlan>,
1983        items: Vec<Expr>,
1984        detach: bool,
1985    },
1986    /// FOREACH (variable IN list | clauses)
1987    Foreach {
1988        input: Box<LogicalPlan>,
1989        variable: String,
1990        list: Expr,
1991        body: Vec<LogicalPlan>,
1992    },
1993    Sort {
1994        input: Box<LogicalPlan>,
1995        order_by: Vec<SortItem>,
1996    },
1997    Limit {
1998        input: Box<LogicalPlan>,
1999        skip: Option<usize>,
2000        fetch: Option<usize>,
2001    },
2002    Aggregate {
2003        input: Box<LogicalPlan>,
2004        group_by: Vec<Expr>,
2005        aggregates: Vec<Expr>,
2006    },
2007    Distinct {
2008        input: Box<LogicalPlan>,
2009    },
2010    Window {
2011        input: Box<LogicalPlan>,
2012        window_exprs: Vec<Expr>,
2013    },
2014    Project {
2015        input: Box<LogicalPlan>,
2016        projections: Vec<(Expr, Option<String>)>,
2017    },
2018    CrossJoin {
2019        left: Box<LogicalPlan>,
2020        right: Box<LogicalPlan>,
2021    },
2022    Apply {
2023        input: Box<LogicalPlan>,
2024        subquery: Box<LogicalPlan>,
2025        input_filter: Option<Expr>,
2026    },
2027    RecursiveCTE {
2028        cte_name: String,
2029        initial: Box<LogicalPlan>,
2030        recursive: Box<LogicalPlan>,
2031    },
2032    ProcedureCall {
2033        procedure_name: String,
2034        arguments: Vec<Expr>,
2035        yield_items: Vec<(String, Option<String>)>,
2036    },
2037    SubqueryCall {
2038        input: Box<LogicalPlan>,
2039        subquery: Box<LogicalPlan>,
2040    },
2041    VectorKnn {
2042        label_id: u16,
2043        variable: String,
2044        property: String,
2045        query: Expr,
2046        k: usize,
2047        threshold: Option<f32>,
2048    },
2049    InvertedIndexLookup {
2050        label_id: u16,
2051        variable: String,
2052        property: String,
2053        terms: Expr,
2054    },
2055    ShortestPath {
2056        input: Box<LogicalPlan>,
2057        edge_type_ids: Vec<u32>,
2058        direction: Direction,
2059        source_variable: String,
2060        target_variable: String,
2061        target_label_id: u16,
2062        path_variable: String,
2063        /// Minimum number of hops (edges) in the path. Default is 1.
2064        min_hops: u32,
2065        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2066        max_hops: u32,
2067    },
2068    /// allShortestPaths() - Returns all paths with minimum length
2069    AllShortestPaths {
2070        input: Box<LogicalPlan>,
2071        edge_type_ids: Vec<u32>,
2072        direction: Direction,
2073        source_variable: String,
2074        target_variable: String,
2075        target_label_id: u16,
2076        path_variable: String,
2077        /// Minimum number of hops (edges) in the path. Default is 1.
2078        min_hops: u32,
2079        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2080        max_hops: u32,
2081    },
2082    QuantifiedPattern {
2083        input: Box<LogicalPlan>,
2084        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
2085        min_iterations: u32,
2086        max_iterations: u32,
2087        path_variable: Option<String>,
2088        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
2089        binding_variable: String, // Output variable of iteration (e.g. 'b')
2090    },
2091    // DDL Plans
2092    CreateVectorIndex {
2093        config: VectorIndexConfig,
2094        if_not_exists: bool,
2095    },
2096    CreateFullTextIndex {
2097        config: FullTextIndexConfig,
2098        if_not_exists: bool,
2099    },
2100    CreateScalarIndex {
2101        config: ScalarIndexConfig,
2102        if_not_exists: bool,
2103    },
2104    CreateJsonFtsIndex {
2105        config: JsonFtsIndexConfig,
2106        if_not_exists: bool,
2107    },
2108    DropIndex {
2109        name: String,
2110        if_exists: bool,
2111    },
2112    ShowIndexes {
2113        filter: Option<String>,
2114    },
2115    Copy {
2116        target: String,
2117        source: String,
2118        is_export: bool,
2119        options: HashMap<String, Value>,
2120    },
2121    Backup {
2122        destination: String,
2123        options: HashMap<String, Value>,
2124    },
2125    Explain {
2126        plan: Box<LogicalPlan>,
2127    },
2128    // Admin Plans
2129    ShowDatabase,
2130    ShowConfig,
2131    ShowStatistics,
2132    Vacuum,
2133    Checkpoint,
2134    CopyTo {
2135        label: String,
2136        path: String,
2137        format: String,
2138        options: HashMap<String, Value>,
2139    },
2140    CopyFrom {
2141        label: String,
2142        path: String,
2143        format: String,
2144        options: HashMap<String, Value>,
2145    },
2146    // Schema DDL
2147    CreateLabel(CreateLabel),
2148    CreateEdgeType(CreateEdgeType),
2149    AlterLabel(AlterLabel),
2150    AlterEdgeType(AlterEdgeType),
2151    DropLabel(DropLabel),
2152    DropEdgeType(DropEdgeType),
2153    // Constraints
2154    CreateConstraint(CreateConstraint),
2155    DropConstraint(DropConstraint),
2156    ShowConstraints(ShowConstraints),
2157    /// Bind a zero-length path (single node pattern with path variable).
2158    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2159    BindZeroLengthPath {
2160        input: Box<LogicalPlan>,
2161        node_variable: String,
2162        path_variable: String,
2163    },
2164    /// Bind a fixed-length path from already-computed node and edge columns.
2165    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2166    BindPath {
2167        input: Box<LogicalPlan>,
2168        node_variables: Vec<String>,
2169        edge_variables: Vec<String>,
2170        path_variable: String,
2171    },
2172
2173    // ── Locy variants ──────────────────────────────────────────
2174    /// Top-level Locy program: stratified rules + commands.
2175    LocyProgram {
2176        strata: Vec<super::planner_locy_types::LocyStratum>,
2177        commands: Vec<super::planner_locy_types::LocyCommand>,
2178        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2179        max_iterations: usize,
2180        timeout: std::time::Duration,
2181        max_derived_bytes: usize,
2182        deterministic_best_by: bool,
2183        strict_probability_domain: bool,
2184        probability_epsilon: f64,
2185        exact_probability: bool,
2186        max_bdd_variables: usize,
2187        top_k_proofs: usize,
2188        /// Active probability semiring (rollout D-7). Defaults to
2189        /// `AddMultProb` (Phase 1/2 byte-identical behavior). `BddExact`
2190        /// is selected by `LocyConfig::resolve()` when `exact_probability`
2191        /// is true.
2192        semiring_kind: uni_locy::SemiringKind,
2193        /// Phase B Slice 3: per-evaluation registry of neural classifiers
2194        /// keyed by model name. Empty for programs without `CREATE MODEL`.
2195        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2196        /// Phase B follow-up: optional memoization cache. `None` →
2197        /// runtime creates a fresh per-query cache; `Some` → shared
2198        /// across queries (caller-managed).
2199        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2200        /// Phase C B1-B3 follow-up: per-query side-channel store
2201        /// for per-invocation (raw, calibrated, confidence_band)
2202        /// records. Flows alongside `classifier_cache` into
2203        /// `LocyProgramExec`.
2204        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2205    },
2206    /// FOLD operator: lattice-join non-key columns per KEY group.
2207    LocyFold {
2208        input: Box<LogicalPlan>,
2209        key_columns: Vec<String>,
2210        fold_bindings: Vec<(String, Expr)>,
2211        strict_probability_domain: bool,
2212        probability_epsilon: f64,
2213    },
2214    /// BEST BY operator: select best row per KEY group by ordered criteria.
2215    LocyBestBy {
2216        input: Box<LogicalPlan>,
2217        key_columns: Vec<String>,
2218        /// (expression, ascending) pairs.
2219        criteria: Vec<(Expr, bool)>,
2220    },
2221    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2222    LocyPriority {
2223        input: Box<LogicalPlan>,
2224        key_columns: Vec<String>,
2225    },
2226    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2227    LocyDerivedScan {
2228        scan_index: usize,
2229        data: Arc<RwLock<Vec<RecordBatch>>>,
2230        schema: SchemaRef,
2231    },
2232    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2233    /// without carrying through helper/property columns like the regular Project.
2234    LocyProject {
2235        input: Box<LogicalPlan>,
2236        projections: Vec<(Expr, Option<String>)>,
2237        /// Expected output Arrow type per projection (for CAST support).
2238        target_types: Vec<DataType>,
2239    },
2240    /// Phase B A4: invoke registered neural classifiers against the
2241    /// input batches and overwrite the per-invocation placeholder
2242    /// column with each row's predicted probability. Wraps a Locy
2243    /// clause body plan when `CompiledClause.model_invocations` is
2244    /// non-empty; transparent (passes batches through unchanged) when
2245    /// the list is empty.
2246    ///
2247    /// Registry and cache are carried on the node so that
2248    /// `execute_subplan` — which spins up a fresh
2249    /// `HybridPhysicalPlanner` per call — can lower it to a physical
2250    /// `LocyModelInvokeExec` without depending on planner-side
2251    /// runtime state.
2252    LocyModelInvoke {
2253        input: Box<LogicalPlan>,
2254        invocations: Vec<uni_locy::ModelInvocation>,
2255        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2256        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2257        /// Phase C B1-B3 follow-up: per-query side-channel store
2258        /// for per-invocation (raw, calibrated, confidence_band)
2259        /// records. `LocyModelInvokeExec` writes here after each
2260        /// classifier call; EXPLAIN reads via collect_neural_calls
2261        /// to surface NeuralProvenance for ALONG/FOLD-position
2262        /// invocations and Mode B re-execution paths.
2263        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2264        /// Phase D D3 runtime: one handle per `path_context.source_rule`
2265        /// referenced by any invocation on this node. The handle's
2266        /// `data: Arc<RwLock<Vec<RecordBatch>>>` is shared with the
2267        /// `DerivedScanRegistry`; the source rule's derived facts are
2268        /// already converged by the time this node executes (the
2269        /// dependency-graph builder ensures source rules sit in
2270        /// earlier strata).
2271        path_context_handles: std::collections::HashMap<
2272            String,
2273            super::df_graph::locy_model_invoke::PathContextHandle,
2274        >,
2275    },
2276}
2277
2278/// Extracted vector similarity predicate info for optimization
2279struct VectorSimilarityPredicate {
2280    variable: String,
2281    property: String,
2282    query: Expr,
2283    threshold: Option<f32>,
2284}
2285
2286/// Result of extracting vector_similarity from a predicate
2287struct VectorSimilarityExtraction {
2288    /// The extracted vector similarity predicate
2289    predicate: VectorSimilarityPredicate,
2290    /// Remaining predicates that couldn't be optimized (if any)
2291    residual: Option<Expr>,
2292}
2293
2294/// Try to extract a vector_similarity predicate from an expression.
2295/// Matches patterns like:
2296/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2297/// - n.embedding ~= $query
2298///
2299/// Also handles AND predicates.
2300fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2301    match expr {
2302        Expr::BinaryOp { left, op, right } => {
2303            // Handle AND: check both sides for vector_similarity
2304            if matches!(op, BinaryOp::And) {
2305                // Try left side first
2306                if let Some(vs) = extract_simple_vector_similarity(left) {
2307                    return Some(VectorSimilarityExtraction {
2308                        predicate: vs,
2309                        residual: Some(right.as_ref().clone()),
2310                    });
2311                }
2312                // Try right side
2313                if let Some(vs) = extract_simple_vector_similarity(right) {
2314                    return Some(VectorSimilarityExtraction {
2315                        predicate: vs,
2316                        residual: Some(left.as_ref().clone()),
2317                    });
2318                }
2319                // Recursively check within left/right for nested ANDs
2320                if let Some(mut extraction) = extract_vector_similarity(left) {
2321                    extraction.residual = Some(combine_with_and(
2322                        extraction.residual,
2323                        right.as_ref().clone(),
2324                    ));
2325                    return Some(extraction);
2326                }
2327                if let Some(mut extraction) = extract_vector_similarity(right) {
2328                    extraction.residual =
2329                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2330                    return Some(extraction);
2331                }
2332                return None;
2333            }
2334
2335            // Simple case: direct vector_similarity comparison
2336            if let Some(vs) = extract_simple_vector_similarity(expr) {
2337                return Some(VectorSimilarityExtraction {
2338                    predicate: vs,
2339                    residual: None,
2340                });
2341            }
2342            None
2343        }
2344        _ => None,
2345    }
2346}
2347
2348/// Helper to combine an optional expression with another using AND
2349fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2350    match opt_expr {
2351        Some(e) => Expr::BinaryOp {
2352            left: Box::new(e),
2353            op: BinaryOp::And,
2354            right: Box::new(other),
2355        },
2356        None => other,
2357    }
2358}
2359
2360/// Extract a simple vector_similarity comparison (no AND)
2361fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2362    match expr {
2363        Expr::BinaryOp { left, op, right } => {
2364            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2365            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2366                && let (Some(vs), Some(thresh)) = (
2367                    extract_vector_similarity_call(left),
2368                    extract_float_literal(right),
2369                )
2370            {
2371                return Some(VectorSimilarityPredicate {
2372                    variable: vs.0,
2373                    property: vs.1,
2374                    query: vs.2,
2375                    threshold: Some(thresh),
2376                });
2377            }
2378            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2379            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2380                && let (Some(thresh), Some(vs)) = (
2381                    extract_float_literal(left),
2382                    extract_vector_similarity_call(right),
2383                )
2384            {
2385                return Some(VectorSimilarityPredicate {
2386                    variable: vs.0,
2387                    property: vs.1,
2388                    query: vs.2,
2389                    threshold: Some(thresh),
2390                });
2391            }
2392            // Pattern: n.embedding ~= query
2393            if matches!(op, BinaryOp::ApproxEq)
2394                && let Expr::Property(var_expr, prop) = left.as_ref()
2395                && let Expr::Variable(var) = var_expr.as_ref()
2396            {
2397                return Some(VectorSimilarityPredicate {
2398                    variable: var.clone(),
2399                    property: prop.clone(),
2400                    query: right.as_ref().clone(),
2401                    threshold: None,
2402                });
2403            }
2404            None
2405        }
2406        _ => None,
2407    }
2408}
2409
2410/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2411fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2412    if let Expr::FunctionCall { name, args, .. } = expr
2413        && name.eq_ignore_ascii_case("vector_similarity")
2414        && args.len() == 2
2415    {
2416        // First arg should be Property(Identifier(var), prop)
2417        if let Expr::Property(var_expr, prop) = &args[0]
2418            && let Expr::Variable(var) = var_expr.as_ref()
2419        {
2420            // Second arg is query
2421            return Some((var.clone(), prop.clone(), args[1].clone()));
2422        }
2423    }
2424    None
2425}
2426
2427/// Extract a float value from a literal expression
2428fn extract_float_literal(expr: &Expr) -> Option<f32> {
2429    match expr {
2430        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2431        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2432        _ => None,
2433    }
2434}
2435
2436/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2437///
2438/// `QueryPlanner` applies semantic validation (variable scoping, label
2439/// resolution, type checking) and produces a plan tree that the executor
2440/// can run against storage.
2441#[derive(Debug)]
2442pub struct QueryPlanner {
2443    schema: Arc<Schema>,
2444    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2445    gen_expr_cache: HashMap<(String, String), Expr>,
2446    /// Counter for generating unique anonymous variable names.
2447    anon_counter: std::sync::atomic::AtomicUsize,
2448    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2449    params: HashMap<String, uni_common::Value>,
2450    /// Optional plugin registry consulted when label / edge-type / identifier
2451    /// resolution misses the local schema (M5b — Catalog / ReplacementScan).
2452    plugin_registry: Option<Arc<uni_plugin::PluginRegistry>>,
2453    /// Gate for replacement-scan dispatch on unknown identifiers (M5b).
2454    replacement_scans_enabled: bool,
2455    /// Names of parameters folded into a `LIMIT`/`SKIP` position during the
2456    /// plan. The resulting `LogicalPlan::Limit` bakes the concrete values in, so
2457    /// a plan cache keyed on query text must additionally key on these
2458    /// parameters' values (see `folded_limit_skip_params`). Interior-mutable
2459    /// because `plan` takes `&self`.
2460    folded_limit_skip_params: std::sync::Mutex<std::collections::BTreeSet<String>>,
2461}
2462
2463struct TraverseParams<'a> {
2464    rel: &'a RelationshipPattern,
2465    target_node: &'a NodePattern,
2466    optional: bool,
2467    path_variable: Option<String>,
2468    /// All variables from this OPTIONAL MATCH pattern.
2469    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2470    optional_pattern_vars: HashSet<String>,
2471}
2472
2473impl QueryPlanner {
2474    /// Create a new planner for the given schema.
2475    ///
2476    /// Pre-parses all generation expressions defined in the schema so that
2477    /// repeated plan calls avoid redundant parsing.
2478    pub fn new(schema: Arc<Schema>) -> Self {
2479        // Pre-parse all generation expressions for caching
2480        let mut gen_expr_cache = HashMap::new();
2481        for (label, props) in &schema.properties {
2482            for (gen_col, meta) in props {
2483                if let Some(expr_str) = &meta.generation_expression
2484                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2485                {
2486                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2487                }
2488            }
2489        }
2490        Self {
2491            schema,
2492            gen_expr_cache,
2493            anon_counter: std::sync::atomic::AtomicUsize::new(0),
2494            params: HashMap::new(),
2495            plugin_registry: None,
2496            replacement_scans_enabled: false,
2497            folded_limit_skip_params: std::sync::Mutex::new(std::collections::BTreeSet::new()),
2498        }
2499    }
2500
2501    /// Record the parameters referenced by a successfully-folded `LIMIT`/`SKIP`
2502    /// expression so the caller's plan cache can key on their values.
2503    fn note_folded_limit_skip(&self, expr: &Expr) {
2504        let mut names = Vec::new();
2505        collect_expr_parameters(expr, &mut names);
2506        if !names.is_empty()
2507            && let Ok(mut acc) = self.folded_limit_skip_params.lock()
2508        {
2509            acc.extend(names);
2510        }
2511    }
2512
2513    /// Parameter names folded into `LIMIT`/`SKIP` positions during the last
2514    /// [`plan`](Self::plan).
2515    ///
2516    /// The cached plan bakes these values in, so a text-keyed plan cache must
2517    /// fold their current values into its key — otherwise two calls differing
2518    /// only in a LIMIT/SKIP parameter would wrongly share one cached plan.
2519    /// Returns an empty vector when no parameter was folded.
2520    #[must_use]
2521    pub fn folded_limit_skip_params(&self) -> Vec<String> {
2522        self.folded_limit_skip_params
2523            .lock()
2524            .map(|acc| acc.iter().cloned().collect())
2525            .unwrap_or_default()
2526    }
2527
2528    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2529    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2530        self.params = params;
2531        self
2532    }
2533
2534    /// Attach a plugin registry for catalog / replacement-scan fallbacks
2535    /// (M5b). When absent, label / edge-type resolution behaves exactly as
2536    /// before; when present, an unknown label is offered to each
2537    /// `CatalogProvider` before erroring.
2538    #[must_use]
2539    pub fn with_plugin_registry(mut self, registry: Arc<uni_plugin::PluginRegistry>) -> Self {
2540        self.plugin_registry = Some(registry);
2541        self
2542    }
2543
2544    /// Enable replacement-scan dispatch on unknown identifiers (M5b §4.23).
2545    /// Default off; opt-in only.
2546    #[must_use]
2547    pub fn with_replacement_scans(mut self, enabled: bool) -> Self {
2548        self.replacement_scans_enabled = enabled;
2549        self
2550    }
2551
2552    /// Allocate (or look up) a virtual label ID for `name` by consulting
2553    /// every registered `CatalogProvider` and then every registered
2554    /// `ReplacementScanProvider` (only the latter when the replacement-
2555    /// scan gate is on). On a first claim the catalog table is stashed
2556    /// on the host's [`uni_plugin::PluginRegistry`] under a freshly
2557    /// allocated virtual ID; subsequent calls with the same name return
2558    /// the cached ID and refresh the stashed table.
2559    ///
2560    /// Returns `None` if no provider claims the label or no plugin
2561    /// registry is attached. Returns `Some((id, table))` on a hit; the
2562    /// `id` lies in `[VIRTUAL_LABEL_ID_START, VIRTUAL_LABEL_ID_SENTINEL)`.
2563    /// Errors are surfaced as `Some(Err(_))`-equivalent via `Result`.
2564    fn allocate_virtual_label(
2565        &self,
2566        name: &str,
2567    ) -> Result<Option<(u16, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2568        let Some(registry) = self.plugin_registry.as_ref() else {
2569            return Ok(None);
2570        };
2571        // 1. CatalogProvider (always consulted, no gate — Batch 2 semantics).
2572        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2573        for cat in registry.catalogs() {
2574            if let Some(t) = cat.resolve_label(name) {
2575                claimed = Some(t);
2576                break;
2577            }
2578        }
2579        // 2. ReplacementScanProvider (gated). Only consult if no
2580        //    CatalogProvider already claimed.
2581        if claimed.is_none() {
2582            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2583            if let Some(Replacement::CatalogTable(t)) =
2584                self.consult_replacement_scan(ReplacementRequest::Label(name))
2585            {
2586                claimed = Some(t);
2587            }
2588        }
2589        let Some(table) = claimed else {
2590            return Ok(None);
2591        };
2592        let id = registry
2593            .register_virtual_label(name, Arc::clone(&table))
2594            .map_err(|e| anyhow!("virtual label registration failed for `{name}`: {e}"))?;
2595        Ok(Some((id, table)))
2596    }
2597
2598    /// Reject any write operation that names a label currently allocated
2599    /// as a virtual (catalog-backed) label. Catalog tables are read-only
2600    /// in this milestone — there is no write-back path through
2601    /// `CatalogTable::scan` to the originating provider, so silently
2602    /// allowing the write would produce ghosted state on the host side
2603    /// without affecting the external catalog. Errors with a clear,
2604    /// actionable message.
2605    fn reject_virtual_label_writes(&self, labels: &[String], op: &str) -> Result<()> {
2606        let Some(registry) = self.plugin_registry.as_ref() else {
2607            return Ok(());
2608        };
2609        for label in labels {
2610            if registry.virtual_label_by_name(label).is_some() {
2611                return Err(anyhow!(
2612                    "Cannot {op} on virtual (catalog-resolved) label `{label}` — virtual \
2613                     labels are read-only; write back via the originating catalog \
2614                     instead"
2615                ));
2616            }
2617        }
2618        Ok(())
2619    }
2620
2621    /// Edge-type analog of [`Self::allocate_virtual_label`].
2622    fn allocate_virtual_edge_type(
2623        &self,
2624        name: &str,
2625    ) -> Result<Option<(u32, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2626        let Some(registry) = self.plugin_registry.as_ref() else {
2627            return Ok(None);
2628        };
2629        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2630        for cat in registry.catalogs() {
2631            if let Some(t) = cat.resolve_edge_type(name) {
2632                claimed = Some(t);
2633                break;
2634            }
2635        }
2636        let Some(table) = claimed else {
2637            return Ok(None);
2638        };
2639        let id = registry
2640            .register_virtual_edge_type(name, Arc::clone(&table))
2641            .map_err(|e| anyhow!("virtual edge-type registration failed for `{name}`: {e}"))?;
2642        Ok(Some((id, table)))
2643    }
2644
2645    /// Try to resolve an unknown identifier through replacement-scan providers
2646    /// (gated by [`Self::with_replacement_scans`]). Returns the first
2647    /// [`Replacement`] any registered provider produces, or `None` if the
2648    /// gate is off, no registry is attached, or no provider claims the
2649    /// identifier. First-match wins (mirrors DuckDB).
2650    pub(crate) fn consult_replacement_scan(
2651        &self,
2652        request: uni_plugin::traits::catalog::ReplacementRequest<'_>,
2653    ) -> Option<uni_plugin::traits::catalog::Replacement> {
2654        if !self.replacement_scans_enabled {
2655            return None;
2656        }
2657        let registry = self.plugin_registry.as_ref()?;
2658        for r in registry.replacement_scans().iter() {
2659            if let Some(replacement) = r.replace(&request) {
2660                tracing::debug!(
2661                    target: "uni.plugin.registry",
2662                    ?request,
2663                    ?replacement,
2664                    "identifier resolved via ReplacementScanProvider"
2665                );
2666                return Some(replacement);
2667            }
2668        }
2669        None
2670    }
2671
2672    /// Resolve a user-typed procedure name against the attached plugin
2673    /// registry, applying the same namespace-prefix rules as
2674    /// `ProcedureRegistry::resolve_user_procedure` (host-coupled
2675    /// procedure dispatch). Returns `true` if any namespace claims the
2676    /// name. Used by the procedure-call replacement-scan gate to decide
2677    /// whether to consult before substituting.
2678    fn procedure_resolves(&self, user_name: &str) -> bool {
2679        let Some(registry) = self.plugin_registry.as_ref() else {
2680            return false;
2681        };
2682        if let Some((ns, local)) = user_name.split_once('.')
2683            && registry
2684                .procedure(&uni_plugin::QName::new(ns, local))
2685                .is_some()
2686        {
2687            return true;
2688        }
2689        let stripped = user_name.strip_prefix("uni.").unwrap_or(user_name);
2690        for plugin_id in ["uni", "builtin", "apoc-core", "custom"] {
2691            if registry
2692                .procedure(&uni_plugin::QName::new(plugin_id, stripped))
2693                .is_some()
2694            {
2695                return true;
2696            }
2697        }
2698        false
2699    }
2700
2701    /// Construct a [`uni_plugin::QName`] from a user-typed identifier for
2702    /// passing to [`Replacement`]-scan providers. If the name is dotted,
2703    /// the last segment is the local and the rest is the namespace
2704    /// (mirroring `QName::parse`). Bare names — which Cypher allows for
2705    /// procedures (`CALL foo()`) and functions (`RETURN foo(x)`) — are
2706    /// encoded with the conventional `"user"` namespace; providers that
2707    /// want to match a bare-typed name should inspect `.local()`.
2708    fn qname_from_user(name: &str) -> uni_plugin::QName {
2709        uni_plugin::QName::parse(name).unwrap_or_else(|_| uni_plugin::QName::new("user", name))
2710    }
2711
2712    /// Apply `ReplacementScanProvider`-driven function rewrites to the
2713    /// query's AST. When the gate is off or no registry is attached, the
2714    /// walker is short-circuited and the query is returned unchanged.
2715    /// Otherwise, every [`uni_cypher::ast::Expr::FunctionCall`] is offered
2716    /// to registered providers (first-match wins); a returned
2717    /// `Replacement::Function(new_qname)` substitutes the name in place.
2718    /// Rewrite depth is capped at 1 — the rewritten name is NOT re-
2719    /// consulted (a chained `A→B→A` provider therefore stops after the
2720    /// first hop). Wrong-variant returns (`CatalogTable`, `Procedure`)
2721    /// error immediately.
2722    fn rewrite_function_calls_in_query(
2723        &self,
2724        query: uni_cypher::ast::Query,
2725    ) -> Result<uni_cypher::ast::Query> {
2726        if !self.replacement_scans_enabled || self.plugin_registry.is_none() {
2727            return Ok(query);
2728        }
2729        let mut rename = |name: &str| -> Result<Option<String>> {
2730            let qname = Self::qname_from_user(name);
2731            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2732            match self.consult_replacement_scan(ReplacementRequest::Function(&qname)) {
2733                Some(Replacement::Function(new_qname)) => {
2734                    // Cypher function-call dispatch is bare-name-keyed
2735                    // (the per-category translators in `df_expr` match on
2736                    // `name.to_uppercase()` against bare local strings —
2737                    // "UPPER", "ABS", etc.). When the provider returns a
2738                    // synthetic-namespace target (`builtin.*` or `user.*`),
2739                    // strip the namespace so the AST name is what those
2740                    // dispatchers expect; for plugin-namespaced targets,
2741                    // preserve the full dotted form (matches how users
2742                    // type them).
2743                    let rewritten = match new_qname.namespace() {
2744                        "builtin" | "user" => new_qname.local().to_string(),
2745                        _ => new_qname.to_string(),
2746                    };
2747                    tracing::debug!(
2748                        target: "uni.plugin.registry",
2749                        from = %name,
2750                        to = %rewritten,
2751                        "function call rerouted via ReplacementScanProvider"
2752                    );
2753                    Ok(Some(rewritten))
2754                }
2755                Some(other) => Err(anyhow!(
2756                    "ReplacementScanProvider returned wrong variant for Function \
2757                     request `{}`: expected `Function`, got {:?}",
2758                    name,
2759                    other
2760                )),
2761                None => Ok(None),
2762            }
2763        };
2764        crate::query::rewrite::function_rename::rewrite_function_calls_in_query(query, &mut rename)
2765    }
2766
2767    /// Plan a Cypher query with no pre-bound variables.
2768    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2769        self.plan_with_scope(query, Vec::new())
2770    }
2771
2772    /// Plan a Cypher query with a set of externally pre-bound variable names.
2773    ///
2774    /// `vars` lists variable names already in scope before this query executes
2775    /// (e.g., from an enclosing Locy rule body).
2776    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2777        // Apply query rewrites before planning
2778        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2779        // M5 follow-up #5: function-call rewrite via ReplacementScanProvider.
2780        // Done as an AST pass *before* planning so the rewritten name flows
2781        // through every downstream stage (translation, UDF resolution,
2782        // execution) as if the user had typed it. No-op when the gate is
2783        // off or no provider claims the call. First-match wins; hard-cap
2784        // at one rewrite per call site (the rewritten name is NOT re-
2785        // consulted) — see `rewrite_function_calls_in_query`.
2786        let rewritten_query = self.rewrite_function_calls_in_query(rewritten_query)?;
2787        if Self::has_mixed_union_modes(&rewritten_query) {
2788            return Err(anyhow!(
2789                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2790            ));
2791        }
2792
2793        match rewritten_query {
2794            Query::Single(stmt) => self.plan_single(stmt, vars),
2795            Query::Union { left, right, all } => {
2796                let l = self.plan_with_scope(*left, vars.clone())?;
2797                let r = self.plan_with_scope(*right, vars)?;
2798
2799                // Validate that both sides have the same column names
2800                let left_cols = Self::extract_projection_columns(&l);
2801                let right_cols = Self::extract_projection_columns(&r);
2802
2803                if left_cols != right_cols {
2804                    return Err(anyhow!(
2805                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2806                    ));
2807                }
2808
2809                Ok(LogicalPlan::Union {
2810                    left: Box::new(l),
2811                    right: Box::new(r),
2812                    all,
2813                })
2814            }
2815            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2816            Query::Explain(inner) => {
2817                let inner_plan = self.plan_with_scope(*inner, vars)?;
2818                Ok(LogicalPlan::Explain {
2819                    plan: Box::new(inner_plan),
2820                })
2821            }
2822            Query::TimeTravel { .. } => {
2823                unreachable!("TimeTravel should be resolved at API layer before planning")
2824            }
2825        }
2826    }
2827
2828    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2829        match query {
2830            Query::Union { left, right, all } => {
2831                out.insert(*all);
2832                Self::collect_union_modes(left, out);
2833                Self::collect_union_modes(right, out);
2834            }
2835            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2836            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2837            Query::Single(_) | Query::Schema(_) => {}
2838        }
2839    }
2840
2841    fn has_mixed_union_modes(query: &Query) -> bool {
2842        let mut modes = HashSet::new();
2843        Self::collect_union_modes(query, &mut modes);
2844        modes.len() > 1
2845    }
2846
2847    fn next_anon_var(&self) -> String {
2848        let id = self
2849            .anon_counter
2850            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2851        format!("_anon_{}", id)
2852    }
2853
2854    /// Extract projection column names from a logical plan.
2855    /// Used for UNION column validation.
2856    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2857        match plan {
2858            LogicalPlan::Project { projections, .. } => projections
2859                .iter()
2860                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2861                .collect(),
2862            LogicalPlan::Limit { input, .. }
2863            | LogicalPlan::Sort { input, .. }
2864            | LogicalPlan::Distinct { input, .. }
2865            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2866            LogicalPlan::Union { left, right, .. } => {
2867                let left_cols = Self::extract_projection_columns(left);
2868                if left_cols.is_empty() {
2869                    Self::extract_projection_columns(right)
2870                } else {
2871                    left_cols
2872                }
2873            }
2874            LogicalPlan::Aggregate {
2875                group_by,
2876                aggregates,
2877                ..
2878            } => {
2879                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2880                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2881                cols
2882            }
2883            _ => Vec::new(),
2884        }
2885    }
2886
2887    fn plan_return_clause(
2888        &self,
2889        return_clause: &ReturnClause,
2890        plan: LogicalPlan,
2891        vars_in_scope: &[VariableInfo],
2892    ) -> Result<LogicalPlan> {
2893        let mut plan = plan;
2894        let mut group_by = Vec::new();
2895        let mut aggregates = Vec::new();
2896        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2897        let mut has_agg = false;
2898        let mut projections = Vec::new();
2899        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2900        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2901        let mut projected_aliases: HashSet<String> = HashSet::new();
2902
2903        for item in &return_clause.items {
2904            match item {
2905                ReturnItem::All => {
2906                    // RETURN * - add all user-named variables in scope
2907                    // (anonymous variables like _anon_0 are excluded)
2908                    let user_vars: Vec<_> = vars_in_scope
2909                        .iter()
2910                        .filter(|v| !v.name.starts_with("_anon_"))
2911                        .collect();
2912                    if user_vars.is_empty() {
2913                        return Err(anyhow!(
2914                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2915                        ));
2916                    }
2917                    for v in user_vars {
2918                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2919                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2920                            group_by.push(Expr::Variable(v.name.clone()));
2921                        }
2922                        projected_aliases.insert(v.name.clone());
2923                        projected_simple_reprs.insert(v.name.clone());
2924                    }
2925                }
2926                ReturnItem::Expr {
2927                    expr,
2928                    alias,
2929                    source_text,
2930                } => {
2931                    if matches!(expr, Expr::Wildcard) {
2932                        for v in vars_in_scope {
2933                            projections
2934                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2935                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2936                                group_by.push(Expr::Variable(v.name.clone()));
2937                            }
2938                            projected_aliases.insert(v.name.clone());
2939                            projected_simple_reprs.insert(v.name.clone());
2940                        }
2941                    } else {
2942                        // Validate expression variables are defined
2943                        validate_expression_variables(expr, vars_in_scope)?;
2944                        // Validate function argument types and boolean operators
2945                        validate_expression(expr, vars_in_scope)?;
2946                        // Pattern predicates are not allowed in RETURN
2947                        if contains_pattern_predicate(expr) {
2948                            return Err(anyhow!(
2949                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2950                            ));
2951                        }
2952
2953                        // Use source text as column name when no explicit alias
2954                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2955                        projections.push((expr.clone(), effective_alias));
2956                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2957                            // Bare aggregate — push directly
2958                            has_agg = true;
2959                            aggregates.push(expr.clone());
2960                            projected_aggregate_reprs.insert(expr.to_string_repr());
2961                        } else if !is_window_function(expr)
2962                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2963                        {
2964                            // Compound aggregate or expression containing aggregates —
2965                            // extract the inner bare aggregates for the Aggregate node
2966                            has_agg = true;
2967                            compound_agg_exprs.push(expr.clone());
2968                            for inner in extract_inner_aggregates(expr) {
2969                                let repr = inner.to_string_repr();
2970                                if !projected_aggregate_reprs.contains(&repr) {
2971                                    aggregates.push(inner);
2972                                    projected_aggregate_reprs.insert(repr);
2973                                }
2974                            }
2975                        } else if !group_by.contains(expr) {
2976                            group_by.push(expr.clone());
2977                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2978                                projected_simple_reprs.insert(expr.to_string_repr());
2979                            }
2980                        }
2981
2982                        if let Some(a) = alias {
2983                            if projected_aliases.contains(a) {
2984                                return Err(anyhow!(
2985                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2986                                    a
2987                                ));
2988                            }
2989                            projected_aliases.insert(a.clone());
2990                        } else if let Expr::Variable(v) = expr {
2991                            if projected_aliases.contains(v) {
2992                                return Err(anyhow!(
2993                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2994                                    v
2995                                ));
2996                            }
2997                            projected_aliases.insert(v.clone());
2998                        }
2999                    }
3000                }
3001            }
3002        }
3003
3004        // Validate compound aggregate expressions: non-aggregate refs must be
3005        // individually present in the group_by as simple variables or properties.
3006        if has_agg {
3007            let group_by_reprs: HashSet<String> =
3008                group_by.iter().map(|e| e.to_string_repr()).collect();
3009            for expr in &compound_agg_exprs {
3010                let mut refs = Vec::new();
3011                collect_non_aggregate_refs(expr, false, &mut refs);
3012                for r in &refs {
3013                    let is_covered = match r {
3014                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
3015                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
3016                    };
3017                    if !is_covered {
3018                        return Err(anyhow!(
3019                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
3020                        ));
3021                    }
3022                }
3023            }
3024        }
3025
3026        if has_agg {
3027            plan = LogicalPlan::Aggregate {
3028                input: Box::new(plan),
3029                group_by,
3030                aggregates,
3031            };
3032        }
3033
3034        let mut window_exprs = Vec::new();
3035        for (expr, _) in &projections {
3036            Self::collect_window_functions(expr, &mut window_exprs);
3037        }
3038
3039        if let Some(order_by) = &return_clause.order_by {
3040            for item in order_by {
3041                Self::collect_window_functions(&item.expr, &mut window_exprs);
3042            }
3043        }
3044
3045        let has_window_exprs = !window_exprs.is_empty();
3046
3047        if has_window_exprs {
3048            // Before creating the Window node, we need to ensure all properties
3049            // referenced by window functions are available. Create a Project node
3050            // that loads these properties.
3051            let mut props_needed_for_window: Vec<Expr> = Vec::new();
3052            for window_expr in &window_exprs {
3053                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
3054            }
3055
3056            // Also include non-window expressions from projections that might be needed
3057            // Preserve qualified names (e.g., "e.salary") as aliases for properties
3058            let non_window_projections: Vec<_> = projections
3059                .iter()
3060                .filter_map(|(expr, alias)| {
3061                    // Keep expressions that don't have window_spec
3062                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
3063                        window_spec.is_none()
3064                    } else {
3065                        true
3066                    };
3067
3068                    if keep {
3069                        // For property references, use the qualified name as alias
3070                        let new_alias = if matches!(expr, Expr::Property(..)) {
3071                            Some(expr.to_string_repr())
3072                        } else {
3073                            alias.clone()
3074                        };
3075                        Some((expr.clone(), new_alias))
3076                    } else {
3077                        None
3078                    }
3079                })
3080                .collect();
3081
3082            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
3083                let mut intermediate_projections = non_window_projections;
3084                // Add any additional property references needed by window functions
3085                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
3086                for prop in &props_needed_for_window {
3087                    if !intermediate_projections
3088                        .iter()
3089                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
3090                    {
3091                        let qualified_name = prop.to_string_repr();
3092                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
3093                    }
3094                }
3095
3096                if !intermediate_projections.is_empty() {
3097                    plan = LogicalPlan::Project {
3098                        input: Box::new(plan),
3099                        projections: intermediate_projections,
3100                    };
3101                }
3102            }
3103
3104            // Transform property expressions in window functions to use qualified variable names
3105            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
3106            let transformed_window_exprs: Vec<Expr> = window_exprs
3107                .into_iter()
3108                .map(Self::transform_window_expr_properties)
3109                .collect();
3110
3111            plan = LogicalPlan::Window {
3112                input: Box::new(plan),
3113                window_exprs: transformed_window_exprs,
3114            };
3115        }
3116
3117        if let Some(order_by) = &return_clause.order_by {
3118            let alias_exprs: HashMap<String, Expr> = projections
3119                .iter()
3120                .filter_map(|(expr, alias)| {
3121                    alias.as_ref().map(|a| {
3122                        // ORDER BY is planned before the final RETURN projection.
3123                        // In aggregate contexts, aliases must resolve to the
3124                        // post-aggregate output columns, not raw aggregate calls.
3125                        let rewritten = if has_agg && !has_window_exprs {
3126                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
3127                                Expr::Variable(aggregate_column_name(expr))
3128                            } else if is_compound_aggregate(expr)
3129                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
3130                            {
3131                                replace_aggregates_with_columns(expr)
3132                            } else {
3133                                Expr::Variable(expr.to_string_repr())
3134                            }
3135                        } else {
3136                            expr.clone()
3137                        };
3138                        (a.clone(), rewritten)
3139                    })
3140                })
3141                .collect();
3142
3143            // Build an extended scope that includes RETURN aliases so ORDER BY
3144            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
3145            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
3146                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
3147                // Keep aliases and directly returned variables in scope.
3148                let mut scope = Vec::new();
3149                for (expr, alias) in &projections {
3150                    if let Some(a) = alias
3151                        && !is_var_in_scope(&scope, a)
3152                    {
3153                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3154                    }
3155                    if let Expr::Variable(v) = expr
3156                        && !is_var_in_scope(&scope, v)
3157                    {
3158                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3159                    }
3160                }
3161                scope
3162            } else {
3163                let mut scope = vars_in_scope.to_vec();
3164                for (expr, alias) in &projections {
3165                    if let Some(a) = alias
3166                        && !is_var_in_scope(&scope, a)
3167                    {
3168                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3169                    } else if let Expr::Variable(v) = expr
3170                        && !is_var_in_scope(&scope, v)
3171                    {
3172                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3173                    }
3174                }
3175                scope
3176            };
3177            // Validate ORDER BY expressions against the extended scope
3178            for item in order_by {
3179                // DISTINCT allows ORDER BY on the same projected expression
3180                // even when underlying variables are not otherwise visible.
3181                let matches_projected_expr = return_clause.distinct
3182                    && projections
3183                        .iter()
3184                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
3185                if !matches_projected_expr {
3186                    validate_expression_variables(&item.expr, &order_by_scope)?;
3187                    validate_expression(&item.expr, &order_by_scope)?;
3188                }
3189                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
3190                if has_aggregate_in_item && !has_agg {
3191                    return Err(anyhow!(
3192                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
3193                    ));
3194                }
3195                if has_agg && has_aggregate_in_item {
3196                    validate_with_order_by_aggregate_item(
3197                        &item.expr,
3198                        &projected_aggregate_reprs,
3199                        &projected_simple_reprs,
3200                        &projected_aliases,
3201                    )?;
3202                }
3203            }
3204            let rewritten_order_by: Vec<SortItem> = order_by
3205                .iter()
3206                .map(|item| SortItem {
3207                    expr: {
3208                        let mut rewritten =
3209                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
3210                        if has_agg && !has_window_exprs {
3211                            rewritten = replace_aggregates_with_columns(&rewritten);
3212                        }
3213                        rewritten
3214                    },
3215                    ascending: item.ascending,
3216                })
3217                .collect();
3218            plan = LogicalPlan::Sort {
3219                input: Box::new(plan),
3220                order_by: rewritten_order_by,
3221            };
3222        }
3223
3224        if return_clause.skip.is_some() || return_clause.limit.is_some() {
3225            let skip = return_clause
3226                .skip
3227                .as_ref()
3228                .map(|e| {
3229                    self.note_folded_limit_skip(e);
3230                    parse_non_negative_integer(e, "SKIP", &self.params)
3231                })
3232                .transpose()?
3233                .flatten();
3234            let fetch = return_clause
3235                .limit
3236                .as_ref()
3237                .map(|e| {
3238                    self.note_folded_limit_skip(e);
3239                    parse_non_negative_integer(e, "LIMIT", &self.params)
3240                })
3241                .transpose()?
3242                .flatten();
3243
3244            plan = LogicalPlan::Limit {
3245                input: Box::new(plan),
3246                skip,
3247                fetch,
3248            };
3249        }
3250
3251        if !projections.is_empty() {
3252            // If we created an Aggregate or Window node, we need to adjust the final projections
3253            // to reference aggregate/window function results as columns instead of re-evaluating them
3254            let final_projections = if has_agg || has_window_exprs {
3255                projections
3256                    .into_iter()
3257                    .map(|(expr, alias)| {
3258                        // Check if this expression is an aggregate function
3259                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
3260                        {
3261                            // Bare aggregate — replace with column reference
3262                            let col_name = aggregate_column_name(&expr);
3263                            (Expr::Variable(col_name), alias)
3264                        } else if !has_window_exprs
3265                            && (is_compound_aggregate(&expr)
3266                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
3267                        {
3268                            // Compound aggregate — replace inner aggregates with
3269                            // column references, keep outer expression for Project
3270                            (replace_aggregates_with_columns(&expr), alias)
3271                        }
3272                        // For grouped RETURN projections, reference the pre-computed
3273                        // group-by output column instead of re-evaluating the expression
3274                        // against the aggregate schema (which no longer has original vars).
3275                        else if has_agg
3276                            && !has_window_exprs
3277                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
3278                        {
3279                            (Expr::Variable(expr.to_string_repr()), alias)
3280                        }
3281                        // Check if this expression is a window function
3282                        else if let Expr::FunctionCall {
3283                            window_spec: Some(_),
3284                            ..
3285                        } = &expr
3286                        {
3287                            // Replace window function with a column reference to its result
3288                            // The column name in the Window output is the full expression string
3289                            let window_col_name = expr.to_string_repr();
3290                            // Keep the original alias for the final output
3291                            (Expr::Variable(window_col_name), alias)
3292                        } else {
3293                            (expr, alias)
3294                        }
3295                    })
3296                    .collect()
3297            } else {
3298                projections
3299            };
3300
3301            plan = LogicalPlan::Project {
3302                input: Box::new(plan),
3303                projections: final_projections,
3304            };
3305        }
3306
3307        if return_clause.distinct {
3308            plan = LogicalPlan::Distinct {
3309                input: Box::new(plan),
3310            };
3311        }
3312
3313        Ok(plan)
3314    }
3315
3316    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
3317        let typed_vars: Vec<VariableInfo> = initial_vars
3318            .into_iter()
3319            .map(|name| VariableInfo::new(name, VariableType::Imported))
3320            .collect();
3321        self.plan_single_typed(query, typed_vars)
3322    }
3323
3324    /// Rewrite a query then plan it, preserving typed variable scope when possible.
3325    ///
3326    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
3327    /// variable info through and avoid false type-conflict errors in subqueries.
3328    /// For unions and other compound queries, falls back to `plan_with_scope`.
3329    fn rewrite_and_plan_typed(
3330        &self,
3331        query: Query,
3332        typed_vars: &[VariableInfo],
3333    ) -> Result<LogicalPlan> {
3334        let rewritten = crate::query::rewrite::rewrite_query(query)?;
3335        match rewritten {
3336            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
3337            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
3338        }
3339    }
3340
3341    fn plan_single_typed(
3342        &self,
3343        query: Statement,
3344        initial_vars: Vec<VariableInfo>,
3345    ) -> Result<LogicalPlan> {
3346        let mut plan = LogicalPlan::Empty;
3347
3348        if !initial_vars.is_empty() {
3349            // Project bound variables from outer scope as parameters.
3350            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
3351            // Use Parameter expressions to read from params, not Variable which would read from input row.
3352            let projections = initial_vars
3353                .iter()
3354                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
3355                .collect();
3356            plan = LogicalPlan::Project {
3357                input: Box::new(plan),
3358                projections,
3359            };
3360        }
3361
3362        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
3363        // Track variables introduced by CREATE clauses so we can distinguish
3364        // MATCH-introduced variables (which cannot be re-created as bare nodes)
3365        // from CREATE-introduced variables (which can be referenced as bare nodes).
3366        let mut create_introduced_vars: HashSet<String> = HashSet::new();
3367        // Track variables targeted by DELETE so we can reject property/label
3368        // access on deleted entities in subsequent RETURN clauses.
3369        let mut deleted_vars: HashSet<String> = HashSet::new();
3370
3371        let clause_count = query.clauses.len();
3372        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
3373            match clause {
3374                Clause::Match(match_clause) => {
3375                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
3376                }
3377                Clause::Unwind(unwind) => {
3378                    plan = LogicalPlan::Unwind {
3379                        input: Box::new(plan),
3380                        expr: unwind.expr.clone(),
3381                        variable: unwind.variable.clone(),
3382                    };
3383                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
3384                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
3385                }
3386                Clause::Call(call_clause) => {
3387                    match &call_clause.kind {
3388                        CallKind::Procedure {
3389                            procedure,
3390                            arguments,
3391                        } => {
3392                            // Validate that procedure arguments don't contain aggregation functions
3393                            for arg in arguments {
3394                                if contains_aggregate_recursive(arg) {
3395                                    return Err(anyhow!(
3396                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
3397                                    ));
3398                                }
3399                            }
3400
3401                            let has_yield_star = call_clause.yield_items.len() == 1
3402                                && call_clause.yield_items[0].name == "*"
3403                                && call_clause.yield_items[0].alias.is_none();
3404                            if has_yield_star && clause_idx + 1 < clause_count {
3405                                return Err(anyhow!(
3406                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
3407                                ));
3408                            }
3409
3410                            // Validate for duplicate yield names (VariableAlreadyBound)
3411                            let mut yield_names = Vec::new();
3412                            for item in &call_clause.yield_items {
3413                                if item.name == "*" {
3414                                    continue;
3415                                }
3416                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
3417                                if yield_names.contains(output_name) {
3418                                    return Err(anyhow!(
3419                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
3420                                        output_name
3421                                    ));
3422                                }
3423                                // Check against existing scope (in-query CALL must not shadow)
3424                                if clause_idx > 0
3425                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
3426                                {
3427                                    return Err(anyhow!(
3428                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
3429                                        output_name
3430                                    ));
3431                                }
3432                                yield_names.push(output_name.clone());
3433                            }
3434
3435                            let mut yields = Vec::new();
3436                            for item in &call_clause.yield_items {
3437                                if item.name == "*" {
3438                                    continue;
3439                                }
3440                                yields.push((item.name.clone(), item.alias.clone()));
3441                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
3442                                // Use Imported because procedure return types are unknown
3443                                // at plan time (could be nodes, edges, or scalars)
3444                                add_var_to_scope(
3445                                    &mut vars_in_scope,
3446                                    var_name,
3447                                    VariableType::Imported,
3448                                )?;
3449                            }
3450                            // M5 follow-up #5: if replacement-scan dispatch is
3451                            // enabled and the procedure name does not resolve
3452                            // against the plugin registry, consult registered
3453                            // `ReplacementScanProvider`s. A `Replacement::Procedure`
3454                            // substitutes the call's target name in the logical
3455                            // plan; the rewritten name must itself resolve or
3456                            // we error immediately (no second-tier consult — caps
3457                            // rewrite depth at one).
3458                            let procedure_name = if self.replacement_scans_enabled
3459                                && !self.procedure_resolves(procedure)
3460                            {
3461                                use uni_plugin::traits::catalog::{
3462                                    Replacement, ReplacementRequest,
3463                                };
3464                                let qname = Self::qname_from_user(procedure);
3465                                match self
3466                                    .consult_replacement_scan(ReplacementRequest::Procedure(&qname))
3467                                {
3468                                    Some(Replacement::Procedure(new_qname)) => {
3469                                        let rewritten = new_qname.to_string();
3470                                        if !self.procedure_resolves(&rewritten) {
3471                                            return Err(anyhow!(
3472                                                "ReplacementScanProvider rerouted procedure \
3473                                                 `{}` to `{}`, which also did not resolve",
3474                                                procedure,
3475                                                rewritten
3476                                            ));
3477                                        }
3478                                        tracing::debug!(
3479                                            target: "uni.plugin.registry",
3480                                            from = %procedure,
3481                                            to = %rewritten,
3482                                            "procedure rerouted via ReplacementScanProvider"
3483                                        );
3484                                        rewritten
3485                                    }
3486                                    Some(other) => {
3487                                        return Err(anyhow!(
3488                                            "ReplacementScanProvider returned wrong variant \
3489                                             for Procedure request `{}`: expected \
3490                                             `Procedure`, got {:?}",
3491                                            procedure,
3492                                            other
3493                                        ));
3494                                    }
3495                                    None => procedure.clone(),
3496                                }
3497                            } else {
3498                                procedure.clone()
3499                            };
3500                            let proc_plan = LogicalPlan::ProcedureCall {
3501                                procedure_name,
3502                                arguments: arguments.clone(),
3503                                yield_items: yields.clone(),
3504                            };
3505
3506                            if matches!(plan, LogicalPlan::Empty) {
3507                                // Standalone CALL (first clause) — use directly
3508                                plan = proc_plan;
3509                            } else if yields.is_empty() {
3510                                // In-query CALL with no YIELD (void procedure):
3511                                // preserve the input rows unchanged
3512                            } else {
3513                                // In-query CALL with YIELD: cross-join input × procedure output
3514                                plan = LogicalPlan::Apply {
3515                                    input: Box::new(plan),
3516                                    subquery: Box::new(proc_plan),
3517                                    input_filter: None,
3518                                };
3519                            }
3520                        }
3521                        CallKind::Subquery(query) => {
3522                            let subquery_plan =
3523                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3524
3525                            // Extract variables from subquery RETURN clause
3526                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3527
3528                            // Add new variables to scope (as Scalar since they come from subquery projection)
3529                            for var in subquery_vars {
3530                                if !is_var_in_scope(&vars_in_scope, &var) {
3531                                    add_var_to_scope(
3532                                        &mut vars_in_scope,
3533                                        &var,
3534                                        VariableType::Scalar,
3535                                    )?;
3536                                }
3537                            }
3538
3539                            plan = LogicalPlan::SubqueryCall {
3540                                input: Box::new(plan),
3541                                subquery: Box::new(subquery_plan),
3542                            };
3543                        }
3544                    }
3545                }
3546                Clause::Merge(merge_clause) => {
3547                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3548                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3549                    // read-only — reject MERGE that names one.
3550                    let merge_labels = collect_pattern_labels(&merge_clause.pattern);
3551                    self.reject_virtual_label_writes(&merge_labels, "MERGE")?;
3552
3553                    plan = LogicalPlan::Merge {
3554                        input: Box::new(plan),
3555                        pattern: merge_clause.pattern.clone(),
3556                        on_match: Some(SetClause {
3557                            items: merge_clause.on_match.clone(),
3558                        }),
3559                        on_create: Some(SetClause {
3560                            items: merge_clause.on_create.clone(),
3561                        }),
3562                    };
3563
3564                    for path in &merge_clause.pattern.paths {
3565                        if let Some(path_var) = &path.variable
3566                            && !path_var.is_empty()
3567                            && !is_var_in_scope(&vars_in_scope, path_var)
3568                        {
3569                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3570                        }
3571                        for element in &path.elements {
3572                            if let PatternElement::Node(n) = element {
3573                                if let Some(v) = &n.variable
3574                                    && !is_var_in_scope(&vars_in_scope, v)
3575                                {
3576                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3577                                }
3578                            } else if let PatternElement::Relationship(r) = element
3579                                && let Some(v) = &r.variable
3580                                && !is_var_in_scope(&vars_in_scope, v)
3581                            {
3582                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3583                            }
3584                        }
3585                    }
3586                }
3587                Clause::Create(create_clause) => {
3588                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3589                    // read-only — reject CREATE that names one.
3590                    let create_labels = collect_pattern_labels(&create_clause.pattern);
3591                    self.reject_virtual_label_writes(&create_labels, "CREATE")?;
3592                    // Validate CREATE patterns:
3593                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3594                    // - Bare nodes (v) are "references" if bound, "creations" if not
3595                    // - Relationships are always creations - can't rebind
3596                    // - Within CREATE, each new variable can only be defined once
3597                    // - Variables used in properties must be defined
3598                    let mut create_vars: Vec<&str> = Vec::new();
3599                    for path in &create_clause.pattern.paths {
3600                        let is_standalone_node = path.elements.len() == 1;
3601                        for element in &path.elements {
3602                            match element {
3603                                PatternElement::Node(n) => {
3604                                    validate_property_variables(
3605                                        &n.properties,
3606                                        &vars_in_scope,
3607                                        &create_vars,
3608                                    )?;
3609
3610                                    if let Some(v) = n.variable.as_deref()
3611                                        && !v.is_empty()
3612                                    {
3613                                        // A node is a "creation" if it has labels or properties
3614                                        let is_creation =
3615                                            !n.labels.is_empty() || n.properties.is_some();
3616
3617                                        if is_creation {
3618                                            check_not_already_bound(
3619                                                v,
3620                                                &vars_in_scope,
3621                                                &create_vars,
3622                                            )?;
3623                                            create_vars.push(v);
3624                                        } else if is_standalone_node
3625                                            && is_var_in_scope(&vars_in_scope, v)
3626                                            && !create_introduced_vars.contains(v)
3627                                        {
3628                                            // Standalone bare node referencing a variable from a
3629                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3630                                            // Bare nodes used as relationship endpoints
3631                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3632                                            return Err(anyhow!(
3633                                                "SyntaxError: VariableAlreadyBound - '{}'",
3634                                                v
3635                                            ));
3636                                        } else if !create_vars.contains(&v) {
3637                                            // New bare variable — register it
3638                                            create_vars.push(v);
3639                                        }
3640                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3641                                    }
3642                                }
3643                                PatternElement::Relationship(r) => {
3644                                    validate_property_variables(
3645                                        &r.properties,
3646                                        &vars_in_scope,
3647                                        &create_vars,
3648                                    )?;
3649
3650                                    if let Some(v) = r.variable.as_deref()
3651                                        && !v.is_empty()
3652                                    {
3653                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3654                                        create_vars.push(v);
3655                                    }
3656
3657                                    // Validate relationship constraints for CREATE
3658                                    if r.types.len() != 1 {
3659                                        return Err(anyhow!(
3660                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3661                                        ));
3662                                    }
3663                                    if r.direction == Direction::Both {
3664                                        return Err(anyhow!(
3665                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3666                                        ));
3667                                    }
3668                                    if r.range.is_some() {
3669                                        return Err(anyhow!(
3670                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3671                                        ));
3672                                    }
3673                                }
3674                                PatternElement::Parenthesized { .. } => {}
3675                            }
3676                        }
3677                    }
3678
3679                    // Batch consecutive CREATEs to avoid deep recursion
3680                    match &mut plan {
3681                        LogicalPlan::CreateBatch { patterns, .. } => {
3682                            // Append to existing batch
3683                            patterns.push(create_clause.pattern.clone());
3684                        }
3685                        LogicalPlan::Create { input, pattern } => {
3686                            // Convert single Create to CreateBatch with both patterns
3687                            let first_pattern = pattern.clone();
3688                            plan = LogicalPlan::CreateBatch {
3689                                input: input.clone(),
3690                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3691                            };
3692                        }
3693                        _ => {
3694                            // Start new Create (may become batch if more CREATEs follow)
3695                            plan = LogicalPlan::Create {
3696                                input: Box::new(plan),
3697                                pattern: create_clause.pattern.clone(),
3698                            };
3699                        }
3700                    }
3701                    // Add variables from created nodes and relationships to scope
3702                    for path in &create_clause.pattern.paths {
3703                        for element in &path.elements {
3704                            match element {
3705                                PatternElement::Node(n) => {
3706                                    if let Some(var) = &n.variable
3707                                        && !var.is_empty()
3708                                    {
3709                                        create_introduced_vars.insert(var.clone());
3710                                        add_var_to_scope(
3711                                            &mut vars_in_scope,
3712                                            var,
3713                                            VariableType::Node,
3714                                        )?;
3715                                    }
3716                                }
3717                                PatternElement::Relationship(r) => {
3718                                    if let Some(var) = &r.variable
3719                                        && !var.is_empty()
3720                                    {
3721                                        create_introduced_vars.insert(var.clone());
3722                                        add_var_to_scope(
3723                                            &mut vars_in_scope,
3724                                            var,
3725                                            VariableType::Edge,
3726                                        )?;
3727                                    }
3728                                }
3729                                PatternElement::Parenthesized { .. } => {
3730                                    // Skip for now - not commonly used in CREATE
3731                                }
3732                            }
3733                        }
3734                    }
3735                }
3736                Clause::Set(set_clause) => {
3737                    // Validate SET value expressions
3738                    for item in &set_clause.items {
3739                        match item {
3740                            SetItem::Property { value, .. }
3741                            | SetItem::Variable { value, .. }
3742                            | SetItem::VariablePlus { value, .. } => {
3743                                validate_expression_variables(value, &vars_in_scope)?;
3744                                validate_expression(value, &vars_in_scope)?;
3745                                if contains_pattern_predicate(value) {
3746                                    return Err(anyhow!(
3747                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3748                                    ));
3749                                }
3750                            }
3751                            SetItem::Labels { .. } => {}
3752                        }
3753                    }
3754                    plan = LogicalPlan::Set {
3755                        input: Box::new(plan),
3756                        items: set_clause.items.clone(),
3757                    };
3758                }
3759                Clause::Remove(remove_clause) => {
3760                    plan = LogicalPlan::Remove {
3761                        input: Box::new(plan),
3762                        items: remove_clause.items.clone(),
3763                    };
3764                }
3765                Clause::Delete(delete_clause) => {
3766                    // Validate DELETE targets
3767                    for item in &delete_clause.items {
3768                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3769                        if matches!(item, Expr::LabelCheck { .. }) {
3770                            return Err(anyhow!(
3771                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3772                            ));
3773                        }
3774                        let vars_used = collect_expr_variables(item);
3775                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3776                        if vars_used.is_empty() {
3777                            return Err(anyhow!(
3778                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3779                            ));
3780                        }
3781                        for var in &vars_used {
3782                            // Check if variable is defined
3783                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3784                                return Err(anyhow!(
3785                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3786                                    var
3787                                ));
3788                            }
3789                        }
3790                        // Strict type check only for simple variable references —
3791                        // complex expressions (property access, array index, etc.)
3792                        // may resolve to a node/edge at runtime even if the base
3793                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3794                        if let Expr::Variable(name) = item
3795                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3796                            && matches!(
3797                                info.var_type,
3798                                VariableType::Scalar | VariableType::ScalarLiteral
3799                            )
3800                        {
3801                            return Err(anyhow!(
3802                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3803                                name
3804                            ));
3805                        }
3806                    }
3807                    // Track deleted variables for later validation
3808                    for item in &delete_clause.items {
3809                        if let Expr::Variable(name) = item {
3810                            deleted_vars.insert(name.clone());
3811                        }
3812                    }
3813                    plan = LogicalPlan::Delete {
3814                        input: Box::new(plan),
3815                        items: delete_clause.items.clone(),
3816                        detach: delete_clause.detach,
3817                    };
3818                }
3819                Clause::With(with_clause) => {
3820                    let (new_plan, new_vars) =
3821                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3822                    plan = new_plan;
3823                    vars_in_scope = new_vars;
3824                }
3825                Clause::WithRecursive(with_recursive) => {
3826                    // Plan the recursive CTE
3827                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3828                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3829                    add_var_to_scope(
3830                        &mut vars_in_scope,
3831                        &with_recursive.name,
3832                        VariableType::Scalar,
3833                    )?;
3834                }
3835                Clause::Return(return_clause) => {
3836                    // Check for property/label access on deleted entities
3837                    if !deleted_vars.is_empty() {
3838                        for item in &return_clause.items {
3839                            if let ReturnItem::Expr { expr, .. } = item {
3840                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3841                            }
3842                        }
3843                    }
3844                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3845                } // All Clause variants are handled above - no catch-all needed
3846            }
3847        }
3848
3849        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3850        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3851        // If RETURN was used, the plan will have been wrapped in Project, so we only
3852        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3853        let plan = match &plan {
3854            LogicalPlan::Create { .. }
3855            | LogicalPlan::CreateBatch { .. }
3856            | LogicalPlan::Delete { .. }
3857            | LogicalPlan::Set { .. }
3858            | LogicalPlan::Remove { .. }
3859            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3860                input: Box::new(plan),
3861                skip: None,
3862                fetch: Some(0),
3863            },
3864            _ => plan,
3865        };
3866
3867        Ok(plan)
3868    }
3869
3870    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3871        match expr {
3872            Expr::Property(_, _)
3873                if !collected
3874                    .iter()
3875                    .any(|e| e.to_string_repr() == expr.to_string_repr()) =>
3876            {
3877                collected.push(expr.clone());
3878            }
3879            Expr::Property(_, _) => {}
3880            Expr::Variable(_) => {
3881                // Variables are already available, don't need to project them
3882            }
3883            Expr::BinaryOp { left, right, .. } => {
3884                Self::collect_properties_from_expr(left, collected);
3885                Self::collect_properties_from_expr(right, collected);
3886            }
3887            Expr::FunctionCall {
3888                args, window_spec, ..
3889            } => {
3890                for arg in args {
3891                    Self::collect_properties_from_expr(arg, collected);
3892                }
3893                if let Some(spec) = window_spec {
3894                    for partition_expr in &spec.partition_by {
3895                        Self::collect_properties_from_expr(partition_expr, collected);
3896                    }
3897                    for sort_item in &spec.order_by {
3898                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3899                    }
3900                }
3901            }
3902            Expr::List(items) => {
3903                for item in items {
3904                    Self::collect_properties_from_expr(item, collected);
3905                }
3906            }
3907            Expr::UnaryOp { expr: e, .. }
3908            | Expr::IsNull(e)
3909            | Expr::IsNotNull(e)
3910            | Expr::IsUnique(e) => {
3911                Self::collect_properties_from_expr(e, collected);
3912            }
3913            Expr::Case {
3914                expr,
3915                when_then,
3916                else_expr,
3917            } => {
3918                if let Some(e) = expr {
3919                    Self::collect_properties_from_expr(e, collected);
3920                }
3921                for (w, t) in when_then {
3922                    Self::collect_properties_from_expr(w, collected);
3923                    Self::collect_properties_from_expr(t, collected);
3924                }
3925                if let Some(e) = else_expr {
3926                    Self::collect_properties_from_expr(e, collected);
3927                }
3928            }
3929            Expr::In { expr, list } => {
3930                Self::collect_properties_from_expr(expr, collected);
3931                Self::collect_properties_from_expr(list, collected);
3932            }
3933            Expr::ArrayIndex { array, index } => {
3934                Self::collect_properties_from_expr(array, collected);
3935                Self::collect_properties_from_expr(index, collected);
3936            }
3937            Expr::ArraySlice { array, start, end } => {
3938                Self::collect_properties_from_expr(array, collected);
3939                if let Some(s) = start {
3940                    Self::collect_properties_from_expr(s, collected);
3941                }
3942                if let Some(e) = end {
3943                    Self::collect_properties_from_expr(e, collected);
3944                }
3945            }
3946            _ => {}
3947        }
3948    }
3949
3950    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3951        if let Expr::FunctionCall { window_spec, .. } = expr {
3952            // Collect any function with a window spec (OVER clause)
3953            if window_spec.is_some() {
3954                if !collected
3955                    .iter()
3956                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3957                {
3958                    collected.push(expr.clone());
3959                }
3960                return;
3961            }
3962        }
3963
3964        match expr {
3965            Expr::BinaryOp { left, right, .. } => {
3966                Self::collect_window_functions(left, collected);
3967                Self::collect_window_functions(right, collected);
3968            }
3969            Expr::FunctionCall { args, .. } => {
3970                for arg in args {
3971                    Self::collect_window_functions(arg, collected);
3972                }
3973            }
3974            Expr::List(items) => {
3975                for i in items {
3976                    Self::collect_window_functions(i, collected);
3977                }
3978            }
3979            Expr::Map(items) => {
3980                for (_, i) in items {
3981                    Self::collect_window_functions(i, collected);
3982                }
3983            }
3984            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3985                Self::collect_window_functions(e, collected);
3986            }
3987            Expr::Case {
3988                expr,
3989                when_then,
3990                else_expr,
3991            } => {
3992                if let Some(e) = expr {
3993                    Self::collect_window_functions(e, collected);
3994                }
3995                for (w, t) in when_then {
3996                    Self::collect_window_functions(w, collected);
3997                    Self::collect_window_functions(t, collected);
3998                }
3999                if let Some(e) = else_expr {
4000                    Self::collect_window_functions(e, collected);
4001                }
4002            }
4003            Expr::Reduce {
4004                init, list, expr, ..
4005            } => {
4006                Self::collect_window_functions(init, collected);
4007                Self::collect_window_functions(list, collected);
4008                Self::collect_window_functions(expr, collected);
4009            }
4010            Expr::Quantifier {
4011                list, predicate, ..
4012            } => {
4013                Self::collect_window_functions(list, collected);
4014                Self::collect_window_functions(predicate, collected);
4015            }
4016            Expr::In { expr, list } => {
4017                Self::collect_window_functions(expr, collected);
4018                Self::collect_window_functions(list, collected);
4019            }
4020            Expr::ArrayIndex { array, index } => {
4021                Self::collect_window_functions(array, collected);
4022                Self::collect_window_functions(index, collected);
4023            }
4024            Expr::ArraySlice { array, start, end } => {
4025                Self::collect_window_functions(array, collected);
4026                if let Some(s) = start {
4027                    Self::collect_window_functions(s, collected);
4028                }
4029                if let Some(e) = end {
4030                    Self::collect_window_functions(e, collected);
4031                }
4032            }
4033            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
4034            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
4035            _ => {}
4036        }
4037    }
4038
4039    /// Transform property expressions in manual window functions to use qualified variable names.
4040    ///
4041    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
4042    /// so the executor can look up values directly from the row HashMap after the
4043    /// intermediate projection has materialized these properties with qualified names.
4044    ///
4045    /// Transforms ALL window functions (both manual and aggregate).
4046    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
4047    fn transform_window_expr_properties(expr: Expr) -> Expr {
4048        let Expr::FunctionCall {
4049            name,
4050            args,
4051            window_spec: Some(spec),
4052            distinct,
4053        } = expr
4054        else {
4055            return expr;
4056        };
4057
4058        // Transform arguments for ALL window functions
4059        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
4060        let transformed_args = args
4061            .into_iter()
4062            .map(Self::transform_property_to_variable)
4063            .collect();
4064
4065        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
4066        let transformed_partition_by = spec
4067            .partition_by
4068            .into_iter()
4069            .map(Self::transform_property_to_variable)
4070            .collect();
4071
4072        let transformed_order_by = spec
4073            .order_by
4074            .into_iter()
4075            .map(|item| SortItem {
4076                expr: Self::transform_property_to_variable(item.expr),
4077                ascending: item.ascending,
4078            })
4079            .collect();
4080
4081        Expr::FunctionCall {
4082            name,
4083            args: transformed_args,
4084            window_spec: Some(WindowSpec {
4085                partition_by: transformed_partition_by,
4086                order_by: transformed_order_by,
4087            }),
4088            distinct,
4089        }
4090    }
4091
4092    /// Transform a property expression to a variable expression with qualified name.
4093    ///
4094    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
4095    fn transform_property_to_variable(expr: Expr) -> Expr {
4096        let Expr::Property(base, prop) = expr else {
4097            return expr;
4098        };
4099
4100        match *base {
4101            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
4102            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
4103        }
4104    }
4105
4106    /// Transform VALID_AT macro into function call
4107    ///
4108    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
4109    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
4110    fn transform_valid_at_to_function(expr: Expr) -> Expr {
4111        match expr {
4112            Expr::ValidAt {
4113                entity,
4114                timestamp,
4115                start_prop,
4116                end_prop,
4117            } => {
4118                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
4119                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
4120
4121                Expr::FunctionCall {
4122                    name: "uni.temporal.validAt".to_string(),
4123                    args: vec![
4124                        Self::transform_valid_at_to_function(*entity),
4125                        Expr::Literal(CypherLiteral::String(start)),
4126                        Expr::Literal(CypherLiteral::String(end)),
4127                        Self::transform_valid_at_to_function(*timestamp),
4128                    ],
4129                    distinct: false,
4130                    window_spec: None,
4131                }
4132            }
4133            // Recursively transform nested expressions
4134            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4135                left: Box::new(Self::transform_valid_at_to_function(*left)),
4136                op,
4137                right: Box::new(Self::transform_valid_at_to_function(*right)),
4138            },
4139            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
4140                op,
4141                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4142            },
4143            Expr::FunctionCall {
4144                name,
4145                args,
4146                distinct,
4147                window_spec,
4148            } => Expr::FunctionCall {
4149                name,
4150                args: args
4151                    .into_iter()
4152                    .map(Self::transform_valid_at_to_function)
4153                    .collect(),
4154                distinct,
4155                window_spec,
4156            },
4157            Expr::Property(base, prop) => {
4158                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
4159            }
4160            Expr::List(items) => Expr::List(
4161                items
4162                    .into_iter()
4163                    .map(Self::transform_valid_at_to_function)
4164                    .collect(),
4165            ),
4166            Expr::In { expr, list } => Expr::In {
4167                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4168                list: Box::new(Self::transform_valid_at_to_function(*list)),
4169            },
4170            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
4171            Expr::IsNotNull(e) => {
4172                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
4173            }
4174            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
4175            // Other cases: return as-is
4176            other => other,
4177        }
4178    }
4179
4180    /// Rewrite system-metadata function calls (`id(v)`, `created_at(v)`,
4181    /// `updated_at(v)`) to direct property access on the corresponding
4182    /// internal column (`v._vid`, `v._created_at`, `v._updated_at`). This
4183    /// normalization enables predicate pushdown via the Property pattern
4184    /// recognized by `PredicateAnalyzer`.
4185    ///
4186    /// All three functions share the same shape: single-arg, argument
4187    /// must be a node/edge variable, returns the column value directly.
4188    fn rewrite_id_to_vid(expr: Expr, vars_in_scope: &[VariableInfo]) -> Expr {
4189        match expr {
4190            Expr::FunctionCall {
4191                name,
4192                args,
4193                distinct,
4194                window_spec,
4195            } if args.len() == 1 && Self::metadata_function_column(&name, None).is_some() => {
4196                if let Expr::Variable(ref var) = args[0] {
4197                    // `id()` resolves to `_eid` for an edge binding and `_vid`
4198                    // for a node — edge rows expose `_eid`, not `_vid`. Mirror
4199                    // the projection path (`df_expr.rs` translate of `id`).
4200                    let var_type = find_var_in_scope(vars_in_scope, var).map(|v| v.var_type);
4201                    let column = Self::metadata_function_column(&name, var_type)
4202                        .unwrap()
4203                        .to_string();
4204                    Expr::Property(Box::new(Expr::Variable(var.clone())), column)
4205                } else {
4206                    Expr::FunctionCall {
4207                        name,
4208                        args,
4209                        distinct,
4210                        window_spec,
4211                    }
4212                }
4213            }
4214            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4215                left: Box::new(Self::rewrite_id_to_vid(*left, vars_in_scope)),
4216                op,
4217                right: Box::new(Self::rewrite_id_to_vid(*right, vars_in_scope)),
4218            },
4219            Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
4220                op,
4221                expr: Box::new(Self::rewrite_id_to_vid(*inner, vars_in_scope)),
4222            },
4223            other => other,
4224        }
4225    }
4226
4227    /// Return the internal column name for a system-metadata function, or
4228    /// `None` if the name is not one of the recognised metadata functions.
4229    ///
4230    /// `id()` maps to `_eid` when its argument is a relationship
4231    /// (`VariableType::Edge`) and `_vid` otherwise; `var_type` is `None` when the
4232    /// caller only needs the is-metadata-function test.
4233    fn metadata_function_column(
4234        name: &str,
4235        var_type: Option<VariableType>,
4236    ) -> Option<&'static str> {
4237        if name.eq_ignore_ascii_case("id") {
4238            if matches!(var_type, Some(VariableType::Edge)) {
4239                Some("_eid")
4240            } else {
4241                Some("_vid")
4242            }
4243        } else if name.eq_ignore_ascii_case("created_at") {
4244            Some("_created_at")
4245        } else if name.eq_ignore_ascii_case("updated_at") {
4246            Some("_updated_at")
4247        } else {
4248            None
4249        }
4250    }
4251
4252    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
4253    fn plan_match_clause(
4254        &self,
4255        match_clause: &MatchClause,
4256        plan: LogicalPlan,
4257        vars_in_scope: &mut Vec<VariableInfo>,
4258    ) -> Result<LogicalPlan> {
4259        let mut plan = plan;
4260
4261        if match_clause.pattern.paths.is_empty() {
4262            return Err(anyhow!("Empty pattern"));
4263        }
4264
4265        // Track variables introduced by this OPTIONAL MATCH
4266        let vars_before_pattern = vars_in_scope.len();
4267
4268        for path in &match_clause.pattern.paths {
4269            if let Some(mode) = &path.shortest_path_mode {
4270                plan =
4271                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
4272            } else {
4273                plan = self.plan_path(
4274                    path,
4275                    plan,
4276                    vars_in_scope,
4277                    match_clause.optional,
4278                    vars_before_pattern,
4279                )?;
4280            }
4281        }
4282
4283        // Collect variables introduced by this OPTIONAL MATCH pattern
4284        let optional_vars: HashSet<String> = if match_clause.optional {
4285            vars_in_scope[vars_before_pattern..]
4286                .iter()
4287                .map(|v| v.name.clone())
4288                .collect()
4289        } else {
4290            HashSet::new()
4291        };
4292
4293        // Handle WHERE clause with vector_similarity and predicate pushdown
4294        if let Some(predicate) = &match_clause.where_clause {
4295            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
4296        }
4297
4298        Ok(plan)
4299    }
4300
4301    /// Plan a shortestPath pattern.
4302    fn plan_shortest_path(
4303        &self,
4304        path: &PathPattern,
4305        plan: LogicalPlan,
4306        vars_in_scope: &mut Vec<VariableInfo>,
4307        mode: &ShortestPathMode,
4308        _vars_before_pattern: usize,
4309    ) -> Result<LogicalPlan> {
4310        let mut plan = plan;
4311        let elements = &path.elements;
4312
4313        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
4314        if elements.len() < 3 || elements.len().is_multiple_of(2) {
4315            return Err(anyhow!(
4316                "shortestPath requires at least one relationship: (a)-[*]->(b)"
4317            ));
4318        }
4319
4320        let source_node = match &elements[0] {
4321            PatternElement::Node(n) => n,
4322            _ => return Err(anyhow!("ShortestPath must start with a node")),
4323        };
4324        let rel = match &elements[1] {
4325            PatternElement::Relationship(r) => r,
4326            _ => {
4327                return Err(anyhow!(
4328                    "ShortestPath middle element must be a relationship"
4329                ));
4330            }
4331        };
4332        let target_node = match &elements[2] {
4333            PatternElement::Node(n) => n,
4334            _ => return Err(anyhow!("ShortestPath must end with a node")),
4335        };
4336
4337        let source_var = source_node
4338            .variable
4339            .clone()
4340            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
4341        let target_var = target_node
4342            .variable
4343            .clone()
4344            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
4345        let path_var = path
4346            .variable
4347            .clone()
4348            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
4349
4350        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
4351        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
4352
4353        // Plan source node if not bound
4354        if !source_bound {
4355            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
4356        } else if let Some(prop_filter) =
4357            self.properties_to_expr(&source_var, &source_node.properties)
4358        {
4359            plan = LogicalPlan::Filter {
4360                input: Box::new(plan),
4361                predicate: prop_filter,
4362                optional_variables: HashSet::new(),
4363            };
4364        }
4365
4366        // Plan target node if not bound
4367        let target_label_id = if !target_bound {
4368            // Use first label for target_label_id
4369            let target_label_name = target_node
4370                .labels
4371                .first()
4372                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
4373            // Native lookup first; then consult `CatalogProvider` /
4374            // `ReplacementScanProvider` and allocate a virtual label-id
4375            // (M5b follow-up #6). Virtual ids dispatch to
4376            // `CatalogVertexScanExec` at physical-plan time.
4377            let target_label_id =
4378                if let Some(meta) = self.schema.get_label_case_insensitive(target_label_name) {
4379                    meta.id
4380                } else if let Some((vid, _)) = self.allocate_virtual_label(target_label_name)? {
4381                    vid
4382                } else {
4383                    return Err(anyhow!("Label {} not found", target_label_name));
4384                };
4385
4386            let target_scan = LogicalPlan::Scan {
4387                label_id: target_label_id,
4388                labels: target_node.labels.names().to_vec(),
4389                variable: target_var.clone(),
4390                filter: self.properties_to_expr(&target_var, &target_node.properties),
4391                optional: false,
4392            };
4393
4394            plan = Self::join_with_plan(plan, target_scan);
4395            target_label_id
4396        } else {
4397            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
4398            {
4399                plan = LogicalPlan::Filter {
4400                    input: Box::new(plan),
4401                    predicate: prop_filter,
4402                    optional_variables: HashSet::new(),
4403                };
4404            }
4405            0 // Wildcard for already-bound target
4406        };
4407
4408        // Add ShortestPath operator
4409        let edge_type_ids = if rel.types.is_empty() {
4410            // If no type specified, fetch all edge types (both schema and schemaless)
4411            self.schema.all_edge_type_ids()
4412        } else {
4413            let mut ids = Vec::new();
4414            for type_name in &rel.types {
4415                let id = if let Some(meta) = self.schema.edge_types.get(type_name) {
4416                    meta.id
4417                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
4418                    vid
4419                } else {
4420                    return Err(anyhow!("Edge type {} not found", type_name));
4421                };
4422                ids.push(id);
4423            }
4424            ids
4425        };
4426
4427        // Extract hop constraints from relationship pattern
4428        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
4429        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
4430
4431        let sp_plan = match mode {
4432            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
4433                input: Box::new(plan),
4434                edge_type_ids,
4435                direction: rel.direction.clone(),
4436                source_variable: source_var.clone(),
4437                target_variable: target_var.clone(),
4438                target_label_id,
4439                path_variable: path_var.clone(),
4440                min_hops,
4441                max_hops,
4442            },
4443            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
4444                input: Box::new(plan),
4445                edge_type_ids,
4446                direction: rel.direction.clone(),
4447                source_variable: source_var.clone(),
4448                target_variable: target_var.clone(),
4449                target_label_id,
4450                path_variable: path_var.clone(),
4451                min_hops,
4452                max_hops,
4453            },
4454        };
4455
4456        if !source_bound {
4457            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
4458        }
4459        if !target_bound {
4460            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
4461        }
4462        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
4463
4464        Ok(sp_plan)
4465    }
4466    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
4467    ///
4468    /// This is a public entry point for the Locy plan builder to reuse the
4469    /// existing pattern-planning logic for clause bodies.
4470    pub fn plan_pattern(
4471        &self,
4472        pattern: &Pattern,
4473        initial_vars: &[VariableInfo],
4474    ) -> Result<LogicalPlan> {
4475        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
4476        let vars_before_pattern = vars_in_scope.len();
4477        let mut plan = LogicalPlan::Empty;
4478        for path in &pattern.paths {
4479            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
4480        }
4481        Ok(plan)
4482    }
4483
4484    /// Plan a regular MATCH path (not shortestPath).
4485    fn plan_path(
4486        &self,
4487        path: &PathPattern,
4488        plan: LogicalPlan,
4489        vars_in_scope: &mut Vec<VariableInfo>,
4490        optional: bool,
4491        vars_before_pattern: usize,
4492    ) -> Result<LogicalPlan> {
4493        let mut plan = plan;
4494        let elements = &path.elements;
4495        let mut i = 0;
4496
4497        let path_variable = path.variable.clone();
4498
4499        // Check for VariableAlreadyBound: path variable already in scope
4500        if let Some(pv) = &path_variable
4501            && !pv.is_empty()
4502            && is_var_in_scope(vars_in_scope, pv)
4503        {
4504            return Err(anyhow!(
4505                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4506                pv
4507            ));
4508        }
4509
4510        // Check for VariableAlreadyBound: path variable conflicts with element variables
4511        if let Some(pv) = &path_variable
4512            && !pv.is_empty()
4513        {
4514            for element in elements {
4515                match element {
4516                    PatternElement::Node(n) => {
4517                        if let Some(v) = &n.variable
4518                            && v == pv
4519                        {
4520                            return Err(anyhow!(
4521                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4522                                pv
4523                            ));
4524                        }
4525                    }
4526                    PatternElement::Relationship(r) => {
4527                        if let Some(v) = &r.variable
4528                            && v == pv
4529                        {
4530                            return Err(anyhow!(
4531                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4532                                pv
4533                            ));
4534                        }
4535                    }
4536                    PatternElement::Parenthesized { .. } => {}
4537                }
4538            }
4539        }
4540
4541        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
4542        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
4543        let mut optional_pattern_vars: HashSet<String> = if optional {
4544            let mut vars = HashSet::new();
4545            for element in elements {
4546                match element {
4547                    PatternElement::Node(n) => {
4548                        if let Some(v) = &n.variable
4549                            && !v.is_empty()
4550                            && !is_var_in_scope(vars_in_scope, v)
4551                        {
4552                            vars.insert(v.clone());
4553                        }
4554                    }
4555                    PatternElement::Relationship(r) => {
4556                        if let Some(v) = &r.variable
4557                            && !v.is_empty()
4558                            && !is_var_in_scope(vars_in_scope, v)
4559                        {
4560                            vars.insert(v.clone());
4561                        }
4562                    }
4563                    PatternElement::Parenthesized { pattern, .. } => {
4564                        // Also check nested patterns
4565                        for nested_elem in &pattern.elements {
4566                            match nested_elem {
4567                                PatternElement::Node(n) => {
4568                                    if let Some(v) = &n.variable
4569                                        && !v.is_empty()
4570                                        && !is_var_in_scope(vars_in_scope, v)
4571                                    {
4572                                        vars.insert(v.clone());
4573                                    }
4574                                }
4575                                PatternElement::Relationship(r) => {
4576                                    if let Some(v) = &r.variable
4577                                        && !v.is_empty()
4578                                        && !is_var_in_scope(vars_in_scope, v)
4579                                    {
4580                                        vars.insert(v.clone());
4581                                    }
4582                                }
4583                                _ => {}
4584                            }
4585                        }
4586                    }
4587                }
4588            }
4589            // Include path variable if present
4590            if let Some(pv) = &path_variable
4591                && !pv.is_empty()
4592            {
4593                vars.insert(pv.clone());
4594            }
4595            vars
4596        } else {
4597            HashSet::new()
4598        };
4599
4600        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
4601        // These must participate in Trail mode (relationship uniqueness) enforcement
4602        // across ALL segments in this path, so that VLP segments like [*0..1] don't
4603        // traverse through edges already claimed by a bound relationship [r].
4604        let path_bound_edge_vars: HashSet<String> = {
4605            let mut bound = HashSet::new();
4606            for element in elements {
4607                if let PatternElement::Relationship(rel) = element
4608                    && let Some(ref var_name) = rel.variable
4609                    && !var_name.is_empty()
4610                    && vars_in_scope[..vars_before_pattern]
4611                        .iter()
4612                        .any(|v| v.name == *var_name)
4613                {
4614                    bound.insert(var_name.clone());
4615                }
4616            }
4617            bound
4618        };
4619
4620        // Track if any traverses were added (for zero-length path detection)
4621        let mut had_traverses = false;
4622        // Track the node variable for zero-length path binding
4623        let mut single_node_variable: Option<String> = None;
4624        // Collect node/edge variables for BindPath (fixed-length path binding)
4625        let mut path_node_vars: Vec<String> = Vec::new();
4626        let mut path_edge_vars: Vec<String> = Vec::new();
4627        // Track the last processed outer node variable for QPP source binding.
4628        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4629        let mut last_outer_node_var: Option<String> = None;
4630
4631        // Multi-hop path variables are now supported - path is accumulated across hops
4632        while i < elements.len() {
4633            let element = &elements[i];
4634            match element {
4635                PatternElement::Node(n) => {
4636                    let mut variable = n.variable.clone().unwrap_or_default();
4637                    if variable.is_empty() {
4638                        variable = self.next_anon_var();
4639                    }
4640                    // Track first node variable for zero-length path
4641                    if single_node_variable.is_none() {
4642                        single_node_variable = Some(variable.clone());
4643                    }
4644                    let is_bound =
4645                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4646                    if optional && !is_bound {
4647                        optional_pattern_vars.insert(variable.clone());
4648                    }
4649
4650                    if is_bound {
4651                        // Check for type conflict - can't use an Edge/Path as a Node
4652                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4653                            && !info.var_type.is_compatible_with(VariableType::Node)
4654                        {
4655                            return Err(anyhow!(
4656                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4657                                variable,
4658                                info.var_type
4659                            ));
4660                        }
4661                        if let Some(node_filter) =
4662                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4663                        {
4664                            plan = LogicalPlan::Filter {
4665                                input: Box::new(plan),
4666                                predicate: node_filter,
4667                                optional_variables: HashSet::new(),
4668                            };
4669                        }
4670                    } else {
4671                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4672                        if !variable.is_empty() {
4673                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4674                        }
4675                    }
4676
4677                    // Track source node for BindPath
4678                    if path_variable.is_some() && path_node_vars.is_empty() {
4679                        path_node_vars.push(variable.clone());
4680                    }
4681
4682                    // Look ahead for relationships
4683                    let mut current_source_var = variable;
4684                    last_outer_node_var = Some(current_source_var.clone());
4685                    i += 1;
4686                    while i < elements.len() {
4687                        if let PatternElement::Relationship(r) = &elements[i] {
4688                            if i + 1 < elements.len() {
4689                                let target_node_part = &elements[i + 1];
4690                                if let PatternElement::Node(n_target) = target_node_part {
4691                                    // For VLP traversals, pass path_variable through
4692                                    // For fixed-length, we use BindPath instead
4693                                    let is_vlp = r.range.is_some();
4694                                    let traverse_path_var =
4695                                        if is_vlp { path_variable.clone() } else { None };
4696
4697                                    // If we're about to start a VLP segment and there are
4698                                    // collected fixed-hop path vars, create an intermediate
4699                                    // BindPath for the fixed prefix first. The VLP will then
4700                                    // extend this existing path.
4701                                    if is_vlp
4702                                        && let Some(pv) = path_variable.as_ref()
4703                                        && !path_node_vars.is_empty()
4704                                    {
4705                                        plan = LogicalPlan::BindPath {
4706                                            input: Box::new(plan),
4707                                            node_variables: std::mem::take(&mut path_node_vars),
4708                                            edge_variables: std::mem::take(&mut path_edge_vars),
4709                                            path_variable: pv.clone(),
4710                                        };
4711                                        if !is_var_in_scope(vars_in_scope, pv) {
4712                                            add_var_to_scope(
4713                                                vars_in_scope,
4714                                                pv,
4715                                                VariableType::Path,
4716                                            )?;
4717                                        }
4718                                    }
4719
4720                                    // Plan the traverse from the current source node
4721                                    let target_was_bound =
4722                                        n_target.variable.as_ref().is_some_and(|v| {
4723                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4724                                        });
4725                                    let (new_plan, target_var, effective_target) = self
4726                                        .plan_traverse_with_source(
4727                                            plan,
4728                                            vars_in_scope,
4729                                            TraverseParams {
4730                                                rel: r,
4731                                                target_node: n_target,
4732                                                optional,
4733                                                path_variable: traverse_path_var,
4734                                                optional_pattern_vars: optional_pattern_vars
4735                                                    .clone(),
4736                                            },
4737                                            &current_source_var,
4738                                            vars_before_pattern,
4739                                            &path_bound_edge_vars,
4740                                        )?;
4741                                    plan = new_plan;
4742                                    if optional && !target_was_bound {
4743                                        optional_pattern_vars.insert(target_var.clone());
4744                                    }
4745
4746                                    // Track edge/target node for BindPath
4747                                    if path_variable.is_some() && !is_vlp {
4748                                        // Use the edge variable if given, otherwise use
4749                                        // the internal tracking column pattern.
4750                                        // Use effective_target (which may be __rebound_x
4751                                        // for bound-target traversals) to match the actual
4752                                        // column name produced by GraphTraverseExec.
4753                                        if let Some(ev) = &r.variable {
4754                                            path_edge_vars.push(ev.clone());
4755                                        } else {
4756                                            path_edge_vars
4757                                                .push(format!("__eid_to_{}", effective_target));
4758                                        }
4759                                        path_node_vars.push(target_var.clone());
4760                                    }
4761
4762                                    current_source_var = target_var;
4763                                    last_outer_node_var = Some(current_source_var.clone());
4764                                    had_traverses = true;
4765                                    i += 2;
4766                                } else {
4767                                    return Err(anyhow!("Relationship must be followed by a node"));
4768                                }
4769                            } else {
4770                                return Err(anyhow!("Relationship cannot be the last element"));
4771                            }
4772                        } else {
4773                            break;
4774                        }
4775                    }
4776                }
4777                PatternElement::Relationship(_) => {
4778                    return Err(anyhow!("Pattern must start with a node"));
4779                }
4780                PatternElement::Parenthesized { pattern, range } => {
4781                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4782                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4783                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4784                        return Err(anyhow!(
4785                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4786                        ));
4787                    }
4788
4789                    let source_node = match &pattern.elements[0] {
4790                        PatternElement::Node(n) => n,
4791                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4792                    };
4793
4794                    // Extract all relationship-node pairs (QPP steps)
4795                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4796                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4797                        let rel = match &pattern.elements[pair_idx] {
4798                            PatternElement::Relationship(r) => r,
4799                            _ => {
4800                                return Err(anyhow!(
4801                                    "Quantified pattern element at position {} must be a relationship",
4802                                    pair_idx
4803                                ));
4804                            }
4805                        };
4806                        let node = match &pattern.elements[pair_idx + 1] {
4807                            PatternElement::Node(n) => n,
4808                            _ => {
4809                                return Err(anyhow!(
4810                                    "Quantified pattern element at position {} must be a node",
4811                                    pair_idx + 1
4812                                ));
4813                            }
4814                        };
4815                        // Reject nested quantifiers
4816                        if rel.range.is_some() {
4817                            return Err(anyhow!(
4818                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4819                            ));
4820                        }
4821                        qpp_rels.push((rel, node));
4822                    }
4823
4824                    // Check if there's an outer target node after the Parenthesized element.
4825                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4826                    // target that should receive the traversal result.
4827                    let inner_target_node = qpp_rels.last().unwrap().1;
4828                    let outer_target_node = if i + 1 < elements.len() {
4829                        match &elements[i + 1] {
4830                            PatternElement::Node(n) => Some(n),
4831                            _ => None,
4832                        }
4833                    } else {
4834                        None
4835                    };
4836                    // Use the outer target for variable binding and filters; inner target
4837                    // labels are used for state constraints within the NFA.
4838                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4839
4840                    // For simple 3-element single-hop QPP without intermediate label constraints,
4841                    // fall back to existing VLP behavior (copy range to relationship).
4842                    let use_simple_vlp = qpp_rels.len() == 1
4843                        && inner_target_node
4844                            .labels
4845                            .first()
4846                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4847                            .is_none();
4848
4849                    // Plan source node.
4850                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4851                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4852                    // node variable, use it; otherwise fall back to the inner source.
4853                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4854                        // The preceding outer node is already bound and in scope
4855                        // Apply any property filters from the inner source node
4856                        if let Some(prop_filter) =
4857                            self.properties_to_expr(outer_src, &source_node.properties)
4858                        {
4859                            plan = LogicalPlan::Filter {
4860                                input: Box::new(plan),
4861                                predicate: prop_filter,
4862                                optional_variables: HashSet::new(),
4863                            };
4864                        }
4865                        outer_src.clone()
4866                    } else {
4867                        let sv = source_node
4868                            .variable
4869                            .clone()
4870                            .filter(|v| !v.is_empty())
4871                            .unwrap_or_else(|| self.next_anon_var());
4872
4873                        if is_var_in_scope(vars_in_scope, &sv) {
4874                            // Source is already bound, apply property filter if needed
4875                            if let Some(prop_filter) =
4876                                self.properties_to_expr(&sv, &source_node.properties)
4877                            {
4878                                plan = LogicalPlan::Filter {
4879                                    input: Box::new(plan),
4880                                    predicate: prop_filter,
4881                                    optional_variables: HashSet::new(),
4882                                };
4883                            }
4884                        } else {
4885                            // Source is unbound, scan it
4886                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4887                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4888                            if optional {
4889                                optional_pattern_vars.insert(sv.clone());
4890                            }
4891                        }
4892                        sv
4893                    };
4894
4895                    if use_simple_vlp {
4896                        // Simple single-hop QPP: apply range to relationship and use VLP path
4897                        let mut relationship = qpp_rels[0].0.clone();
4898                        relationship.range = range.clone();
4899
4900                        let target_was_bound = target_node
4901                            .variable
4902                            .as_ref()
4903                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4904                        let (new_plan, target_var, _effective_target) = self
4905                            .plan_traverse_with_source(
4906                                plan,
4907                                vars_in_scope,
4908                                TraverseParams {
4909                                    rel: &relationship,
4910                                    target_node,
4911                                    optional,
4912                                    path_variable: path_variable.clone(),
4913                                    optional_pattern_vars: optional_pattern_vars.clone(),
4914                                },
4915                                &source_variable,
4916                                vars_before_pattern,
4917                                &path_bound_edge_vars,
4918                            )?;
4919                        plan = new_plan;
4920                        if optional && !target_was_bound {
4921                            optional_pattern_vars.insert(target_var);
4922                        }
4923                    } else {
4924                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4925                        let mut qpp_step_infos = Vec::new();
4926                        let mut all_edge_type_ids = Vec::new();
4927
4928                        for (rel, node) in &qpp_rels {
4929                            let mut step_edge_type_ids = Vec::new();
4930                            if rel.types.is_empty() {
4931                                step_edge_type_ids = self.schema.all_edge_type_ids();
4932                            } else {
4933                                for type_name in &rel.types {
4934                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4935                                        step_edge_type_ids.push(edge_meta.id);
4936                                    }
4937                                }
4938                            }
4939                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4940
4941                            let target_label = node.labels.first().and_then(|l| {
4942                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4943                            });
4944
4945                            qpp_step_infos.push(QppStepInfo {
4946                                edge_type_ids: step_edge_type_ids,
4947                                direction: rel.direction.clone(),
4948                                target_label,
4949                            });
4950                        }
4951
4952                        // Deduplicate edge type IDs for adjacency warming
4953                        all_edge_type_ids.sort_unstable();
4954                        all_edge_type_ids.dedup();
4955
4956                        // Compute iteration bounds from range
4957                        let hops_per_iter = qpp_step_infos.len();
4958                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4959                        let (min_iter, max_iter) = if let Some(range) = range {
4960                            let min = range.min.unwrap_or(1) as usize;
4961                            let max = range
4962                                .max
4963                                .map(|m| m as usize)
4964                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4965                            (min, max)
4966                        } else {
4967                            (1, 1)
4968                        };
4969                        let min_hops = min_iter * hops_per_iter;
4970                        let max_hops = max_iter * hops_per_iter;
4971
4972                        // Target variable from the last node in the QPP sub-pattern
4973                        let target_variable = target_node
4974                            .variable
4975                            .clone()
4976                            .filter(|v| !v.is_empty())
4977                            .unwrap_or_else(|| self.next_anon_var());
4978
4979                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4980
4981                        // Determine target label for the final node
4982                        let target_label_meta = target_node
4983                            .labels
4984                            .first()
4985                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4986
4987                        // Collect scope match variables
4988                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4989                            [vars_before_pattern..]
4990                            .iter()
4991                            .map(|v| v.name.clone())
4992                            .collect();
4993                        scope_match_variables.insert(target_variable.clone());
4994
4995                        // Handle bound target: use rebound variable for traverse
4996                        let rebound_target_var = if target_is_bound {
4997                            Some(target_variable.clone())
4998                        } else {
4999                            None
5000                        };
5001                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5002                            format!("__rebound_{}", bv)
5003                        } else {
5004                            target_variable.clone()
5005                        };
5006
5007                        plan = LogicalPlan::Traverse {
5008                            input: Box::new(plan),
5009                            edge_type_ids: all_edge_type_ids,
5010                            direction: qpp_rels[0].0.direction.clone(),
5011                            source_variable: source_variable.to_string(),
5012                            target_variable: effective_target_var.clone(),
5013                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
5014                            step_variable: None, // QPP doesn't expose intermediate edges
5015                            min_hops,
5016                            max_hops,
5017                            optional,
5018                            target_filter: self.node_filter_expr(
5019                                &target_variable,
5020                                &target_node.labels,
5021                                &target_node.properties,
5022                            ),
5023                            path_variable: path_variable.clone(),
5024                            edge_properties: HashSet::new(),
5025                            is_variable_length: true,
5026                            optional_pattern_vars: optional_pattern_vars.clone(),
5027                            scope_match_variables,
5028                            edge_filter_expr: None,
5029                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5030                            qpp_steps: Some(qpp_step_infos),
5031                        };
5032
5033                        // Handle bound target: filter rebound results against original variable
5034                        if let Some(ref btv) = rebound_target_var {
5035                            // Filter: __rebound_x._vid = x._vid
5036                            let filter_pred = Expr::BinaryOp {
5037                                left: Box::new(Expr::Property(
5038                                    Box::new(Expr::Variable(effective_target_var.clone())),
5039                                    "_vid".to_string(),
5040                                )),
5041                                op: BinaryOp::Eq,
5042                                right: Box::new(Expr::Property(
5043                                    Box::new(Expr::Variable(btv.clone())),
5044                                    "_vid".to_string(),
5045                                )),
5046                            };
5047                            plan = LogicalPlan::Filter {
5048                                input: Box::new(plan),
5049                                predicate: filter_pred,
5050                                optional_variables: if optional {
5051                                    optional_pattern_vars.clone()
5052                                } else {
5053                                    HashSet::new()
5054                                },
5055                            };
5056                        }
5057
5058                        // Add target variable to scope
5059                        if !target_is_bound {
5060                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5061                        }
5062
5063                        // Add path variable to scope
5064                        if let Some(ref pv) = path_variable
5065                            && !pv.is_empty()
5066                            && !is_var_in_scope(vars_in_scope, pv)
5067                        {
5068                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5069                        }
5070                    }
5071                    had_traverses = true;
5072
5073                    // Skip the outer target node if we consumed it
5074                    if outer_target_node.is_some() {
5075                        i += 2; // skip both Parenthesized and the following Node
5076                    } else {
5077                        i += 1;
5078                    }
5079                }
5080            }
5081        }
5082
5083        // If this is a single-node pattern with a path variable, bind the zero-length path
5084        // E.g., `p = (a)` should create a Path with one node and zero edges
5085        if let Some(ref path_var) = path_variable
5086            && !path_var.is_empty()
5087            && !had_traverses
5088            && let Some(node_var) = single_node_variable
5089        {
5090            plan = LogicalPlan::BindZeroLengthPath {
5091                input: Box::new(plan),
5092                node_variable: node_var,
5093                path_variable: path_var.clone(),
5094            };
5095            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5096        }
5097
5098        // Bind fixed-length path from collected node/edge variables
5099        if let Some(ref path_var) = path_variable
5100            && !path_var.is_empty()
5101            && had_traverses
5102            && !path_node_vars.is_empty()
5103            && !is_var_in_scope(vars_in_scope, path_var)
5104        {
5105            plan = LogicalPlan::BindPath {
5106                input: Box::new(plan),
5107                node_variables: path_node_vars,
5108                edge_variables: path_edge_vars,
5109                path_variable: path_var.clone(),
5110            };
5111            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5112        }
5113
5114        Ok(plan)
5115    }
5116
5117    /// Plan a traverse with an explicit source variable name.
5118    ///
5119    /// Returns `(plan, target_variable, effective_target_variable)` where:
5120    /// - `target_variable` is the semantic variable name for downstream scope
5121    /// - `effective_target_variable` is the actual column-name prefix used by
5122    ///   the traverse (may be `__rebound_x` for bound-target patterns)
5123    fn plan_traverse_with_source(
5124        &self,
5125        plan: LogicalPlan,
5126        vars_in_scope: &mut Vec<VariableInfo>,
5127        params: TraverseParams<'_>,
5128        source_variable: &str,
5129        vars_before_pattern: usize,
5130        path_bound_edge_vars: &HashSet<String>,
5131    ) -> Result<(LogicalPlan, String, String)> {
5132        // Check for parameter used as relationship predicate
5133        if let Some(Expr::Parameter(_)) = &params.rel.properties {
5134            return Err(anyhow!(
5135                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
5136            ));
5137        }
5138
5139        let mut edge_type_ids = Vec::new();
5140        let mut dst_labels = Vec::new();
5141        let mut unknown_types = Vec::new();
5142
5143        if params.rel.types.is_empty() {
5144            // All types - include both schema and schemaless edge types
5145            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
5146            edge_type_ids = self.schema.all_edge_type_ids();
5147            for meta in self.schema.edge_types.values() {
5148                dst_labels.extend(meta.dst_labels.iter().cloned());
5149            }
5150        } else {
5151            for type_name in &params.rel.types {
5152                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
5153                    // Known type - use standard Traverse with type_id
5154                    edge_type_ids.push(edge_meta.id);
5155                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
5156                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
5157                    // M5b.3: virtual edge type (plugin-registered CatalogTable).
5158                    // Resolving it into `edge_type_ids` (not `unknown_types`)
5159                    // lets the regular `Traverse` planner build a structured
5160                    // plan that the physical planner can dispatch to a
5161                    // `CatalogEdgeScanExec` mid-pattern.
5162                    edge_type_ids.push(vid);
5163                } else {
5164                    // Unknown type - will use TraverseMainByType
5165                    unknown_types.push(type_name.clone());
5166                }
5167            }
5168        }
5169
5170        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
5171        edge_type_ids.sort_unstable();
5172        edge_type_ids.dedup();
5173        unknown_types.sort_unstable();
5174        unknown_types.dedup();
5175
5176        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
5177        if target_variable.is_empty() {
5178            target_variable = self.next_anon_var();
5179        }
5180        let target_is_bound =
5181            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
5182
5183        // Check for VariableTypeConflict: relationship variable used as node
5184        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
5185        if let Some(rel_var) = &params.rel.variable
5186            && !rel_var.is_empty()
5187            && rel_var == &target_variable
5188        {
5189            return Err(anyhow!(
5190                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
5191                rel_var
5192            ));
5193        }
5194
5195        // Check for VariableTypeConflict/RelationshipUniquenessViolation
5196        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
5197        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
5198        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
5199        let mut bound_edge_var: Option<String> = None;
5200        let mut bound_edge_list_var: Option<String> = None;
5201        if let Some(rel_var) = &params.rel.variable
5202            && !rel_var.is_empty()
5203            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
5204        {
5205            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
5206                .iter()
5207                .any(|v| v.name == *rel_var);
5208
5209            if info.var_type == VariableType::Edge {
5210                // Check if this edge variable comes from a previous clause (before this MATCH)
5211                if is_from_previous_clause {
5212                    // Edge variable bound from previous clause - this is allowed
5213                    // We'll filter the traversal to match this specific edge
5214                    bound_edge_var = Some(rel_var.clone());
5215                } else {
5216                    // Same relationship variable used twice in the same MATCH clause
5217                    return Err(anyhow!(
5218                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
5219                        rel_var
5220                    ));
5221                }
5222            } else if params.rel.range.is_some()
5223                && is_from_previous_clause
5224                && matches!(
5225                    info.var_type,
5226                    VariableType::Scalar | VariableType::ScalarLiteral
5227                )
5228            {
5229                // Allow VLP rebound against a previously bound relationship list
5230                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
5231                bound_edge_list_var = Some(rel_var.clone());
5232            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
5233                return Err(anyhow!(
5234                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
5235                    rel_var,
5236                    info.var_type
5237                ));
5238            }
5239        }
5240
5241        // Check for VariableTypeConflict: target node variable already bound as non-Node
5242        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
5243        if target_is_bound
5244            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5245            && !info.var_type.is_compatible_with(VariableType::Node)
5246        {
5247            return Err(anyhow!(
5248                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
5249                target_variable,
5250                info.var_type
5251            ));
5252        }
5253
5254        // If all requested types are unknown (schemaless), use TraverseMainByType
5255        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
5256        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
5257        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
5258            // All types are unknown - use schemaless traversal
5259
5260            let is_variable_length = params.rel.range.is_some();
5261
5262            const DEFAULT_MAX_HOPS: usize = 100;
5263            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5264                let min = range.min.unwrap_or(1) as usize;
5265                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5266                (min, max)
5267            } else {
5268                (1, 1)
5269            };
5270
5271            // For both single-hop and variable-length paths:
5272            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5273            //   Single-hop: step_var holds a single edge object
5274            //   VLP: step_var holds a list of edge objects
5275            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5276            let step_var = params.rel.variable.clone();
5277            let path_var = params.path_variable.clone();
5278
5279            // Compute scope_match_variables for relationship uniqueness scoping.
5280            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5281                .iter()
5282                .map(|v| v.name.clone())
5283                .collect();
5284            if let Some(ref sv) = step_var {
5285                // Only add the step variable to scope if it's NOT rebound from a previous clause.
5286                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
5287                // filtering because the second MATCH intentionally reuses the same edge.
5288                if bound_edge_var.is_none() {
5289                    scope_match_variables.insert(sv.clone());
5290                }
5291            }
5292            scope_match_variables.insert(target_variable.clone());
5293            // Include bound edge variables from this path for cross-segment Trail mode
5294            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
5295            // edges already claimed by a bound relationship [r] in the same path.
5296            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
5297            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
5298            scope_match_variables.extend(
5299                path_bound_edge_vars
5300                    .iter()
5301                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
5302                    .cloned(),
5303            );
5304
5305            let mut plan = LogicalPlan::TraverseMainByType {
5306                type_names: unknown_types,
5307                input: Box::new(plan),
5308                direction: params.rel.direction.clone(),
5309                source_variable: source_variable.to_string(),
5310                target_variable: target_variable.clone(),
5311                step_variable: step_var.clone(),
5312                min_hops,
5313                max_hops,
5314                optional: params.optional,
5315                target_filter: self.node_filter_expr(
5316                    &target_variable,
5317                    &params.target_node.labels,
5318                    &params.target_node.properties,
5319                ),
5320                path_variable: path_var.clone(),
5321                is_variable_length,
5322                optional_pattern_vars: params.optional_pattern_vars.clone(),
5323                scope_match_variables,
5324                edge_filter_expr: if is_variable_length {
5325                    let filter_var = step_var
5326                        .clone()
5327                        .unwrap_or_else(|| "__anon_edge".to_string());
5328                    self.properties_to_expr(&filter_var, &params.rel.properties)
5329                } else {
5330                    None
5331                },
5332                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5333            };
5334
5335            // Only apply bound target filter for Imported variables (from outer scope/subquery).
5336            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5337            // uses Parameter which requires the value to be in params (subquery context).
5338            if target_is_bound
5339                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5340                && info.var_type == VariableType::Imported
5341            {
5342                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5343            }
5344
5345            // Apply relationship property predicates for fixed-length schemaless
5346            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
5347            // For VLP, predicates are stored inline in edge_filter_expr (above).
5348            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5349            if !is_variable_length
5350                && let Some(edge_var_name) = step_var.as_ref()
5351                && let Some(edge_prop_filter) =
5352                    self.properties_to_expr(edge_var_name, &params.rel.properties)
5353            {
5354                let filter_optional_vars = if params.optional {
5355                    params.optional_pattern_vars.clone()
5356                } else {
5357                    HashSet::new()
5358                };
5359                plan = LogicalPlan::Filter {
5360                    input: Box::new(plan),
5361                    predicate: edge_prop_filter,
5362                    optional_variables: filter_optional_vars,
5363                };
5364            }
5365
5366            // Add the bound variables to scope
5367            if let Some(sv) = &step_var {
5368                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5369                if is_variable_length
5370                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5371                {
5372                    info.is_vlp = true;
5373                }
5374            }
5375            if let Some(pv) = &path_var
5376                && !is_var_in_scope(vars_in_scope, pv)
5377            {
5378                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5379            }
5380            if !is_var_in_scope(vars_in_scope, &target_variable) {
5381                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5382            }
5383
5384            return Ok((plan, target_variable.clone(), target_variable));
5385        }
5386
5387        // If we have a mix of known and unknown types, error for now
5388        // (could be extended to Union of Traverse + TraverseMainByType)
5389        if !unknown_types.is_empty() {
5390            return Err(anyhow!(
5391                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
5392                unknown_types
5393            ));
5394        }
5395
5396        // Resolve target label to either a schema id or a virtual id from the
5397        // plugin registry. Mid-pattern virtual-label dispatch (M5b.3) requires
5398        // the virtual id to flow into `Traverse.target_label_id` so the
5399        // physical planner can layer a `CatalogVertexScanExec` join on the
5400        // traverse output. Mirrors the schema-then-virtual fallthrough used
5401        // by single-vertex `Scan` planning (~`plan_node_pattern` below).
5402        let mut virtual_target_label_id: Option<u16> = None;
5403        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
5404            // Use first label for target_label_id
5405            // For schemaless support, allow unknown target labels
5406            match self.schema.get_label_case_insensitive(label_name) {
5407                Some(meta) => Some(meta),
5408                None => {
5409                    if let Some((vid, _)) = self.allocate_virtual_label(label_name)? {
5410                        virtual_target_label_id = Some(vid);
5411                    }
5412                    None
5413                }
5414            }
5415        } else if !target_is_bound {
5416            // Infer from edge type(s)
5417            let unique_dsts: Vec<_> = dst_labels
5418                .into_iter()
5419                .collect::<HashSet<_>>()
5420                .into_iter()
5421                .collect();
5422            if unique_dsts.len() == 1 {
5423                let label_name = &unique_dsts[0];
5424                self.schema.get_label_case_insensitive(label_name)
5425            } else {
5426                // Multiple or no destination labels inferred - allow any target
5427                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
5428                // where the edge type can connect to multiple labels
5429                None
5430            }
5431        } else {
5432            None
5433        };
5434
5435        // Check if this is a variable-length pattern (has range specifier like *1..3)
5436        let is_variable_length = params.rel.range.is_some();
5437
5438        // For VLP patterns, default min to 1 and max to a reasonable limit.
5439        // For single-hop patterns (no range), both are 1.
5440        const DEFAULT_MAX_HOPS: usize = 100;
5441        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5442            let min = range.min.unwrap_or(1) as usize;
5443            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5444            (min, max)
5445        } else {
5446            (1, 1)
5447        };
5448
5449        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5450        //   Single-hop: step_var holds a single edge object
5451        //   VLP: step_var holds a list of edge objects
5452        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5453        let step_var = params.rel.variable.clone();
5454        let path_var = params.path_variable.clone();
5455
5456        // If we have a bound edge variable from a previous clause, use a temp variable
5457        // for the Traverse step, then filter to match the bound edge
5458        let rebound_var = bound_edge_var
5459            .as_ref()
5460            .or(bound_edge_list_var.as_ref())
5461            .cloned();
5462        let effective_step_var = if let Some(ref bv) = rebound_var {
5463            Some(format!("__rebound_{}", bv))
5464        } else {
5465            step_var.clone()
5466        };
5467
5468        // If we have a bound target variable from a previous clause (e.g. WITH),
5469        // use a temp variable for the Traverse step, then filter to match the bound
5470        // target — mirroring the bound edge pattern above.
5471        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
5472            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
5473                .map(|info| info.var_type == VariableType::Imported)
5474                .unwrap_or(false);
5475            if !is_imported {
5476                Some(target_variable.clone())
5477            } else {
5478                None
5479            }
5480        } else {
5481            None
5482        };
5483
5484        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5485            format!("__rebound_{}", bv)
5486        } else {
5487            target_variable.clone()
5488        };
5489
5490        // Collect all variables (node + edge) from the current MATCH clause scope
5491        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
5492        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
5493        // if their associated variable is in this set. This prevents relationship
5494        // uniqueness from being enforced across disconnected MATCH clauses.
5495        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5496            .iter()
5497            .map(|v| v.name.clone())
5498            .collect();
5499        // Include the current traverse's edge variable (not yet added to vars_in_scope)
5500        if let Some(ref sv) = effective_step_var {
5501            scope_match_variables.insert(sv.clone());
5502        }
5503        // Include the target variable (not yet added to vars_in_scope)
5504        scope_match_variables.insert(effective_target_var.clone());
5505        // Include bound edge variables from this path for cross-segment Trail mode
5506        // enforcement (same as the schemaless path above).
5507        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
5508
5509        let mut plan = LogicalPlan::Traverse {
5510            input: Box::new(plan),
5511            edge_type_ids,
5512            direction: params.rel.direction.clone(),
5513            source_variable: source_variable.to_string(),
5514            target_variable: effective_target_var.clone(),
5515            target_label_id: target_label_meta
5516                .map(|m| m.id)
5517                .or(virtual_target_label_id)
5518                .unwrap_or(0),
5519            step_variable: effective_step_var.clone(),
5520            min_hops,
5521            max_hops,
5522            optional: params.optional,
5523            target_filter: self.node_filter_expr(
5524                &target_variable,
5525                &params.target_node.labels,
5526                &params.target_node.properties,
5527            ),
5528            path_variable: path_var.clone(),
5529            edge_properties: HashSet::new(),
5530            is_variable_length,
5531            optional_pattern_vars: params.optional_pattern_vars.clone(),
5532            scope_match_variables,
5533            edge_filter_expr: if is_variable_length {
5534                // Use the step variable name, or a fallback for anonymous edges.
5535                // The variable name is used by properties_to_expr to build
5536                // `var.prop = value` expressions. For BFS property checking,
5537                // only the property name and value matter (the variable name
5538                // is stripped during extraction).
5539                let filter_var = effective_step_var
5540                    .clone()
5541                    .unwrap_or_else(|| "__anon_edge".to_string());
5542                self.properties_to_expr(&filter_var, &params.rel.properties)
5543            } else {
5544                None
5545            },
5546            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5547            qpp_steps: None,
5548        };
5549
5550        // Pre-compute optional variables set for filter nodes in this traverse.
5551        // Used by relationship property filters and bound-edge filters below.
5552        let filter_optional_vars = if params.optional {
5553            params.optional_pattern_vars.clone()
5554        } else {
5555            HashSet::new()
5556        };
5557
5558        // Apply relationship property predicates (e.g. [r {k: v}]).
5559        // For VLP, predicates are stored inline in edge_filter_expr (above).
5560        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5561        if !is_variable_length
5562            && let Some(edge_var_name) = effective_step_var.as_ref()
5563            && let Some(edge_prop_filter) =
5564                self.properties_to_expr(edge_var_name, &params.rel.properties)
5565        {
5566            plan = LogicalPlan::Filter {
5567                input: Box::new(plan),
5568                predicate: edge_prop_filter,
5569                optional_variables: filter_optional_vars.clone(),
5570            };
5571        }
5572
5573        // Only apply bound target filter for Imported variables (from outer scope/subquery).
5574        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5575        // uses Parameter which requires the value to be in params (subquery context).
5576        if target_is_bound
5577            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5578            && info.var_type == VariableType::Imported
5579        {
5580            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5581        }
5582
5583        // If we have a bound edge variable, add a filter to match it
5584        if let Some(ref bv) = bound_edge_var {
5585            let temp_var = format!("__rebound_{}", bv);
5586            let bound_check = Expr::BinaryOp {
5587                left: Box::new(Expr::Property(
5588                    Box::new(Expr::Variable(temp_var)),
5589                    "_eid".to_string(),
5590                )),
5591                op: BinaryOp::Eq,
5592                right: Box::new(Expr::Property(
5593                    Box::new(Expr::Variable(bv.clone())),
5594                    "_eid".to_string(),
5595                )),
5596            };
5597            plan = LogicalPlan::Filter {
5598                input: Box::new(plan),
5599                predicate: bound_check,
5600                optional_variables: filter_optional_vars.clone(),
5601            };
5602        }
5603
5604        // If we have a bound relationship list variable for a VLP pattern,
5605        // add a filter to match the traversed relationship list exactly.
5606        if let Some(ref bv) = bound_edge_list_var {
5607            let temp_var = format!("__rebound_{}", bv);
5608            let temp_eids = Expr::ListComprehension {
5609                variable: "__rebound_edge".to_string(),
5610                list: Box::new(Expr::Variable(temp_var)),
5611                where_clause: None,
5612                map_expr: Box::new(Expr::FunctionCall {
5613                    name: "toInteger".to_string(),
5614                    args: vec![Expr::Property(
5615                        Box::new(Expr::Variable("__rebound_edge".to_string())),
5616                        "_eid".to_string(),
5617                    )],
5618                    distinct: false,
5619                    window_spec: None,
5620                }),
5621            };
5622            let bound_eids = Expr::ListComprehension {
5623                variable: "__bound_edge".to_string(),
5624                list: Box::new(Expr::Variable(bv.clone())),
5625                where_clause: None,
5626                map_expr: Box::new(Expr::FunctionCall {
5627                    name: "toInteger".to_string(),
5628                    args: vec![Expr::Property(
5629                        Box::new(Expr::Variable("__bound_edge".to_string())),
5630                        "_eid".to_string(),
5631                    )],
5632                    distinct: false,
5633                    window_spec: None,
5634                }),
5635            };
5636            let bound_list_check = Expr::BinaryOp {
5637                left: Box::new(temp_eids),
5638                op: BinaryOp::Eq,
5639                right: Box::new(bound_eids),
5640            };
5641            plan = LogicalPlan::Filter {
5642                input: Box::new(plan),
5643                predicate: bound_list_check,
5644                optional_variables: filter_optional_vars.clone(),
5645            };
5646        }
5647
5648        // If we have a bound target variable (non-imported), add a filter to constrain
5649        // the traversal output to match the previously bound target node.
5650        if let Some(ref bv) = rebound_target_var {
5651            let temp_var = format!("__rebound_{}", bv);
5652            let bound_check = Expr::BinaryOp {
5653                left: Box::new(Expr::Property(
5654                    Box::new(Expr::Variable(temp_var.clone())),
5655                    "_vid".to_string(),
5656                )),
5657                op: BinaryOp::Eq,
5658                right: Box::new(Expr::Property(
5659                    Box::new(Expr::Variable(bv.clone())),
5660                    "_vid".to_string(),
5661                )),
5662            };
5663            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5664            // so that OptionalFilterExec excludes it from the grouping key and
5665            // properly nullifies it in recovery rows when all matches are filtered out.
5666            // Without this, each traverse result creates its own group (keyed by
5667            // __rebound_c._vid), and null-row recovery emits a spurious null row
5668            // for every non-matching target instead of one per source group.
5669            let mut rebound_filter_vars = filter_optional_vars;
5670            if params.optional {
5671                rebound_filter_vars.insert(temp_var);
5672            }
5673            plan = LogicalPlan::Filter {
5674                input: Box::new(plan),
5675                predicate: bound_check,
5676                optional_variables: rebound_filter_vars,
5677            };
5678        }
5679
5680        // Add the bound variables to scope
5681        // Skip adding the edge variable if it's already bound from a previous clause
5682        if let Some(sv) = &step_var
5683            && bound_edge_var.is_none()
5684            && bound_edge_list_var.is_none()
5685        {
5686            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5687            if is_variable_length
5688                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5689            {
5690                info.is_vlp = true;
5691            }
5692        }
5693        if let Some(pv) = &path_var
5694            && !is_var_in_scope(vars_in_scope, pv)
5695        {
5696            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5697        }
5698        if !is_var_in_scope(vars_in_scope, &target_variable) {
5699            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5700        }
5701
5702        Ok((plan, target_variable, effective_target_var))
5703    }
5704
5705    /// Combine a new scan plan with an existing plan.
5706    ///
5707    /// If the existing plan is `Empty`, returns the new plan directly.
5708    /// Otherwise, wraps them in a `CrossJoin`.
5709    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5710        if matches!(existing, LogicalPlan::Empty) {
5711            new
5712        } else {
5713            LogicalPlan::CrossJoin {
5714                left: Box::new(existing),
5715                right: Box::new(new),
5716            }
5717        }
5718    }
5719
5720    /// Split node map predicates into scan-pushable and residual filters.
5721    ///
5722    /// A predicate is scan-pushable when its value expression references only
5723    /// the node variable itself (or no variables). Predicates referencing other
5724    /// in-scope variables (correlated predicates) are returned as residual so
5725    /// they can be applied after joining with the existing plan.
5726    fn split_node_property_filters_for_scan(
5727        &self,
5728        variable: &str,
5729        properties: &Option<Expr>,
5730    ) -> (Option<Expr>, Option<Expr>) {
5731        let entries = match properties {
5732            Some(Expr::Map(entries)) => entries,
5733            _ => return (None, None),
5734        };
5735
5736        if entries.is_empty() {
5737            return (None, None);
5738        }
5739
5740        let mut pushdown_entries = Vec::new();
5741        let mut residual_entries = Vec::new();
5742
5743        for (prop, val_expr) in entries {
5744            let vars = collect_expr_variables(val_expr);
5745            if vars.iter().all(|v| v == variable) {
5746                pushdown_entries.push((prop.clone(), val_expr.clone()));
5747            } else {
5748                residual_entries.push((prop.clone(), val_expr.clone()));
5749            }
5750        }
5751
5752        let pushdown_map = if pushdown_entries.is_empty() {
5753            None
5754        } else {
5755            Some(Expr::Map(pushdown_entries))
5756        };
5757        let residual_map = if residual_entries.is_empty() {
5758            None
5759        } else {
5760            Some(Expr::Map(residual_entries))
5761        };
5762
5763        (
5764            self.properties_to_expr(variable, &pushdown_map),
5765            self.properties_to_expr(variable, &residual_map),
5766        )
5767    }
5768
5769    /// Decide whether per-label `Scan` branches for a label disjunction can
5770    /// safely be combined under `LogicalPlan::Union`. Returns `true` iff every
5771    /// label in `labels` is registered in the schema AND every pair shares an
5772    /// identical property name+type set.
5773    ///
5774    /// When this returns `false`, the disjunction must fall back to a single
5775    /// `ScanMainByLabels` over all labels — otherwise DataFusion's
5776    /// `UnionExec::try_new` panics in `union_schema` because the per-label
5777    /// `GraphScanExec` outputs (`_vid` + `_labels` + per-label projected
5778    /// properties) have different field counts. Issue rustic-ai/uni-db#62.
5779    ///
5780    /// We deliberately compare full schema property sets rather than only the
5781    /// properties referenced by the current query: at this logical-planning
5782    /// stage we have not yet collected `all_properties`, and `*` wildcards
5783    /// (e.g. from unknown function calls) would expand per-label downstream
5784    /// in `df_planner::resolve_properties` even when the query text only
5785    /// touches common columns.
5786    fn label_branches_share_property_schema(&self, labels: &[String]) -> bool {
5787        if labels.len() < 2 {
5788            return true;
5789        }
5790        let mut iter = labels.iter();
5791        let first = iter.next().expect("len >= 2");
5792        let Some(first_props) = self.schema.properties.get(first) else {
5793            return false;
5794        };
5795        for label in iter {
5796            let Some(props) = self.schema.properties.get(label) else {
5797                return false;
5798            };
5799            if props.len() != first_props.len() {
5800                return false;
5801            }
5802            for (name, meta) in first_props {
5803                let Some(other_meta) = props.get(name) else {
5804                    return false;
5805                };
5806                if meta.r#type != other_meta.r#type {
5807                    return false;
5808                }
5809            }
5810        }
5811        true
5812    }
5813
5814    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5815    fn plan_unbound_node(
5816        &self,
5817        node: &NodePattern,
5818        variable: &str,
5819        plan: LogicalPlan,
5820        optional: bool,
5821    ) -> Result<LogicalPlan> {
5822        // Properties handling
5823        let properties = match &node.properties {
5824            Some(Expr::Map(entries)) => entries.as_slice(),
5825            Some(Expr::Parameter(_)) => {
5826                return Err(anyhow!(
5827                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5828                ));
5829            }
5830            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5831            None => &[],
5832        };
5833
5834        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5835
5836        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5837            if let Some(predicate) = residual {
5838                LogicalPlan::Filter {
5839                    input: Box::new(input),
5840                    predicate,
5841                    optional_variables: HashSet::new(),
5842                }
5843            } else {
5844                input
5845            }
5846        };
5847
5848        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5849            self.split_node_property_filters_for_scan(variable, &node.properties)
5850        } else {
5851            (self.properties_to_expr(variable, &node.properties), None)
5852        };
5853
5854        // Check for ext_id in properties when no label is specified
5855        if node.labels.is_empty() {
5856            // Try to find ext_id property for main table lookup
5857            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5858                // Extract the ext_id value as a string
5859                let ext_id = match ext_id_value {
5860                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5861                    _ => {
5862                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5863                    }
5864                };
5865
5866                // Build filter for remaining properties (excluding ext_id)
5867                let remaining_props: Vec<_> = properties
5868                    .iter()
5869                    .filter(|(k, _)| k != "ext_id")
5870                    .cloned()
5871                    .collect();
5872
5873                let remaining_expr = if remaining_props.is_empty() {
5874                    None
5875                } else {
5876                    Some(Expr::Map(remaining_props))
5877                };
5878
5879                let (prop_filter, residual_filter) = if has_existing_scope {
5880                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5881                } else {
5882                    (self.properties_to_expr(variable, &remaining_expr), None)
5883                };
5884
5885                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5886                    variable: variable.to_string(),
5887                    ext_id,
5888                    filter: prop_filter,
5889                    optional,
5890                };
5891
5892                let joined = Self::join_with_plan(plan, ext_id_lookup);
5893                return Ok(apply_residual_filter(joined, residual_filter));
5894            }
5895
5896            // No ext_id: create ScanAll for unlabeled node pattern
5897            let scan_all = LogicalPlan::ScanAll {
5898                variable: variable.to_string(),
5899                filter: node_scan_filter,
5900                optional,
5901            };
5902
5903            let joined = Self::join_with_plan(plan, scan_all);
5904            return Ok(apply_residual_filter(joined, node_residual_filter));
5905        }
5906
5907        // Label disjunction `(n:A|B|C)` — emit Union of label-scoped Scans.
5908        //
5909        // Storage fact: a multi-labeled vertex is fanned out into every
5910        // per-label table it carries (uni-store/src/runtime/writer.rs's
5911        // `push_vertex_to_labels`), so the same vid can appear in both the
5912        // `A` scan and the `B` scan of a disjunctive query. Use
5913        // `Union { all: false }` so the combined result deduplicates by row
5914        // contents (which include the vid) rather than emitting the same
5915        // vertex twice. The single-label-disjunction case (`Disjunction(["A"])`)
5916        // is encoded the same way the parser already encodes single edge
5917        // types, and reduces to one Scan with no Union wrapping.
5918        if node.labels.is_proper_disjunction() {
5919            let label_names: Vec<String> = node.labels.names().to_vec();
5920
5921            // Per-label branches under a `Union` only line up when every
5922            // branch produces the same Arrow schema. The narrow-scan
5923            // `Scan` path resolves columns *per label*, so heterogeneous
5924            // property sets (or any schemaless label in the mix) yield
5925            // mismatched widths and DataFusion's `UnionExec::try_new`
5926            // panics inside `union_schema` (issue rustic-ai/uni-db#62).
5927            //
5928            // For those cases, lower every branch to a *single-label*
5929            // `ScanMainByLabels` instead. The schemaless main-table scan
5930            // resolves columns from `all_properties` directly (no per-label
5931            // expansion), so all branches emit a uniform schema and the
5932            // outer `Union { all: false }` deduplicates correctly. We
5933            // keep the per-branch Union shape (rather than collapsing to
5934            // a single multi-label scan) because multi-label
5935            // `ScanMainByLabels` has AND/intersection semantics — wrong
5936            // for a disjunction.
5937            let use_main_table_branches = !self.label_branches_share_property_schema(&label_names);
5938
5939            let mut branches: Vec<LogicalPlan> = Vec::with_capacity(label_names.len());
5940            for label_name in &label_names {
5941                let branch = if use_main_table_branches {
5942                    LogicalPlan::ScanMainByLabels {
5943                        labels: vec![label_name.clone()],
5944                        variable: variable.to_string(),
5945                        filter: node_scan_filter.clone(),
5946                        optional,
5947                    }
5948                } else {
5949                    let meta = self
5950                        .schema
5951                        .get_label_case_insensitive(label_name)
5952                        .expect("share_property_schema true implies all labels in schema");
5953                    LogicalPlan::Scan {
5954                        label_id: meta.id,
5955                        labels: vec![label_name.clone()],
5956                        variable: variable.to_string(),
5957                        filter: node_scan_filter.clone(),
5958                        optional,
5959                    }
5960                };
5961                branches.push(branch);
5962            }
5963            // Left-leaning Union: Union(Union(A, B), C). All inner
5964            // unions dedupe by row, so the outer one does too.
5965            let mut iter = branches.into_iter();
5966            let mut union_plan = iter
5967                .next()
5968                .expect("is_proper_disjunction implies at least 2 labels");
5969            for next in iter {
5970                union_plan = LogicalPlan::Union {
5971                    left: Box::new(union_plan),
5972                    right: Box::new(next),
5973                    all: false,
5974                };
5975            }
5976            let joined = Self::join_with_plan(plan, union_plan);
5977            return Ok(apply_residual_filter(joined, node_residual_filter));
5978        }
5979
5980        // Use first label for label_id (primary label for dataset selection)
5981        let label_name = &node.labels[0];
5982
5983        // Check if label exists in schema
5984        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5985            // Known label: use standard Scan
5986            let scan = LogicalPlan::Scan {
5987                label_id: label_meta.id,
5988                labels: node.labels.names().to_vec(),
5989                variable: variable.to_string(),
5990                filter: node_scan_filter,
5991                optional,
5992            };
5993
5994            let joined = Self::join_with_plan(plan, scan);
5995            Ok(apply_residual_filter(joined, node_residual_filter))
5996        } else {
5997            // Unknown label. Try a CatalogProvider / ReplacementScanProvider
5998            // claim first: on success allocate a virtual label-ID and emit a
5999            // regular `Scan` against the virtual id (`df_planner` dispatches
6000            // to `CatalogVertexScanExec`). When no provider claims and the
6001            // replacement-scan gate is on, strict-mode errors. When the gate
6002            // is off and no provider claims, preserve today's silent-empty
6003            // schemaless `ScanMainByLabels` behavior bit-for-bit.
6004            if let Some((virtual_id, _)) = self.allocate_virtual_label(label_name)? {
6005                let scan = LogicalPlan::Scan {
6006                    label_id: virtual_id,
6007                    labels: node.labels.names().to_vec(),
6008                    variable: variable.to_string(),
6009                    filter: node_scan_filter,
6010                    optional,
6011                };
6012                let joined = Self::join_with_plan(plan, scan);
6013                return Ok(apply_residual_filter(joined, node_residual_filter));
6014            }
6015            if self.replacement_scans_enabled {
6016                return Err(anyhow!(
6017                    "Label `{}` is not defined in schema and no \
6018                     CatalogProvider or ReplacementScanProvider claimed it; \
6019                     strict-mode (replacement_scans=true) requires the label \
6020                     to resolve",
6021                    label_name
6022                ));
6023            }
6024
6025            let scan_main = LogicalPlan::ScanMainByLabels {
6026                labels: node.labels.names().to_vec(),
6027                variable: variable.to_string(),
6028                filter: node_scan_filter,
6029                optional,
6030            };
6031
6032            let joined = Self::join_with_plan(plan, scan_main);
6033            Ok(apply_residual_filter(joined, node_residual_filter))
6034        }
6035    }
6036
6037    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
6038    ///
6039    /// When `optional_vars` is non-empty, the Filter will preserve rows where
6040    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
6041    fn plan_where_clause(
6042        &self,
6043        predicate: &Expr,
6044        plan: LogicalPlan,
6045        vars_in_scope: &[VariableInfo],
6046        optional_vars: HashSet<String>,
6047    ) -> Result<LogicalPlan> {
6048        // Validate no aggregation functions in WHERE clause
6049        validate_no_aggregation_in_where(predicate)?;
6050
6051        // Validate all variables used are in scope
6052        validate_expression_variables(predicate, vars_in_scope)?;
6053
6054        // Validate expression types (function args, boolean operators)
6055        validate_expression(predicate, vars_in_scope)?;
6056
6057        // Check that WHERE predicate isn't a bare node/edge/path variable
6058        if let Expr::Variable(var_name) = predicate
6059            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
6060            && matches!(
6061                info.var_type,
6062                VariableType::Node | VariableType::Edge | VariableType::Path
6063            )
6064        {
6065            return Err(anyhow!(
6066                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
6067                info.var_type
6068            ));
6069        }
6070
6071        let mut plan = plan;
6072
6073        // Transform VALID_AT macro to function call
6074        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
6075
6076        // Rewrite id(var) to var._vid (or var._eid for an edge) so
6077        // PredicateAnalyzer can push it down.
6078        let transformed_predicate = Self::rewrite_id_to_vid(transformed_predicate, vars_in_scope);
6079
6080        let mut current_predicate =
6081            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
6082
6083        // 1. Try to extract vector_similarity predicate for optimization
6084        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
6085            let vs = &extraction.predicate;
6086            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
6087                plan = Self::replace_scan_with_knn(
6088                    plan,
6089                    &vs.variable,
6090                    &vs.property,
6091                    vs.query.clone(),
6092                    vs.threshold,
6093                );
6094                if let Some(residual) = extraction.residual {
6095                    current_predicate = residual;
6096                } else {
6097                    current_predicate = Expr::TRUE;
6098                }
6099            }
6100        }
6101
6102        // 2. Label/type disjunction → narrow-scan rewrite.
6103        //
6104        // `WHERE n:A OR n:B` and `WHERE type(r) = 'A' OR type(r) = 'B'`
6105        // are functionally identical to the inline forms `(n:A|B)` and
6106        // `[r:A|B]`, but a literal pattern lowering would route them
6107        // through `Filter(LabelCheck OR LabelCheck)` over `ScanAll` —
6108        // a full vertex/edge scan plus residual filter, missing the
6109        // narrow-scan fast-path that the inline forms get for free.
6110        // Detect those OR-chains here and rewrite the upstream
6111        // `ScanAll` / `Traverse` accordingly.
6112        let conjuncts = Self::split_and_conjuncts(&current_predicate);
6113        let mut keep: Vec<Expr> = Vec::with_capacity(conjuncts.len());
6114        for conj in conjuncts {
6115            let mut consumed = false;
6116            for var in vars_in_scope {
6117                if optional_vars.contains(&var.name) {
6118                    continue;
6119                }
6120                // Node label disjunction → Union of label-scoped Scans.
6121                if Self::is_scan_all_for(&plan, &var.name)
6122                    && let Some(labels) = try_label_or_to_union(&conj, &var.name)
6123                {
6124                    plan = self.replace_scan_all_with_label_union(plan, &var.name, &labels, false);
6125                    consumed = true;
6126                    break;
6127                }
6128                // Edge type disjunction → merge into Traverse.edge_type_ids.
6129                if let Some(types) = try_type_or_to_union(&conj, &var.name)
6130                    && Self::merge_traverse_types_for(&plan, &var.name, &types).is_some()
6131                {
6132                    let mut ids: Vec<u32> = Vec::with_capacity(types.len());
6133                    let mut all_known = true;
6134                    for t in &types {
6135                        match self.schema.edge_types.get(t) {
6136                            Some(meta) => ids.push(meta.id),
6137                            None => {
6138                                all_known = false;
6139                                break;
6140                            }
6141                        }
6142                    }
6143                    if all_known {
6144                        plan = Self::set_traverse_edge_type_ids(plan, &var.name, ids);
6145                        consumed = true;
6146                        break;
6147                    }
6148                }
6149            }
6150            if !consumed {
6151                keep.push(conj);
6152            }
6153        }
6154        current_predicate = Self::combine_predicates(keep).unwrap_or(Expr::TRUE);
6155
6156        // 3. Push eligible predicates to Scan OR Traverse filters
6157        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
6158        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
6159        // rows. Let them stay in the Filter operator which handles NULL preservation.
6160        for var in vars_in_scope {
6161            // Skip pushdown for optional variables - they need NULL preservation in Filter
6162            if optional_vars.contains(&var.name) {
6163                continue;
6164            }
6165
6166            // Check if var is produced by a Scan
6167            if Self::find_scan_label_id(&plan, &var.name).is_some() {
6168                let (pushable, residual) =
6169                    Self::extract_variable_predicates(&current_predicate, &var.name);
6170
6171                for pred in pushable {
6172                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
6173                }
6174
6175                if let Some(r) = residual {
6176                    current_predicate = r;
6177                } else {
6178                    current_predicate = Expr::TRUE;
6179                }
6180            } else if Self::is_traverse_target(&plan, &var.name) {
6181                // Push to Traverse
6182                let (pushable, residual) =
6183                    Self::extract_variable_predicates(&current_predicate, &var.name);
6184
6185                for pred in pushable {
6186                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
6187                }
6188
6189                if let Some(r) = residual {
6190                    current_predicate = r;
6191                } else {
6192                    current_predicate = Expr::TRUE;
6193                }
6194            }
6195        }
6196
6197        // 4. Push predicates to Apply.input_filter
6198        // This filters input rows BEFORE executing correlated subqueries.
6199        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
6200
6201        // 5. Add Filter node for any remaining predicates
6202        if !current_predicate.is_true_literal() {
6203            plan = LogicalPlan::Filter {
6204                input: Box::new(plan),
6205                predicate: current_predicate,
6206                optional_variables: optional_vars,
6207            };
6208        }
6209
6210        Ok(plan)
6211    }
6212
6213    fn rewrite_predicates_using_indexes(
6214        &self,
6215        predicate: &Expr,
6216        plan: &LogicalPlan,
6217        vars_in_scope: &[VariableInfo],
6218    ) -> Result<Expr> {
6219        let mut rewritten = predicate.clone();
6220
6221        for var in vars_in_scope {
6222            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
6223                // Find label name
6224                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
6225
6226                if let Some(label) = label_name
6227                    && let Some(props) = self.schema.properties.get(&label)
6228                {
6229                    for (gen_col, meta) in props {
6230                        if meta.generation_expression.is_some() {
6231                            // Use cached parsed expression
6232                            if let Some(schema_expr) =
6233                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
6234                            {
6235                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
6236                                rewritten = Self::replace_expression(
6237                                    rewritten,
6238                                    schema_expr,
6239                                    &var.name,
6240                                    gen_col,
6241                                );
6242                            }
6243                        }
6244                    }
6245                }
6246            }
6247        }
6248        Ok(rewritten)
6249    }
6250
6251    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
6252        // First, normalize schema_expr to use query_var
6253        let schema_var = schema_expr.extract_variable();
6254
6255        if let Some(s_var) = schema_var {
6256            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
6257
6258            if expr == target_expr {
6259                return Expr::Property(
6260                    Box::new(Expr::Variable(query_var.to_string())),
6261                    gen_col.to_string(),
6262                );
6263            }
6264        }
6265
6266        // Recurse
6267        match expr {
6268            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
6269                left: Box::new(Self::replace_expression(
6270                    *left,
6271                    schema_expr,
6272                    query_var,
6273                    gen_col,
6274                )),
6275                op,
6276                right: Box::new(Self::replace_expression(
6277                    *right,
6278                    schema_expr,
6279                    query_var,
6280                    gen_col,
6281                )),
6282            },
6283            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
6284                op,
6285                expr: Box::new(Self::replace_expression(
6286                    *expr,
6287                    schema_expr,
6288                    query_var,
6289                    gen_col,
6290                )),
6291            },
6292            Expr::FunctionCall {
6293                name,
6294                args,
6295                distinct,
6296                window_spec,
6297            } => Expr::FunctionCall {
6298                name,
6299                args: args
6300                    .into_iter()
6301                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
6302                    .collect(),
6303                distinct,
6304                window_spec,
6305            },
6306            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
6307                *expr,
6308                schema_expr,
6309                query_var,
6310                gen_col,
6311            ))),
6312            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
6313                *expr,
6314                schema_expr,
6315                query_var,
6316                gen_col,
6317            ))),
6318            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
6319                *expr,
6320                schema_expr,
6321                query_var,
6322                gen_col,
6323            ))),
6324            Expr::ArrayIndex {
6325                array: e,
6326                index: idx,
6327            } => Expr::ArrayIndex {
6328                array: Box::new(Self::replace_expression(
6329                    *e,
6330                    schema_expr,
6331                    query_var,
6332                    gen_col,
6333                )),
6334                index: Box::new(Self::replace_expression(
6335                    *idx,
6336                    schema_expr,
6337                    query_var,
6338                    gen_col,
6339                )),
6340            },
6341            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
6342                array: Box::new(Self::replace_expression(
6343                    *array,
6344                    schema_expr,
6345                    query_var,
6346                    gen_col,
6347                )),
6348                start: start.map(|s| {
6349                    Box::new(Self::replace_expression(
6350                        *s,
6351                        schema_expr,
6352                        query_var,
6353                        gen_col,
6354                    ))
6355                }),
6356                end: end.map(|e| {
6357                    Box::new(Self::replace_expression(
6358                        *e,
6359                        schema_expr,
6360                        query_var,
6361                        gen_col,
6362                    ))
6363                }),
6364            },
6365            Expr::List(exprs) => Expr::List(
6366                exprs
6367                    .into_iter()
6368                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
6369                    .collect(),
6370            ),
6371            Expr::Map(entries) => Expr::Map(
6372                entries
6373                    .into_iter()
6374                    .map(|(k, v)| {
6375                        (
6376                            k,
6377                            Self::replace_expression(v, schema_expr, query_var, gen_col),
6378                        )
6379                    })
6380                    .collect(),
6381            ),
6382            Expr::Property(e, prop) => Expr::Property(
6383                Box::new(Self::replace_expression(
6384                    *e,
6385                    schema_expr,
6386                    query_var,
6387                    gen_col,
6388                )),
6389                prop,
6390            ),
6391            Expr::Case {
6392                expr: case_expr,
6393                when_then,
6394                else_expr,
6395            } => Expr::Case {
6396                expr: case_expr.map(|e| {
6397                    Box::new(Self::replace_expression(
6398                        *e,
6399                        schema_expr,
6400                        query_var,
6401                        gen_col,
6402                    ))
6403                }),
6404                when_then: when_then
6405                    .into_iter()
6406                    .map(|(w, t)| {
6407                        (
6408                            Self::replace_expression(w, schema_expr, query_var, gen_col),
6409                            Self::replace_expression(t, schema_expr, query_var, gen_col),
6410                        )
6411                    })
6412                    .collect(),
6413                else_expr: else_expr.map(|e| {
6414                    Box::new(Self::replace_expression(
6415                        *e,
6416                        schema_expr,
6417                        query_var,
6418                        gen_col,
6419                    ))
6420                }),
6421            },
6422            Expr::Reduce {
6423                accumulator,
6424                init,
6425                variable: reduce_var,
6426                list,
6427                expr: reduce_expr,
6428            } => Expr::Reduce {
6429                accumulator,
6430                init: Box::new(Self::replace_expression(
6431                    *init,
6432                    schema_expr,
6433                    query_var,
6434                    gen_col,
6435                )),
6436                variable: reduce_var,
6437                list: Box::new(Self::replace_expression(
6438                    *list,
6439                    schema_expr,
6440                    query_var,
6441                    gen_col,
6442                )),
6443                expr: Box::new(Self::replace_expression(
6444                    *reduce_expr,
6445                    schema_expr,
6446                    query_var,
6447                    gen_col,
6448                )),
6449            },
6450
6451            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
6452            _ => expr,
6453        }
6454    }
6455
6456    /// Returns `true` iff `variable` is bound to a `ScanAll` operator
6457    /// (somewhere under `plan`). Used to gate the
6458    /// `WHERE n:A OR n:B` → `Union(Scan{A}, Scan{B})` rewrite — we only
6459    /// fire it when the variable is currently doing a full vertex scan,
6460    /// not when it's already bound to a labeled `Scan`.
6461    fn is_scan_all_for(plan: &LogicalPlan, variable: &str) -> bool {
6462        match plan {
6463            LogicalPlan::ScanAll { variable: var, .. } => var == variable,
6464            LogicalPlan::Filter { input, .. }
6465            | LogicalPlan::Project { input, .. }
6466            | LogicalPlan::Sort { input, .. }
6467            | LogicalPlan::Limit { input, .. }
6468            | LogicalPlan::Aggregate { input, .. }
6469            | LogicalPlan::Apply { input, .. }
6470            | LogicalPlan::Traverse { input, .. } => Self::is_scan_all_for(input, variable),
6471            LogicalPlan::CrossJoin { left, right } => {
6472                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6473            }
6474            LogicalPlan::Union { left, right, .. } => {
6475                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6476            }
6477            _ => false,
6478        }
6479    }
6480
6481    /// Replace the `ScanAll` for `variable` in `plan` with a left-leaning
6482    /// `Union` of label-scoped `Scan` (or `ScanMainByLabels` for unknown
6483    /// labels) operators built from `labels`. Used by the
6484    /// `WHERE n:A OR n:B` rewrite.
6485    fn replace_scan_all_with_label_union(
6486        &self,
6487        plan: LogicalPlan,
6488        variable: &str,
6489        labels: &[String],
6490        optional: bool,
6491    ) -> LogicalPlan {
6492        match plan {
6493            LogicalPlan::ScanAll {
6494                variable: var,
6495                filter,
6496                optional: scan_optional,
6497            } if var == variable => {
6498                // Heterogeneous (or any-schemaless) disjunction: route every
6499                // branch through a single-label `ScanMainByLabels` so all
6500                // branches emit a uniform schemaless schema. Avoids the
6501                // DataFusion `union_schema` panic. See `plan_unbound_node`
6502                // and issue rustic-ai/uni-db#62.
6503                let use_main_table_branches = !self.label_branches_share_property_schema(labels);
6504
6505                let mut branches: Vec<LogicalPlan> = Vec::with_capacity(labels.len());
6506                for label in labels {
6507                    let branch = if use_main_table_branches {
6508                        LogicalPlan::ScanMainByLabels {
6509                            labels: vec![label.clone()],
6510                            variable: variable.to_string(),
6511                            filter: filter.clone(),
6512                            optional: scan_optional || optional,
6513                        }
6514                    } else {
6515                        let meta = self
6516                            .schema
6517                            .get_label_case_insensitive(label)
6518                            .expect("share_property_schema true implies all labels in schema");
6519                        LogicalPlan::Scan {
6520                            label_id: meta.id,
6521                            labels: vec![label.clone()],
6522                            variable: variable.to_string(),
6523                            filter: filter.clone(),
6524                            optional: scan_optional || optional,
6525                        }
6526                    };
6527                    branches.push(branch);
6528                }
6529                let mut iter = branches.into_iter();
6530                let mut union_plan = iter.next().expect("at least one label");
6531                for next in iter {
6532                    union_plan = LogicalPlan::Union {
6533                        left: Box::new(union_plan),
6534                        right: Box::new(next),
6535                        all: false,
6536                    };
6537                }
6538                union_plan
6539            }
6540            LogicalPlan::Filter {
6541                input,
6542                predicate,
6543                optional_variables,
6544            } => LogicalPlan::Filter {
6545                input: Box::new(
6546                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6547                ),
6548                predicate,
6549                optional_variables,
6550            },
6551            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6552                input: Box::new(
6553                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6554                ),
6555                projections,
6556            },
6557            LogicalPlan::CrossJoin { left, right } => {
6558                if Self::is_scan_all_for(&left, variable) {
6559                    LogicalPlan::CrossJoin {
6560                        left: Box::new(
6561                            self.replace_scan_all_with_label_union(
6562                                *left, variable, labels, optional,
6563                            ),
6564                        ),
6565                        right,
6566                    }
6567                } else {
6568                    LogicalPlan::CrossJoin {
6569                        left,
6570                        right: Box::new(
6571                            self.replace_scan_all_with_label_union(
6572                                *right, variable, labels, optional,
6573                            ),
6574                        ),
6575                    }
6576                }
6577            }
6578            LogicalPlan::Traverse {
6579                input,
6580                edge_type_ids,
6581                direction,
6582                source_variable,
6583                target_variable,
6584                target_label_id,
6585                step_variable,
6586                min_hops,
6587                max_hops,
6588                optional: trav_optional,
6589                target_filter,
6590                path_variable,
6591                edge_properties,
6592                is_variable_length,
6593                optional_pattern_vars,
6594                scope_match_variables,
6595                edge_filter_expr,
6596                path_mode,
6597                qpp_steps,
6598            } => LogicalPlan::Traverse {
6599                input: Box::new(
6600                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6601                ),
6602                edge_type_ids,
6603                direction,
6604                source_variable,
6605                target_variable,
6606                target_label_id,
6607                step_variable,
6608                min_hops,
6609                max_hops,
6610                optional: trav_optional,
6611                target_filter,
6612                path_variable,
6613                edge_properties,
6614                is_variable_length,
6615                optional_pattern_vars,
6616                scope_match_variables,
6617                edge_filter_expr,
6618                path_mode,
6619                qpp_steps,
6620            },
6621            other => other,
6622        }
6623    }
6624
6625    /// Returns `Some(())` iff `variable` is the `step_variable` (i.e. the
6626    /// edge variable) of some `Traverse` operator in `plan`. Used to gate
6627    /// the `WHERE type(r) = 'A' OR type(r) = 'B'` rewrite — we need a
6628    /// Traverse whose types we can merge into.
6629    fn merge_traverse_types_for(
6630        plan: &LogicalPlan,
6631        edge_var: &str,
6632        _types: &[String],
6633    ) -> Option<()> {
6634        match plan {
6635            LogicalPlan::Traverse {
6636                step_variable,
6637                input,
6638                ..
6639            } => {
6640                if step_variable.as_deref() == Some(edge_var) {
6641                    Some(())
6642                } else {
6643                    Self::merge_traverse_types_for(input, edge_var, _types)
6644                }
6645            }
6646            LogicalPlan::Filter { input, .. }
6647            | LogicalPlan::Project { input, .. }
6648            | LogicalPlan::Sort { input, .. }
6649            | LogicalPlan::Limit { input, .. }
6650            | LogicalPlan::Aggregate { input, .. }
6651            | LogicalPlan::Apply { input, .. } => {
6652                Self::merge_traverse_types_for(input, edge_var, _types)
6653            }
6654            LogicalPlan::CrossJoin { left, right } | LogicalPlan::Union { left, right, .. } => {
6655                Self::merge_traverse_types_for(left, edge_var, _types)
6656                    .or_else(|| Self::merge_traverse_types_for(right, edge_var, _types))
6657            }
6658            _ => None,
6659        }
6660    }
6661
6662    /// Replace `edge_type_ids` on the Traverse whose `step_variable`
6663    /// equals `edge_var`. Used by the type-OR rewrite.
6664    fn set_traverse_edge_type_ids(
6665        plan: LogicalPlan,
6666        edge_var: &str,
6667        new_ids: Vec<u32>,
6668    ) -> LogicalPlan {
6669        match plan {
6670            LogicalPlan::Traverse {
6671                input,
6672                edge_type_ids,
6673                direction,
6674                source_variable,
6675                target_variable,
6676                target_label_id,
6677                step_variable,
6678                min_hops,
6679                max_hops,
6680                optional,
6681                target_filter,
6682                path_variable,
6683                edge_properties,
6684                is_variable_length,
6685                optional_pattern_vars,
6686                scope_match_variables,
6687                edge_filter_expr,
6688                path_mode,
6689                qpp_steps,
6690            } => {
6691                let matches_var = step_variable.as_deref() == Some(edge_var);
6692                let recursed_input = if matches_var {
6693                    input
6694                } else {
6695                    Box::new(Self::set_traverse_edge_type_ids(
6696                        *input,
6697                        edge_var,
6698                        new_ids.clone(),
6699                    ))
6700                };
6701                LogicalPlan::Traverse {
6702                    input: recursed_input,
6703                    edge_type_ids: if matches_var { new_ids } else { edge_type_ids },
6704                    direction,
6705                    source_variable,
6706                    target_variable,
6707                    target_label_id,
6708                    step_variable,
6709                    min_hops,
6710                    max_hops,
6711                    optional,
6712                    target_filter,
6713                    path_variable,
6714                    edge_properties,
6715                    is_variable_length,
6716                    optional_pattern_vars,
6717                    scope_match_variables,
6718                    edge_filter_expr,
6719                    path_mode,
6720                    qpp_steps,
6721                }
6722            }
6723            LogicalPlan::Filter {
6724                input,
6725                predicate,
6726                optional_variables,
6727            } => LogicalPlan::Filter {
6728                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6729                predicate,
6730                optional_variables,
6731            },
6732            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6733                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6734                projections,
6735            },
6736            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6737                left: Box::new(Self::set_traverse_edge_type_ids(
6738                    *left,
6739                    edge_var,
6740                    new_ids.clone(),
6741                )),
6742                right: Box::new(Self::set_traverse_edge_type_ids(*right, edge_var, new_ids)),
6743            },
6744            other => other,
6745        }
6746    }
6747
6748    /// Check if the variable is the target of a Traverse node
6749    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
6750        match plan {
6751            LogicalPlan::Traverse {
6752                target_variable,
6753                input,
6754                ..
6755            } => target_variable == variable || Self::is_traverse_target(input, variable),
6756            LogicalPlan::Filter { input, .. }
6757            | LogicalPlan::Project { input, .. }
6758            | LogicalPlan::Sort { input, .. }
6759            | LogicalPlan::Limit { input, .. }
6760            | LogicalPlan::Aggregate { input, .. }
6761            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
6762            LogicalPlan::CrossJoin { left, right } => {
6763                Self::is_traverse_target(left, variable)
6764                    || Self::is_traverse_target(right, variable)
6765            }
6766            _ => false,
6767        }
6768    }
6769
6770    /// Push a predicate into a Traverse's target_filter for the specified variable
6771    fn push_predicate_to_traverse(
6772        plan: LogicalPlan,
6773        variable: &str,
6774        predicate: Expr,
6775    ) -> LogicalPlan {
6776        match plan {
6777            LogicalPlan::Traverse {
6778                input,
6779                edge_type_ids,
6780                direction,
6781                source_variable,
6782                target_variable,
6783                target_label_id,
6784                step_variable,
6785                min_hops,
6786                max_hops,
6787                optional,
6788                target_filter,
6789                path_variable,
6790                edge_properties,
6791                is_variable_length,
6792                optional_pattern_vars,
6793                scope_match_variables,
6794                edge_filter_expr,
6795                path_mode,
6796                qpp_steps,
6797            } => {
6798                if target_variable == variable {
6799                    // Found the traverse producing this variable
6800                    let new_filter = match target_filter {
6801                        Some(existing) => Some(Expr::BinaryOp {
6802                            left: Box::new(existing),
6803                            op: BinaryOp::And,
6804                            right: Box::new(predicate),
6805                        }),
6806                        None => Some(predicate),
6807                    };
6808                    LogicalPlan::Traverse {
6809                        input,
6810                        edge_type_ids,
6811                        direction,
6812                        source_variable,
6813                        target_variable,
6814                        target_label_id,
6815                        step_variable,
6816                        min_hops,
6817                        max_hops,
6818                        optional,
6819                        target_filter: new_filter,
6820                        path_variable,
6821                        edge_properties,
6822                        is_variable_length,
6823                        optional_pattern_vars,
6824                        scope_match_variables,
6825                        edge_filter_expr,
6826                        path_mode,
6827                        qpp_steps,
6828                    }
6829                } else {
6830                    // Recurse into input
6831                    LogicalPlan::Traverse {
6832                        input: Box::new(Self::push_predicate_to_traverse(
6833                            *input, variable, predicate,
6834                        )),
6835                        edge_type_ids,
6836                        direction,
6837                        source_variable,
6838                        target_variable,
6839                        target_label_id,
6840                        step_variable,
6841                        min_hops,
6842                        max_hops,
6843                        optional,
6844                        target_filter,
6845                        path_variable,
6846                        edge_properties,
6847                        is_variable_length,
6848                        optional_pattern_vars,
6849                        scope_match_variables,
6850                        edge_filter_expr,
6851                        path_mode,
6852                        qpp_steps,
6853                    }
6854                }
6855            }
6856            LogicalPlan::Filter {
6857                input,
6858                predicate: p,
6859                optional_variables: opt_vars,
6860            } => LogicalPlan::Filter {
6861                input: Box::new(Self::push_predicate_to_traverse(
6862                    *input, variable, predicate,
6863                )),
6864                predicate: p,
6865                optional_variables: opt_vars,
6866            },
6867            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6868                input: Box::new(Self::push_predicate_to_traverse(
6869                    *input, variable, predicate,
6870                )),
6871                projections,
6872            },
6873            LogicalPlan::CrossJoin { left, right } => {
6874                // Check which side has the variable
6875                if Self::is_traverse_target(&left, variable) {
6876                    LogicalPlan::CrossJoin {
6877                        left: Box::new(Self::push_predicate_to_traverse(
6878                            *left, variable, predicate,
6879                        )),
6880                        right,
6881                    }
6882                } else {
6883                    LogicalPlan::CrossJoin {
6884                        left,
6885                        right: Box::new(Self::push_predicate_to_traverse(
6886                            *right, variable, predicate,
6887                        )),
6888                    }
6889                }
6890            }
6891            other => other,
6892        }
6893    }
6894
6895    /// Plan a WITH clause, handling aggregations and projections.
6896    fn plan_with_clause(
6897        &self,
6898        with_clause: &WithClause,
6899        plan: LogicalPlan,
6900        vars_in_scope: &[VariableInfo],
6901    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
6902        let mut plan = plan;
6903        let mut group_by: Vec<Expr> = Vec::new();
6904        let mut aggregates: Vec<Expr> = Vec::new();
6905        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
6906        let mut has_agg = false;
6907        let mut projections = Vec::new();
6908        let mut new_vars: Vec<VariableInfo> = Vec::new();
6909        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
6910        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
6911        let mut projected_aliases: HashSet<String> = HashSet::new();
6912        let mut has_unaliased_non_variable_expr = false;
6913
6914        for item in &with_clause.items {
6915            match item {
6916                ReturnItem::All => {
6917                    // WITH * - add all variables in scope
6918                    for v in vars_in_scope {
6919                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6920                        projected_aliases.insert(v.name.clone());
6921                        projected_simple_reprs.insert(v.name.clone());
6922                    }
6923                    new_vars.extend(vars_in_scope.iter().cloned());
6924                }
6925                ReturnItem::Expr { expr, alias, .. } => {
6926                    if matches!(expr, Expr::Wildcard) {
6927                        for v in vars_in_scope {
6928                            projections
6929                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6930                            projected_aliases.insert(v.name.clone());
6931                            projected_simple_reprs.insert(v.name.clone());
6932                        }
6933                        new_vars.extend(vars_in_scope.iter().cloned());
6934                    } else {
6935                        // Validate expression variables and syntax
6936                        validate_expression_variables(expr, vars_in_scope)?;
6937                        validate_expression(expr, vars_in_scope)?;
6938                        // Pattern predicates are not allowed in WITH
6939                        if contains_pattern_predicate(expr) {
6940                            return Err(anyhow!(
6941                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
6942                            ));
6943                        }
6944
6945                        projections.push((expr.clone(), alias.clone()));
6946                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
6947                            // Bare aggregate — push directly
6948                            has_agg = true;
6949                            aggregates.push(expr.clone());
6950                            projected_aggregate_reprs.insert(expr.to_string_repr());
6951                        } else if !is_window_function(expr)
6952                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
6953                        {
6954                            // Compound aggregate or expression containing aggregates
6955                            has_agg = true;
6956                            compound_agg_exprs.push(expr.clone());
6957                            for inner in extract_inner_aggregates(expr) {
6958                                let repr = inner.to_string_repr();
6959                                if !projected_aggregate_reprs.contains(&repr) {
6960                                    aggregates.push(inner);
6961                                    projected_aggregate_reprs.insert(repr);
6962                                }
6963                            }
6964                        } else if !group_by.contains(expr) {
6965                            group_by.push(expr.clone());
6966                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
6967                                projected_simple_reprs.insert(expr.to_string_repr());
6968                            }
6969                        }
6970
6971                        // Preserve non-scalar type information when WITH aliases
6972                        // entity/path-capable expressions.
6973                        if let Some(a) = alias {
6974                            if projected_aliases.contains(a) {
6975                                return Err(anyhow!(
6976                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6977                                    a
6978                                ));
6979                            }
6980                            let inferred = infer_with_output_type(expr, vars_in_scope);
6981                            new_vars.push(VariableInfo::new(a.clone(), inferred));
6982                            projected_aliases.insert(a.clone());
6983                        } else if let Expr::Variable(v) = expr {
6984                            if projected_aliases.contains(v) {
6985                                return Err(anyhow!(
6986                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6987                                    v
6988                                ));
6989                            }
6990                            // Preserve the original type if the variable is just passed through
6991                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
6992                                new_vars.push(existing.clone());
6993                            } else {
6994                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
6995                            }
6996                            projected_aliases.insert(v.clone());
6997                        } else {
6998                            has_unaliased_non_variable_expr = true;
6999                        }
7000                    }
7001                }
7002            }
7003        }
7004
7005        // Collect extra variables that need to survive the projection stage
7006        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
7007        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
7008        let mut passthrough_extras: Vec<String> = Vec::new();
7009        let mut seen_passthrough: HashSet<String> = HashSet::new();
7010
7011        if let Some(predicate) = &with_clause.where_clause {
7012            for name in collect_expr_variables(predicate) {
7013                if !projected_names.contains(name.as_str())
7014                    && find_var_in_scope(vars_in_scope, &name).is_some()
7015                    && seen_passthrough.insert(name.clone())
7016                {
7017                    passthrough_extras.push(name);
7018                }
7019            }
7020        }
7021
7022        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
7023        // Carry those variables through the projection so Sort can resolve them.
7024        if !has_agg && let Some(order_by) = &with_clause.order_by {
7025            for item in order_by {
7026                for name in collect_expr_variables(&item.expr) {
7027                    if !projected_names.contains(name.as_str())
7028                        && find_var_in_scope(vars_in_scope, &name).is_some()
7029                        && seen_passthrough.insert(name.clone())
7030                    {
7031                        passthrough_extras.push(name);
7032                    }
7033                }
7034            }
7035        }
7036
7037        let needs_cleanup = !passthrough_extras.is_empty();
7038        for extra in &passthrough_extras {
7039            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
7040        }
7041
7042        // Validate compound aggregate expressions: non-aggregate refs must be
7043        // individually present in the group_by as simple variables or properties.
7044        if has_agg {
7045            let group_by_reprs: HashSet<String> =
7046                group_by.iter().map(|e| e.to_string_repr()).collect();
7047            for expr in &compound_agg_exprs {
7048                let mut refs = Vec::new();
7049                collect_non_aggregate_refs(expr, false, &mut refs);
7050                for r in &refs {
7051                    let is_covered = match r {
7052                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
7053                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
7054                    };
7055                    if !is_covered {
7056                        return Err(anyhow!(
7057                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
7058                        ));
7059                    }
7060                }
7061            }
7062        }
7063
7064        if has_agg {
7065            plan = LogicalPlan::Aggregate {
7066                input: Box::new(plan),
7067                group_by,
7068                aggregates,
7069            };
7070
7071            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
7072            // can reference the WITH aliases instead of raw column names.
7073            let rename_projections: Vec<(Expr, Option<String>)> = projections
7074                .iter()
7075                .map(|(expr, alias)| {
7076                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
7077                        // Bare aggregate — reference by column name
7078                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
7079                    } else if is_compound_aggregate(expr)
7080                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
7081                    {
7082                        // Compound aggregate — replace inner aggregates with
7083                        // column references, keep outer expression
7084                        (replace_aggregates_with_columns(expr), alias.clone())
7085                    } else {
7086                        (Expr::Variable(expr.to_string_repr()), alias.clone())
7087                    }
7088                })
7089                .collect();
7090            plan = LogicalPlan::Project {
7091                input: Box::new(plan),
7092                projections: rename_projections,
7093            };
7094        } else if !projections.is_empty() {
7095            plan = LogicalPlan::Project {
7096                input: Box::new(plan),
7097                projections: projections.clone(),
7098            };
7099        }
7100
7101        // Apply the WHERE filter (post-projection, with extras still visible).
7102        if let Some(predicate) = &with_clause.where_clause {
7103            plan = LogicalPlan::Filter {
7104                input: Box::new(plan),
7105                predicate: predicate.clone(),
7106                optional_variables: HashSet::new(),
7107            };
7108        }
7109
7110        // Validate and apply ORDER BY for WITH clause.
7111        // Keep pre-WITH vars in scope for parser compatibility, then apply
7112        // stricter checks for aggregate-containing ORDER BY items.
7113        if let Some(order_by) = &with_clause.order_by {
7114            // Build a mapping from aliases and projected expression reprs to
7115            // output columns of the preceding Project/Aggregate pipeline.
7116            let with_order_aliases: HashMap<String, Expr> = projections
7117                .iter()
7118                .flat_map(|(expr, alias)| {
7119                    let output_col = if let Some(a) = alias {
7120                        a.clone()
7121                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
7122                        aggregate_column_name(expr)
7123                    } else {
7124                        expr.to_string_repr()
7125                    };
7126
7127                    let mut entries = Vec::new();
7128                    // ORDER BY alias
7129                    if let Some(a) = alias {
7130                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
7131                    }
7132                    // ORDER BY projected expression (e.g. me.age)
7133                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
7134                    entries
7135                })
7136                .collect();
7137
7138            let order_by_scope: Vec<VariableInfo> = {
7139                let mut scope = new_vars.clone();
7140                for v in vars_in_scope {
7141                    if !is_var_in_scope(&scope, &v.name) {
7142                        scope.push(v.clone());
7143                    }
7144                }
7145                scope
7146            };
7147            for item in order_by {
7148                validate_expression_variables(&item.expr, &order_by_scope)?;
7149                validate_expression(&item.expr, &order_by_scope)?;
7150                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
7151                if has_aggregate_in_item && !has_agg {
7152                    return Err(anyhow!(
7153                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
7154                    ));
7155                }
7156                if has_agg && has_aggregate_in_item {
7157                    validate_with_order_by_aggregate_item(
7158                        &item.expr,
7159                        &projected_aggregate_reprs,
7160                        &projected_simple_reprs,
7161                        &projected_aliases,
7162                    )?;
7163                }
7164            }
7165            let rewritten_order_by: Vec<SortItem> = order_by
7166                .iter()
7167                .map(|item| {
7168                    let mut expr =
7169                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
7170                    if has_agg {
7171                        // Rewrite any aggregate calls to the aggregate output
7172                        // columns produced by Aggregate.
7173                        expr = replace_aggregates_with_columns(&expr);
7174                        // Then re-map projected property expressions to aliases
7175                        // from the WITH projection.
7176                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
7177                    }
7178                    SortItem {
7179                        expr,
7180                        ascending: item.ascending,
7181                    }
7182                })
7183                .collect();
7184            plan = LogicalPlan::Sort {
7185                input: Box::new(plan),
7186                order_by: rewritten_order_by,
7187            };
7188        }
7189
7190        // Non-variable expressions in WITH must be aliased.
7191        // This check is intentionally placed after ORDER BY validation so
7192        // higher-priority semantic errors (e.g., ambiguous aggregation in
7193        // ORDER BY) can surface first.
7194        if has_unaliased_non_variable_expr {
7195            return Err(anyhow!(
7196                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
7197            ));
7198        }
7199
7200        // Validate and apply SKIP/LIMIT for WITH clause
7201        let skip = with_clause
7202            .skip
7203            .as_ref()
7204            .map(|e| {
7205                self.note_folded_limit_skip(e);
7206                parse_non_negative_integer(e, "SKIP", &self.params)
7207            })
7208            .transpose()?
7209            .flatten();
7210        let fetch = with_clause
7211            .limit
7212            .as_ref()
7213            .map(|e| {
7214                self.note_folded_limit_skip(e);
7215                parse_non_negative_integer(e, "LIMIT", &self.params)
7216            })
7217            .transpose()?
7218            .flatten();
7219
7220        if skip.is_some() || fetch.is_some() {
7221            plan = LogicalPlan::Limit {
7222                input: Box::new(plan),
7223                skip,
7224                fetch,
7225            };
7226        }
7227
7228        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
7229        if needs_cleanup {
7230            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
7231                .iter()
7232                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
7233                .collect();
7234            plan = LogicalPlan::Project {
7235                input: Box::new(plan),
7236                projections: cleanup_projections,
7237            };
7238        }
7239
7240        if with_clause.distinct {
7241            plan = LogicalPlan::Distinct {
7242                input: Box::new(plan),
7243            };
7244        }
7245
7246        Ok((plan, new_vars))
7247    }
7248
7249    fn plan_with_recursive(
7250        &self,
7251        with_recursive: &WithRecursiveClause,
7252        _prev_plan: LogicalPlan,
7253        vars_in_scope: &[VariableInfo],
7254    ) -> Result<LogicalPlan> {
7255        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
7256        match &*with_recursive.query {
7257            Query::Union { left, right, .. } => {
7258                // Plan the anchor (initial) query with current scope
7259                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
7260
7261                // Plan the recursive query with the CTE name added to scope
7262                // so it can reference itself
7263                let mut recursive_scope = vars_in_scope.to_vec();
7264                recursive_scope.push(VariableInfo::new(
7265                    with_recursive.name.clone(),
7266                    VariableType::Scalar,
7267                ));
7268                let recursive_plan =
7269                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
7270
7271                Ok(LogicalPlan::RecursiveCTE {
7272                    cte_name: with_recursive.name.clone(),
7273                    initial: Box::new(initial_plan),
7274                    recursive: Box::new(recursive_plan),
7275                })
7276            }
7277            _ => Err(anyhow::anyhow!(
7278                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
7279            )),
7280        }
7281    }
7282
7283    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
7284        let entries = match properties {
7285            Some(Expr::Map(entries)) => entries,
7286            _ => return None,
7287        };
7288
7289        if entries.is_empty() {
7290            return None;
7291        }
7292        let mut final_expr = None;
7293        for (prop, val_expr) in entries {
7294            let eq_expr = Expr::BinaryOp {
7295                left: Box::new(Expr::Property(
7296                    Box::new(Expr::Variable(variable.to_string())),
7297                    prop.clone(),
7298                )),
7299                op: BinaryOp::Eq,
7300                right: Box::new(val_expr.clone()),
7301            };
7302
7303            if let Some(e) = final_expr {
7304                final_expr = Some(Expr::BinaryOp {
7305                    left: Box::new(e),
7306                    op: BinaryOp::And,
7307                    right: Box::new(eq_expr),
7308                });
7309            } else {
7310                final_expr = Some(eq_expr);
7311            }
7312        }
7313        final_expr
7314    }
7315
7316    /// Build a filter expression from node properties and labels.
7317    ///
7318    /// This is used for TraverseMainByType where we need to filter target nodes
7319    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
7320    pub fn node_filter_expr(
7321        &self,
7322        variable: &str,
7323        labels: &[String],
7324        properties: &Option<Expr>,
7325    ) -> Option<Expr> {
7326        let mut final_expr = None;
7327
7328        // Add label checks using hasLabel(variable, 'label')
7329        for label in labels {
7330            let label_check = Expr::FunctionCall {
7331                name: "hasLabel".to_string(),
7332                args: vec![
7333                    Expr::Variable(variable.to_string()),
7334                    Expr::Literal(CypherLiteral::String(label.clone())),
7335                ],
7336                distinct: false,
7337                window_spec: None,
7338            };
7339
7340            final_expr = match final_expr {
7341                Some(e) => Some(Expr::BinaryOp {
7342                    left: Box::new(e),
7343                    op: BinaryOp::And,
7344                    right: Box::new(label_check),
7345                }),
7346                None => Some(label_check),
7347            };
7348        }
7349
7350        // Add property checks
7351        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
7352            final_expr = match final_expr {
7353                Some(e) => Some(Expr::BinaryOp {
7354                    left: Box::new(e),
7355                    op: BinaryOp::And,
7356                    right: Box::new(prop_expr),
7357                }),
7358                None => Some(prop_expr),
7359            };
7360        }
7361
7362        final_expr
7363    }
7364
7365    /// Create a filter plan that ensures traversed target matches a bound variable.
7366    ///
7367    /// Used in EXISTS subquery patterns where the target is already bound.
7368    /// Compares the target's VID against the bound variable's VID.
7369    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
7370        // Compare the traverse-discovered target's VID against the bound variable's VID.
7371        // Left side: Property access on the variable from current scope.
7372        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
7373        // We use Variable("{var}._vid") to access the VID column from the traverse output,
7374        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
7375        let bound_check = Expr::BinaryOp {
7376            left: Box::new(Expr::Property(
7377                Box::new(Expr::Variable(target_variable.to_string())),
7378                "_vid".to_string(),
7379            )),
7380            op: BinaryOp::Eq,
7381            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
7382        };
7383        LogicalPlan::Filter {
7384            input: Box::new(plan),
7385            predicate: bound_check,
7386            optional_variables: HashSet::new(),
7387        }
7388    }
7389
7390    /// Replace a Scan node matching the variable with a VectorKnn node
7391    fn replace_scan_with_knn(
7392        plan: LogicalPlan,
7393        variable: &str,
7394        property: &str,
7395        query: Expr,
7396        threshold: Option<f32>,
7397    ) -> LogicalPlan {
7398        match plan {
7399            LogicalPlan::Scan {
7400                label_id,
7401                labels,
7402                variable: scan_var,
7403                filter,
7404                optional,
7405            } => {
7406                if scan_var == variable {
7407                    // Inject any existing scan filter into VectorKnn?
7408                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
7409                    // Typically filter is applied post-Knn or during Knn if supported.
7410                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
7411                    // If `filter` is present on Scan, it must be preserved.
7412                    // We can wrap VectorKnn in Filter if Scan had filter.
7413
7414                    let knn = LogicalPlan::VectorKnn {
7415                        label_id,
7416                        variable: variable.to_string(),
7417                        property: property.to_string(),
7418                        query,
7419                        k: 100, // Default K, should push down LIMIT
7420                        threshold,
7421                    };
7422
7423                    if let Some(f) = filter {
7424                        LogicalPlan::Filter {
7425                            input: Box::new(knn),
7426                            predicate: f,
7427                            optional_variables: HashSet::new(),
7428                        }
7429                    } else {
7430                        knn
7431                    }
7432                } else {
7433                    LogicalPlan::Scan {
7434                        label_id,
7435                        labels,
7436                        variable: scan_var,
7437                        filter,
7438                        optional,
7439                    }
7440                }
7441            }
7442            LogicalPlan::Filter {
7443                input,
7444                predicate,
7445                optional_variables,
7446            } => LogicalPlan::Filter {
7447                input: Box::new(Self::replace_scan_with_knn(
7448                    *input, variable, property, query, threshold,
7449                )),
7450                predicate,
7451                optional_variables,
7452            },
7453            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7454                input: Box::new(Self::replace_scan_with_knn(
7455                    *input, variable, property, query, threshold,
7456                )),
7457                projections,
7458            },
7459            LogicalPlan::Limit { input, skip, fetch } => {
7460                // If we encounter Limit, we should ideally push K down to VectorKnn
7461                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
7462                // Limit comes later.
7463                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
7464                LogicalPlan::Limit {
7465                    input: Box::new(Self::replace_scan_with_knn(
7466                        *input, variable, property, query, threshold,
7467                    )),
7468                    skip,
7469                    fetch,
7470                }
7471            }
7472            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7473                left: Box::new(Self::replace_scan_with_knn(
7474                    *left,
7475                    variable,
7476                    property,
7477                    query.clone(),
7478                    threshold,
7479                )),
7480                right: Box::new(Self::replace_scan_with_knn(
7481                    *right, variable, property, query, threshold,
7482                )),
7483            },
7484            other => other,
7485        }
7486    }
7487
7488    /// Find the label_id for a Scan node matching the given variable
7489    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
7490        match plan {
7491            LogicalPlan::Scan {
7492                label_id,
7493                variable: var,
7494                ..
7495            } if var == variable => Some(*label_id),
7496            LogicalPlan::ScanAll { variable: var, .. } if var == variable => Some(0),
7497            LogicalPlan::Filter { input, .. }
7498            | LogicalPlan::Project { input, .. }
7499            | LogicalPlan::Sort { input, .. }
7500            | LogicalPlan::Limit { input, .. }
7501            | LogicalPlan::Aggregate { input, .. }
7502            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
7503            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
7504                .or_else(|| Self::find_scan_label_id(right, variable)),
7505            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
7506            _ => None,
7507        }
7508    }
7509
7510    /// Push a predicate into a Scan's filter for the specified variable
7511    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
7512        match plan {
7513            LogicalPlan::Scan {
7514                label_id,
7515                labels,
7516                variable: var,
7517                filter,
7518                optional,
7519            } if var == variable => {
7520                // Merge the predicate with existing filter
7521                let new_filter = match filter {
7522                    Some(existing) => Some(Expr::BinaryOp {
7523                        left: Box::new(existing),
7524                        op: BinaryOp::And,
7525                        right: Box::new(predicate),
7526                    }),
7527                    None => Some(predicate),
7528                };
7529                LogicalPlan::Scan {
7530                    label_id,
7531                    labels,
7532                    variable: var,
7533                    filter: new_filter,
7534                    optional,
7535                }
7536            }
7537            LogicalPlan::ScanAll {
7538                variable: var,
7539                filter,
7540                optional,
7541            } if var == variable => {
7542                let new_filter = match filter {
7543                    Some(existing) => Some(Expr::BinaryOp {
7544                        left: Box::new(existing),
7545                        op: BinaryOp::And,
7546                        right: Box::new(predicate),
7547                    }),
7548                    None => Some(predicate),
7549                };
7550                LogicalPlan::ScanAll {
7551                    variable: var,
7552                    filter: new_filter,
7553                    optional,
7554                }
7555            }
7556            LogicalPlan::Filter {
7557                input,
7558                predicate: p,
7559                optional_variables: opt_vars,
7560            } => LogicalPlan::Filter {
7561                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7562                predicate: p,
7563                optional_variables: opt_vars,
7564            },
7565            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7566                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7567                projections,
7568            },
7569            LogicalPlan::CrossJoin { left, right } => {
7570                // Check which side has the variable
7571                if Self::find_scan_label_id(&left, variable).is_some() {
7572                    LogicalPlan::CrossJoin {
7573                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
7574                        right,
7575                    }
7576                } else {
7577                    LogicalPlan::CrossJoin {
7578                        left,
7579                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
7580                    }
7581                }
7582            }
7583            LogicalPlan::Traverse {
7584                input,
7585                edge_type_ids,
7586                direction,
7587                source_variable,
7588                target_variable,
7589                target_label_id,
7590                step_variable,
7591                min_hops,
7592                max_hops,
7593                optional,
7594                target_filter,
7595                path_variable,
7596                edge_properties,
7597                is_variable_length,
7598                optional_pattern_vars,
7599                scope_match_variables,
7600                edge_filter_expr,
7601                path_mode,
7602                qpp_steps,
7603            } => LogicalPlan::Traverse {
7604                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7605                edge_type_ids,
7606                direction,
7607                source_variable,
7608                target_variable,
7609                target_label_id,
7610                step_variable,
7611                min_hops,
7612                max_hops,
7613                optional,
7614                target_filter,
7615                path_variable,
7616                edge_properties,
7617                is_variable_length,
7618                optional_pattern_vars,
7619                scope_match_variables,
7620                edge_filter_expr,
7621                path_mode,
7622                qpp_steps,
7623            },
7624            other => other,
7625        }
7626    }
7627
7628    /// Extract predicates that reference only the specified variable
7629    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
7630        let analyzer = PredicateAnalyzer::new();
7631        let analysis = analyzer.analyze(predicate, variable);
7632
7633        // Return pushable predicates and combined residual
7634        let residual = if analysis.residual.is_empty() {
7635            None
7636        } else {
7637            let mut iter = analysis.residual.into_iter();
7638            let first = iter.next().unwrap();
7639            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
7640                left: Box::new(acc),
7641                op: BinaryOp::And,
7642                right: Box::new(e),
7643            }))
7644        };
7645
7646        (analysis.pushable, residual)
7647    }
7648
7649    // =====================================================================
7650    // Apply Predicate Pushdown - Helper Functions
7651    // =====================================================================
7652
7653    /// Split AND-connected predicates into a list.
7654    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
7655        match expr {
7656            Expr::BinaryOp {
7657                left,
7658                op: BinaryOp::And,
7659                right,
7660            } => {
7661                let mut result = Self::split_and_conjuncts(left);
7662                result.extend(Self::split_and_conjuncts(right));
7663                result
7664            }
7665            _ => vec![expr.clone()],
7666        }
7667    }
7668
7669    /// Combine predicates with AND.
7670    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
7671        if predicates.is_empty() {
7672            return None;
7673        }
7674        let mut result = predicates[0].clone();
7675        for pred in predicates.iter().skip(1) {
7676            result = Expr::BinaryOp {
7677                left: Box::new(result),
7678                op: BinaryOp::And,
7679                right: Box::new(pred.clone()),
7680            };
7681        }
7682        Some(result)
7683    }
7684
7685    /// Collect all variable names referenced in an expression.
7686    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
7687        let mut vars = HashSet::new();
7688        Self::collect_expr_variables_impl(expr, &mut vars);
7689        vars
7690    }
7691
7692    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
7693        match expr {
7694            Expr::Variable(name) => {
7695                vars.insert(name.clone());
7696            }
7697            Expr::Property(inner, _) => {
7698                if let Expr::Variable(name) = inner.as_ref() {
7699                    vars.insert(name.clone());
7700                } else {
7701                    Self::collect_expr_variables_impl(inner, vars);
7702                }
7703            }
7704            Expr::BinaryOp { left, right, .. } => {
7705                Self::collect_expr_variables_impl(left, vars);
7706                Self::collect_expr_variables_impl(right, vars);
7707            }
7708            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7709            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
7710            Expr::FunctionCall { args, .. } => {
7711                for arg in args {
7712                    Self::collect_expr_variables_impl(arg, vars);
7713                }
7714            }
7715            Expr::List(items) => {
7716                for item in items {
7717                    Self::collect_expr_variables_impl(item, vars);
7718                }
7719            }
7720            Expr::Case {
7721                expr,
7722                when_then,
7723                else_expr,
7724            } => {
7725                if let Some(e) = expr {
7726                    Self::collect_expr_variables_impl(e, vars);
7727                }
7728                for (w, t) in when_then {
7729                    Self::collect_expr_variables_impl(w, vars);
7730                    Self::collect_expr_variables_impl(t, vars);
7731                }
7732                if let Some(e) = else_expr {
7733                    Self::collect_expr_variables_impl(e, vars);
7734                }
7735            }
7736            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7737            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
7738            // they introduce local variable bindings not in outer scope.
7739            _ => {}
7740        }
7741    }
7742
7743    /// Collect all variables produced by a logical plan.
7744    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
7745        let mut vars = HashSet::new();
7746        Self::collect_plan_variables_impl(plan, &mut vars);
7747        vars
7748    }
7749
7750    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
7751        match plan {
7752            LogicalPlan::Scan { variable, .. } => {
7753                vars.insert(variable.clone());
7754            }
7755            LogicalPlan::Traverse {
7756                target_variable,
7757                step_variable,
7758                input,
7759                path_variable,
7760                ..
7761            } => {
7762                vars.insert(target_variable.clone());
7763                if let Some(sv) = step_variable {
7764                    vars.insert(sv.clone());
7765                }
7766                if let Some(pv) = path_variable {
7767                    vars.insert(pv.clone());
7768                }
7769                Self::collect_plan_variables_impl(input, vars);
7770            }
7771            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
7772            LogicalPlan::Project { input, projections } => {
7773                for (expr, alias) in projections {
7774                    if let Some(a) = alias {
7775                        vars.insert(a.clone());
7776                    } else if let Expr::Variable(v) = expr {
7777                        vars.insert(v.clone());
7778                    }
7779                }
7780                Self::collect_plan_variables_impl(input, vars);
7781            }
7782            LogicalPlan::Apply {
7783                input, subquery, ..
7784            } => {
7785                Self::collect_plan_variables_impl(input, vars);
7786                Self::collect_plan_variables_impl(subquery, vars);
7787            }
7788            LogicalPlan::CrossJoin { left, right } => {
7789                Self::collect_plan_variables_impl(left, vars);
7790                Self::collect_plan_variables_impl(right, vars);
7791            }
7792            LogicalPlan::Unwind {
7793                input, variable, ..
7794            } => {
7795                vars.insert(variable.clone());
7796                Self::collect_plan_variables_impl(input, vars);
7797            }
7798            LogicalPlan::Aggregate { input, .. } => {
7799                Self::collect_plan_variables_impl(input, vars);
7800            }
7801            LogicalPlan::Distinct { input } => {
7802                Self::collect_plan_variables_impl(input, vars);
7803            }
7804            LogicalPlan::Sort { input, .. } => {
7805                Self::collect_plan_variables_impl(input, vars);
7806            }
7807            LogicalPlan::Limit { input, .. } => {
7808                Self::collect_plan_variables_impl(input, vars);
7809            }
7810            LogicalPlan::VectorKnn { variable, .. } => {
7811                vars.insert(variable.clone());
7812            }
7813            LogicalPlan::ProcedureCall { yield_items, .. } => {
7814                for (name, alias) in yield_items {
7815                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
7816                }
7817            }
7818            LogicalPlan::ShortestPath {
7819                input,
7820                path_variable,
7821                ..
7822            } => {
7823                vars.insert(path_variable.clone());
7824                Self::collect_plan_variables_impl(input, vars);
7825            }
7826            LogicalPlan::AllShortestPaths {
7827                input,
7828                path_variable,
7829                ..
7830            } => {
7831                vars.insert(path_variable.clone());
7832                Self::collect_plan_variables_impl(input, vars);
7833            }
7834            LogicalPlan::RecursiveCTE {
7835                initial, recursive, ..
7836            } => {
7837                Self::collect_plan_variables_impl(initial, vars);
7838                Self::collect_plan_variables_impl(recursive, vars);
7839            }
7840            LogicalPlan::SubqueryCall {
7841                input, subquery, ..
7842            } => {
7843                Self::collect_plan_variables_impl(input, vars);
7844                Self::collect_plan_variables_impl(subquery, vars);
7845            }
7846            _ => {}
7847        }
7848    }
7849
7850    /// Extract predicates that only reference variables from Apply's input.
7851    /// Returns (input_only_predicates, remaining_predicates).
7852    fn extract_apply_input_predicates(
7853        predicate: &Expr,
7854        input_variables: &HashSet<String>,
7855        subquery_new_variables: &HashSet<String>,
7856    ) -> (Vec<Expr>, Vec<Expr>) {
7857        let conjuncts = Self::split_and_conjuncts(predicate);
7858        let mut input_preds = Vec::new();
7859        let mut remaining = Vec::new();
7860
7861        for conj in conjuncts {
7862            let vars = Self::collect_expr_variables(&conj);
7863
7864            // Predicate only references input variables (none from subquery)
7865            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
7866            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
7867
7868            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
7869                input_preds.push(conj);
7870            } else {
7871                remaining.push(conj);
7872            }
7873        }
7874
7875        (input_preds, remaining)
7876    }
7877
7878    /// Push eligible predicates into Apply.input_filter.
7879    /// This filters input rows BEFORE executing the correlated subquery.
7880    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
7881        match plan {
7882            LogicalPlan::Apply {
7883                input,
7884                subquery,
7885                input_filter,
7886            } => {
7887                // Collect variables from input plan
7888                let input_vars = Self::collect_plan_variables(&input);
7889
7890                // Collect NEW variables introduced by subquery (not in input)
7891                let subquery_vars = Self::collect_plan_variables(&subquery);
7892                let new_subquery_vars: HashSet<String> =
7893                    subquery_vars.difference(&input_vars).cloned().collect();
7894
7895                // Extract predicates that only reference input variables
7896                let (input_preds, remaining) = Self::extract_apply_input_predicates(
7897                    current_predicate,
7898                    &input_vars,
7899                    &new_subquery_vars,
7900                );
7901
7902                // Update current_predicate to only remaining predicates
7903                *current_predicate = if remaining.is_empty() {
7904                    Expr::TRUE
7905                } else {
7906                    Self::combine_predicates(remaining).unwrap()
7907                };
7908
7909                // Combine extracted predicates with existing input_filter
7910                let new_input_filter = if input_preds.is_empty() {
7911                    input_filter
7912                } else {
7913                    let extracted = Self::combine_predicates(input_preds).unwrap();
7914                    match input_filter {
7915                        Some(existing) => Some(Expr::BinaryOp {
7916                            left: Box::new(existing),
7917                            op: BinaryOp::And,
7918                            right: Box::new(extracted),
7919                        }),
7920                        None => Some(extracted),
7921                    }
7922                };
7923
7924                // Recurse into input plan
7925                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
7926
7927                LogicalPlan::Apply {
7928                    input: Box::new(new_input),
7929                    subquery,
7930                    input_filter: new_input_filter,
7931                }
7932            }
7933            // Recurse into other plan nodes
7934            LogicalPlan::Filter {
7935                input,
7936                predicate,
7937                optional_variables,
7938            } => LogicalPlan::Filter {
7939                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7940                predicate,
7941                optional_variables,
7942            },
7943            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7944                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7945                projections,
7946            },
7947            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
7948                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7949                order_by,
7950            },
7951            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
7952                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7953                skip,
7954                fetch,
7955            },
7956            LogicalPlan::Aggregate {
7957                input,
7958                group_by,
7959                aggregates,
7960            } => LogicalPlan::Aggregate {
7961                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7962                group_by,
7963                aggregates,
7964            },
7965            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7966                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
7967                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
7968            },
7969            LogicalPlan::Traverse {
7970                input,
7971                edge_type_ids,
7972                direction,
7973                source_variable,
7974                target_variable,
7975                target_label_id,
7976                step_variable,
7977                min_hops,
7978                max_hops,
7979                optional,
7980                target_filter,
7981                path_variable,
7982                edge_properties,
7983                is_variable_length,
7984                optional_pattern_vars,
7985                scope_match_variables,
7986                edge_filter_expr,
7987                path_mode,
7988                qpp_steps,
7989            } => LogicalPlan::Traverse {
7990                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7991                edge_type_ids,
7992                direction,
7993                source_variable,
7994                target_variable,
7995                target_label_id,
7996                step_variable,
7997                min_hops,
7998                max_hops,
7999                optional,
8000                target_filter,
8001                path_variable,
8002                edge_properties,
8003                is_variable_length,
8004                optional_pattern_vars,
8005                scope_match_variables,
8006                edge_filter_expr,
8007                path_mode,
8008                qpp_steps,
8009            },
8010            other => other,
8011        }
8012    }
8013}
8014
8015/// Get the expected column name for an aggregate expression.
8016///
8017/// This is the single source of truth for aggregate column naming, used by:
8018/// - Logical planner (to create column references)
8019/// - Physical planner (to rename DataFusion's auto-generated column names)
8020/// - Fallback executor (to name result columns)
8021pub fn aggregate_column_name(expr: &Expr) -> String {
8022    expr.to_string_repr()
8023}
8024
8025/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
8026#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8027pub struct ExplainOutput {
8028    /// Debug-formatted logical plan tree.
8029    pub plan_text: String,
8030    /// Index availability report for each scan in the plan.
8031    pub index_usage: Vec<IndexUsage>,
8032    /// Rough row and cost estimates for the full plan.
8033    pub cost_estimates: CostEstimates,
8034    /// Planner warnings (e.g., missing index, forced full scan).
8035    pub warnings: Vec<String>,
8036    /// Suggested indexes that would improve this query.
8037    pub suggestions: Vec<IndexSuggestion>,
8038}
8039
8040/// Suggestion for creating an index to improve query performance.
8041#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8042pub struct IndexSuggestion {
8043    /// Label or edge type that would benefit from the index.
8044    pub label_or_type: String,
8045    /// Property to index.
8046    pub property: String,
8047    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
8048    pub index_type: String,
8049    /// Human-readable explanation of the performance benefit.
8050    pub reason: String,
8051    /// Ready-to-execute Cypher statement to create the index.
8052    pub create_statement: String,
8053}
8054
8055/// Index availability report for a single scan operator.
8056#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8057pub struct IndexUsage {
8058    pub label_or_type: String,
8059    pub property: String,
8060    pub index_type: String,
8061    /// Whether the index was actually used for this scan.
8062    pub used: bool,
8063    /// Human-readable explanation of why the index was or was not used.
8064    pub reason: Option<String>,
8065}
8066
8067/// Rough cost and row count estimates for a complete logical plan.
8068#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8069pub struct CostEstimates {
8070    /// Estimated number of rows the plan will produce.
8071    pub estimated_rows: f64,
8072    /// Abstract cost units (lower is cheaper).
8073    pub estimated_cost: f64,
8074}
8075
8076impl QueryPlanner {
8077    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
8078    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
8079        let plan = self.plan(ast)?;
8080        self.explain_logical_plan(&plan)
8081    }
8082
8083    /// Produce an EXPLAIN report for an already-planned logical plan.
8084    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
8085        let index_usage = self.analyze_index_usage(plan)?;
8086        let cost_estimates = self.estimate_costs(plan)?;
8087        let suggestions = self.collect_index_suggestions(plan);
8088        let warnings = Vec::new();
8089        let plan_text = format!("{:#?}", plan);
8090
8091        Ok(ExplainOutput {
8092            plan_text,
8093            index_usage,
8094            cost_estimates,
8095            warnings,
8096            suggestions,
8097        })
8098    }
8099
8100    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
8101        let mut usage = Vec::new();
8102        self.collect_index_usage(plan, &mut usage);
8103        Ok(usage)
8104    }
8105
8106    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
8107        match plan {
8108            LogicalPlan::Scan {
8109                label_id,
8110                filter: Some(filter),
8111                ..
8112            } => {
8113                // Detect indexed-property pushdown — issue #57. Run the same
8114                // analyzer the physical planner uses; if it reports a
8115                // hash-index hit, surface it in EXPLAIN.
8116                if let Some(label_name) = self.schema.label_name_by_id(*label_id) {
8117                    let analyzer = crate::query::pushdown::IndexAwareAnalyzer::new(&self.schema);
8118                    // The variable name is the scan's binding variable; we
8119                    // reach for it via the Scan node directly.
8120                    if let LogicalPlan::Scan { variable, .. } = plan {
8121                        let strategy = analyzer.analyze(filter, variable, *label_id);
8122                        for prop in strategy.hash_index_columns {
8123                            usage.push(IndexUsage {
8124                                label_or_type: label_name.to_string(),
8125                                property: prop,
8126                                index_type: "HASH".to_string(),
8127                                used: true,
8128                                reason: Some(
8129                                    "Hash index point lookup pushed into Lance scan".to_string(),
8130                                ),
8131                            });
8132                        }
8133                    }
8134                }
8135            }
8136            LogicalPlan::Scan { .. } => {}
8137            LogicalPlan::VectorKnn {
8138                label_id, property, ..
8139            } => {
8140                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
8141                usage.push(IndexUsage {
8142                    label_or_type: label_name.to_string(),
8143                    property: property.clone(),
8144                    index_type: "VECTOR".to_string(),
8145                    used: true,
8146                    reason: None,
8147                });
8148            }
8149            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
8150            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
8151            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
8152            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
8153            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
8154            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
8155            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
8156            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8157                self.collect_index_usage(left, usage);
8158                self.collect_index_usage(right, usage);
8159            }
8160            _ => {}
8161        }
8162    }
8163
8164    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
8165        Ok(CostEstimates {
8166            estimated_rows: 100.0,
8167            estimated_cost: 10.0,
8168        })
8169    }
8170
8171    /// Collect index suggestions based on query patterns.
8172    ///
8173    /// Currently detects:
8174    /// - Temporal predicates from `uni.validAt()` function calls
8175    /// - Temporal predicates from `VALID_AT` macro expansion
8176    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
8177        let mut suggestions = Vec::new();
8178        self.collect_temporal_suggestions(plan, &mut suggestions);
8179        suggestions
8180    }
8181
8182    /// Recursively collect temporal index suggestions from the plan.
8183    fn collect_temporal_suggestions(
8184        &self,
8185        plan: &LogicalPlan,
8186        suggestions: &mut Vec<IndexSuggestion>,
8187    ) {
8188        match plan {
8189            LogicalPlan::Filter {
8190                input, predicate, ..
8191            } => {
8192                // Check for temporal patterns in the predicate
8193                self.detect_temporal_pattern(predicate, suggestions);
8194                // Recurse into input
8195                self.collect_temporal_suggestions(input, suggestions);
8196            }
8197            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
8198            LogicalPlan::Project { input, .. } => {
8199                self.collect_temporal_suggestions(input, suggestions)
8200            }
8201            LogicalPlan::Limit { input, .. } => {
8202                self.collect_temporal_suggestions(input, suggestions)
8203            }
8204            LogicalPlan::Sort { input, .. } => {
8205                self.collect_temporal_suggestions(input, suggestions)
8206            }
8207            LogicalPlan::Aggregate { input, .. } => {
8208                self.collect_temporal_suggestions(input, suggestions)
8209            }
8210            LogicalPlan::Traverse { input, .. } => {
8211                self.collect_temporal_suggestions(input, suggestions)
8212            }
8213            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8214                self.collect_temporal_suggestions(left, suggestions);
8215                self.collect_temporal_suggestions(right, suggestions);
8216            }
8217            _ => {}
8218        }
8219    }
8220
8221    /// Detect temporal predicate patterns and suggest indexes.
8222    ///
8223    /// Detects two patterns:
8224    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
8225    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
8226    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
8227        match expr {
8228            // Pattern 1: uni.temporal.validAt() function call
8229            Expr::FunctionCall { name, args, .. }
8230                if (name.eq_ignore_ascii_case("uni.temporal.validAt")
8231                    || name.eq_ignore_ascii_case("validAt"))
8232                    && args.len() >= 2 =>
8233            {
8234                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
8235                let start_prop = if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1)
8236                {
8237                    s.clone()
8238                } else {
8239                    "valid_from".to_string()
8240                };
8241
8242                // Try to extract label from the node expression
8243                if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
8244                    self.suggest_temporal_index(&var, &start_prop, suggestions);
8245                }
8246            }
8247
8248            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
8249            Expr::BinaryOp {
8250                left,
8251                op: BinaryOp::And,
8252                right,
8253            } => {
8254                // Check left side for `prop <= time` pattern (temporal start condition)
8255                if let Expr::BinaryOp {
8256                    left: prop_expr,
8257                    op: BinaryOp::LtEq,
8258                    ..
8259                } = left.as_ref()
8260                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
8261                    && (prop_name == "valid_from"
8262                        || prop_name.contains("start")
8263                        || prop_name.contains("from")
8264                        || prop_name.contains("begin"))
8265                    && let Some(var) = base.extract_variable()
8266                {
8267                    self.suggest_temporal_index(&var, prop_name, suggestions);
8268                }
8269
8270                // Recurse into both sides of AND
8271                self.detect_temporal_pattern(left.as_ref(), suggestions);
8272                self.detect_temporal_pattern(right.as_ref(), suggestions);
8273            }
8274
8275            // Recurse into other binary ops
8276            Expr::BinaryOp { left, right, .. } => {
8277                self.detect_temporal_pattern(left.as_ref(), suggestions);
8278                self.detect_temporal_pattern(right.as_ref(), suggestions);
8279            }
8280
8281            _ => {}
8282        }
8283    }
8284
8285    /// Suggest a scalar index for a temporal property if one doesn't already exist.
8286    fn suggest_temporal_index(
8287        &self,
8288        _variable: &str,
8289        property: &str,
8290        suggestions: &mut Vec<IndexSuggestion>,
8291    ) {
8292        // Check if a scalar index already exists for this property
8293        // We need to check all labels since we may not know the exact label from the variable
8294        let mut has_index = false;
8295
8296        for index in &self.schema.indexes {
8297            if let IndexDefinition::Scalar(config) = index
8298                && config.properties.contains(&property.to_string())
8299            {
8300                has_index = true;
8301                break;
8302            }
8303        }
8304
8305        if !has_index {
8306            // Avoid duplicate suggestions
8307            let already_suggested = suggestions.iter().any(|s| s.property == property);
8308            if !already_suggested {
8309                suggestions.push(IndexSuggestion {
8310                    label_or_type: "(detected from temporal query)".to_string(),
8311                    property: property.to_string(),
8312                    index_type: "SCALAR (BTree)".to_string(),
8313                    reason: format!(
8314                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
8315                        property
8316                    ),
8317                    create_statement: format!(
8318                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
8319                        property, property
8320                    ),
8321                });
8322            }
8323        }
8324    }
8325
8326    /// Helper functions for expression normalization
8327    /// Normalize an expression for storage: strip variable prefixes
8328    /// For simple property: u.email -> "email"
8329    /// For expressions: lower(u.email) -> "lower(email)"
8330    fn normalize_expression_for_storage(expr: &Expr) -> String {
8331        match expr {
8332            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
8333            _ => {
8334                // Serialize expression and strip variable prefix
8335                let expr_str = expr.to_string_repr();
8336                Self::strip_variable_prefix(&expr_str)
8337            }
8338        }
8339    }
8340
8341    /// Strip variable references like "u.prop" from expression strings
8342    /// Converts "lower(u.email)" to "lower(email)"
8343    fn strip_variable_prefix(expr_str: &str) -> String {
8344        use regex::Regex;
8345        // Match patterns like "word.property" and replace with just "property"
8346        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
8347        re.replace_all(expr_str, "$1").to_string()
8348    }
8349
8350    /// Plan a schema command from the new AST
8351    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
8352        match cmd {
8353            SchemaCommand::CreateVectorIndex(c) => {
8354                // Parse index type from options (default: IvfPq)
8355                let opt = |key: &str| {
8356                    c.options
8357                        .get(key)
8358                        .and_then(|v| v.as_str())
8359                        .and_then(|s| s.parse::<u32>().ok())
8360                };
8361                let opt_u8 = |key: &str| -> Option<u8> {
8362                    c.options
8363                        .get(key)
8364                        .and_then(|v| v.as_str())
8365                        .and_then(|s| s.parse::<u8>().ok())
8366                };
8367                let index_type = match c.options.get("type").and_then(|v| v.as_str()) {
8368                    Some("flat") => VectorIndexType::Flat,
8369                    Some("ivf_flat") => VectorIndexType::IvfFlat {
8370                        num_partitions: opt("partitions").unwrap_or(256),
8371                    },
8372                    Some("ivf_sq") => VectorIndexType::IvfSq {
8373                        num_partitions: opt("partitions").unwrap_or(256),
8374                    },
8375                    Some("ivf_rq") => VectorIndexType::IvfRq {
8376                        num_partitions: opt("partitions").unwrap_or(256),
8377                        num_bits: opt_u8("num_bits"),
8378                    },
8379                    Some("hnsw_flat") => VectorIndexType::HnswFlat {
8380                        m: opt("m").unwrap_or(16),
8381                        ef_construction: opt("ef_construction").unwrap_or(200),
8382                        num_partitions: opt("partitions"),
8383                    },
8384                    Some("hnsw") | Some("hnsw_sq") => VectorIndexType::HnswSq {
8385                        m: opt("m").unwrap_or(16),
8386                        ef_construction: opt("ef_construction").unwrap_or(200),
8387                        num_partitions: opt("partitions"),
8388                    },
8389                    Some("hnsw_pq") => VectorIndexType::HnswPq {
8390                        m: opt("m").unwrap_or(16),
8391                        ef_construction: opt("ef_construction").unwrap_or(200),
8392                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
8393                        num_partitions: opt("partitions"),
8394                    },
8395                    _ => VectorIndexType::IvfPq {
8396                        num_partitions: opt("partitions").unwrap_or(256),
8397                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
8398                        bits_per_subvector: opt_u8("num_bits").unwrap_or(8),
8399                    },
8400                };
8401
8402                // Parse embedding config from options
8403                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
8404                    Self::parse_embedding_config(emb_val)?
8405                } else {
8406                    None
8407                };
8408
8409                let config = VectorIndexConfig {
8410                    name: c.name,
8411                    label: c.label,
8412                    property: c.property,
8413                    metric: DistanceMetric::Cosine,
8414                    index_type,
8415                    embedding_config,
8416                    metadata: Default::default(),
8417                };
8418                Ok(LogicalPlan::CreateVectorIndex {
8419                    config,
8420                    if_not_exists: c.if_not_exists,
8421                })
8422            }
8423            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
8424                config: FullTextIndexConfig {
8425                    name: cfg.name,
8426                    label: cfg.label,
8427                    properties: cfg.properties,
8428                    tokenizer: TokenizerConfig::Standard,
8429                    with_positions: true,
8430                    metadata: Default::default(),
8431                },
8432                if_not_exists: cfg.if_not_exists,
8433            }),
8434            SchemaCommand::CreateScalarIndex(cfg) => {
8435                // Convert expressions to storage strings (strip variable prefix)
8436                let properties: Vec<String> = cfg
8437                    .expressions
8438                    .iter()
8439                    .map(Self::normalize_expression_for_storage)
8440                    .collect();
8441
8442                Ok(LogicalPlan::CreateScalarIndex {
8443                    config: ScalarIndexConfig {
8444                        name: cfg.name,
8445                        label: cfg.label,
8446                        properties,
8447                        index_type: ScalarIndexType::BTree,
8448                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
8449                        metadata: Default::default(),
8450                    },
8451                    if_not_exists: cfg.if_not_exists,
8452                })
8453            }
8454            SchemaCommand::CreateJsonFtsIndex(cfg) => {
8455                let with_positions = cfg
8456                    .options
8457                    .get("with_positions")
8458                    .and_then(|v| v.as_bool())
8459                    .unwrap_or(false);
8460                Ok(LogicalPlan::CreateJsonFtsIndex {
8461                    config: JsonFtsIndexConfig {
8462                        name: cfg.name,
8463                        label: cfg.label,
8464                        column: cfg.column,
8465                        paths: Vec::new(),
8466                        with_positions,
8467                        metadata: Default::default(),
8468                    },
8469                    if_not_exists: cfg.if_not_exists,
8470                })
8471            }
8472            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
8473                name: drop.name,
8474                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
8475            }),
8476            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
8477            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
8478            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
8479            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
8480            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
8481            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
8482            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
8483            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
8484            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
8485            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
8486            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
8487            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
8488            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
8489            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
8490            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
8491            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
8492                destination: path,
8493                options: HashMap::new(),
8494            }),
8495            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
8496                label: cmd.label,
8497                path: cmd.path,
8498                format: cmd.format,
8499                options: cmd.options,
8500            }),
8501            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
8502                label: cmd.label,
8503                path: cmd.path,
8504                format: cmd.format,
8505                options: cmd.options,
8506            }),
8507        }
8508    }
8509
8510    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
8511        let obj = emb_val
8512            .as_object()
8513            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
8514
8515        // Parse alias (required)
8516        let alias = obj
8517            .get("alias")
8518            .and_then(|v| v.as_str())
8519            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
8520
8521        // Parse source properties (required)
8522        let source_properties = obj
8523            .get("source")
8524            .and_then(|v| v.as_array())
8525            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
8526            .iter()
8527            .filter_map(|v| v.as_str().map(|s| s.to_string()))
8528            .collect::<Vec<_>>();
8529
8530        if source_properties.is_empty() {
8531            return Err(anyhow!(
8532                "embedding.source must contain at least one property"
8533            ));
8534        }
8535
8536        let batch_size = obj
8537            .get("batch_size")
8538            .and_then(|v| v.as_u64())
8539            .map(|v| v as usize)
8540            .unwrap_or(32);
8541
8542        let document_prefix = obj
8543            .get("document_prefix")
8544            .and_then(|v| v.as_str())
8545            .map(|s| s.to_string());
8546
8547        let query_prefix = obj
8548            .get("query_prefix")
8549            .and_then(|v| v.as_str())
8550            .map(|s| s.to_string());
8551
8552        Ok(Some(EmbeddingConfig {
8553            alias: alias.to_string(),
8554            source_properties,
8555            batch_size,
8556            document_prefix,
8557            query_prefix,
8558        }))
8559    }
8560}
8561
8562/// Collect all properties referenced anywhere in the LogicalPlan tree.
8563///
8564/// This is critical for window functions: properties must be materialized
8565/// at the Scan node so they're available for window operations later.
8566///
8567/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
8568pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
8569    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
8570    collect_properties_recursive(plan, &mut properties);
8571    properties
8572}
8573
8574/// Recursively walk the LogicalPlan tree and collect all property references.
8575fn collect_properties_recursive(
8576    plan: &LogicalPlan,
8577    properties: &mut HashMap<String, HashSet<String>>,
8578) {
8579    match plan {
8580        LogicalPlan::Window {
8581            input,
8582            window_exprs,
8583        } => {
8584            // Collect from window expressions
8585            for expr in window_exprs {
8586                collect_properties_from_expr_into(expr, properties);
8587            }
8588            collect_properties_recursive(input, properties);
8589        }
8590        LogicalPlan::Project { input, projections } => {
8591            for (expr, _alias) in projections {
8592                collect_properties_from_expr_into(expr, properties);
8593            }
8594            collect_properties_recursive(input, properties);
8595        }
8596        LogicalPlan::Sort { input, order_by } => {
8597            for sort_item in order_by {
8598                collect_properties_from_expr_into(&sort_item.expr, properties);
8599            }
8600            collect_properties_recursive(input, properties);
8601        }
8602        LogicalPlan::Filter {
8603            input, predicate, ..
8604        } => {
8605            collect_properties_from_expr_into(predicate, properties);
8606            collect_properties_recursive(input, properties);
8607        }
8608        LogicalPlan::Aggregate {
8609            input,
8610            group_by,
8611            aggregates,
8612        } => {
8613            for expr in group_by {
8614                collect_properties_from_expr_into(expr, properties);
8615            }
8616            for expr in aggregates {
8617                collect_properties_from_expr_into(expr, properties);
8618            }
8619            collect_properties_recursive(input, properties);
8620        }
8621        LogicalPlan::Scan {
8622            filter: Some(expr), ..
8623        } => {
8624            collect_properties_from_expr_into(expr, properties);
8625        }
8626        LogicalPlan::Scan { filter: None, .. } => {}
8627        LogicalPlan::ExtIdLookup {
8628            filter: Some(expr), ..
8629        } => {
8630            collect_properties_from_expr_into(expr, properties);
8631        }
8632        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
8633        LogicalPlan::ScanAll {
8634            filter: Some(expr), ..
8635        } => {
8636            collect_properties_from_expr_into(expr, properties);
8637        }
8638        LogicalPlan::ScanAll { filter: None, .. } => {}
8639        LogicalPlan::ScanMainByLabels {
8640            filter: Some(expr), ..
8641        } => {
8642            collect_properties_from_expr_into(expr, properties);
8643        }
8644        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
8645        LogicalPlan::TraverseMainByType {
8646            input,
8647            target_filter,
8648            ..
8649        } => {
8650            if let Some(expr) = target_filter {
8651                collect_properties_from_expr_into(expr, properties);
8652            }
8653            collect_properties_recursive(input, properties);
8654        }
8655        LogicalPlan::Traverse {
8656            input,
8657            target_filter,
8658            step_variable: _,
8659            ..
8660        } => {
8661            if let Some(expr) = target_filter {
8662                collect_properties_from_expr_into(expr, properties);
8663            }
8664            // Note: Edge properties (step_variable) will be collected from expressions
8665            // that reference them. The edge_properties field in LogicalPlan is populated
8666            // later during physical planning based on this collected map.
8667            collect_properties_recursive(input, properties);
8668        }
8669        LogicalPlan::Unwind { input, expr, .. } => {
8670            collect_properties_from_expr_into(expr, properties);
8671            collect_properties_recursive(input, properties);
8672        }
8673        LogicalPlan::Create { input, pattern } => {
8674            // Mark variables referenced in CREATE patterns with "*" so plan_scan
8675            // adds structural projections (bare entity columns). Without this,
8676            // execute_create_pattern() can't find bound variables and creates
8677            // spurious new nodes instead of using existing MATCH'd ones.
8678            mark_pattern_variables(pattern, properties);
8679            collect_properties_recursive(input, properties);
8680        }
8681        LogicalPlan::CreateBatch { input, patterns } => {
8682            for pattern in patterns {
8683                mark_pattern_variables(pattern, properties);
8684            }
8685            collect_properties_recursive(input, properties);
8686        }
8687        LogicalPlan::Merge {
8688            input,
8689            pattern,
8690            on_match,
8691            on_create,
8692        } => {
8693            mark_pattern_variables(pattern, properties);
8694            if let Some(set_clause) = on_match {
8695                mark_set_item_variables(&set_clause.items, properties);
8696            }
8697            if let Some(set_clause) = on_create {
8698                mark_set_item_variables(&set_clause.items, properties);
8699            }
8700            collect_properties_recursive(input, properties);
8701        }
8702        LogicalPlan::Set { input, items } => {
8703            mark_set_item_variables(items, properties);
8704            collect_properties_recursive(input, properties);
8705        }
8706        LogicalPlan::Remove { input, items } => {
8707            for item in items {
8708                match item {
8709                    RemoveItem::Property(expr) => {
8710                        // REMOVE n.prop — collect the property and mark the variable
8711                        // with "*" so full structural projection is applied.
8712                        collect_properties_from_expr_into(expr, properties);
8713                        if let Expr::Property(base, _) = expr
8714                            && let Expr::Variable(var) = base.as_ref()
8715                        {
8716                            properties
8717                                .entry(var.clone())
8718                                .or_default()
8719                                .insert("*".to_string());
8720                        }
8721                    }
8722                    RemoveItem::Labels { variable, .. } => {
8723                        // REMOVE n:Label — mark n with "*"
8724                        properties
8725                            .entry(variable.clone())
8726                            .or_default()
8727                            .insert("*".to_string());
8728                    }
8729                }
8730            }
8731            collect_properties_recursive(input, properties);
8732        }
8733        LogicalPlan::Delete { input, items, .. } => {
8734            for expr in items {
8735                collect_properties_from_expr_into(expr, properties);
8736            }
8737            collect_properties_recursive(input, properties);
8738        }
8739        LogicalPlan::Foreach {
8740            input, list, body, ..
8741        } => {
8742            collect_properties_from_expr_into(list, properties);
8743            for plan in body {
8744                collect_properties_recursive(plan, properties);
8745            }
8746            collect_properties_recursive(input, properties);
8747        }
8748        LogicalPlan::Limit { input, .. } => {
8749            collect_properties_recursive(input, properties);
8750        }
8751        LogicalPlan::CrossJoin { left, right } => {
8752            collect_properties_recursive(left, properties);
8753            collect_properties_recursive(right, properties);
8754        }
8755        LogicalPlan::Apply {
8756            input,
8757            subquery,
8758            input_filter,
8759        } => {
8760            if let Some(expr) = input_filter {
8761                collect_properties_from_expr_into(expr, properties);
8762            }
8763            collect_properties_recursive(input, properties);
8764            collect_properties_recursive(subquery, properties);
8765        }
8766        LogicalPlan::Union { left, right, .. } => {
8767            collect_properties_recursive(left, properties);
8768            collect_properties_recursive(right, properties);
8769        }
8770        LogicalPlan::RecursiveCTE {
8771            initial, recursive, ..
8772        } => {
8773            collect_properties_recursive(initial, properties);
8774            collect_properties_recursive(recursive, properties);
8775        }
8776        LogicalPlan::ProcedureCall { arguments, .. } => {
8777            for arg in arguments {
8778                collect_properties_from_expr_into(arg, properties);
8779            }
8780        }
8781        LogicalPlan::VectorKnn { query, .. } => {
8782            collect_properties_from_expr_into(query, properties);
8783        }
8784        LogicalPlan::InvertedIndexLookup { terms, .. } => {
8785            collect_properties_from_expr_into(terms, properties);
8786        }
8787        LogicalPlan::ShortestPath { input, .. } => {
8788            collect_properties_recursive(input, properties);
8789        }
8790        LogicalPlan::AllShortestPaths { input, .. } => {
8791            collect_properties_recursive(input, properties);
8792        }
8793        LogicalPlan::Distinct { input } => {
8794            collect_properties_recursive(input, properties);
8795        }
8796        LogicalPlan::QuantifiedPattern {
8797            input,
8798            pattern_plan,
8799            ..
8800        } => {
8801            collect_properties_recursive(input, properties);
8802            collect_properties_recursive(pattern_plan, properties);
8803        }
8804        LogicalPlan::BindZeroLengthPath { input, .. } => {
8805            collect_properties_recursive(input, properties);
8806        }
8807        LogicalPlan::BindPath { input, .. } => {
8808            collect_properties_recursive(input, properties);
8809        }
8810        LogicalPlan::SubqueryCall { input, subquery } => {
8811            collect_properties_recursive(input, properties);
8812            collect_properties_recursive(subquery, properties);
8813        }
8814        LogicalPlan::LocyProject {
8815            input, projections, ..
8816        } => {
8817            for (expr, _alias) in projections {
8818                match expr {
8819                    // Bare variable in LocyProject: only need _vid for node variables
8820                    // (plan_locy_project extracts VID directly). Adding "*" would create
8821                    // a structural Struct column that conflicts with derived scan columns.
8822                    Expr::Variable(name) if !name.contains('.') => {
8823                        properties
8824                            .entry(name.clone())
8825                            .or_default()
8826                            .insert("_vid".to_string());
8827                    }
8828                    _ => collect_properties_from_expr_into(expr, properties),
8829                }
8830            }
8831            collect_properties_recursive(input, properties);
8832        }
8833        LogicalPlan::LocyFold {
8834            input,
8835            fold_bindings,
8836            ..
8837        } => {
8838            for (_name, expr) in fold_bindings {
8839                collect_properties_from_expr_into(expr, properties);
8840            }
8841            collect_properties_recursive(input, properties);
8842        }
8843        LogicalPlan::LocyBestBy {
8844            input, criteria, ..
8845        } => {
8846            for (expr, _asc) in criteria {
8847                collect_properties_from_expr_into(expr, properties);
8848            }
8849            collect_properties_recursive(input, properties);
8850        }
8851        LogicalPlan::LocyPriority { input, .. } => {
8852            collect_properties_recursive(input, properties);
8853        }
8854        LogicalPlan::LocyModelInvoke { input, .. } => {
8855            // Model invocations don't introduce new property accesses
8856            // — feature expressions are lifted to hidden YIELD items
8857            // by `extract_model_invocations` (uni-locy typecheck) and
8858            // their property refs are already collected via the
8859            // wrapped LocyProject's projection walk.
8860            collect_properties_recursive(input, properties);
8861        }
8862        // DDL and other plans don't reference properties
8863        _ => {}
8864    }
8865}
8866
8867/// Mark target variables from SET items with "*" and collect value expressions.
8868fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
8869    for item in items {
8870        match item {
8871            SetItem::Property { expr, value } => {
8872                // SET n.prop = val — mark n with STRUCT_ONLY_SENTINEL so the
8873                // scan builds the bare `n` struct column (needed for executor
8874                // `row.get(var_name)`) WITHOUT pulling the full schema. The
8875                // explicit `prop` is collected via `collect_properties_from_expr_into`
8876                // below and joins the variable's HashSet alongside the sentinel.
8877                //
8878                // If the same variable is also referenced bare elsewhere
8879                // (e.g. `SET n.x = 1 RETURN n`), `collect_properties_from_expr_into`
8880                // inserts "*" through the bare-Variable path; "*" dominates
8881                // the sentinel in `resolve_properties`, so the full schema
8882                // is still pulled when actually required.
8883                collect_properties_from_expr_into(expr, properties);
8884                collect_properties_from_expr_into(value, properties);
8885                if let Expr::Property(base, _) = expr
8886                    && let Expr::Variable(var) = base.as_ref()
8887                {
8888                    properties
8889                        .entry(var.clone())
8890                        .or_default()
8891                        .insert(STRUCT_ONLY_SENTINEL.to_string());
8892                }
8893            }
8894            SetItem::Labels { variable, .. } => {
8895                // SET n:Label — need full access to n
8896                properties
8897                    .entry(variable.clone())
8898                    .or_default()
8899                    .insert("*".to_string());
8900            }
8901            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
8902                // SET n = {props} or SET n += {props}
8903                properties
8904                    .entry(variable.clone())
8905                    .or_default()
8906                    .insert("*".to_string());
8907                collect_properties_from_expr_into(value, properties);
8908            }
8909        }
8910    }
8911}
8912
8913/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
8914/// adds structural projections (bare entity Struct columns) for them.
8915/// This is needed so that execute_create_pattern() can find bound variables
8916/// in the row HashMap and reuse existing nodes instead of creating new ones.
8917fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
8918    for path in &pattern.paths {
8919        if let Some(ref v) = path.variable {
8920            properties
8921                .entry(v.clone())
8922                .or_default()
8923                .insert("*".to_string());
8924        }
8925        for element in &path.elements {
8926            match element {
8927                PatternElement::Node(n) => {
8928                    if let Some(ref v) = n.variable {
8929                        properties
8930                            .entry(v.clone())
8931                            .or_default()
8932                            .insert("*".to_string());
8933                    }
8934                    // Also collect properties from inline property expressions
8935                    if let Some(ref props) = n.properties {
8936                        collect_properties_from_expr_into(props, properties);
8937                    }
8938                }
8939                PatternElement::Relationship(r) => {
8940                    if let Some(ref v) = r.variable {
8941                        properties
8942                            .entry(v.clone())
8943                            .or_default()
8944                            .insert("*".to_string());
8945                    }
8946                    if let Some(ref props) = r.properties {
8947                        collect_properties_from_expr_into(props, properties);
8948                    }
8949                }
8950                PatternElement::Parenthesized { pattern, .. } => {
8951                    let sub = Pattern {
8952                        paths: vec![pattern.as_ref().clone()],
8953                    };
8954                    mark_pattern_variables(&sub, properties);
8955                }
8956            }
8957        }
8958    }
8959}
8960
8961/// Collect properties from an expression into a HashMap.
8962fn collect_properties_from_expr_into(
8963    expr: &Expr,
8964    properties: &mut HashMap<String, HashSet<String>>,
8965) {
8966    match expr {
8967        Expr::PatternComprehension {
8968            where_clause,
8969            map_expr,
8970            ..
8971        } => {
8972            // Collect properties from the WHERE clause and map expression.
8973            // The pattern itself creates local bindings that don't need
8974            // property collection from the outer scope.
8975            if let Some(where_expr) = where_clause {
8976                collect_properties_from_expr_into(where_expr, properties);
8977            }
8978            collect_properties_from_expr_into(map_expr, properties);
8979        }
8980        Expr::Variable(name) => {
8981            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
8982            if let Some((var, prop)) = name.split_once('.') {
8983                properties
8984                    .entry(var.to_string())
8985                    .or_default()
8986                    .insert(prop.to_string());
8987            } else {
8988                // Bare variable (e.g., RETURN n) — needs all properties materialized
8989                properties
8990                    .entry(name.clone())
8991                    .or_default()
8992                    .insert("*".to_string());
8993            }
8994        }
8995        Expr::Property(base, name) => {
8996            // Extract variable name from the base expression
8997            if let Expr::Variable(var) = base.as_ref() {
8998                properties
8999                    .entry(var.clone())
9000                    .or_default()
9001                    .insert(name.clone());
9002                // Don't recurse into Variable — that would mark it as a bare
9003                // variable reference (adding "*") when it's just a property base.
9004            } else {
9005                // Recurse for complex base expressions (nested property, function call, etc.)
9006                collect_properties_from_expr_into(base, properties);
9007            }
9008        }
9009        Expr::BinaryOp { left, right, .. } => {
9010            collect_properties_from_expr_into(left, properties);
9011            collect_properties_from_expr_into(right, properties);
9012        }
9013        Expr::FunctionCall {
9014            name,
9015            args,
9016            window_spec,
9017            ..
9018        } => {
9019            // Analyze function for property requirements (pushdown hydration)
9020            analyze_function_property_requirements(name, args, properties);
9021
9022            // Collect from arguments
9023            for arg in args {
9024                collect_properties_from_expr_into(arg, properties);
9025            }
9026
9027            // Collect from window spec (PARTITION BY, ORDER BY)
9028            if let Some(spec) = window_spec {
9029                for part_expr in &spec.partition_by {
9030                    collect_properties_from_expr_into(part_expr, properties);
9031                }
9032                for sort_item in &spec.order_by {
9033                    collect_properties_from_expr_into(&sort_item.expr, properties);
9034                }
9035            }
9036        }
9037        Expr::UnaryOp { expr, .. } => {
9038            collect_properties_from_expr_into(expr, properties);
9039        }
9040        Expr::List(items) => {
9041            for item in items {
9042                collect_properties_from_expr_into(item, properties);
9043            }
9044        }
9045        Expr::Map(entries) => {
9046            for (_key, value) in entries {
9047                collect_properties_from_expr_into(value, properties);
9048            }
9049        }
9050        Expr::ListComprehension {
9051            list,
9052            where_clause,
9053            map_expr,
9054            ..
9055        } => {
9056            collect_properties_from_expr_into(list, properties);
9057            if let Some(where_expr) = where_clause {
9058                collect_properties_from_expr_into(where_expr, properties);
9059            }
9060            collect_properties_from_expr_into(map_expr, properties);
9061        }
9062        Expr::Case {
9063            expr,
9064            when_then,
9065            else_expr,
9066        } => {
9067            if let Some(scrutinee_expr) = expr {
9068                collect_properties_from_expr_into(scrutinee_expr, properties);
9069            }
9070            for (when, then) in when_then {
9071                collect_properties_from_expr_into(when, properties);
9072                collect_properties_from_expr_into(then, properties);
9073            }
9074            if let Some(default_expr) = else_expr {
9075                collect_properties_from_expr_into(default_expr, properties);
9076            }
9077        }
9078        Expr::Quantifier {
9079            list, predicate, ..
9080        } => {
9081            collect_properties_from_expr_into(list, properties);
9082            collect_properties_from_expr_into(predicate, properties);
9083        }
9084        Expr::Reduce {
9085            init, list, expr, ..
9086        } => {
9087            collect_properties_from_expr_into(init, properties);
9088            collect_properties_from_expr_into(list, properties);
9089            collect_properties_from_expr_into(expr, properties);
9090        }
9091        Expr::Exists { query, .. } => {
9092            // Walk into EXISTS body to collect property references for outer-scope variables.
9093            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
9094            // are included in the outer scan's property list. Extra properties collected for
9095            // inner-only variables are harmless — the outer scan ignores unknown variable names.
9096            collect_properties_from_subquery(query, properties);
9097        }
9098        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
9099            collect_properties_from_subquery(query, properties);
9100        }
9101        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
9102            collect_properties_from_expr_into(expr, properties);
9103        }
9104        Expr::In { expr, list } => {
9105            collect_properties_from_expr_into(expr, properties);
9106            collect_properties_from_expr_into(list, properties);
9107        }
9108        Expr::ArrayIndex { array, index } => {
9109            if let Expr::Variable(var) = array.as_ref() {
9110                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
9111                    // Static string key: e['name'] → only need that specific property
9112                    properties
9113                        .entry(var.clone())
9114                        .or_default()
9115                        .insert(prop_name.clone());
9116                } else {
9117                    // Dynamic property access: e[prop] → need all properties
9118                    properties
9119                        .entry(var.clone())
9120                        .or_default()
9121                        .insert("*".to_string());
9122                }
9123            }
9124            collect_properties_from_expr_into(array, properties);
9125            collect_properties_from_expr_into(index, properties);
9126        }
9127        Expr::ArraySlice { array, start, end } => {
9128            collect_properties_from_expr_into(array, properties);
9129            if let Some(start_expr) = start {
9130                collect_properties_from_expr_into(start_expr, properties);
9131            }
9132            if let Some(end_expr) = end {
9133                collect_properties_from_expr_into(end_expr, properties);
9134            }
9135        }
9136        Expr::ValidAt {
9137            entity,
9138            timestamp,
9139            start_prop,
9140            end_prop,
9141        } => {
9142            // Extract property requirements from ValidAt expression
9143            if let Expr::Variable(var) = entity.as_ref() {
9144                if let Some(prop) = start_prop {
9145                    properties
9146                        .entry(var.clone())
9147                        .or_default()
9148                        .insert(prop.clone());
9149                }
9150                if let Some(prop) = end_prop {
9151                    properties
9152                        .entry(var.clone())
9153                        .or_default()
9154                        .insert(prop.clone());
9155                }
9156            }
9157            collect_properties_from_expr_into(entity, properties);
9158            collect_properties_from_expr_into(timestamp, properties);
9159        }
9160        Expr::MapProjection { base, items } => {
9161            collect_properties_from_expr_into(base, properties);
9162            for item in items {
9163                match item {
9164                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
9165                        if let Expr::Variable(var) = base.as_ref() {
9166                            properties
9167                                .entry(var.clone())
9168                                .or_default()
9169                                .insert(prop.clone());
9170                        }
9171                    }
9172                    uni_cypher::ast::MapProjectionItem::AllProperties => {
9173                        if let Expr::Variable(var) = base.as_ref() {
9174                            properties
9175                                .entry(var.clone())
9176                                .or_default()
9177                                .insert("*".to_string());
9178                        }
9179                    }
9180                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
9181                        collect_properties_from_expr_into(expr, properties);
9182                    }
9183                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
9184                }
9185            }
9186        }
9187        Expr::LabelCheck { expr, .. } => {
9188            collect_properties_from_expr_into(expr, properties);
9189        }
9190        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
9191        // Mark them with "*" so the outer scan produces structural projections that
9192        // extract_row_params can resolve.
9193        Expr::Parameter(name) => {
9194            properties
9195                .entry(name.clone())
9196                .or_default()
9197                .insert("*".to_string());
9198        }
9199        // Literals and wildcard don't reference properties
9200        Expr::Literal(_) | Expr::Wildcard => {}
9201    }
9202}
9203
9204/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
9205///
9206/// This is needed so that correlated property accesses like `a.city` inside
9207/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
9208/// scan to include `a.city` in its projected columns.
9209fn collect_properties_from_subquery(
9210    query: &Query,
9211    properties: &mut HashMap<String, HashSet<String>>,
9212) {
9213    match query {
9214        Query::Single(stmt) => {
9215            for clause in &stmt.clauses {
9216                match clause {
9217                    Clause::Match(m) => {
9218                        if let Some(ref wc) = m.where_clause {
9219                            collect_properties_from_expr_into(wc, properties);
9220                        }
9221                    }
9222                    Clause::With(w) => {
9223                        for item in &w.items {
9224                            if let ReturnItem::Expr { expr, .. } = item {
9225                                collect_properties_from_expr_into(expr, properties);
9226                            }
9227                        }
9228                        if let Some(ref wc) = w.where_clause {
9229                            collect_properties_from_expr_into(wc, properties);
9230                        }
9231                    }
9232                    Clause::Return(r) => {
9233                        for item in &r.items {
9234                            if let ReturnItem::Expr { expr, .. } = item {
9235                                collect_properties_from_expr_into(expr, properties);
9236                            }
9237                        }
9238                    }
9239                    _ => {}
9240                }
9241            }
9242        }
9243        Query::Union { left, right, .. } => {
9244            collect_properties_from_subquery(left, properties);
9245            collect_properties_from_subquery(right, properties);
9246        }
9247        _ => {}
9248    }
9249}
9250
9251/// Analyze function calls to extract property requirements for pushdown hydration
9252///
9253/// This function examines function calls and their arguments to determine which properties
9254/// need to be loaded for entity arguments. For example:
9255/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
9256/// - keys(n) -> n needs all properties (*)
9257///
9258/// The extracted requirements are added to the properties map for later use during
9259/// scan planning.
9260fn analyze_function_property_requirements(
9261    name: &str,
9262    args: &[Expr],
9263    properties: &mut HashMap<String, HashSet<String>>,
9264) {
9265    use crate::query::function_props::get_function_spec;
9266
9267    /// Helper to mark a variable as needing all properties.
9268    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
9269        properties
9270            .entry(var.to_string())
9271            .or_default()
9272            .insert("*".to_string());
9273    }
9274
9275    // System-managed timestamp functions: require only the corresponding
9276    // `_created_at` / `_updated_at` column, not full entity materialization.
9277    if name.eq_ignore_ascii_case("created_at") || name.eq_ignore_ascii_case("updated_at") {
9278        if let Some(Expr::Variable(var)) = args.first() {
9279            let col = if name.eq_ignore_ascii_case("created_at") {
9280                "_created_at"
9281            } else {
9282                "_updated_at"
9283            };
9284            properties
9285                .entry(var.clone())
9286                .or_default()
9287                .insert(col.to_string());
9288        }
9289        return;
9290    }
9291
9292    let Some(spec) = get_function_spec(name) else {
9293        // Unknown function: conservatively require all properties for variable args
9294        for arg in args {
9295            if let Expr::Variable(var) = arg {
9296                mark_wildcard(var, properties);
9297            }
9298        }
9299        return;
9300    };
9301
9302    // Extract property names from string literal arguments
9303    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
9304        let entity_arg = args.get(entity_arg_idx);
9305        let prop_arg = args.get(prop_arg_idx);
9306
9307        match (entity_arg, prop_arg) {
9308            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
9309                properties
9310                    .entry(var.clone())
9311                    .or_default()
9312                    .insert(prop.clone());
9313            }
9314            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
9315                // Parameter property name: need all properties
9316                mark_wildcard(var, properties);
9317            }
9318            _ => {}
9319        }
9320    }
9321
9322    // Handle full entity requirement (keys(), properties())
9323    if spec.needs_full_entity {
9324        for &idx in spec.entity_args {
9325            if let Some(Expr::Variable(var)) = args.get(idx) {
9326                mark_wildcard(var, properties);
9327            }
9328        }
9329    }
9330}
9331
9332// ============================================================================
9333// Phase 5a-impl — fork-aware fusion rewrite
9334// ============================================================================
9335
9336/// Trait that exposes the per-fork "is there a fork-local index for
9337/// `(label, column)`?" lookup. Implemented for `StorageManager` so
9338/// callers don't need to depend on the fork module directly; tests
9339/// can mock by implementing it on a `HashMap`.
9340pub trait ForkIndexLookup {
9341    fn fork_index_for(
9342        &self,
9343        label: &str,
9344        column: &str,
9345    ) -> Option<uni_store::fork::ForkLocalIndexKind>;
9346
9347    /// Phase 5b followup: resolve a label id, then dispatch to
9348    /// `fork_index_for`. Used by the rewrite when wrapping
9349    /// `VectorKnn` and `InvertedIndexLookup` nodes which carry
9350    /// `label_id: u16` rather than the label name. Default returns
9351    /// `None`; the `StorageManager` impl resolves via its
9352    /// `schema_manager`.
9353    fn fork_index_for_label_id(
9354        &self,
9355        _label_id: u16,
9356        _column: &str,
9357    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9358        None
9359    }
9360}
9361
9362impl ForkIndexLookup for uni_store::storage::StorageManager {
9363    fn fork_index_for(
9364        &self,
9365        label: &str,
9366        column: &str,
9367    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9368        self.fork_index_exists(label, column)
9369    }
9370
9371    fn fork_index_for_label_id(
9372        &self,
9373        label_id: u16,
9374        column: &str,
9375    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9376        let schema = self.schema_manager().schema();
9377        let label_name = schema.label_name_by_id(label_id)?;
9378        self.fork_index_exists(label_name, column)
9379    }
9380}
9381
9382/// Fold a trailing `SET var.prop = value` into the freshly-created entity's
9383/// inline property map, eliminating the separate `Set` write pass.
9384///
9385/// Rewrites `CREATE (a)-[r:T]->(b) SET r.x = e.v` into the equivalent of
9386/// `CREATE (a)-[r:T {x: e.v}]->(b)`, so the plan collapses from `Set → Create`
9387/// to a single `Create`. This removes an entire read-modify-write operator
9388/// (`MutationSetExec`) — measured at ~38% of per-edge `UNWIND … CREATE … SET`
9389/// execution — that the bulk write path never pays.
9390///
9391/// # Examples
9392///
9393/// ```ignore
9394/// // CREATE (a)-[r:LINK]->(b) SET r.role = e.role   ==>
9395/// // CREATE (a)-[r:LINK {role: e.role}]->(b)
9396/// let fused = fuse_create_set(plan);
9397/// ```
9398///
9399/// The fold is **all-or-nothing per `SET` clause** and only fires when every
9400/// item is safe:
9401/// - the item is the simple `Variable.property = value` form (not `+=`, label
9402///   set `SET n:L`, or whole-entity map assignment `SET n = {...}`),
9403/// - the target variable is introduced by the immediately-preceding
9404///   `Create`/`CreateBatch` (a MATCHed variable is left untouched),
9405/// - the target element's inline properties are absent or a map literal (a
9406///   parameter-map form such as `CREATE (n $props)` cannot be merged),
9407/// - the value references no variable created in the same statement, so
9408///   evaluating it at create time is observably identical to SET time.
9409///
9410/// When any item fails these checks the whole `Set` node is preserved, keeping
9411/// semantics unchanged. The pass is idempotent: a plan with no fusable
9412/// `Set`/`Create` adjacency passes through untouched.
9413#[must_use]
9414pub fn fuse_create_set(plan: LogicalPlan) -> LogicalPlan {
9415    match plan {
9416        LogicalPlan::Set { input, items } => {
9417            // Fuse any deeper adjacency first so chained
9418            // `CREATE … SET … CREATE … SET` collapses bottom-up.
9419            let input = fuse_create_set(*input);
9420            match input {
9421                LogicalPlan::Create {
9422                    input: child,
9423                    pattern,
9424                } => {
9425                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9426                    match try_fuse_set_items(std::slice::from_ref(&pattern), &items, &bound_vars) {
9427                        Some(mut patterns) => LogicalPlan::Create {
9428                            input: child,
9429                            // try_fuse_set_items returns exactly as many patterns
9430                            // as it was given (one here).
9431                            pattern: patterns
9432                                .pop()
9433                                .expect("one pattern in yields one pattern out"),
9434                        },
9435                        None => LogicalPlan::Set {
9436                            input: Box::new(LogicalPlan::Create {
9437                                input: child,
9438                                pattern,
9439                            }),
9440                            items,
9441                        },
9442                    }
9443                }
9444                LogicalPlan::CreateBatch {
9445                    input: child,
9446                    patterns,
9447                } => {
9448                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9449                    match try_fuse_set_items(&patterns, &items, &bound_vars) {
9450                        Some(fused) => LogicalPlan::CreateBatch {
9451                            input: child,
9452                            patterns: fused,
9453                        },
9454                        None => LogicalPlan::Set {
9455                            input: Box::new(LogicalPlan::CreateBatch {
9456                                input: child,
9457                                patterns,
9458                            }),
9459                            items,
9460                        },
9461                    }
9462                }
9463                other => LogicalPlan::Set {
9464                    input: Box::new(other),
9465                    items,
9466                },
9467            }
9468        }
9469        // Recurse through the operators that can sit above a write clause so a
9470        // `Set` under RETURN/ORDER BY/LIMIT is still reached. This mirrors the
9471        // pragmatic recursion of `rewrite_for_fork_fusion`: variants that never
9472        // sit above a write clause fall through `other => other` unchanged.
9473        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9474            input: Box::new(fuse_create_set(*input)),
9475            projections,
9476        },
9477        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9478            input: Box::new(fuse_create_set(*input)),
9479            skip,
9480            fetch,
9481        },
9482        LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
9483            input: Box::new(fuse_create_set(*input)),
9484            order_by,
9485        },
9486        LogicalPlan::Filter {
9487            input,
9488            predicate,
9489            optional_variables,
9490        } => LogicalPlan::Filter {
9491            input: Box::new(fuse_create_set(*input)),
9492            predicate,
9493            optional_variables,
9494        },
9495        LogicalPlan::Create { input, pattern } => LogicalPlan::Create {
9496            input: Box::new(fuse_create_set(*input)),
9497            pattern,
9498        },
9499        LogicalPlan::CreateBatch { input, patterns } => LogicalPlan::CreateBatch {
9500            input: Box::new(fuse_create_set(*input)),
9501            patterns,
9502        },
9503        other => other,
9504    }
9505}
9506
9507/// Try to fold every `SET` item into the given CREATE patterns.
9508///
9509/// Returns the rewritten patterns when *all* items fuse safely (see
9510/// [`fuse_create_set`] for the conditions); returns `None` the moment any item
9511/// is unfusable, so the caller can keep the original `Set` node untouched.
9512///
9513/// `bound_vars` are the variables produced by the CREATE's input plan (e.g. an
9514/// upstream MATCH). A CREATE pattern may *reuse* such a variable as an endpoint
9515/// (`MATCH (a) CREATE (a)-[r:T]->(b)`), so `pattern_variable_names` alone cannot
9516/// tell a freshly-created variable from a reused one. Reused variables are
9517/// excluded from `owner`: a `SET` on them must not fuse, because the executor
9518/// skips inline properties on already-bound elements (which would silently drop
9519/// the write).
9520fn try_fuse_set_items(
9521    patterns: &[Pattern],
9522    items: &[SetItem],
9523    bound_vars: &HashSet<String>,
9524) -> Option<Vec<Pattern>> {
9525    // Map each freshly-created variable to the index of the pattern that
9526    // introduces it, skipping any variable already bound upstream.
9527    let mut owner: HashMap<String, usize> = HashMap::new();
9528    for (idx, pattern) in patterns.iter().enumerate() {
9529        for var in crate::query::df_graph::mutation_common::pattern_variable_names(pattern) {
9530            if bound_vars.contains(&var) {
9531                continue;
9532            }
9533            owner.entry(var).or_insert(idx);
9534        }
9535    }
9536
9537    let mut out = patterns.to_vec();
9538    for item in items {
9539        let SetItem::Property { expr, value } = item else {
9540            return None; // `+=`, label set, or whole-entity map assignment
9541        };
9542        let Expr::Property(base, prop) = expr else {
9543            return None; // not a property target
9544        };
9545        let Expr::Variable(var) = base.as_ref() else {
9546            return None; // e.g. `n[expr].x` or a deeper path
9547        };
9548        let Some(&idx) = owner.get(var) else {
9549            return None; // target is a MATCHed (not created) variable
9550        };
9551        // Evaluating the value at create time must equal evaluating it at SET
9552        // time: reject any reference to a variable created in this statement
9553        // (its value may not yet exist when the element is constructed).
9554        if collect_expr_variables(value)
9555            .iter()
9556            .any(|referenced| owner.contains_key(referenced))
9557        {
9558            return None;
9559        }
9560        if !merge_pattern_property(&mut out[idx], var, prop, value) {
9561            return None; // element absent or has a non-map property form
9562        }
9563    }
9564    Some(out)
9565}
9566
9567/// Merge `var.prop = value` into the matching element's inline property map.
9568///
9569/// Returns `false` (leaving the pattern unchanged) when the variable's element
9570/// is not found or its existing properties are a non-map expression that cannot
9571/// be merged. Any pre-existing entry for `prop` is replaced so the SET's
9572/// last-write-wins precedence is preserved.
9573fn merge_pattern_property(pattern: &mut Pattern, var: &str, prop: &str, value: &Expr) -> bool {
9574    for path in &mut pattern.paths {
9575        if merge_into_elements(&mut path.elements, var, prop, value) {
9576            return true;
9577        }
9578    }
9579    false
9580}
9581
9582/// Recursive worker for [`merge_pattern_property`] over a list of elements.
9583fn merge_into_elements(
9584    elements: &mut [PatternElement],
9585    var: &str,
9586    prop: &str,
9587    value: &Expr,
9588) -> bool {
9589    for element in elements {
9590        match element {
9591            PatternElement::Node(n) if n.variable.as_deref() == Some(var) => {
9592                return set_map_property(&mut n.properties, prop, value.clone());
9593            }
9594            PatternElement::Relationship(r) if r.variable.as_deref() == Some(var) => {
9595                return set_map_property(&mut r.properties, prop, value.clone());
9596            }
9597            PatternElement::Parenthesized { pattern, .. } => {
9598                if merge_into_elements(&mut pattern.elements, var, prop, value) {
9599                    return true;
9600                }
9601            }
9602            _ => {}
9603        }
9604    }
9605    false
9606}
9607
9608/// Set `prop = value` on an optional inline property map, last-write-wins.
9609///
9610/// Returns `false` without mutating when the properties are present but are not
9611/// a map literal (e.g. `CREATE (n $params)`), which cannot accept a single key.
9612fn set_map_property(props: &mut Option<Expr>, prop: &str, value: Expr) -> bool {
9613    match props {
9614        None => {
9615            *props = Some(Expr::Map(vec![(prop.to_string(), value)]));
9616            true
9617        }
9618        Some(Expr::Map(entries)) => {
9619            entries.retain(|(k, _)| k != prop);
9620            entries.push((prop.to_string(), value));
9621            true
9622        }
9623        Some(_) => false,
9624    }
9625}
9626
9627/// Walk a [`LogicalPlan`] tree and rewrite each `Scan` whose target
9628/// `(label, column)` has a registered fork-local index into the
9629/// matching `FusedIndexScan` variant.
9630///
9631/// Phase 5a-impl Step 4 covers `VidUidForkFirst`; Steps 5 and 6 add
9632/// `BtreeUnion` and `SortedKWayMerge` by extending `kind_for_filter`.
9633///
9634/// Idempotent: a tree that already contains `FusedIndexScan` nodes
9635/// passes through unchanged.
9636#[must_use]
9637pub fn rewrite_for_fork_fusion<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9638    rewrite_node(plan, lookup)
9639}
9640
9641fn rewrite_node<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9642    match plan {
9643        LogicalPlan::Scan {
9644            label_id,
9645            labels,
9646            variable,
9647            filter,
9648            optional,
9649        } => {
9650            // VidUid fusion only fires on a single-label scan with an
9651            // equality filter on a registered UID column. BTree and
9652            // Sorted will extend this match in Steps 5 and 6.
9653            let kind = if labels.len() == 1
9654                && let Some(col) = filter
9655                    .as_ref()
9656                    .and_then(|f| equality_target_column(f, &variable))
9657                && let Some(idx_kind) = lookup.fork_index_for(&labels[0], &col)
9658            {
9659                into_fusion_kind(idx_kind)
9660            } else {
9661                None
9662            };
9663            match kind {
9664                Some(kind) => LogicalPlan::FusedIndexScan {
9665                    label_id,
9666                    labels,
9667                    variable,
9668                    filter,
9669                    optional,
9670                    kind,
9671                },
9672                None => LogicalPlan::Scan {
9673                    label_id,
9674                    labels,
9675                    variable,
9676                    filter,
9677                    optional,
9678                },
9679            }
9680        }
9681        // Phase 5b followup: wrap lossy leaf operators when a
9682        // matching fork-local index has been registered. The wrap
9683        // preserves the original node's fields (the physical
9684        // planner unwraps and recurses); only the explain-plan
9685        // surface and runtime-stats operator name change. The
9686        // actual fusion still happens at the `BranchedBackend`
9687        // layer via Lance's per-branch reads.
9688        //
9689        // The CALL-style vector/FTS queries land as `ProcedureCall`
9690        // (not the dedicated `VectorKnn`/`InvertedIndexLookup`
9691        // operators); recognize those by procedure name and the
9692        // shape of their first two arguments (`label, column, ...`).
9693        LogicalPlan::ProcedureCall {
9694            procedure_name,
9695            arguments,
9696            yield_items,
9697        } => {
9698            let kind = procedure_call_fusion_kind(&procedure_name, &arguments, lookup);
9699            let inner = LogicalPlan::ProcedureCall {
9700                procedure_name,
9701                arguments,
9702                yield_items,
9703            };
9704            match kind {
9705                Some(kind) => LogicalPlan::FusedIndexScanWrapped {
9706                    inner: Box::new(inner),
9707                    kind,
9708                },
9709                None => inner,
9710            }
9711        }
9712        LogicalPlan::VectorKnn {
9713            label_id,
9714            variable,
9715            property,
9716            query,
9717            k,
9718            threshold,
9719        } => {
9720            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9721                && let Some(kind) = into_fusion_kind(idx_kind)
9722            {
9723                LogicalPlan::FusedIndexScanWrapped {
9724                    inner: Box::new(LogicalPlan::VectorKnn {
9725                        label_id,
9726                        variable,
9727                        property,
9728                        query,
9729                        k,
9730                        threshold,
9731                    }),
9732                    kind,
9733                }
9734            } else {
9735                LogicalPlan::VectorKnn {
9736                    label_id,
9737                    variable,
9738                    property,
9739                    query,
9740                    k,
9741                    threshold,
9742                }
9743            }
9744        }
9745        LogicalPlan::InvertedIndexLookup {
9746            label_id,
9747            variable,
9748            property,
9749            terms,
9750        } => {
9751            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9752                && let Some(kind) = into_fusion_kind(idx_kind)
9753            {
9754                LogicalPlan::FusedIndexScanWrapped {
9755                    inner: Box::new(LogicalPlan::InvertedIndexLookup {
9756                        label_id,
9757                        variable,
9758                        property,
9759                        terms,
9760                    }),
9761                    kind,
9762                }
9763            } else {
9764                LogicalPlan::InvertedIndexLookup {
9765                    label_id,
9766                    variable,
9767                    property,
9768                    terms,
9769                }
9770            }
9771        }
9772        // Tree-recursive variants — only the ones that can carry a
9773        // Scan in their subtree need to recurse here. Adding more is
9774        // safe (a missing recursion just means fusion doesn't fire
9775        // for that nested context, not incorrect results).
9776        LogicalPlan::Filter {
9777            input,
9778            predicate,
9779            optional_variables,
9780        } => LogicalPlan::Filter {
9781            input: Box::new(rewrite_node(*input, lookup)),
9782            predicate,
9783            optional_variables,
9784        },
9785        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9786            input: Box::new(rewrite_node(*input, lookup)),
9787            projections,
9788        },
9789        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9790            input: Box::new(rewrite_node(*input, lookup)),
9791            skip,
9792            fetch,
9793        },
9794        LogicalPlan::Sort { input, order_by } => {
9795            // Phase 5a-impl Sorted fusion: when the immediate child
9796            // is a single-label Scan AND the sole sort key is a
9797            // single-column property reference on that scan's
9798            // variable AND the column has a fork-local Sorted index
9799            // registered, rewrite to FusedIndexScan { SortedKWayMerge }.
9800            // Otherwise recurse normally.
9801            let new_input = match (*input, &order_by[..]) {
9802                (
9803                    LogicalPlan::Scan {
9804                        label_id,
9805                        labels,
9806                        variable,
9807                        filter,
9808                        optional,
9809                    },
9810                    [single_sort],
9811                ) if labels.len() == 1
9812                    && let Some(col) = column_of_scan_variable(&single_sort.expr, &variable)
9813                    && let Some(uni_store::fork::ForkLocalIndexKind::Sorted) =
9814                        lookup.fork_index_for(&labels[0], &col) =>
9815                {
9816                    LogicalPlan::FusedIndexScan {
9817                        label_id,
9818                        labels,
9819                        variable,
9820                        filter,
9821                        optional,
9822                        kind: FusionKind::SortedKWayMerge,
9823                    }
9824                }
9825                (other_input, _) => rewrite_node(other_input, lookup),
9826            };
9827            LogicalPlan::Sort {
9828                input: Box::new(new_input),
9829                order_by,
9830            }
9831        }
9832        LogicalPlan::Union { left, right, all } => LogicalPlan::Union {
9833            left: Box::new(rewrite_node(*left, lookup)),
9834            right: Box::new(rewrite_node(*right, lookup)),
9835            all,
9836        },
9837        // Everything else passes through unchanged. Adding more
9838        // arms is purely additive — fusion just doesn't fire inside
9839        // un-recursed-into subtrees.
9840        other => other,
9841    }
9842}
9843
9844/// Phase 5b followup: inspect a CALL-style procedure invocation
9845/// for a `(label, column)` pair and check whether a fork-local
9846/// index has been registered for it.
9847///
9848/// Recognizes:
9849/// - `uni.vector.query(label, column, query_vec, k)` → `AnnRerank`
9850///   when a `Vector` fork-local index exists.
9851/// - `uni.fts.query(label, column, query, k)` → `Bm25Rrf` when a
9852///   `FullText` fork-local index exists.
9853///
9854/// Returns `None` for any other procedure (no rewrite) or when the
9855/// registry has no matching entry.
9856fn procedure_call_fusion_kind<L: ForkIndexLookup>(
9857    procedure_name: &str,
9858    arguments: &[Expr],
9859    lookup: &L,
9860) -> Option<FusionKind> {
9861    if arguments.len() < 2 {
9862        return None;
9863    }
9864    let label = match &arguments[0] {
9865        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9866        _ => return None,
9867    };
9868    let column = match &arguments[1] {
9869        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9870        _ => return None,
9871    };
9872    let expected = match procedure_name {
9873        "uni.vector.query" => uni_store::fork::ForkLocalIndexKind::Vector,
9874        "uni.fts.query" => uni_store::fork::ForkLocalIndexKind::FullText,
9875        _ => return None,
9876    };
9877    let registered = lookup.fork_index_for(label, column)?;
9878    if registered != expected {
9879        return None;
9880    }
9881    into_fusion_kind(registered)
9882}
9883
9884/// Map a fork-local index kind to its planner-side fusion variant.
9885/// Returns `None` for any future `ForkLocalIndexKind` we don't yet
9886/// know how to fuse — the caller falls back to a regular Scan.
9887fn into_fusion_kind(kind: uni_store::fork::ForkLocalIndexKind) -> Option<FusionKind> {
9888    use uni_store::fork::ForkLocalIndexKind as K;
9889    match kind {
9890        K::VidUid => Some(FusionKind::VidUidForkFirst),
9891        K::ScalarBtree => Some(FusionKind::BtreeUnion),
9892        K::Sorted => Some(FusionKind::SortedKWayMerge),
9893        K::Vector => Some(FusionKind::AnnRerank),
9894        K::FullText => Some(FusionKind::Bm25Rrf),
9895        // `ForkLocalIndexKind` is `#[non_exhaustive]`; future kinds
9896        // we don't yet handle are silently passed through as a
9897        // regular Scan so a forward-incompatible binary doesn't
9898        // panic — just misses the fusion opportunity.
9899        _ => None,
9900    }
9901}
9902
9903/// Inspect a Scan filter `Expr` for a single-column equality predicate
9904/// against the scan's variable. Returns the column name if the
9905/// predicate matches the shape `variable.column = <literal_or_param>`
9906/// (or its commuted form). Returns `None` for any other shape — fusion
9907/// only fires on the simple case in Phase 5a-impl.
9908fn equality_target_column(filter: &Expr, scan_variable: &str) -> Option<String> {
9909    let (lhs, rhs) = match filter {
9910        Expr::BinaryOp {
9911            left,
9912            op: uni_cypher::ast::BinaryOp::Eq,
9913            right,
9914        } => (left.as_ref(), right.as_ref()),
9915        _ => return None,
9916    };
9917    // Try lhs = column-of-scan-var, rhs = literal/param; or commuted.
9918    if let Some(col) = column_of_scan_variable(lhs, scan_variable)
9919        && is_constant_or_param(rhs)
9920    {
9921        return Some(col);
9922    }
9923    if let Some(col) = column_of_scan_variable(rhs, scan_variable)
9924        && is_constant_or_param(lhs)
9925    {
9926        return Some(col);
9927    }
9928    None
9929}
9930
9931fn column_of_scan_variable(expr: &Expr, scan_variable: &str) -> Option<String> {
9932    if let Expr::Property(base, prop) = expr
9933        && let Expr::Variable(v) = base.as_ref()
9934        && v == scan_variable
9935    {
9936        return Some(prop.clone());
9937    }
9938    None
9939}
9940
9941fn is_constant_or_param(expr: &Expr) -> bool {
9942    matches!(expr, Expr::Literal(_) | Expr::Parameter(_))
9943}
9944
9945#[cfg(test)]
9946mod pushdown_tests {
9947    use super::*;
9948
9949    #[test]
9950    fn test_validat_extracts_property_names() {
9951        // validAt(e, 'start', 'end', ts) → e: {start, end}
9952        let mut properties = HashMap::new();
9953
9954        let args = vec![
9955            Expr::Variable("e".to_string()),
9956            Expr::Literal(CypherLiteral::String("start".to_string())),
9957            Expr::Literal(CypherLiteral::String("end".to_string())),
9958            Expr::Variable("ts".to_string()),
9959        ];
9960
9961        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
9962
9963        assert!(properties.contains_key("e"));
9964        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
9965            .iter()
9966            .cloned()
9967            .collect();
9968        assert_eq!(properties.get("e").unwrap(), &e_props);
9969    }
9970
9971    #[test]
9972    fn test_keys_requires_wildcard() {
9973        // keys(n) → n: {*}
9974        let mut properties = HashMap::new();
9975
9976        let args = vec![Expr::Variable("n".to_string())];
9977
9978        analyze_function_property_requirements("keys", &args, &mut properties);
9979
9980        assert!(properties.contains_key("n"));
9981        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
9982        assert_eq!(properties.get("n").unwrap(), &n_props);
9983    }
9984
9985    #[test]
9986    fn test_properties_requires_wildcard() {
9987        // properties(n) → n: {*}
9988        let mut properties = HashMap::new();
9989
9990        let args = vec![Expr::Variable("n".to_string())];
9991
9992        analyze_function_property_requirements("properties", &args, &mut properties);
9993
9994        assert!(properties.contains_key("n"));
9995        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
9996        assert_eq!(properties.get("n").unwrap(), &n_props);
9997    }
9998
9999    #[test]
10000    fn test_unknown_function_conservative() {
10001        // customUdf(e) → e: {*}
10002        let mut properties = HashMap::new();
10003
10004        let args = vec![Expr::Variable("e".to_string())];
10005
10006        analyze_function_property_requirements("customUdf", &args, &mut properties);
10007
10008        assert!(properties.contains_key("e"));
10009        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10010        assert_eq!(properties.get("e").unwrap(), &e_props);
10011    }
10012
10013    #[test]
10014    fn test_parameter_property_name() {
10015        // validAt(e, $start, $end, ts) → e: {*}
10016        let mut properties = HashMap::new();
10017
10018        let args = vec![
10019            Expr::Variable("e".to_string()),
10020            Expr::Parameter("start".to_string()),
10021            Expr::Parameter("end".to_string()),
10022            Expr::Variable("ts".to_string()),
10023        ];
10024
10025        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
10026
10027        assert!(properties.contains_key("e"));
10028        assert!(properties.get("e").unwrap().contains("*"));
10029    }
10030
10031    #[test]
10032    fn test_validat_expr_extracts_properties() {
10033        // Test Expr::ValidAt variant property extraction
10034        let mut properties = HashMap::new();
10035
10036        let validat_expr = Expr::ValidAt {
10037            entity: Box::new(Expr::Variable("e".to_string())),
10038            timestamp: Box::new(Expr::Variable("ts".to_string())),
10039            start_prop: Some("valid_from".to_string()),
10040            end_prop: Some("valid_to".to_string()),
10041        };
10042
10043        collect_properties_from_expr_into(&validat_expr, &mut properties);
10044
10045        assert!(properties.contains_key("e"));
10046        assert!(properties.get("e").unwrap().contains("valid_from"));
10047        assert!(properties.get("e").unwrap().contains("valid_to"));
10048    }
10049
10050    #[test]
10051    fn test_array_index_requires_wildcard() {
10052        // e[prop] → e: {*}
10053        let mut properties = HashMap::new();
10054
10055        let array_index_expr = Expr::ArrayIndex {
10056            array: Box::new(Expr::Variable("e".to_string())),
10057            index: Box::new(Expr::Variable("prop".to_string())),
10058        };
10059
10060        collect_properties_from_expr_into(&array_index_expr, &mut properties);
10061
10062        assert!(properties.contains_key("e"));
10063        assert!(properties.get("e").unwrap().contains("*"));
10064    }
10065
10066    #[test]
10067    fn test_property_access_extraction() {
10068        // e.name → e: {name}
10069        let mut properties = HashMap::new();
10070
10071        let prop_access = Expr::Property(
10072            Box::new(Expr::Variable("e".to_string())),
10073            "name".to_string(),
10074        );
10075
10076        collect_properties_from_expr_into(&prop_access, &mut properties);
10077
10078        assert!(properties.contains_key("e"));
10079        assert!(properties.get("e").unwrap().contains("name"));
10080    }
10081}