Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::{PredicateAnalyzer, try_label_or_to_union, try_type_or_to_union};
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig, ScalarIndexConfig,
14    ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15};
16use uni_cypher::ast::{
17    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
18    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
19    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
20    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
21    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
22    WithRecursiveClause,
23};
24
25/// Sentinel column name inserted into a variable's property set to request
26/// that the planner build the bare struct column (`add_structural_projection`)
27/// WITHOUT pulling the full schema.
28///
29/// Emitted by `mark_set_item_variables` for `SetItem::Property` targets only.
30/// Other SET variants (`Labels`, `Variable`, `VariablePlus`) and REMOVE still
31/// emit `"*"` because they replace/merge the whole node.
32///
33/// **Union semantics:** When both `"*"` and the sentinel appear in the same
34/// variable's HashSet (e.g. `SET n.x = 1 RETURN n` collects both), `"*"`
35/// dominates — schema expansion still happens. The sentinel only changes
36/// behavior when it's the sole structural marker present.
37///
38/// Reserved-name convention: the double-underscore prefix marks this as
39/// internal. Schema validation should reject user-declared properties with
40/// this name (deferred follow-up).
41pub(crate) const STRUCT_ONLY_SENTINEL: &str = "__set_struct__";
42
43/// Type of variable in scope for semantic validation.
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum VariableType {
46    /// Node variable (from MATCH (n), CREATE (n), etc.)
47    Node,
48    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
49    Edge,
50    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
51    Path,
52    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
53    /// Could hold a map or dynamic value — property access is allowed.
54    Scalar,
55    /// Scalar from a known non-graph literal (int, float, bool, string, list).
56    /// Property access is NOT allowed on these at compile time.
57    ScalarLiteral,
58    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
59    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
60    Imported,
61}
62
63impl VariableType {
64    /// Returns true if this type is compatible with the expected type.
65    ///
66    /// `Imported` is always compatible because the actual type is unknown at plan time.
67    fn is_compatible_with(self, expected: VariableType) -> bool {
68        self == expected
69            || self == VariableType::Imported
70            // ScalarLiteral behaves like Scalar for compatibility checks
71            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
72    }
73}
74
75/// Information about a variable in scope during planning.
76#[derive(Debug, Clone)]
77pub struct VariableInfo {
78    /// Variable name as written in the query.
79    pub name: String,
80    /// Semantic type of the variable.
81    pub var_type: VariableType,
82    /// True if this is a variable-length path (VLP) step variable.
83    ///
84    /// VLP step variables are typed as Edge but semantically hold edge lists.
85    pub is_vlp: bool,
86}
87
88impl VariableInfo {
89    pub fn new(name: String, var_type: VariableType) -> Self {
90        Self {
91            name,
92            var_type,
93            is_vlp: false,
94        }
95    }
96}
97
98/// Find a variable in scope by name.
99fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
100    vars.iter().find(|v| v.name == name)
101}
102
103/// Check if a variable is in scope.
104fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
105    find_var_in_scope(vars, name).is_some()
106}
107
108/// Check if an expression contains a pattern predicate.
109fn contains_pattern_predicate(expr: &Expr) -> bool {
110    if matches!(
111        expr,
112        Expr::Exists {
113            from_pattern_predicate: true,
114            ..
115        }
116    ) {
117        return true;
118    }
119    let mut found = false;
120    expr.for_each_child(&mut |child| {
121        if !found {
122            found = contains_pattern_predicate(child);
123        }
124    });
125    found
126}
127
128/// Add a variable to scope with type conflict validation.
129/// Returns an error if the variable already exists with a different type.
130fn add_var_to_scope(
131    vars: &mut Vec<VariableInfo>,
132    name: &str,
133    var_type: VariableType,
134) -> Result<()> {
135    if name.is_empty() {
136        return Ok(());
137    }
138
139    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
140        if existing.var_type == VariableType::Imported {
141            // Imported vars upgrade to the concrete type
142            existing.var_type = var_type;
143        } else if var_type == VariableType::Imported || existing.var_type == var_type {
144            // New type is Imported (keep existing) or same type — no conflict
145        } else if matches!(
146            existing.var_type,
147            VariableType::Scalar | VariableType::ScalarLiteral
148        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
149        {
150            // Scalar can be used as Node/Edge in CREATE context — a scalar
151            // holding a node/edge reference is valid for pattern use
152            existing.var_type = var_type;
153        } else {
154            return Err(anyhow!(
155                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
156                name,
157                existing.var_type,
158                var_type
159            ));
160        }
161    } else {
162        vars.push(VariableInfo::new(name.to_string(), var_type));
163    }
164    Ok(())
165}
166
167/// Convert VariableInfo vec to String vec for backward compatibility
168fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
169    vars.iter().map(|v| v.name.clone()).collect()
170}
171
172fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
173    match expr {
174        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
175            .map(|info| info.var_type)
176            .unwrap_or(VariableType::Scalar),
177        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
178        // Known non-graph literals: property access is NOT valid on these.
179        Expr::Literal(CypherLiteral::Integer(_))
180        | Expr::Literal(CypherLiteral::Float(_))
181        | Expr::Literal(CypherLiteral::String(_))
182        | Expr::Literal(CypherLiteral::Bool(_))
183        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
184        Expr::FunctionCall { name, args, .. } => {
185            let lower = name.to_lowercase();
186            if lower == "coalesce" {
187                infer_coalesce_type(args, vars_in_scope)
188            } else if lower == "collect" && !args.is_empty() {
189                let collected = infer_with_output_type(&args[0], vars_in_scope);
190                if matches!(
191                    collected,
192                    VariableType::Node
193                        | VariableType::Edge
194                        | VariableType::Path
195                        | VariableType::Imported
196                ) {
197                    collected
198                } else {
199                    VariableType::Scalar
200                }
201            } else {
202                VariableType::Scalar
203            }
204        }
205        // WITH list literals/expressions produce scalar list values. Preserving
206        // entity typing here causes invalid node/edge reuse in later MATCH clauses
207        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
208        // Lists are ScalarLiteral since property access is not valid on them.
209        Expr::List(_) => VariableType::ScalarLiteral,
210        _ => VariableType::Scalar,
211    }
212}
213
214fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
215    let mut resolved: Option<VariableType> = None;
216    let mut saw_imported = false;
217    for arg in args {
218        let t = infer_with_output_type(arg, vars_in_scope);
219        match t {
220            VariableType::Node | VariableType::Edge | VariableType::Path => {
221                if let Some(existing) = resolved {
222                    if existing != t {
223                        return VariableType::Scalar;
224                    }
225                } else {
226                    resolved = Some(t);
227                }
228            }
229            VariableType::Imported => saw_imported = true,
230            VariableType::Scalar | VariableType::ScalarLiteral => {}
231        }
232    }
233    if let Some(t) = resolved {
234        t
235    } else if saw_imported {
236        VariableType::Imported
237    } else {
238        VariableType::Scalar
239    }
240}
241
242fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
243    match expr {
244        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
245            .map(|info| info.var_type)
246            .unwrap_or(VariableType::Scalar),
247        Expr::FunctionCall { name, args, .. }
248            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
249        {
250            infer_with_output_type(&args[0], vars_in_scope)
251        }
252        Expr::List(items) => {
253            let mut inferred: Option<VariableType> = None;
254            for item in items {
255                let t = infer_with_output_type(item, vars_in_scope);
256                if !matches!(
257                    t,
258                    VariableType::Node
259                        | VariableType::Edge
260                        | VariableType::Path
261                        | VariableType::Imported
262                ) {
263                    return VariableType::Scalar;
264                }
265                if let Some(existing) = inferred {
266                    if existing != t
267                        && t != VariableType::Imported
268                        && existing != VariableType::Imported
269                    {
270                        return VariableType::Scalar;
271                    }
272                    if existing == VariableType::Imported && t != VariableType::Imported {
273                        inferred = Some(t);
274                    }
275                } else {
276                    inferred = Some(t);
277                }
278            }
279            inferred.unwrap_or(VariableType::Scalar)
280        }
281        _ => VariableType::Scalar,
282    }
283}
284
285/// Collect all variable names referenced in an expression
286fn collect_expr_variables(expr: &Expr) -> Vec<String> {
287    let mut vars = Vec::new();
288    collect_expr_variables_inner(expr, &mut vars);
289    vars
290}
291
292/// Collect the names of `$param` references in a constant-foldable expression.
293///
294/// Walks the variants that `eval_const_numeric_expr` accepts (the only shapes a
295/// successfully-folded `LIMIT`/`SKIP` expression can take): parameters,
296/// literals, unary/binary arithmetic, and the whitelisted numeric functions.
297/// Used to tell the plan cache which parameter values were baked into the plan.
298fn collect_expr_parameters(expr: &Expr, names: &mut Vec<String>) {
299    match expr {
300        Expr::Parameter(name) => {
301            if !names.contains(name) {
302                names.push(name.clone());
303            }
304        }
305        Expr::UnaryOp { expr: e, .. } => collect_expr_parameters(e, names),
306        Expr::BinaryOp { left, right, .. } => {
307            collect_expr_parameters(left, names);
308            collect_expr_parameters(right, names);
309        }
310        Expr::FunctionCall { args, .. } => {
311            for a in args {
312                collect_expr_parameters(a, names);
313            }
314        }
315        _ => {}
316    }
317}
318
319fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
320    let mut add_var = |name: &String| {
321        if !vars.contains(name) {
322            vars.push(name.clone());
323        }
324    };
325
326    match expr {
327        Expr::Variable(name) => add_var(name),
328        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
329        Expr::BinaryOp { left, right, .. } => {
330            collect_expr_variables_inner(left, vars);
331            collect_expr_variables_inner(right, vars);
332        }
333        Expr::UnaryOp { expr: e, .. }
334        | Expr::IsNull(e)
335        | Expr::IsNotNull(e)
336        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
337        Expr::FunctionCall { args, .. } => {
338            for a in args {
339                collect_expr_variables_inner(a, vars);
340            }
341        }
342        Expr::List(items) => {
343            for item in items {
344                collect_expr_variables_inner(item, vars);
345            }
346        }
347        Expr::In { expr: e, list } => {
348            collect_expr_variables_inner(e, vars);
349            collect_expr_variables_inner(list, vars);
350        }
351        Expr::Case {
352            expr: case_expr,
353            when_then,
354            else_expr,
355        } => {
356            if let Some(e) = case_expr {
357                collect_expr_variables_inner(e, vars);
358            }
359            for (w, t) in when_then {
360                collect_expr_variables_inner(w, vars);
361                collect_expr_variables_inner(t, vars);
362            }
363            if let Some(e) = else_expr {
364                collect_expr_variables_inner(e, vars);
365            }
366        }
367        Expr::Map(entries) => {
368            for (_, v) in entries {
369                collect_expr_variables_inner(v, vars);
370            }
371        }
372        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
373        Expr::ArrayIndex { array, index } => {
374            collect_expr_variables_inner(array, vars);
375            collect_expr_variables_inner(index, vars);
376        }
377        Expr::ArraySlice { array, start, end } => {
378            collect_expr_variables_inner(array, vars);
379            if let Some(s) = start {
380                collect_expr_variables_inner(s, vars);
381            }
382            if let Some(e) = end {
383                collect_expr_variables_inner(e, vars);
384            }
385        }
386        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
387        // they introduce local variable bindings not in outer scope.
388        _ => {}
389    }
390}
391
392/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
393///
394/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
395/// before the final RETURN projection without losing alias semantics.
396fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
397    let repr = expr.to_string_repr();
398    if let Some(rewritten) = aliases.get(&repr) {
399        return rewritten.clone();
400    }
401
402    match expr {
403        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
404        Expr::Property(base, prop) => Expr::Property(
405            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
406            prop.clone(),
407        ),
408        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
409            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
410            op: *op,
411            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
412        },
413        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
414            op: *op,
415            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
416        },
417        Expr::FunctionCall {
418            name,
419            args,
420            distinct,
421            window_spec,
422        } => Expr::FunctionCall {
423            name: name.clone(),
424            args: args
425                .iter()
426                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
427                .collect(),
428            distinct: *distinct,
429            window_spec: window_spec.clone(),
430        },
431        Expr::List(items) => Expr::List(
432            items
433                .iter()
434                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
435                .collect(),
436        ),
437        Expr::Map(entries) => Expr::Map(
438            entries
439                .iter()
440                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
441                .collect(),
442        ),
443        Expr::Case {
444            expr: case_expr,
445            when_then,
446            else_expr,
447        } => Expr::Case {
448            expr: case_expr
449                .as_ref()
450                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
451            when_then: when_then
452                .iter()
453                .map(|(w, t)| {
454                    (
455                        rewrite_order_by_expr_with_aliases(w, aliases),
456                        rewrite_order_by_expr_with_aliases(t, aliases),
457                    )
458                })
459                .collect(),
460            else_expr: else_expr
461                .as_ref()
462                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
463        },
464        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
465        // they introduce local variable bindings that could shadow aliases.
466        _ => expr.clone(),
467    }
468}
469
470/// Validate function call argument types.
471/// Returns error if type constraints are violated.
472fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
473    let name_lower = name.to_lowercase();
474
475    // labels() requires Node
476    if name_lower == "labels"
477        && let Some(Expr::Variable(var_name)) = args.first()
478        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
479        && !info.var_type.is_compatible_with(VariableType::Node)
480    {
481        return Err(anyhow!(
482            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
483        ));
484    }
485
486    // type() requires Edge
487    if name_lower == "type"
488        && let Some(Expr::Variable(var_name)) = args.first()
489        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
490        && !info.var_type.is_compatible_with(VariableType::Edge)
491    {
492        return Err(anyhow!(
493            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
494        ));
495    }
496
497    // properties() requires Node/Edge/Map (not scalar literals)
498    if name_lower == "properties"
499        && let Some(arg) = args.first()
500    {
501        match arg {
502            Expr::Literal(CypherLiteral::Integer(_))
503            | Expr::Literal(CypherLiteral::Float(_))
504            | Expr::Literal(CypherLiteral::String(_))
505            | Expr::Literal(CypherLiteral::Bool(_))
506            | Expr::List(_) => {
507                return Err(anyhow!(
508                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
509                ));
510            }
511            Expr::Variable(var_name) => {
512                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
513                    && matches!(
514                        info.var_type,
515                        VariableType::Scalar | VariableType::ScalarLiteral
516                    )
517                {
518                    return Err(anyhow!(
519                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
520                    ));
521                }
522            }
523            _ => {}
524        }
525    }
526
527    // nodes()/relationships() require Path
528    if (name_lower == "nodes" || name_lower == "relationships")
529        && let Some(Expr::Variable(var_name)) = args.first()
530        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
531        && !info.var_type.is_compatible_with(VariableType::Path)
532    {
533        return Err(anyhow!(
534            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
535            name_lower
536        ));
537    }
538
539    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
540    if name_lower == "size"
541        && let Some(Expr::Variable(var_name)) = args.first()
542        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
543        && info.var_type == VariableType::Path
544    {
545        return Err(anyhow!(
546            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
547        ));
548    }
549
550    // length()/size() do NOT accept Node or single-Edge arguments.
551    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
552    // but are actually edge lists — size()/length() is valid on those.
553    if (name_lower == "length" || name_lower == "size")
554        && let Some(Expr::Variable(var_name)) = args.first()
555        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
556        && (info.var_type == VariableType::Node
557            || (info.var_type == VariableType::Edge && !info.is_vlp))
558    {
559        return Err(anyhow!(
560            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
561            name_lower
562        ));
563    }
564
565    Ok(())
566}
567
568/// Check if an expression is a non-boolean literal.
569fn is_non_boolean_literal(expr: &Expr) -> bool {
570    matches!(
571        expr,
572        Expr::Literal(CypherLiteral::Integer(_))
573            | Expr::Literal(CypherLiteral::Float(_))
574            | Expr::Literal(CypherLiteral::String(_))
575            | Expr::List(_)
576            | Expr::Map(_)
577    )
578}
579
580/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
581fn validate_boolean_expression(expr: &Expr) -> Result<()> {
582    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
583    if let Expr::BinaryOp { left, op, right } = expr
584        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
585    {
586        let op_name = format!("{op:?}").to_uppercase();
587        for operand in [left.as_ref(), right.as_ref()] {
588            if is_non_boolean_literal(operand) {
589                return Err(anyhow!(
590                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
591                    op_name
592                ));
593            }
594        }
595    }
596    if let Expr::UnaryOp {
597        op: uni_cypher::ast::UnaryOp::Not,
598        expr: inner,
599    } = expr
600        && is_non_boolean_literal(inner)
601    {
602        return Err(anyhow!(
603            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
604        ));
605    }
606    let mut result = Ok(());
607    expr.for_each_child(&mut |child| {
608        if result.is_ok() {
609            result = validate_boolean_expression(child);
610        }
611    });
612    result
613}
614
615/// Validate that all variables used in an expression are in scope.
616fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
617    let used_vars = collect_expr_variables(expr);
618    for var in used_vars {
619        if !is_var_in_scope(vars_in_scope, &var) {
620            return Err(anyhow!(
621                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
622                var
623            ));
624        }
625    }
626    Ok(())
627}
628
629/// Check if a function name (lowercase) is an aggregate function.
630fn is_aggregate_function_name(name: &str) -> bool {
631    matches!(
632        name.to_lowercase().as_str(),
633        "count"
634            | "sum"
635            | "avg"
636            | "min"
637            | "max"
638            | "collect"
639            | "stdev"
640            | "stddev"
641            | "stdevp"
642            | "stddevp"
643            | "variance"
644            | "variancep"
645            | "percentiledisc"
646            | "percentilecont"
647            | "btic_min"
648            | "btic_max"
649            | "btic_span_agg"
650            | "btic_count_at"
651    ) || uni_cypher::is_known_plugin_aggregate(name)
652}
653
654/// Returns true if the expression is a window function (FunctionCall with window_spec).
655fn is_window_function(expr: &Expr) -> bool {
656    matches!(
657        expr,
658        Expr::FunctionCall {
659            window_spec: Some(_),
660            ..
661        }
662    )
663}
664
665/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
666/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
667/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
668/// size() call, BinaryOp, etc.).
669fn is_compound_aggregate(expr: &Expr) -> bool {
670    if !expr.is_aggregate() {
671        return false;
672    }
673    match expr {
674        Expr::FunctionCall {
675            name, window_spec, ..
676        } => {
677            // A bare aggregate FunctionCall is NOT compound
678            if window_spec.is_some() {
679                return true; // window wrapping an aggregate — treat as compound
680            }
681            !is_aggregate_function_name(name)
682        }
683        // Subquery aggregates are "bare" (not compound)
684        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
685        // Everything else (ListComprehension, BinaryOp, etc.) is compound
686        _ => true,
687    }
688}
689
690/// Recursively collect all bare aggregate FunctionCall sub-expressions from
691/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
692/// the outermost aggregate boundaries).
693///
694/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
695/// is searched because the body (`map_expr`, `predicate`, `expr`) references
696/// the loop variable, not outer-scope aggregates.
697fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
698    let mut out = Vec::new();
699    extract_inner_aggregates_rec(expr, &mut out);
700    out
701}
702
703fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
704    match expr {
705        Expr::FunctionCall {
706            name, window_spec, ..
707        } if window_spec.is_none() && is_aggregate_function_name(name) => {
708            // Found a bare aggregate — collect it and stop recursing
709            out.push(expr.clone());
710        }
711        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
712            out.push(expr.clone());
713        }
714        // For list comprehension, only search the `list` source for aggregates
715        Expr::ListComprehension { list, .. } => {
716            extract_inner_aggregates_rec(list, out);
717        }
718        // For quantifier, only search the `list` source
719        Expr::Quantifier { list, .. } => {
720            extract_inner_aggregates_rec(list, out);
721        }
722        // For reduce, search `init` and `list` (not the body `expr`)
723        Expr::Reduce { init, list, .. } => {
724            extract_inner_aggregates_rec(init, out);
725            extract_inner_aggregates_rec(list, out);
726        }
727        // Standard recursive cases
728        Expr::FunctionCall { args, .. } => {
729            for arg in args {
730                extract_inner_aggregates_rec(arg, out);
731            }
732        }
733        Expr::BinaryOp { left, right, .. } => {
734            extract_inner_aggregates_rec(left, out);
735            extract_inner_aggregates_rec(right, out);
736        }
737        Expr::UnaryOp { expr: e, .. }
738        | Expr::IsNull(e)
739        | Expr::IsNotNull(e)
740        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
741        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
742        Expr::List(items) => {
743            for item in items {
744                extract_inner_aggregates_rec(item, out);
745            }
746        }
747        Expr::Case {
748            expr: case_expr,
749            when_then,
750            else_expr,
751        } => {
752            if let Some(e) = case_expr {
753                extract_inner_aggregates_rec(e, out);
754            }
755            for (w, t) in when_then {
756                extract_inner_aggregates_rec(w, out);
757                extract_inner_aggregates_rec(t, out);
758            }
759            if let Some(e) = else_expr {
760                extract_inner_aggregates_rec(e, out);
761            }
762        }
763        Expr::In {
764            expr: in_expr,
765            list,
766        } => {
767            extract_inner_aggregates_rec(in_expr, out);
768            extract_inner_aggregates_rec(list, out);
769        }
770        Expr::ArrayIndex { array, index } => {
771            extract_inner_aggregates_rec(array, out);
772            extract_inner_aggregates_rec(index, out);
773        }
774        Expr::ArraySlice { array, start, end } => {
775            extract_inner_aggregates_rec(array, out);
776            if let Some(s) = start {
777                extract_inner_aggregates_rec(s, out);
778            }
779            if let Some(e) = end {
780                extract_inner_aggregates_rec(e, out);
781            }
782        }
783        Expr::Map(entries) => {
784            for (_, v) in entries {
785                extract_inner_aggregates_rec(v, out);
786            }
787        }
788        _ => {}
789    }
790}
791
792/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
793/// `Expr::Variable(aggregate_column_name(agg))`.
794///
795/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
796/// rewritten (the body references the loop variable, not outer-scope columns).
797fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
798    match expr {
799        Expr::FunctionCall {
800            name, window_spec, ..
801        } if window_spec.is_none() && is_aggregate_function_name(name) => {
802            // Replace bare aggregate with column reference
803            Expr::Variable(aggregate_column_name(expr))
804        }
805        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
806            Expr::Variable(aggregate_column_name(expr))
807        }
808        Expr::ListComprehension {
809            variable,
810            list,
811            where_clause,
812            map_expr,
813        } => Expr::ListComprehension {
814            variable: variable.clone(),
815            list: Box::new(replace_aggregates_with_columns(list)),
816            where_clause: where_clause.clone(), // don't touch — references loop var
817            map_expr: map_expr.clone(),         // don't touch — references loop var
818        },
819        Expr::Quantifier {
820            quantifier,
821            variable,
822            list,
823            predicate,
824        } => Expr::Quantifier {
825            quantifier: *quantifier,
826            variable: variable.clone(),
827            list: Box::new(replace_aggregates_with_columns(list)),
828            predicate: predicate.clone(), // don't touch — references loop var
829        },
830        Expr::Reduce {
831            accumulator,
832            init,
833            variable,
834            list,
835            expr: body,
836        } => Expr::Reduce {
837            accumulator: accumulator.clone(),
838            init: Box::new(replace_aggregates_with_columns(init)),
839            variable: variable.clone(),
840            list: Box::new(replace_aggregates_with_columns(list)),
841            expr: body.clone(), // don't touch — references loop var
842        },
843        Expr::FunctionCall {
844            name,
845            args,
846            distinct,
847            window_spec,
848        } => Expr::FunctionCall {
849            name: name.clone(),
850            args: args.iter().map(replace_aggregates_with_columns).collect(),
851            distinct: *distinct,
852            window_spec: window_spec.clone(),
853        },
854        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
855            left: Box::new(replace_aggregates_with_columns(left)),
856            op: *op,
857            right: Box::new(replace_aggregates_with_columns(right)),
858        },
859        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
860            op: *op,
861            expr: Box::new(replace_aggregates_with_columns(e)),
862        },
863        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
864        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
865        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
866        Expr::Property(base, prop) => Expr::Property(
867            Box::new(replace_aggregates_with_columns(base)),
868            prop.clone(),
869        ),
870        Expr::List(items) => {
871            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
872        }
873        Expr::Case {
874            expr: case_expr,
875            when_then,
876            else_expr,
877        } => Expr::Case {
878            expr: case_expr
879                .as_ref()
880                .map(|e| Box::new(replace_aggregates_with_columns(e))),
881            when_then: when_then
882                .iter()
883                .map(|(w, t)| {
884                    (
885                        replace_aggregates_with_columns(w),
886                        replace_aggregates_with_columns(t),
887                    )
888                })
889                .collect(),
890            else_expr: else_expr
891                .as_ref()
892                .map(|e| Box::new(replace_aggregates_with_columns(e))),
893        },
894        Expr::In {
895            expr: in_expr,
896            list,
897        } => Expr::In {
898            expr: Box::new(replace_aggregates_with_columns(in_expr)),
899            list: Box::new(replace_aggregates_with_columns(list)),
900        },
901        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
902            array: Box::new(replace_aggregates_with_columns(array)),
903            index: Box::new(replace_aggregates_with_columns(index)),
904        },
905        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
906            array: Box::new(replace_aggregates_with_columns(array)),
907            start: start
908                .as_ref()
909                .map(|e| Box::new(replace_aggregates_with_columns(e))),
910            end: end
911                .as_ref()
912                .map(|e| Box::new(replace_aggregates_with_columns(e))),
913        },
914        Expr::Map(entries) => Expr::Map(
915            entries
916                .iter()
917                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
918                .collect(),
919        ),
920        // Leaf expressions — return as-is
921        other => other.clone(),
922    }
923}
924
925/// Check if an expression contains any aggregate function (recursively).
926fn contains_aggregate_recursive(expr: &Expr) -> bool {
927    match expr {
928        Expr::FunctionCall { name, args, .. } => {
929            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
930        }
931        Expr::BinaryOp { left, right, .. } => {
932            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
933        }
934        Expr::UnaryOp { expr: e, .. }
935        | Expr::IsNull(e)
936        | Expr::IsNotNull(e)
937        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
938        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
939        Expr::Case {
940            expr,
941            when_then,
942            else_expr,
943        } => {
944            expr.as_deref().is_some_and(contains_aggregate_recursive)
945                || when_then.iter().any(|(w, t)| {
946                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
947                })
948                || else_expr
949                    .as_deref()
950                    .is_some_and(contains_aggregate_recursive)
951        }
952        Expr::In { expr, list } => {
953            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
954        }
955        Expr::Property(base, _) => contains_aggregate_recursive(base),
956        Expr::ListComprehension { list, .. } => {
957            // Only check the list source — where_clause/map_expr reference the loop variable
958            contains_aggregate_recursive(list)
959        }
960        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
961        Expr::Reduce { init, list, .. } => {
962            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
963        }
964        Expr::ArrayIndex { array, index } => {
965            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
966        }
967        Expr::ArraySlice { array, start, end } => {
968            contains_aggregate_recursive(array)
969                || start.as_deref().is_some_and(contains_aggregate_recursive)
970                || end.as_deref().is_some_and(contains_aggregate_recursive)
971        }
972        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
973        _ => false,
974    }
975}
976
977/// Check if an expression contains a non-deterministic function (e.g. rand()).
978fn contains_non_deterministic(expr: &Expr) -> bool {
979    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
980        return true;
981    }
982    let mut found = false;
983    expr.for_each_child(&mut |child| {
984        if !found {
985            found = contains_non_deterministic(child);
986        }
987    });
988    found
989}
990
991fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
992    match expr {
993        Expr::FunctionCall { name, args, .. } => {
994            if is_aggregate_function_name(name) {
995                out.insert(expr.to_string_repr());
996                return;
997            }
998            for arg in args {
999                collect_aggregate_reprs(arg, out);
1000            }
1001        }
1002        Expr::BinaryOp { left, right, .. } => {
1003            collect_aggregate_reprs(left, out);
1004            collect_aggregate_reprs(right, out);
1005        }
1006        Expr::UnaryOp { expr, .. }
1007        | Expr::IsNull(expr)
1008        | Expr::IsNotNull(expr)
1009        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
1010        Expr::List(items) => {
1011            for item in items {
1012                collect_aggregate_reprs(item, out);
1013            }
1014        }
1015        Expr::Case {
1016            expr,
1017            when_then,
1018            else_expr,
1019        } => {
1020            if let Some(e) = expr {
1021                collect_aggregate_reprs(e, out);
1022            }
1023            for (w, t) in when_then {
1024                collect_aggregate_reprs(w, out);
1025                collect_aggregate_reprs(t, out);
1026            }
1027            if let Some(e) = else_expr {
1028                collect_aggregate_reprs(e, out);
1029            }
1030        }
1031        Expr::In { expr, list } => {
1032            collect_aggregate_reprs(expr, out);
1033            collect_aggregate_reprs(list, out);
1034        }
1035        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
1036        Expr::ListComprehension { list, .. } => {
1037            collect_aggregate_reprs(list, out);
1038        }
1039        Expr::Quantifier { list, .. } => {
1040            collect_aggregate_reprs(list, out);
1041        }
1042        Expr::Reduce { init, list, .. } => {
1043            collect_aggregate_reprs(init, out);
1044            collect_aggregate_reprs(list, out);
1045        }
1046        Expr::ArrayIndex { array, index } => {
1047            collect_aggregate_reprs(array, out);
1048            collect_aggregate_reprs(index, out);
1049        }
1050        Expr::ArraySlice { array, start, end } => {
1051            collect_aggregate_reprs(array, out);
1052            if let Some(s) = start {
1053                collect_aggregate_reprs(s, out);
1054            }
1055            if let Some(e) = end {
1056                collect_aggregate_reprs(e, out);
1057            }
1058        }
1059        _ => {}
1060    }
1061}
1062
1063#[derive(Debug, Clone)]
1064enum NonAggregateRef {
1065    Var(String),
1066    Property {
1067        repr: String,
1068        base_var: Option<String>,
1069    },
1070}
1071
1072fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1073    match expr {
1074        Expr::FunctionCall { name, args, .. } => {
1075            if is_aggregate_function_name(name) {
1076                return;
1077            }
1078            for arg in args {
1079                collect_non_aggregate_refs(arg, inside_agg, out);
1080            }
1081        }
1082        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1083        Expr::Property(base, _) if !inside_agg => {
1084            let base_var = if let Expr::Variable(v) = base.as_ref() {
1085                Some(v.clone())
1086            } else {
1087                None
1088            };
1089            out.push(NonAggregateRef::Property {
1090                repr: expr.to_string_repr(),
1091                base_var,
1092            });
1093        }
1094        Expr::BinaryOp { left, right, .. } => {
1095            collect_non_aggregate_refs(left, inside_agg, out);
1096            collect_non_aggregate_refs(right, inside_agg, out);
1097        }
1098        Expr::UnaryOp { expr, .. }
1099        | Expr::IsNull(expr)
1100        | Expr::IsNotNull(expr)
1101        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1102        Expr::List(items) => {
1103            for item in items {
1104                collect_non_aggregate_refs(item, inside_agg, out);
1105            }
1106        }
1107        Expr::Case {
1108            expr,
1109            when_then,
1110            else_expr,
1111        } => {
1112            if let Some(e) = expr {
1113                collect_non_aggregate_refs(e, inside_agg, out);
1114            }
1115            for (w, t) in when_then {
1116                collect_non_aggregate_refs(w, inside_agg, out);
1117                collect_non_aggregate_refs(t, inside_agg, out);
1118            }
1119            if let Some(e) = else_expr {
1120                collect_non_aggregate_refs(e, inside_agg, out);
1121            }
1122        }
1123        Expr::In { expr, list } => {
1124            collect_non_aggregate_refs(expr, inside_agg, out);
1125            collect_non_aggregate_refs(list, inside_agg, out);
1126        }
1127        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1128        // source. The body references the loop variable, not outer-scope vars.
1129        Expr::ListComprehension { list, .. } => {
1130            collect_non_aggregate_refs(list, inside_agg, out);
1131        }
1132        Expr::Quantifier { list, .. } => {
1133            collect_non_aggregate_refs(list, inside_agg, out);
1134        }
1135        Expr::Reduce { init, list, .. } => {
1136            collect_non_aggregate_refs(init, inside_agg, out);
1137            collect_non_aggregate_refs(list, inside_agg, out);
1138        }
1139        _ => {}
1140    }
1141}
1142
1143fn validate_with_order_by_aggregate_item(
1144    expr: &Expr,
1145    projected_aggregate_reprs: &HashSet<String>,
1146    projected_simple_reprs: &HashSet<String>,
1147    projected_aliases: &HashSet<String>,
1148) -> Result<()> {
1149    let mut aggregate_reprs = HashSet::new();
1150    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1151    for agg in aggregate_reprs {
1152        if !projected_aggregate_reprs.contains(&agg) {
1153            return Err(anyhow!(
1154                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1155                agg
1156            ));
1157        }
1158    }
1159
1160    let mut refs = Vec::new();
1161    collect_non_aggregate_refs(expr, false, &mut refs);
1162    refs.retain(|r| match r {
1163        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1164        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1165    });
1166
1167    let mut dedup = HashSet::new();
1168    refs.retain(|r| {
1169        let key = match r {
1170            NonAggregateRef::Var(v) => format!("v:{v}"),
1171            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1172        };
1173        dedup.insert(key)
1174    });
1175
1176    if refs.len() > 1 {
1177        return Err(anyhow!(
1178            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1179        ));
1180    }
1181
1182    if let Some(r) = refs.first() {
1183        return match r {
1184            NonAggregateRef::Var(v) => Err(anyhow!(
1185                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1186                v
1187            )),
1188            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1189                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1190                base_var
1191                    .clone()
1192                    .unwrap_or_else(|| "<property-base>".to_string())
1193            )),
1194        };
1195    }
1196
1197    Ok(())
1198}
1199
1200/// Validate that no aggregation functions appear in WHERE clause.
1201fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1202    if contains_aggregate_recursive(predicate) {
1203        return Err(anyhow!(
1204            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1205        ));
1206    }
1207    Ok(())
1208}
1209
1210#[derive(Debug, Clone, Copy)]
1211enum ConstNumber {
1212    Int(i64),
1213    Float(f64),
1214}
1215
1216impl ConstNumber {
1217    fn to_f64(self) -> f64 {
1218        match self {
1219            Self::Int(v) => v as f64,
1220            Self::Float(v) => v,
1221        }
1222    }
1223}
1224
1225fn eval_const_numeric_expr(
1226    expr: &Expr,
1227    params: &HashMap<String, uni_common::Value>,
1228) -> Result<ConstNumber> {
1229    match expr {
1230        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1231        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1232        Expr::Parameter(name) => match params.get(name) {
1233            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1234            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1235            Some(uni_common::Value::Null) => Err(anyhow!(
1236                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1237                name
1238            )),
1239            Some(other) => Err(anyhow!(
1240                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1241                name,
1242                other
1243            )),
1244            None => Err(anyhow!(
1245                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1246            )),
1247        },
1248        Expr::UnaryOp {
1249            op: uni_cypher::ast::UnaryOp::Neg,
1250            expr,
1251        } => match eval_const_numeric_expr(expr, params)? {
1252            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1253            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1254        },
1255        Expr::BinaryOp { left, op, right } => {
1256            let l = eval_const_numeric_expr(left, params)?;
1257            let r = eval_const_numeric_expr(right, params)?;
1258            match op {
1259                BinaryOp::Add => match (l, r) {
1260                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1261                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1262                },
1263                BinaryOp::Sub => match (l, r) {
1264                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1265                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1266                },
1267                BinaryOp::Mul => match (l, r) {
1268                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1269                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1270                },
1271                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1272                BinaryOp::Mod => match (l, r) {
1273                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1274                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1275                },
1276                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1277                _ => Err(anyhow!(
1278                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1279                )),
1280            }
1281        }
1282        Expr::FunctionCall { name, args, .. } => {
1283            let lower = name.to_lowercase();
1284            match lower.as_str() {
1285                "rand" if args.is_empty() => {
1286                    use rand::RngExt;
1287                    let mut rng = rand::rng();
1288                    Ok(ConstNumber::Float(rng.random::<f64>()))
1289                }
1290                "tointeger" | "toint" if args.len() == 1 => {
1291                    match eval_const_numeric_expr(&args[0], params)? {
1292                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1293                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1294                    }
1295                }
1296                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1297                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1298                )),
1299                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1300                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1301                )),
1302                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1303                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1304                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1305                },
1306                _ => Err(anyhow!(
1307                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1308                )),
1309            }
1310        }
1311        _ => Err(anyhow!(
1312            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1313        )),
1314    }
1315}
1316
1317/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1318/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1319fn parse_non_negative_integer(
1320    expr: &Expr,
1321    clause_name: &str,
1322    params: &HashMap<String, uni_common::Value>,
1323) -> Result<Option<usize>> {
1324    let referenced_vars = collect_expr_variables(expr);
1325    if !referenced_vars.is_empty() {
1326        return Err(anyhow!(
1327            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1328            clause_name
1329        ));
1330    }
1331
1332    let value = eval_const_numeric_expr(expr, params)?;
1333    let as_int = match value {
1334        ConstNumber::Int(v) => v,
1335        ConstNumber::Float(v) => {
1336            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1337                return Err(anyhow!(
1338                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1339                    clause_name
1340                ));
1341            }
1342            v as i64
1343        }
1344    };
1345    if as_int < 0 {
1346        return Err(anyhow!(
1347            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1348            clause_name
1349        ));
1350    }
1351    Ok(Some(as_int as usize))
1352}
1353
1354/// Validate that aggregation functions are not nested.
1355fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1356    if let Expr::FunctionCall { name, args, .. } = expr
1357        && is_aggregate_function_name(name)
1358    {
1359        for arg in args {
1360            if contains_aggregate_recursive(arg) {
1361                return Err(anyhow!(
1362                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1363                ));
1364            }
1365            if contains_non_deterministic(arg) {
1366                return Err(anyhow!(
1367                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1368                ));
1369            }
1370        }
1371    }
1372    let mut result = Ok(());
1373    expr.for_each_child(&mut |child| {
1374        if result.is_ok() {
1375            result = validate_no_nested_aggregation(child);
1376        }
1377    });
1378    result
1379}
1380
1381/// Validate that an expression does not access properties or labels of
1382/// deleted entities. `type(r)` on a deleted relationship is allowed per
1383/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1384fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1385    // Check n.prop on a deleted variable
1386    if let Expr::Property(inner, _) = expr
1387        && let Expr::Variable(name) = inner.as_ref()
1388        && deleted_vars.contains(name)
1389    {
1390        return Err(anyhow!(
1391            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1392            name
1393        ));
1394    }
1395    // Check labels(n) or keys(n) on a deleted variable
1396    if let Expr::FunctionCall { name, args, .. } = expr
1397        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1398        && args.len() == 1
1399        && let Expr::Variable(var) = &args[0]
1400        && deleted_vars.contains(var)
1401    {
1402        return Err(anyhow!(
1403            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1404            name.to_lowercase(),
1405            var
1406        ));
1407    }
1408    let mut result = Ok(());
1409    expr.for_each_child(&mut |child| {
1410        if result.is_ok() {
1411            result = validate_no_deleted_entity_access(child, deleted_vars);
1412        }
1413    });
1414    result
1415}
1416
1417/// Validate that all variables referenced in properties are defined,
1418/// either in scope or in the local CREATE variable list.
1419fn validate_property_variables(
1420    properties: &Option<Expr>,
1421    vars_in_scope: &[VariableInfo],
1422    create_vars: &[&str],
1423) -> Result<()> {
1424    if let Some(props) = properties {
1425        for var in collect_expr_variables(props) {
1426            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1427                return Err(anyhow!(
1428                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1429                    var
1430                ));
1431            }
1432        }
1433    }
1434    Ok(())
1435}
1436
1437/// Check that a variable name is not already bound in scope or in the local CREATE list.
1438/// Used to prevent rebinding in CREATE clauses.
1439fn check_not_already_bound(
1440    name: &str,
1441    vars_in_scope: &[VariableInfo],
1442    create_vars: &[&str],
1443) -> Result<()> {
1444    if is_var_in_scope(vars_in_scope, name) {
1445        return Err(anyhow!(
1446            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1447            name
1448        ));
1449    }
1450    if create_vars.contains(&name) {
1451        return Err(anyhow!(
1452            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1453            name
1454        ));
1455    }
1456    Ok(())
1457}
1458
1459fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1460    let mut scope = vars_in_scope.to_vec();
1461
1462    for path in &pattern.paths {
1463        if let Some(path_var) = &path.variable
1464            && !path_var.is_empty()
1465            && !is_var_in_scope(&scope, path_var)
1466        {
1467            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1468        }
1469        for element in &path.elements {
1470            match element {
1471                PatternElement::Node(n) => {
1472                    if let Some(v) = &n.variable
1473                        && !v.is_empty()
1474                        && !is_var_in_scope(&scope, v)
1475                    {
1476                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1477                    }
1478                }
1479                PatternElement::Relationship(r) => {
1480                    if let Some(v) = &r.variable
1481                        && !v.is_empty()
1482                        && !is_var_in_scope(&scope, v)
1483                    {
1484                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1485                    }
1486                }
1487                PatternElement::Parenthesized { .. } => {}
1488            }
1489        }
1490    }
1491
1492    scope
1493}
1494
1495fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1496    match item {
1497        SetItem::Property { expr, value } => {
1498            validate_expression_variables(expr, vars_in_scope)?;
1499            validate_expression(expr, vars_in_scope)?;
1500            validate_expression_variables(value, vars_in_scope)?;
1501            validate_expression(value, vars_in_scope)?;
1502            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1503                return Err(anyhow!(
1504                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1505                ));
1506            }
1507        }
1508        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1509            if !is_var_in_scope(vars_in_scope, variable) {
1510                return Err(anyhow!(
1511                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1512                    variable
1513                ));
1514            }
1515            validate_expression_variables(value, vars_in_scope)?;
1516            validate_expression(value, vars_in_scope)?;
1517            if contains_pattern_predicate(value) {
1518                return Err(anyhow!(
1519                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1520                ));
1521            }
1522        }
1523        SetItem::Labels { variable, .. } => {
1524            if !is_var_in_scope(vars_in_scope, variable) {
1525                return Err(anyhow!(
1526                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1527                    variable
1528                ));
1529            }
1530        }
1531    }
1532
1533    Ok(())
1534}
1535
1536/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1537/// The OpenCypher spec requires all property values in MERGE to be non-null.
1538fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1539    if let Some(Expr::Map(entries)) = properties {
1540        for (key, value) in entries {
1541            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1542                return Err(anyhow!(
1543                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1544                    key
1545                ));
1546            }
1547        }
1548    }
1549    Ok(())
1550}
1551
1552/// Flatten every label name appearing in a `Pattern` (across all paths
1553/// and node elements). Used by the M5 follow-up #6 write-rejection
1554/// guard to refuse CREATE/MERGE that names a virtual catalog-resolved
1555/// label.
1556fn collect_pattern_labels(pattern: &uni_cypher::ast::Pattern) -> Vec<String> {
1557    let mut out = Vec::new();
1558    for path in &pattern.paths {
1559        for element in &path.elements {
1560            if let PatternElement::Node(n) = element {
1561                for l in n.labels.names() {
1562                    out.push(l.clone());
1563                }
1564            }
1565        }
1566    }
1567    out
1568}
1569
1570fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1571    for path in &merge_clause.pattern.paths {
1572        for element in &path.elements {
1573            match element {
1574                PatternElement::Node(n) => {
1575                    if let Some(Expr::Parameter(_)) = &n.properties {
1576                        return Err(anyhow!(
1577                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1578                        ));
1579                    }
1580                    reject_null_merge_properties(&n.properties)?;
1581                    // VariableAlreadyBound: reject if a bound variable is used
1582                    // as a standalone MERGE node or introduces new labels/properties.
1583                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1584                    if let Some(variable) = &n.variable
1585                        && !variable.is_empty()
1586                        && is_var_in_scope(vars_in_scope, variable)
1587                    {
1588                        let is_standalone = path.elements.len() == 1;
1589                        let has_new_labels = !n.labels.is_empty();
1590                        let has_new_properties = n.properties.is_some();
1591                        if is_standalone || has_new_labels || has_new_properties {
1592                            return Err(anyhow!(
1593                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1594                                variable
1595                            ));
1596                        }
1597                    }
1598                }
1599                PatternElement::Relationship(r) => {
1600                    if let Some(variable) = &r.variable
1601                        && !variable.is_empty()
1602                        && is_var_in_scope(vars_in_scope, variable)
1603                    {
1604                        return Err(anyhow!(
1605                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1606                            variable
1607                        ));
1608                    }
1609                    if r.types.len() != 1 {
1610                        return Err(anyhow!(
1611                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1612                        ));
1613                    }
1614                    if r.range.is_some() {
1615                        return Err(anyhow!(
1616                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1617                        ));
1618                    }
1619                    if let Some(Expr::Parameter(_)) = &r.properties {
1620                        return Err(anyhow!(
1621                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1622                        ));
1623                    }
1624                    reject_null_merge_properties(&r.properties)?;
1625                }
1626                PatternElement::Parenthesized { .. } => {}
1627            }
1628        }
1629    }
1630
1631    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1632    for item in &merge_clause.on_create {
1633        validate_merge_set_item(item, &merge_scope)?;
1634    }
1635    for item in &merge_clause.on_match {
1636        validate_merge_set_item(item, &merge_scope)?;
1637    }
1638
1639    Ok(())
1640}
1641
1642/// Recursively validate an expression for type errors, undefined variables, etc.
1643fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1644    // Validate boolean operators and nested aggregation first
1645    validate_boolean_expression(expr)?;
1646    validate_no_nested_aggregation(expr)?;
1647
1648    // Helper to validate multiple expressions
1649    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1650        for e in exprs {
1651            validate_expression(e, vars)?;
1652        }
1653        Ok(())
1654    }
1655
1656    match expr {
1657        Expr::FunctionCall { name, args, .. } => {
1658            validate_function_call(name, args, vars_in_scope)?;
1659            validate_all(args, vars_in_scope)
1660        }
1661        Expr::BinaryOp { left, right, .. } => {
1662            validate_expression(left, vars_in_scope)?;
1663            validate_expression(right, vars_in_scope)
1664        }
1665        Expr::UnaryOp { expr: e, .. }
1666        | Expr::IsNull(e)
1667        | Expr::IsNotNull(e)
1668        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1669        Expr::Property(base, prop) => {
1670            if let Expr::Variable(var_name) = base.as_ref()
1671                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1672            {
1673                // Paths don't have properties
1674                if var_info.var_type == VariableType::Path {
1675                    return Err(anyhow!(
1676                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1677                        var_name,
1678                        prop
1679                    ));
1680                }
1681                // Known non-graph literals (int, float, bool, string, list) don't have properties
1682                if var_info.var_type == VariableType::ScalarLiteral {
1683                    return Err(anyhow!(
1684                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1685                    ));
1686                }
1687            }
1688            validate_expression(base, vars_in_scope)
1689        }
1690        Expr::List(items) => validate_all(items, vars_in_scope),
1691        Expr::Case {
1692            expr: case_expr,
1693            when_then,
1694            else_expr,
1695        } => {
1696            if let Some(e) = case_expr {
1697                validate_expression(e, vars_in_scope)?;
1698            }
1699            for (w, t) in when_then {
1700                validate_expression(w, vars_in_scope)?;
1701                validate_expression(t, vars_in_scope)?;
1702            }
1703            if let Some(e) = else_expr {
1704                validate_expression(e, vars_in_scope)?;
1705            }
1706            Ok(())
1707        }
1708        Expr::In { expr: e, list } => {
1709            validate_expression(e, vars_in_scope)?;
1710            validate_expression(list, vars_in_scope)
1711        }
1712        Expr::Exists {
1713            query,
1714            from_pattern_predicate: true,
1715        } => {
1716            // Pattern predicates cannot introduce new named variables.
1717            // Extract named vars from inner MATCH pattern, check each is in scope.
1718            if let Query::Single(stmt) = query.as_ref() {
1719                for clause in &stmt.clauses {
1720                    if let Clause::Match(m) = clause {
1721                        for path in &m.pattern.paths {
1722                            for elem in &path.elements {
1723                                match elem {
1724                                    PatternElement::Node(n) => {
1725                                        if let Some(var) = &n.variable
1726                                            && !is_var_in_scope(vars_in_scope, var)
1727                                        {
1728                                            return Err(anyhow!(
1729                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1730                                                var
1731                                            ));
1732                                        }
1733                                    }
1734                                    PatternElement::Relationship(r) => {
1735                                        if let Some(var) = &r.variable
1736                                            && !is_var_in_scope(vars_in_scope, var)
1737                                        {
1738                                            return Err(anyhow!(
1739                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1740                                                var
1741                                            ));
1742                                        }
1743                                    }
1744                                    _ => {}
1745                                }
1746                            }
1747                        }
1748                    }
1749                }
1750            }
1751            Ok(())
1752        }
1753        _ => Ok(()),
1754    }
1755}
1756
1757/// One step (hop) in a Quantified Path Pattern sub-pattern.
1758///
1759/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1760#[derive(Debug, Clone)]
1761pub struct QppStepInfo {
1762    /// Edge type IDs that this step can traverse.
1763    pub edge_type_ids: Vec<u32>,
1764    /// Traversal direction for this step.
1765    pub direction: Direction,
1766    /// Optional label constraint on the target node.
1767    pub target_label: Option<String>,
1768}
1769
1770/// Phase 5a-impl: per-type fusion strategy for `LogicalPlan::FusedIndexScan`.
1771///
1772/// `#[non_exhaustive]` so Phase 5b can add `AnnRerank` and `Bm25Rrf`
1773/// without breaking downstream pattern-match exhaustiveness.
1774#[derive(Debug, Clone)]
1775#[non_exhaustive]
1776pub enum FusionKind {
1777    /// Union of parent + fork-local BTree hits, deduped by VID.
1778    BtreeUnion,
1779    /// k-way merge of pre-sorted parent + fork streams (ORDER BY).
1780    SortedKWayMerge,
1781    /// Fork-first UID lookup; falls back to parent on miss. Used
1782    /// when a fork rebinds an external UID and queries must see the
1783    /// fork's binding before the parent's.
1784    VidUidForkFirst,
1785    /// Phase 5b — vector ANN rerank: top-k from primary's index +
1786    /// top-k from fork-local index, merged and reranked by exact
1787    /// distance. Recall ≥ 95% per spec §8.2.
1788    AnnRerank,
1789    /// Phase 5b — BM25 reciprocal rank fusion: ranked lists from
1790    /// primary's and fork-local FTS indexes combined via standard
1791    /// RRF (`score = sum 1 / (k_rrf + rank_i)`, k_rrf = 60).
1792    Bm25Rrf,
1793}
1794
1795/// Logical query plan produced by [`QueryPlanner`].
1796///
1797/// Each variant represents one step in the Cypher execution pipeline.
1798/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1799/// transform or join them, and the root node defines the final output.
1800#[derive(Debug, Clone)]
1801pub enum LogicalPlan {
1802    /// UNION / UNION ALL of two sub-plans.
1803    Union {
1804        left: Box<LogicalPlan>,
1805        right: Box<LogicalPlan>,
1806        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1807        all: bool,
1808    },
1809    /// Scan vertices of a single labeled dataset.
1810    Scan {
1811        label_id: u16,
1812        labels: Vec<String>,
1813        variable: String,
1814        filter: Option<Expr>,
1815        optional: bool,
1816    },
1817    /// Phase 5a-impl: fused scan over both primary's index and the
1818    /// forked session's fork-local index. Emitted by the planner only
1819    /// when (a) the session is forked AND (b) `StorageManager::fork_index_exists`
1820    /// returns `Some(_)` for the target column. Otherwise the planner
1821    /// keeps emitting `Scan` and Lance's `base_paths` chain transparently
1822    /// covers parent-inherited indexes.
1823    ///
1824    /// `kind` selects the per-type fusion strategy:
1825    /// - `BtreeUnion` — union of parent + fork hits, dedup by VID.
1826    /// - `SortedKWayMerge` — k-way merge of two pre-sorted streams.
1827    /// - `VidUidForkFirst` — probe fork's branch first, fall back to
1828    ///   parent's UID index on miss.
1829    FusedIndexScan {
1830        label_id: u16,
1831        labels: Vec<String>,
1832        variable: String,
1833        filter: Option<Expr>,
1834        optional: bool,
1835        kind: FusionKind,
1836    },
1837    /// Phase 5b followup: planner-side observability marker for the
1838    /// lossy fusion types. Wraps the original `VectorKnn` or
1839    /// `InvertedIndexLookup` (or any future leaf operator whose
1840    /// shape differs from `Scan`) without changing its fields, so
1841    /// the physical planner can decay it to `inner` unchanged.
1842    ///
1843    /// Runtime behavior is identical to running `inner` directly;
1844    /// the wrap is purely for explain-plan and runtime-stats
1845    /// observability. The actual fusion happens at the
1846    /// `BranchedBackend` layer (per-branch Lance reads via
1847    /// `base_paths`), exactly as in Phase 5b's core ship.
1848    FusedIndexScanWrapped {
1849        inner: Box<LogicalPlan>,
1850        kind: FusionKind,
1851    },
1852    /// Lookup vertices by ext_id using the main vertices table.
1853    /// Used when a query references ext_id without specifying a label.
1854    ExtIdLookup {
1855        variable: String,
1856        ext_id: String,
1857        filter: Option<Expr>,
1858        optional: bool,
1859    },
1860    /// Scan all vertices from main table (MATCH (n) without label).
1861    /// Used for schemaless queries that don't specify any label.
1862    ScanAll {
1863        variable: String,
1864        filter: Option<Expr>,
1865        optional: bool,
1866    },
1867    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1868    /// Used for labels not defined in schema (schemaless support).
1869    /// Scan main vertices table by label name(s) for schemaless support.
1870    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1871    ScanMainByLabels {
1872        labels: Vec<String>,
1873        variable: String,
1874        filter: Option<Expr>,
1875        optional: bool,
1876    },
1877    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1878    Empty,
1879    /// UNWIND: expand a list expression into one row per element.
1880    Unwind {
1881        input: Box<LogicalPlan>,
1882        expr: Expr,
1883        variable: String,
1884    },
1885    Traverse {
1886        input: Box<LogicalPlan>,
1887        edge_type_ids: Vec<u32>,
1888        direction: Direction,
1889        source_variable: String,
1890        target_variable: String,
1891        target_label_id: u16,
1892        step_variable: Option<String>,
1893        min_hops: usize,
1894        max_hops: usize,
1895        optional: bool,
1896        target_filter: Option<Expr>,
1897        path_variable: Option<String>,
1898        edge_properties: HashSet<String>,
1899        /// Whether this is a variable-length pattern (has `*` range specifier).
1900        /// When true, step_variable holds a list of edges (even for *1..1).
1901        is_variable_length: bool,
1902        /// All variables from this OPTIONAL MATCH pattern.
1903        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1904        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1905        optional_pattern_vars: HashSet<String>,
1906        /// Variable names (node + edge) from the current MATCH clause scope.
1907        /// Used for relationship uniqueness scoping: only edge ID columns whose
1908        /// associated variable is in this set participate in uniqueness filtering.
1909        /// Variables from previous disconnected MATCH clauses are excluded.
1910        scope_match_variables: HashSet<String>,
1911        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1912        edge_filter_expr: Option<Expr>,
1913        /// Path traversal semantics (Trail by default for OpenCypher).
1914        path_mode: crate::query::df_graph::nfa::PathMode,
1915        /// QPP steps for multi-hop quantified path patterns.
1916        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1917        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1918        qpp_steps: Option<Vec<QppStepInfo>>,
1919    },
1920    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1921    /// Used for edge types not defined in schema (schemaless support).
1922    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1923    TraverseMainByType {
1924        type_names: Vec<String>,
1925        input: Box<LogicalPlan>,
1926        direction: Direction,
1927        source_variable: String,
1928        target_variable: String,
1929        step_variable: Option<String>,
1930        min_hops: usize,
1931        max_hops: usize,
1932        optional: bool,
1933        target_filter: Option<Expr>,
1934        path_variable: Option<String>,
1935        /// Whether this is a variable-length pattern (has `*` range specifier).
1936        /// When true, step_variable holds a list of edges (even for *1..1).
1937        is_variable_length: bool,
1938        /// All variables from this OPTIONAL MATCH pattern.
1939        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1940        optional_pattern_vars: HashSet<String>,
1941        /// Variables belonging to the current MATCH clause scope.
1942        /// Used for relationship uniqueness scoping: only edge columns whose
1943        /// associated variable is in this set participate in uniqueness filtering.
1944        scope_match_variables: HashSet<String>,
1945        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1946        edge_filter_expr: Option<Expr>,
1947        /// Path traversal semantics (Trail by default for OpenCypher).
1948        path_mode: crate::query::df_graph::nfa::PathMode,
1949    },
1950    Filter {
1951        input: Box<LogicalPlan>,
1952        predicate: Expr,
1953        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1954        /// When evaluating the filter, if any of these variables are NULL,
1955        /// the row is preserved regardless of the predicate result.
1956        optional_variables: HashSet<String>,
1957    },
1958    Create {
1959        input: Box<LogicalPlan>,
1960        pattern: Pattern,
1961    },
1962    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1963    ///
1964    /// This variant combines multiple CREATE patterns into a single plan node
1965    /// to avoid deep recursion when executing many CREATEs sequentially.
1966    CreateBatch {
1967        input: Box<LogicalPlan>,
1968        patterns: Vec<Pattern>,
1969    },
1970    Merge {
1971        input: Box<LogicalPlan>,
1972        pattern: Pattern,
1973        on_match: Option<SetClause>,
1974        on_create: Option<SetClause>,
1975    },
1976    Set {
1977        input: Box<LogicalPlan>,
1978        items: Vec<SetItem>,
1979    },
1980    Remove {
1981        input: Box<LogicalPlan>,
1982        items: Vec<RemoveItem>,
1983    },
1984    Delete {
1985        input: Box<LogicalPlan>,
1986        items: Vec<Expr>,
1987        detach: bool,
1988    },
1989    /// FOREACH (variable IN list | clauses)
1990    Foreach {
1991        input: Box<LogicalPlan>,
1992        variable: String,
1993        list: Expr,
1994        body: Vec<LogicalPlan>,
1995    },
1996    Sort {
1997        input: Box<LogicalPlan>,
1998        order_by: Vec<SortItem>,
1999    },
2000    Limit {
2001        input: Box<LogicalPlan>,
2002        skip: Option<usize>,
2003        fetch: Option<usize>,
2004    },
2005    Aggregate {
2006        input: Box<LogicalPlan>,
2007        group_by: Vec<Expr>,
2008        aggregates: Vec<Expr>,
2009    },
2010    Distinct {
2011        input: Box<LogicalPlan>,
2012    },
2013    Window {
2014        input: Box<LogicalPlan>,
2015        window_exprs: Vec<Expr>,
2016    },
2017    Project {
2018        input: Box<LogicalPlan>,
2019        projections: Vec<(Expr, Option<String>)>,
2020    },
2021    CrossJoin {
2022        left: Box<LogicalPlan>,
2023        right: Box<LogicalPlan>,
2024    },
2025    Apply {
2026        input: Box<LogicalPlan>,
2027        subquery: Box<LogicalPlan>,
2028        input_filter: Option<Expr>,
2029    },
2030    RecursiveCTE {
2031        cte_name: String,
2032        initial: Box<LogicalPlan>,
2033        recursive: Box<LogicalPlan>,
2034    },
2035    ProcedureCall {
2036        procedure_name: String,
2037        arguments: Vec<Expr>,
2038        yield_items: Vec<(String, Option<String>)>,
2039    },
2040    SubqueryCall {
2041        input: Box<LogicalPlan>,
2042        subquery: Box<LogicalPlan>,
2043    },
2044    VectorKnn {
2045        label_id: u16,
2046        variable: String,
2047        property: String,
2048        query: Expr,
2049        k: usize,
2050        threshold: Option<f32>,
2051    },
2052    InvertedIndexLookup {
2053        label_id: u16,
2054        variable: String,
2055        property: String,
2056        terms: Expr,
2057    },
2058    ShortestPath {
2059        input: Box<LogicalPlan>,
2060        edge_type_ids: Vec<u32>,
2061        direction: Direction,
2062        source_variable: String,
2063        target_variable: String,
2064        target_label_id: u16,
2065        path_variable: String,
2066        /// Minimum number of hops (edges) in the path. Default is 1.
2067        min_hops: u32,
2068        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2069        max_hops: u32,
2070    },
2071    /// allShortestPaths() - Returns all paths with minimum length
2072    AllShortestPaths {
2073        input: Box<LogicalPlan>,
2074        edge_type_ids: Vec<u32>,
2075        direction: Direction,
2076        source_variable: String,
2077        target_variable: String,
2078        target_label_id: u16,
2079        path_variable: String,
2080        /// Minimum number of hops (edges) in the path. Default is 1.
2081        min_hops: u32,
2082        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2083        max_hops: u32,
2084    },
2085    QuantifiedPattern {
2086        input: Box<LogicalPlan>,
2087        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
2088        min_iterations: u32,
2089        max_iterations: u32,
2090        path_variable: Option<String>,
2091        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
2092        binding_variable: String, // Output variable of iteration (e.g. 'b')
2093    },
2094    // DDL Plans
2095    CreateVectorIndex {
2096        config: VectorIndexConfig,
2097        if_not_exists: bool,
2098    },
2099    CreateFullTextIndex {
2100        config: FullTextIndexConfig,
2101        if_not_exists: bool,
2102    },
2103    CreateScalarIndex {
2104        config: ScalarIndexConfig,
2105        if_not_exists: bool,
2106    },
2107    CreateJsonFtsIndex {
2108        config: JsonFtsIndexConfig,
2109        if_not_exists: bool,
2110    },
2111    DropIndex {
2112        name: String,
2113        if_exists: bool,
2114    },
2115    ShowIndexes {
2116        filter: Option<String>,
2117    },
2118    Copy {
2119        target: String,
2120        source: String,
2121        is_export: bool,
2122        options: HashMap<String, Value>,
2123    },
2124    Backup {
2125        destination: String,
2126        options: HashMap<String, Value>,
2127    },
2128    Explain {
2129        plan: Box<LogicalPlan>,
2130    },
2131    // Admin Plans
2132    ShowDatabase,
2133    ShowConfig,
2134    ShowStatistics,
2135    Vacuum,
2136    Checkpoint,
2137    CopyTo {
2138        label: String,
2139        path: String,
2140        format: String,
2141        options: HashMap<String, Value>,
2142    },
2143    CopyFrom {
2144        label: String,
2145        path: String,
2146        format: String,
2147        options: HashMap<String, Value>,
2148    },
2149    // Schema DDL
2150    CreateLabel(CreateLabel),
2151    CreateEdgeType(CreateEdgeType),
2152    AlterLabel(AlterLabel),
2153    AlterEdgeType(AlterEdgeType),
2154    DropLabel(DropLabel),
2155    DropEdgeType(DropEdgeType),
2156    // Constraints
2157    CreateConstraint(CreateConstraint),
2158    DropConstraint(DropConstraint),
2159    ShowConstraints(ShowConstraints),
2160    /// Bind a zero-length path (single node pattern with path variable).
2161    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2162    BindZeroLengthPath {
2163        input: Box<LogicalPlan>,
2164        node_variable: String,
2165        path_variable: String,
2166    },
2167    /// Bind a fixed-length path from already-computed node and edge columns.
2168    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2169    BindPath {
2170        input: Box<LogicalPlan>,
2171        node_variables: Vec<String>,
2172        edge_variables: Vec<String>,
2173        path_variable: String,
2174    },
2175
2176    // ── Locy variants ──────────────────────────────────────────
2177    /// Top-level Locy program: stratified rules + commands.
2178    LocyProgram {
2179        strata: Vec<super::planner_locy_types::LocyStratum>,
2180        commands: Vec<super::planner_locy_types::LocyCommand>,
2181        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2182        max_iterations: usize,
2183        timeout: std::time::Duration,
2184        max_derived_bytes: usize,
2185        deterministic_best_by: bool,
2186        strict_probability_domain: bool,
2187        probability_epsilon: f64,
2188        exact_probability: bool,
2189        max_bdd_variables: usize,
2190        top_k_proofs: usize,
2191        /// Active probability semiring (rollout D-7). Defaults to
2192        /// `AddMultProb` (Phase 1/2 byte-identical behavior). `BddExact`
2193        /// is selected by `LocyConfig::resolve()` when `exact_probability`
2194        /// is true.
2195        semiring_kind: uni_locy::SemiringKind,
2196        /// Phase B Slice 3: per-evaluation registry of neural classifiers
2197        /// keyed by model name. Empty for programs without `CREATE MODEL`.
2198        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2199        /// Phase B follow-up: optional memoization cache. `None` →
2200        /// runtime creates a fresh per-query cache; `Some` → shared
2201        /// across queries (caller-managed).
2202        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2203        /// Phase C B1-B3 follow-up: per-query side-channel store
2204        /// for per-invocation (raw, calibrated, confidence_band)
2205        /// records. Flows alongside `classifier_cache` into
2206        /// `LocyProgramExec`.
2207        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2208    },
2209    /// FOLD operator: lattice-join non-key columns per KEY group.
2210    LocyFold {
2211        input: Box<LogicalPlan>,
2212        key_columns: Vec<String>,
2213        fold_bindings: Vec<(String, Expr)>,
2214        strict_probability_domain: bool,
2215        probability_epsilon: f64,
2216    },
2217    /// BEST BY operator: select best row per KEY group by ordered criteria.
2218    LocyBestBy {
2219        input: Box<LogicalPlan>,
2220        key_columns: Vec<String>,
2221        /// (expression, ascending) pairs.
2222        criteria: Vec<(Expr, bool)>,
2223    },
2224    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2225    LocyPriority {
2226        input: Box<LogicalPlan>,
2227        key_columns: Vec<String>,
2228    },
2229    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2230    LocyDerivedScan {
2231        scan_index: usize,
2232        data: Arc<RwLock<Vec<RecordBatch>>>,
2233        schema: SchemaRef,
2234    },
2235    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2236    /// without carrying through helper/property columns like the regular Project.
2237    LocyProject {
2238        input: Box<LogicalPlan>,
2239        projections: Vec<(Expr, Option<String>)>,
2240        /// Expected output Arrow type per projection (for CAST support).
2241        target_types: Vec<DataType>,
2242    },
2243    /// Phase B A4: invoke registered neural classifiers against the
2244    /// input batches and overwrite the per-invocation placeholder
2245    /// column with each row's predicted probability. Wraps a Locy
2246    /// clause body plan when `CompiledClause.model_invocations` is
2247    /// non-empty; transparent (passes batches through unchanged) when
2248    /// the list is empty.
2249    ///
2250    /// Registry and cache are carried on the node so that
2251    /// `execute_subplan` — which spins up a fresh
2252    /// `HybridPhysicalPlanner` per call — can lower it to a physical
2253    /// `LocyModelInvokeExec` without depending on planner-side
2254    /// runtime state.
2255    LocyModelInvoke {
2256        input: Box<LogicalPlan>,
2257        invocations: Vec<uni_locy::ModelInvocation>,
2258        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2259        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2260        /// Phase C B1-B3 follow-up: per-query side-channel store
2261        /// for per-invocation (raw, calibrated, confidence_band)
2262        /// records. `LocyModelInvokeExec` writes here after each
2263        /// classifier call; EXPLAIN reads via collect_neural_calls
2264        /// to surface NeuralProvenance for ALONG/FOLD-position
2265        /// invocations and Mode B re-execution paths.
2266        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2267        /// Phase D D3 runtime: one handle per `path_context.source_rule`
2268        /// referenced by any invocation on this node. The handle's
2269        /// `data: Arc<RwLock<Vec<RecordBatch>>>` is shared with the
2270        /// `DerivedScanRegistry`; the source rule's derived facts are
2271        /// already converged by the time this node executes (the
2272        /// dependency-graph builder ensures source rules sit in
2273        /// earlier strata).
2274        path_context_handles: std::collections::HashMap<
2275            String,
2276            super::df_graph::locy_model_invoke::PathContextHandle,
2277        >,
2278    },
2279}
2280
2281/// Extracted vector similarity predicate info for optimization
2282struct VectorSimilarityPredicate {
2283    variable: String,
2284    property: String,
2285    query: Expr,
2286    threshold: Option<f32>,
2287}
2288
2289/// Result of extracting vector_similarity from a predicate
2290struct VectorSimilarityExtraction {
2291    /// The extracted vector similarity predicate
2292    predicate: VectorSimilarityPredicate,
2293    /// Remaining predicates that couldn't be optimized (if any)
2294    residual: Option<Expr>,
2295}
2296
2297/// Try to extract a vector_similarity predicate from an expression.
2298/// Matches patterns like:
2299/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2300/// - n.embedding ~= $query
2301///
2302/// Also handles AND predicates.
2303fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2304    match expr {
2305        Expr::BinaryOp { left, op, right } => {
2306            // Handle AND: check both sides for vector_similarity
2307            if matches!(op, BinaryOp::And) {
2308                // Try left side first
2309                if let Some(vs) = extract_simple_vector_similarity(left) {
2310                    return Some(VectorSimilarityExtraction {
2311                        predicate: vs,
2312                        residual: Some(right.as_ref().clone()),
2313                    });
2314                }
2315                // Try right side
2316                if let Some(vs) = extract_simple_vector_similarity(right) {
2317                    return Some(VectorSimilarityExtraction {
2318                        predicate: vs,
2319                        residual: Some(left.as_ref().clone()),
2320                    });
2321                }
2322                // Recursively check within left/right for nested ANDs
2323                if let Some(mut extraction) = extract_vector_similarity(left) {
2324                    extraction.residual = Some(combine_with_and(
2325                        extraction.residual,
2326                        right.as_ref().clone(),
2327                    ));
2328                    return Some(extraction);
2329                }
2330                if let Some(mut extraction) = extract_vector_similarity(right) {
2331                    extraction.residual =
2332                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2333                    return Some(extraction);
2334                }
2335                return None;
2336            }
2337
2338            // Simple case: direct vector_similarity comparison
2339            if let Some(vs) = extract_simple_vector_similarity(expr) {
2340                return Some(VectorSimilarityExtraction {
2341                    predicate: vs,
2342                    residual: None,
2343                });
2344            }
2345            None
2346        }
2347        _ => None,
2348    }
2349}
2350
2351/// Helper to combine an optional expression with another using AND
2352fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2353    match opt_expr {
2354        Some(e) => Expr::BinaryOp {
2355            left: Box::new(e),
2356            op: BinaryOp::And,
2357            right: Box::new(other),
2358        },
2359        None => other,
2360    }
2361}
2362
2363/// Extract a simple vector_similarity comparison (no AND)
2364fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2365    match expr {
2366        Expr::BinaryOp { left, op, right } => {
2367            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2368            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2369                && let (Some(vs), Some(thresh)) = (
2370                    extract_vector_similarity_call(left),
2371                    extract_float_literal(right),
2372                )
2373            {
2374                return Some(VectorSimilarityPredicate {
2375                    variable: vs.0,
2376                    property: vs.1,
2377                    query: vs.2,
2378                    threshold: Some(thresh),
2379                });
2380            }
2381            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2382            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2383                && let (Some(thresh), Some(vs)) = (
2384                    extract_float_literal(left),
2385                    extract_vector_similarity_call(right),
2386                )
2387            {
2388                return Some(VectorSimilarityPredicate {
2389                    variable: vs.0,
2390                    property: vs.1,
2391                    query: vs.2,
2392                    threshold: Some(thresh),
2393                });
2394            }
2395            // Pattern: n.embedding ~= query
2396            if matches!(op, BinaryOp::ApproxEq)
2397                && let Expr::Property(var_expr, prop) = left.as_ref()
2398                && let Expr::Variable(var) = var_expr.as_ref()
2399            {
2400                return Some(VectorSimilarityPredicate {
2401                    variable: var.clone(),
2402                    property: prop.clone(),
2403                    query: right.as_ref().clone(),
2404                    threshold: None,
2405                });
2406            }
2407            None
2408        }
2409        _ => None,
2410    }
2411}
2412
2413/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2414fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2415    if let Expr::FunctionCall { name, args, .. } = expr
2416        && name.eq_ignore_ascii_case("vector_similarity")
2417        && args.len() == 2
2418    {
2419        // First arg should be Property(Identifier(var), prop)
2420        if let Expr::Property(var_expr, prop) = &args[0]
2421            && let Expr::Variable(var) = var_expr.as_ref()
2422        {
2423            // Second arg is query
2424            return Some((var.clone(), prop.clone(), args[1].clone()));
2425        }
2426    }
2427    None
2428}
2429
2430/// Extract a float value from a literal expression
2431fn extract_float_literal(expr: &Expr) -> Option<f32> {
2432    match expr {
2433        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2434        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2435        _ => None,
2436    }
2437}
2438
2439/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2440///
2441/// `QueryPlanner` applies semantic validation (variable scoping, label
2442/// resolution, type checking) and produces a plan tree that the executor
2443/// can run against storage.
2444#[derive(Debug)]
2445pub struct QueryPlanner {
2446    schema: Arc<Schema>,
2447    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2448    gen_expr_cache: HashMap<(String, String), Expr>,
2449    /// Counter for generating unique anonymous variable names.
2450    anon_counter: std::sync::atomic::AtomicUsize,
2451    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2452    params: HashMap<String, uni_common::Value>,
2453    /// Optional plugin registry consulted when label / edge-type / identifier
2454    /// resolution misses the local schema (M5b — Catalog / ReplacementScan).
2455    plugin_registry: Option<Arc<uni_plugin::PluginRegistry>>,
2456    /// Gate for replacement-scan dispatch on unknown identifiers (M5b).
2457    replacement_scans_enabled: bool,
2458    /// Names of parameters folded into a `LIMIT`/`SKIP` position during the
2459    /// plan. The resulting `LogicalPlan::Limit` bakes the concrete values in, so
2460    /// a plan cache keyed on query text must additionally key on these
2461    /// parameters' values (see `folded_limit_skip_params`). Interior-mutable
2462    /// because `plan` takes `&self`.
2463    folded_limit_skip_params: std::sync::Mutex<std::collections::BTreeSet<String>>,
2464}
2465
2466struct TraverseParams<'a> {
2467    rel: &'a RelationshipPattern,
2468    target_node: &'a NodePattern,
2469    optional: bool,
2470    path_variable: Option<String>,
2471    /// All variables from this OPTIONAL MATCH pattern.
2472    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2473    optional_pattern_vars: HashSet<String>,
2474}
2475
2476impl QueryPlanner {
2477    /// Create a new planner for the given schema.
2478    ///
2479    /// Pre-parses all generation expressions defined in the schema so that
2480    /// repeated plan calls avoid redundant parsing.
2481    pub fn new(schema: Arc<Schema>) -> Self {
2482        // Pre-parse all generation expressions for caching
2483        let mut gen_expr_cache = HashMap::new();
2484        for (label, props) in &schema.properties {
2485            for (gen_col, meta) in props {
2486                if let Some(expr_str) = &meta.generation_expression
2487                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2488                {
2489                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2490                }
2491            }
2492        }
2493        Self {
2494            schema,
2495            gen_expr_cache,
2496            anon_counter: std::sync::atomic::AtomicUsize::new(0),
2497            params: HashMap::new(),
2498            plugin_registry: None,
2499            replacement_scans_enabled: false,
2500            folded_limit_skip_params: std::sync::Mutex::new(std::collections::BTreeSet::new()),
2501        }
2502    }
2503
2504    /// Graph schema this planner resolves labels and property types against.
2505    pub(crate) fn schema(&self) -> &Schema {
2506        &self.schema
2507    }
2508
2509    /// Record the parameters referenced by a successfully-folded `LIMIT`/`SKIP`
2510    /// expression so the caller's plan cache can key on their values.
2511    fn note_folded_limit_skip(&self, expr: &Expr) {
2512        let mut names = Vec::new();
2513        collect_expr_parameters(expr, &mut names);
2514        if !names.is_empty()
2515            && let Ok(mut acc) = self.folded_limit_skip_params.lock()
2516        {
2517            acc.extend(names);
2518        }
2519    }
2520
2521    /// Parameter names folded into `LIMIT`/`SKIP` positions during the last
2522    /// [`plan`](Self::plan).
2523    ///
2524    /// The cached plan bakes these values in, so a text-keyed plan cache must
2525    /// fold their current values into its key — otherwise two calls differing
2526    /// only in a LIMIT/SKIP parameter would wrongly share one cached plan.
2527    /// Returns an empty vector when no parameter was folded.
2528    #[must_use]
2529    pub fn folded_limit_skip_params(&self) -> Vec<String> {
2530        self.folded_limit_skip_params
2531            .lock()
2532            .map(|acc| acc.iter().cloned().collect())
2533            .unwrap_or_default()
2534    }
2535
2536    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2537    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2538        self.params = params;
2539        self
2540    }
2541
2542    /// Attach a plugin registry for catalog / replacement-scan fallbacks
2543    /// (M5b). When absent, label / edge-type resolution behaves exactly as
2544    /// before; when present, an unknown label is offered to each
2545    /// `CatalogProvider` before erroring.
2546    #[must_use]
2547    pub fn with_plugin_registry(mut self, registry: Arc<uni_plugin::PluginRegistry>) -> Self {
2548        self.plugin_registry = Some(registry);
2549        self
2550    }
2551
2552    /// Enable replacement-scan dispatch on unknown identifiers (M5b §4.23).
2553    /// Default off; opt-in only.
2554    #[must_use]
2555    pub fn with_replacement_scans(mut self, enabled: bool) -> Self {
2556        self.replacement_scans_enabled = enabled;
2557        self
2558    }
2559
2560    /// Allocate (or look up) a virtual label ID for `name` by consulting
2561    /// every registered `CatalogProvider` and then every registered
2562    /// `ReplacementScanProvider` (only the latter when the replacement-
2563    /// scan gate is on). On a first claim the catalog table is stashed
2564    /// on the host's [`uni_plugin::PluginRegistry`] under a freshly
2565    /// allocated virtual ID; subsequent calls with the same name return
2566    /// the cached ID and refresh the stashed table.
2567    ///
2568    /// Returns `None` if no provider claims the label or no plugin
2569    /// registry is attached. Returns `Some((id, table))` on a hit; the
2570    /// `id` lies in `[VIRTUAL_LABEL_ID_START, VIRTUAL_LABEL_ID_SENTINEL)`.
2571    /// Errors are surfaced as `Some(Err(_))`-equivalent via `Result`.
2572    fn allocate_virtual_label(
2573        &self,
2574        name: &str,
2575    ) -> Result<Option<(u16, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2576        let Some(registry) = self.plugin_registry.as_ref() else {
2577            return Ok(None);
2578        };
2579        // 1. CatalogProvider (always consulted, no gate — Batch 2 semantics).
2580        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2581        for cat in registry.catalogs() {
2582            if let Some(t) = cat.resolve_label(name) {
2583                claimed = Some(t);
2584                break;
2585            }
2586        }
2587        // 2. ReplacementScanProvider (gated). Only consult if no
2588        //    CatalogProvider already claimed.
2589        if claimed.is_none() {
2590            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2591            if let Some(Replacement::CatalogTable(t)) =
2592                self.consult_replacement_scan(ReplacementRequest::Label(name))
2593            {
2594                claimed = Some(t);
2595            }
2596        }
2597        let Some(table) = claimed else {
2598            return Ok(None);
2599        };
2600        let id = registry
2601            .register_virtual_label(name, Arc::clone(&table))
2602            .map_err(|e| anyhow!("virtual label registration failed for `{name}`: {e}"))?;
2603        Ok(Some((id, table)))
2604    }
2605
2606    /// Reject any write operation that names a label currently allocated
2607    /// as a virtual (catalog-backed) label. Catalog tables are read-only
2608    /// in this milestone — there is no write-back path through
2609    /// `CatalogTable::scan` to the originating provider, so silently
2610    /// allowing the write would produce ghosted state on the host side
2611    /// without affecting the external catalog. Errors with a clear,
2612    /// actionable message.
2613    fn reject_virtual_label_writes(&self, labels: &[String], op: &str) -> Result<()> {
2614        let Some(registry) = self.plugin_registry.as_ref() else {
2615            return Ok(());
2616        };
2617        for label in labels {
2618            if registry.virtual_label_by_name(label).is_some() {
2619                return Err(anyhow!(
2620                    "Cannot {op} on virtual (catalog-resolved) label `{label}` — virtual \
2621                     labels are read-only; write back via the originating catalog \
2622                     instead"
2623                ));
2624            }
2625        }
2626        Ok(())
2627    }
2628
2629    /// Edge-type analog of [`Self::allocate_virtual_label`].
2630    fn allocate_virtual_edge_type(
2631        &self,
2632        name: &str,
2633    ) -> Result<Option<(u32, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2634        let Some(registry) = self.plugin_registry.as_ref() else {
2635            return Ok(None);
2636        };
2637        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2638        for cat in registry.catalogs() {
2639            if let Some(t) = cat.resolve_edge_type(name) {
2640                claimed = Some(t);
2641                break;
2642            }
2643        }
2644        let Some(table) = claimed else {
2645            return Ok(None);
2646        };
2647        let id = registry
2648            .register_virtual_edge_type(name, Arc::clone(&table))
2649            .map_err(|e| anyhow!("virtual edge-type registration failed for `{name}`: {e}"))?;
2650        Ok(Some((id, table)))
2651    }
2652
2653    /// Try to resolve an unknown identifier through replacement-scan providers
2654    /// (gated by [`Self::with_replacement_scans`]). Returns the first
2655    /// [`Replacement`] any registered provider produces, or `None` if the
2656    /// gate is off, no registry is attached, or no provider claims the
2657    /// identifier. First-match wins (mirrors DuckDB).
2658    pub(crate) fn consult_replacement_scan(
2659        &self,
2660        request: uni_plugin::traits::catalog::ReplacementRequest<'_>,
2661    ) -> Option<uni_plugin::traits::catalog::Replacement> {
2662        if !self.replacement_scans_enabled {
2663            return None;
2664        }
2665        let registry = self.plugin_registry.as_ref()?;
2666        for r in registry.replacement_scans().iter() {
2667            if let Some(replacement) = r.replace(&request) {
2668                tracing::debug!(
2669                    target: "uni.plugin.registry",
2670                    ?request,
2671                    ?replacement,
2672                    "identifier resolved via ReplacementScanProvider"
2673                );
2674                return Some(replacement);
2675            }
2676        }
2677        None
2678    }
2679
2680    /// Resolve a user-typed procedure name against the attached plugin
2681    /// registry, applying the same namespace-prefix rules as
2682    /// `ProcedureRegistry::resolve_user_procedure` (host-coupled
2683    /// procedure dispatch). Returns `true` if any namespace claims the
2684    /// name. Used by the procedure-call replacement-scan gate to decide
2685    /// whether to consult before substituting.
2686    fn procedure_resolves(&self, user_name: &str) -> bool {
2687        let Some(registry) = self.plugin_registry.as_ref() else {
2688            return false;
2689        };
2690        // Try every namespace/local split (first-dot → last-dot) so dotted
2691        // plugin ids resolve alongside the first-dot M9/builtin convention.
2692        // Mirrors `ProcedureRegistry::resolve_user_procedure`.
2693        if uni_plugin::QName::candidate_splits(user_name).any(|q| registry.procedure(&q).is_some())
2694        {
2695            return true;
2696        }
2697        let stripped = user_name.strip_prefix("uni.").unwrap_or(user_name);
2698        for plugin_id in ["uni", "builtin", "apoc-core", "custom"] {
2699            if registry
2700                .procedure(&uni_plugin::QName::new(plugin_id, stripped))
2701                .is_some()
2702            {
2703                return true;
2704            }
2705        }
2706        false
2707    }
2708
2709    /// Construct a [`uni_plugin::QName`] from a user-typed identifier for
2710    /// passing to [`Replacement`]-scan providers. If the name is dotted,
2711    /// the last segment is the local and the rest is the namespace
2712    /// (mirroring `QName::parse`). Bare names — which Cypher allows for
2713    /// procedures (`CALL foo()`) and functions (`RETURN foo(x)`) — are
2714    /// encoded with the conventional `"user"` namespace; providers that
2715    /// want to match a bare-typed name should inspect `.local()`.
2716    fn qname_from_user(name: &str) -> uni_plugin::QName {
2717        uni_plugin::QName::parse(name).unwrap_or_else(|_| uni_plugin::QName::new("user", name))
2718    }
2719
2720    /// Apply `ReplacementScanProvider`-driven function rewrites to the
2721    /// query's AST. When the gate is off or no registry is attached, the
2722    /// walker is short-circuited and the query is returned unchanged.
2723    /// Otherwise, every [`uni_cypher::ast::Expr::FunctionCall`] is offered
2724    /// to registered providers (first-match wins); a returned
2725    /// `Replacement::Function(new_qname)` substitutes the name in place.
2726    /// Rewrite depth is capped at 1 — the rewritten name is NOT re-
2727    /// consulted (a chained `A→B→A` provider therefore stops after the
2728    /// first hop). Wrong-variant returns (`CatalogTable`, `Procedure`)
2729    /// error immediately.
2730    fn rewrite_function_calls_in_query(
2731        &self,
2732        query: uni_cypher::ast::Query,
2733    ) -> Result<uni_cypher::ast::Query> {
2734        if !self.replacement_scans_enabled || self.plugin_registry.is_none() {
2735            return Ok(query);
2736        }
2737        let mut rename = |name: &str| -> Result<Option<String>> {
2738            let qname = Self::qname_from_user(name);
2739            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2740            match self.consult_replacement_scan(ReplacementRequest::Function(&qname)) {
2741                Some(Replacement::Function(new_qname)) => {
2742                    // Cypher function-call dispatch is bare-name-keyed
2743                    // (the per-category translators in `df_expr` match on
2744                    // `name.to_uppercase()` against bare local strings —
2745                    // "UPPER", "ABS", etc.). When the provider returns a
2746                    // synthetic-namespace target (`builtin.*` or `user.*`),
2747                    // strip the namespace so the AST name is what those
2748                    // dispatchers expect; for plugin-namespaced targets,
2749                    // preserve the full dotted form (matches how users
2750                    // type them).
2751                    let rewritten = match new_qname.namespace() {
2752                        "builtin" | "user" => new_qname.local().to_string(),
2753                        _ => new_qname.to_string(),
2754                    };
2755                    tracing::debug!(
2756                        target: "uni.plugin.registry",
2757                        from = %name,
2758                        to = %rewritten,
2759                        "function call rerouted via ReplacementScanProvider"
2760                    );
2761                    Ok(Some(rewritten))
2762                }
2763                Some(other) => Err(anyhow!(
2764                    "ReplacementScanProvider returned wrong variant for Function \
2765                     request `{}`: expected `Function`, got {:?}",
2766                    name,
2767                    other
2768                )),
2769                None => Ok(None),
2770            }
2771        };
2772        crate::query::rewrite::function_rename::rewrite_function_calls_in_query(query, &mut rename)
2773    }
2774
2775    /// Plan a Cypher query with no pre-bound variables.
2776    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2777        self.plan_with_scope(query, Vec::new())
2778    }
2779
2780    /// Plan a Cypher query with a set of externally pre-bound variable names.
2781    ///
2782    /// `vars` lists variable names already in scope before this query executes
2783    /// (e.g., from an enclosing Locy rule body).
2784    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2785        // Apply query rewrites before planning
2786        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2787        // M5 follow-up #5: function-call rewrite via ReplacementScanProvider.
2788        // Done as an AST pass *before* planning so the rewritten name flows
2789        // through every downstream stage (translation, UDF resolution,
2790        // execution) as if the user had typed it. No-op when the gate is
2791        // off or no provider claims the call. First-match wins; hard-cap
2792        // at one rewrite per call site (the rewritten name is NOT re-
2793        // consulted) — see `rewrite_function_calls_in_query`.
2794        let rewritten_query = self.rewrite_function_calls_in_query(rewritten_query)?;
2795        if Self::has_mixed_union_modes(&rewritten_query) {
2796            return Err(anyhow!(
2797                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2798            ));
2799        }
2800
2801        match rewritten_query {
2802            Query::Single(stmt) => self.plan_single(stmt, vars),
2803            Query::Union { left, right, all } => {
2804                let l = self.plan_with_scope(*left, vars.clone())?;
2805                let r = self.plan_with_scope(*right, vars)?;
2806
2807                // Validate that both sides have the same column names
2808                let left_cols = Self::extract_projection_columns(&l);
2809                let right_cols = Self::extract_projection_columns(&r);
2810
2811                if left_cols != right_cols {
2812                    return Err(anyhow!(
2813                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2814                    ));
2815                }
2816
2817                Ok(LogicalPlan::Union {
2818                    left: Box::new(l),
2819                    right: Box::new(r),
2820                    all,
2821                })
2822            }
2823            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2824            Query::Explain(inner) => {
2825                let inner_plan = self.plan_with_scope(*inner, vars)?;
2826                Ok(LogicalPlan::Explain {
2827                    plan: Box::new(inner_plan),
2828                })
2829            }
2830            Query::TimeTravel { .. } => {
2831                unreachable!("TimeTravel should be resolved at API layer before planning")
2832            }
2833        }
2834    }
2835
2836    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2837        match query {
2838            Query::Union { left, right, all } => {
2839                out.insert(*all);
2840                Self::collect_union_modes(left, out);
2841                Self::collect_union_modes(right, out);
2842            }
2843            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2844            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2845            Query::Single(_) | Query::Schema(_) => {}
2846        }
2847    }
2848
2849    fn has_mixed_union_modes(query: &Query) -> bool {
2850        let mut modes = HashSet::new();
2851        Self::collect_union_modes(query, &mut modes);
2852        modes.len() > 1
2853    }
2854
2855    fn next_anon_var(&self) -> String {
2856        let id = self
2857            .anon_counter
2858            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2859        format!("_anon_{}", id)
2860    }
2861
2862    /// Extract projection column names from a logical plan.
2863    /// Used for UNION column validation.
2864    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2865        match plan {
2866            LogicalPlan::Project { projections, .. } => projections
2867                .iter()
2868                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2869                .collect(),
2870            LogicalPlan::Limit { input, .. }
2871            | LogicalPlan::Sort { input, .. }
2872            | LogicalPlan::Distinct { input, .. }
2873            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2874            LogicalPlan::Union { left, right, .. } => {
2875                let left_cols = Self::extract_projection_columns(left);
2876                if left_cols.is_empty() {
2877                    Self::extract_projection_columns(right)
2878                } else {
2879                    left_cols
2880                }
2881            }
2882            LogicalPlan::Aggregate {
2883                group_by,
2884                aggregates,
2885                ..
2886            } => {
2887                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2888                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2889                cols
2890            }
2891            _ => Vec::new(),
2892        }
2893    }
2894
2895    fn plan_return_clause(
2896        &self,
2897        return_clause: &ReturnClause,
2898        plan: LogicalPlan,
2899        vars_in_scope: &[VariableInfo],
2900    ) -> Result<LogicalPlan> {
2901        let mut plan = plan;
2902        let mut group_by = Vec::new();
2903        let mut aggregates = Vec::new();
2904        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2905        let mut has_agg = false;
2906        let mut projections = Vec::new();
2907        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2908        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2909        let mut projected_aliases: HashSet<String> = HashSet::new();
2910
2911        for item in &return_clause.items {
2912            match item {
2913                ReturnItem::All => {
2914                    // RETURN * - add all user-named variables in scope
2915                    // (anonymous variables like _anon_0 are excluded)
2916                    let user_vars: Vec<_> = vars_in_scope
2917                        .iter()
2918                        .filter(|v| !v.name.starts_with("_anon_"))
2919                        .collect();
2920                    if user_vars.is_empty() {
2921                        return Err(anyhow!(
2922                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2923                        ));
2924                    }
2925                    for v in user_vars {
2926                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2927                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2928                            group_by.push(Expr::Variable(v.name.clone()));
2929                        }
2930                        projected_aliases.insert(v.name.clone());
2931                        projected_simple_reprs.insert(v.name.clone());
2932                    }
2933                }
2934                ReturnItem::Expr {
2935                    expr,
2936                    alias,
2937                    source_text,
2938                } => {
2939                    if matches!(expr, Expr::Wildcard) {
2940                        for v in vars_in_scope {
2941                            projections
2942                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2943                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2944                                group_by.push(Expr::Variable(v.name.clone()));
2945                            }
2946                            projected_aliases.insert(v.name.clone());
2947                            projected_simple_reprs.insert(v.name.clone());
2948                        }
2949                    } else {
2950                        // Validate expression variables are defined
2951                        validate_expression_variables(expr, vars_in_scope)?;
2952                        // Validate function argument types and boolean operators
2953                        validate_expression(expr, vars_in_scope)?;
2954                        // Pattern predicates are not allowed in RETURN
2955                        if contains_pattern_predicate(expr) {
2956                            return Err(anyhow!(
2957                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2958                            ));
2959                        }
2960
2961                        // Use source text as column name when no explicit alias
2962                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2963                        projections.push((expr.clone(), effective_alias));
2964                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2965                            // Bare aggregate — push directly
2966                            has_agg = true;
2967                            aggregates.push(expr.clone());
2968                            projected_aggregate_reprs.insert(expr.to_string_repr());
2969                        } else if !is_window_function(expr)
2970                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2971                        {
2972                            // Compound aggregate or expression containing aggregates —
2973                            // extract the inner bare aggregates for the Aggregate node
2974                            has_agg = true;
2975                            compound_agg_exprs.push(expr.clone());
2976                            for inner in extract_inner_aggregates(expr) {
2977                                let repr = inner.to_string_repr();
2978                                if !projected_aggregate_reprs.contains(&repr) {
2979                                    aggregates.push(inner);
2980                                    projected_aggregate_reprs.insert(repr);
2981                                }
2982                            }
2983                        } else if !group_by.contains(expr) {
2984                            group_by.push(expr.clone());
2985                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2986                                projected_simple_reprs.insert(expr.to_string_repr());
2987                            }
2988                        }
2989
2990                        if let Some(a) = alias {
2991                            if projected_aliases.contains(a) {
2992                                return Err(anyhow!(
2993                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2994                                    a
2995                                ));
2996                            }
2997                            projected_aliases.insert(a.clone());
2998                        } else if let Expr::Variable(v) = expr {
2999                            if projected_aliases.contains(v) {
3000                                return Err(anyhow!(
3001                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
3002                                    v
3003                                ));
3004                            }
3005                            projected_aliases.insert(v.clone());
3006                        }
3007                    }
3008                }
3009            }
3010        }
3011
3012        // Validate compound aggregate expressions: non-aggregate refs must be
3013        // individually present in the group_by as simple variables or properties.
3014        if has_agg {
3015            let group_by_reprs: HashSet<String> =
3016                group_by.iter().map(|e| e.to_string_repr()).collect();
3017            for expr in &compound_agg_exprs {
3018                let mut refs = Vec::new();
3019                collect_non_aggregate_refs(expr, false, &mut refs);
3020                for r in &refs {
3021                    let is_covered = match r {
3022                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
3023                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
3024                    };
3025                    if !is_covered {
3026                        return Err(anyhow!(
3027                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
3028                        ));
3029                    }
3030                }
3031            }
3032        }
3033
3034        if has_agg {
3035            plan = LogicalPlan::Aggregate {
3036                input: Box::new(plan),
3037                group_by,
3038                aggregates,
3039            };
3040        }
3041
3042        let mut window_exprs = Vec::new();
3043        for (expr, _) in &projections {
3044            Self::collect_window_functions(expr, &mut window_exprs);
3045        }
3046
3047        if let Some(order_by) = &return_clause.order_by {
3048            for item in order_by {
3049                Self::collect_window_functions(&item.expr, &mut window_exprs);
3050            }
3051        }
3052
3053        let has_window_exprs = !window_exprs.is_empty();
3054
3055        if has_window_exprs {
3056            // Before creating the Window node, we need to ensure all properties
3057            // referenced by window functions are available. Create a Project node
3058            // that loads these properties.
3059            let mut props_needed_for_window: Vec<Expr> = Vec::new();
3060            for window_expr in &window_exprs {
3061                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
3062            }
3063
3064            // Also include non-window expressions from projections that might be needed
3065            // Preserve qualified names (e.g., "e.salary") as aliases for properties
3066            let non_window_projections: Vec<_> = projections
3067                .iter()
3068                .filter_map(|(expr, alias)| {
3069                    // Keep expressions that don't have window_spec
3070                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
3071                        window_spec.is_none()
3072                    } else {
3073                        true
3074                    };
3075
3076                    if keep {
3077                        // For property references, use the qualified name as alias
3078                        let new_alias = if matches!(expr, Expr::Property(..)) {
3079                            Some(expr.to_string_repr())
3080                        } else {
3081                            alias.clone()
3082                        };
3083                        Some((expr.clone(), new_alias))
3084                    } else {
3085                        None
3086                    }
3087                })
3088                .collect();
3089
3090            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
3091                let mut intermediate_projections = non_window_projections;
3092                // Add any additional property references needed by window functions
3093                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
3094                for prop in &props_needed_for_window {
3095                    if !intermediate_projections
3096                        .iter()
3097                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
3098                    {
3099                        let qualified_name = prop.to_string_repr();
3100                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
3101                    }
3102                }
3103
3104                if !intermediate_projections.is_empty() {
3105                    plan = LogicalPlan::Project {
3106                        input: Box::new(plan),
3107                        projections: intermediate_projections,
3108                    };
3109                }
3110            }
3111
3112            // Transform property expressions in window functions to use qualified variable names
3113            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
3114            let transformed_window_exprs: Vec<Expr> = window_exprs
3115                .into_iter()
3116                .map(Self::transform_window_expr_properties)
3117                .collect();
3118
3119            plan = LogicalPlan::Window {
3120                input: Box::new(plan),
3121                window_exprs: transformed_window_exprs,
3122            };
3123        }
3124
3125        if let Some(order_by) = &return_clause.order_by {
3126            let alias_exprs: HashMap<String, Expr> = projections
3127                .iter()
3128                .filter_map(|(expr, alias)| {
3129                    alias.as_ref().map(|a| {
3130                        // ORDER BY is planned before the final RETURN projection.
3131                        // In aggregate contexts, aliases must resolve to the
3132                        // post-aggregate output columns, not raw aggregate calls.
3133                        let rewritten = if has_agg && !has_window_exprs {
3134                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
3135                                Expr::Variable(aggregate_column_name(expr))
3136                            } else if is_compound_aggregate(expr)
3137                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
3138                            {
3139                                replace_aggregates_with_columns(expr)
3140                            } else {
3141                                Expr::Variable(expr.to_string_repr())
3142                            }
3143                        } else {
3144                            expr.clone()
3145                        };
3146                        (a.clone(), rewritten)
3147                    })
3148                })
3149                .collect();
3150
3151            // Build an extended scope that includes RETURN aliases so ORDER BY
3152            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
3153            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
3154                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
3155                // Keep aliases and directly returned variables in scope.
3156                let mut scope = Vec::new();
3157                for (expr, alias) in &projections {
3158                    if let Some(a) = alias
3159                        && !is_var_in_scope(&scope, a)
3160                    {
3161                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3162                    }
3163                    if let Expr::Variable(v) = expr
3164                        && !is_var_in_scope(&scope, v)
3165                    {
3166                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3167                    }
3168                }
3169                scope
3170            } else {
3171                let mut scope = vars_in_scope.to_vec();
3172                for (expr, alias) in &projections {
3173                    if let Some(a) = alias
3174                        && !is_var_in_scope(&scope, a)
3175                    {
3176                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3177                    } else if let Expr::Variable(v) = expr
3178                        && !is_var_in_scope(&scope, v)
3179                    {
3180                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3181                    }
3182                }
3183                scope
3184            };
3185            // Validate ORDER BY expressions against the extended scope
3186            for item in order_by {
3187                // DISTINCT allows ORDER BY on the same projected expression
3188                // even when underlying variables are not otherwise visible.
3189                let matches_projected_expr = return_clause.distinct
3190                    && projections
3191                        .iter()
3192                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
3193                if !matches_projected_expr {
3194                    validate_expression_variables(&item.expr, &order_by_scope)?;
3195                    validate_expression(&item.expr, &order_by_scope)?;
3196                }
3197                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
3198                if has_aggregate_in_item && !has_agg {
3199                    return Err(anyhow!(
3200                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
3201                    ));
3202                }
3203                if has_agg && has_aggregate_in_item {
3204                    validate_with_order_by_aggregate_item(
3205                        &item.expr,
3206                        &projected_aggregate_reprs,
3207                        &projected_simple_reprs,
3208                        &projected_aliases,
3209                    )?;
3210                }
3211            }
3212            let rewritten_order_by: Vec<SortItem> = order_by
3213                .iter()
3214                .map(|item| SortItem {
3215                    expr: {
3216                        let mut rewritten =
3217                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
3218                        if has_agg && !has_window_exprs {
3219                            rewritten = replace_aggregates_with_columns(&rewritten);
3220                        }
3221                        rewritten
3222                    },
3223                    ascending: item.ascending,
3224                })
3225                .collect();
3226            plan = LogicalPlan::Sort {
3227                input: Box::new(plan),
3228                order_by: rewritten_order_by,
3229            };
3230        }
3231
3232        if return_clause.skip.is_some() || return_clause.limit.is_some() {
3233            let skip = return_clause
3234                .skip
3235                .as_ref()
3236                .map(|e| {
3237                    self.note_folded_limit_skip(e);
3238                    parse_non_negative_integer(e, "SKIP", &self.params)
3239                })
3240                .transpose()?
3241                .flatten();
3242            let fetch = return_clause
3243                .limit
3244                .as_ref()
3245                .map(|e| {
3246                    self.note_folded_limit_skip(e);
3247                    parse_non_negative_integer(e, "LIMIT", &self.params)
3248                })
3249                .transpose()?
3250                .flatten();
3251
3252            plan = LogicalPlan::Limit {
3253                input: Box::new(plan),
3254                skip,
3255                fetch,
3256            };
3257        }
3258
3259        if !projections.is_empty() {
3260            // If we created an Aggregate or Window node, we need to adjust the final projections
3261            // to reference aggregate/window function results as columns instead of re-evaluating them
3262            let final_projections = if has_agg || has_window_exprs {
3263                projections
3264                    .into_iter()
3265                    .map(|(expr, alias)| {
3266                        // Check if this expression is an aggregate function
3267                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
3268                        {
3269                            // Bare aggregate — replace with column reference
3270                            let col_name = aggregate_column_name(&expr);
3271                            (Expr::Variable(col_name), alias)
3272                        } else if !has_window_exprs
3273                            && (is_compound_aggregate(&expr)
3274                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
3275                        {
3276                            // Compound aggregate — replace inner aggregates with
3277                            // column references, keep outer expression for Project
3278                            (replace_aggregates_with_columns(&expr), alias)
3279                        }
3280                        // For grouped RETURN projections, reference the pre-computed
3281                        // group-by output column instead of re-evaluating the expression
3282                        // against the aggregate schema (which no longer has original vars).
3283                        else if has_agg
3284                            && !has_window_exprs
3285                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
3286                        {
3287                            (Expr::Variable(expr.to_string_repr()), alias)
3288                        }
3289                        // Check if this expression is a window function
3290                        else if let Expr::FunctionCall {
3291                            window_spec: Some(_),
3292                            ..
3293                        } = &expr
3294                        {
3295                            // Replace window function with a column reference to its result
3296                            // The column name in the Window output is the full expression string
3297                            let window_col_name = expr.to_string_repr();
3298                            // Keep the original alias for the final output
3299                            (Expr::Variable(window_col_name), alias)
3300                        } else {
3301                            (expr, alias)
3302                        }
3303                    })
3304                    .collect()
3305            } else {
3306                projections
3307            };
3308
3309            plan = LogicalPlan::Project {
3310                input: Box::new(plan),
3311                projections: final_projections,
3312            };
3313        }
3314
3315        if return_clause.distinct {
3316            plan = LogicalPlan::Distinct {
3317                input: Box::new(plan),
3318            };
3319        }
3320
3321        Ok(plan)
3322    }
3323
3324    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
3325        let typed_vars: Vec<VariableInfo> = initial_vars
3326            .into_iter()
3327            .map(|name| VariableInfo::new(name, VariableType::Imported))
3328            .collect();
3329        self.plan_single_typed(query, typed_vars)
3330    }
3331
3332    /// Rewrite a query then plan it, preserving typed variable scope when possible.
3333    ///
3334    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
3335    /// variable info through and avoid false type-conflict errors in subqueries.
3336    /// For unions and other compound queries, falls back to `plan_with_scope`.
3337    fn rewrite_and_plan_typed(
3338        &self,
3339        query: Query,
3340        typed_vars: &[VariableInfo],
3341    ) -> Result<LogicalPlan> {
3342        let rewritten = crate::query::rewrite::rewrite_query(query)?;
3343        match rewritten {
3344            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
3345            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
3346        }
3347    }
3348
3349    fn plan_single_typed(
3350        &self,
3351        query: Statement,
3352        initial_vars: Vec<VariableInfo>,
3353    ) -> Result<LogicalPlan> {
3354        let mut plan = LogicalPlan::Empty;
3355
3356        if !initial_vars.is_empty() {
3357            // Project bound variables from outer scope as parameters.
3358            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
3359            // Use Parameter expressions to read from params, not Variable which would read from input row.
3360            let projections = initial_vars
3361                .iter()
3362                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
3363                .collect();
3364            plan = LogicalPlan::Project {
3365                input: Box::new(plan),
3366                projections,
3367            };
3368        }
3369
3370        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
3371        // Track variables introduced by CREATE clauses so we can distinguish
3372        // MATCH-introduced variables (which cannot be re-created as bare nodes)
3373        // from CREATE-introduced variables (which can be referenced as bare nodes).
3374        let mut create_introduced_vars: HashSet<String> = HashSet::new();
3375        // Track variables targeted by DELETE so we can reject property/label
3376        // access on deleted entities in subsequent RETURN clauses.
3377        let mut deleted_vars: HashSet<String> = HashSet::new();
3378
3379        let clause_count = query.clauses.len();
3380        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
3381            match clause {
3382                Clause::Match(match_clause) => {
3383                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
3384                }
3385                Clause::Unwind(unwind) => {
3386                    plan = LogicalPlan::Unwind {
3387                        input: Box::new(plan),
3388                        expr: unwind.expr.clone(),
3389                        variable: unwind.variable.clone(),
3390                    };
3391                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
3392                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
3393                }
3394                Clause::Call(call_clause) => {
3395                    match &call_clause.kind {
3396                        CallKind::Procedure {
3397                            procedure,
3398                            arguments,
3399                        } => {
3400                            // Validate that procedure arguments don't contain aggregation functions
3401                            for arg in arguments {
3402                                if contains_aggregate_recursive(arg) {
3403                                    return Err(anyhow!(
3404                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
3405                                    ));
3406                                }
3407                            }
3408
3409                            let has_yield_star = call_clause.yield_items.len() == 1
3410                                && call_clause.yield_items[0].name == "*"
3411                                && call_clause.yield_items[0].alias.is_none();
3412                            if has_yield_star && clause_idx + 1 < clause_count {
3413                                return Err(anyhow!(
3414                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
3415                                ));
3416                            }
3417
3418                            // Validate for duplicate yield names (VariableAlreadyBound)
3419                            let mut yield_names = Vec::new();
3420                            for item in &call_clause.yield_items {
3421                                if item.name == "*" {
3422                                    continue;
3423                                }
3424                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
3425                                if yield_names.contains(output_name) {
3426                                    return Err(anyhow!(
3427                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
3428                                        output_name
3429                                    ));
3430                                }
3431                                // Check against existing scope (in-query CALL must not shadow)
3432                                if clause_idx > 0
3433                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
3434                                {
3435                                    return Err(anyhow!(
3436                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
3437                                        output_name
3438                                    ));
3439                                }
3440                                yield_names.push(output_name.clone());
3441                            }
3442
3443                            let mut yields = Vec::new();
3444                            for item in &call_clause.yield_items {
3445                                if item.name == "*" {
3446                                    continue;
3447                                }
3448                                yields.push((item.name.clone(), item.alias.clone()));
3449                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
3450                                // Use Imported because procedure return types are unknown
3451                                // at plan time (could be nodes, edges, or scalars)
3452                                add_var_to_scope(
3453                                    &mut vars_in_scope,
3454                                    var_name,
3455                                    VariableType::Imported,
3456                                )?;
3457                            }
3458                            // M5 follow-up #5: if replacement-scan dispatch is
3459                            // enabled and the procedure name does not resolve
3460                            // against the plugin registry, consult registered
3461                            // `ReplacementScanProvider`s. A `Replacement::Procedure`
3462                            // substitutes the call's target name in the logical
3463                            // plan; the rewritten name must itself resolve or
3464                            // we error immediately (no second-tier consult — caps
3465                            // rewrite depth at one).
3466                            let procedure_name = if self.replacement_scans_enabled
3467                                && !self.procedure_resolves(procedure)
3468                            {
3469                                use uni_plugin::traits::catalog::{
3470                                    Replacement, ReplacementRequest,
3471                                };
3472                                let qname = Self::qname_from_user(procedure);
3473                                match self
3474                                    .consult_replacement_scan(ReplacementRequest::Procedure(&qname))
3475                                {
3476                                    Some(Replacement::Procedure(new_qname)) => {
3477                                        let rewritten = new_qname.to_string();
3478                                        if !self.procedure_resolves(&rewritten) {
3479                                            return Err(anyhow!(
3480                                                "ReplacementScanProvider rerouted procedure \
3481                                                 `{}` to `{}`, which also did not resolve",
3482                                                procedure,
3483                                                rewritten
3484                                            ));
3485                                        }
3486                                        tracing::debug!(
3487                                            target: "uni.plugin.registry",
3488                                            from = %procedure,
3489                                            to = %rewritten,
3490                                            "procedure rerouted via ReplacementScanProvider"
3491                                        );
3492                                        rewritten
3493                                    }
3494                                    Some(other) => {
3495                                        return Err(anyhow!(
3496                                            "ReplacementScanProvider returned wrong variant \
3497                                             for Procedure request `{}`: expected \
3498                                             `Procedure`, got {:?}",
3499                                            procedure,
3500                                            other
3501                                        ));
3502                                    }
3503                                    None => procedure.clone(),
3504                                }
3505                            } else {
3506                                procedure.clone()
3507                            };
3508                            let proc_plan = LogicalPlan::ProcedureCall {
3509                                procedure_name,
3510                                arguments: arguments.clone(),
3511                                yield_items: yields.clone(),
3512                            };
3513
3514                            if matches!(plan, LogicalPlan::Empty) {
3515                                // Standalone CALL (first clause) — use directly
3516                                plan = proc_plan;
3517                            } else if yields.is_empty() {
3518                                // In-query CALL with no YIELD (void procedure):
3519                                // preserve the input rows unchanged
3520                            } else {
3521                                // In-query CALL with YIELD: cross-join input × procedure output
3522                                plan = LogicalPlan::Apply {
3523                                    input: Box::new(plan),
3524                                    subquery: Box::new(proc_plan),
3525                                    input_filter: None,
3526                                };
3527                            }
3528                        }
3529                        CallKind::Subquery(query) => {
3530                            let subquery_plan =
3531                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3532
3533                            // Extract variables from subquery RETURN clause
3534                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3535
3536                            // Add new variables to scope (as Scalar since they come from subquery projection)
3537                            for var in subquery_vars {
3538                                if !is_var_in_scope(&vars_in_scope, &var) {
3539                                    add_var_to_scope(
3540                                        &mut vars_in_scope,
3541                                        &var,
3542                                        VariableType::Scalar,
3543                                    )?;
3544                                }
3545                            }
3546
3547                            plan = LogicalPlan::SubqueryCall {
3548                                input: Box::new(plan),
3549                                subquery: Box::new(subquery_plan),
3550                            };
3551                        }
3552                    }
3553                }
3554                Clause::Merge(merge_clause) => {
3555                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3556                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3557                    // read-only — reject MERGE that names one.
3558                    let merge_labels = collect_pattern_labels(&merge_clause.pattern);
3559                    self.reject_virtual_label_writes(&merge_labels, "MERGE")?;
3560
3561                    plan = LogicalPlan::Merge {
3562                        input: Box::new(plan),
3563                        pattern: merge_clause.pattern.clone(),
3564                        on_match: Some(SetClause {
3565                            items: merge_clause.on_match.clone(),
3566                        }),
3567                        on_create: Some(SetClause {
3568                            items: merge_clause.on_create.clone(),
3569                        }),
3570                    };
3571
3572                    for path in &merge_clause.pattern.paths {
3573                        if let Some(path_var) = &path.variable
3574                            && !path_var.is_empty()
3575                            && !is_var_in_scope(&vars_in_scope, path_var)
3576                        {
3577                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3578                        }
3579                        for element in &path.elements {
3580                            if let PatternElement::Node(n) = element {
3581                                if let Some(v) = &n.variable
3582                                    && !is_var_in_scope(&vars_in_scope, v)
3583                                {
3584                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3585                                }
3586                            } else if let PatternElement::Relationship(r) = element
3587                                && let Some(v) = &r.variable
3588                                && !is_var_in_scope(&vars_in_scope, v)
3589                            {
3590                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3591                            }
3592                        }
3593                    }
3594                }
3595                Clause::Create(create_clause) => {
3596                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3597                    // read-only — reject CREATE that names one.
3598                    let create_labels = collect_pattern_labels(&create_clause.pattern);
3599                    self.reject_virtual_label_writes(&create_labels, "CREATE")?;
3600                    // Validate CREATE patterns:
3601                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3602                    // - Bare nodes (v) are "references" if bound, "creations" if not
3603                    // - Relationships are always creations - can't rebind
3604                    // - Within CREATE, each new variable can only be defined once
3605                    // - Variables used in properties must be defined
3606                    let mut create_vars: Vec<&str> = Vec::new();
3607                    for path in &create_clause.pattern.paths {
3608                        let is_standalone_node = path.elements.len() == 1;
3609                        for element in &path.elements {
3610                            match element {
3611                                PatternElement::Node(n) => {
3612                                    validate_property_variables(
3613                                        &n.properties,
3614                                        &vars_in_scope,
3615                                        &create_vars,
3616                                    )?;
3617
3618                                    if let Some(v) = n.variable.as_deref()
3619                                        && !v.is_empty()
3620                                    {
3621                                        // A node is a "creation" if it has labels or properties
3622                                        let is_creation =
3623                                            !n.labels.is_empty() || n.properties.is_some();
3624
3625                                        if is_creation {
3626                                            check_not_already_bound(
3627                                                v,
3628                                                &vars_in_scope,
3629                                                &create_vars,
3630                                            )?;
3631                                            create_vars.push(v);
3632                                        } else if is_standalone_node
3633                                            && is_var_in_scope(&vars_in_scope, v)
3634                                            && !create_introduced_vars.contains(v)
3635                                        {
3636                                            // Standalone bare node referencing a variable from a
3637                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3638                                            // Bare nodes used as relationship endpoints
3639                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3640                                            return Err(anyhow!(
3641                                                "SyntaxError: VariableAlreadyBound - '{}'",
3642                                                v
3643                                            ));
3644                                        } else if !create_vars.contains(&v) {
3645                                            // New bare variable — register it
3646                                            create_vars.push(v);
3647                                        }
3648                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3649                                    }
3650                                }
3651                                PatternElement::Relationship(r) => {
3652                                    validate_property_variables(
3653                                        &r.properties,
3654                                        &vars_in_scope,
3655                                        &create_vars,
3656                                    )?;
3657
3658                                    if let Some(v) = r.variable.as_deref()
3659                                        && !v.is_empty()
3660                                    {
3661                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3662                                        create_vars.push(v);
3663                                    }
3664
3665                                    // Validate relationship constraints for CREATE
3666                                    if r.types.len() != 1 {
3667                                        return Err(anyhow!(
3668                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3669                                        ));
3670                                    }
3671                                    if r.direction == Direction::Both {
3672                                        return Err(anyhow!(
3673                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3674                                        ));
3675                                    }
3676                                    if r.range.is_some() {
3677                                        return Err(anyhow!(
3678                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3679                                        ));
3680                                    }
3681                                }
3682                                PatternElement::Parenthesized { .. } => {}
3683                            }
3684                        }
3685                    }
3686
3687                    // Batch consecutive CREATEs to avoid deep recursion
3688                    match &mut plan {
3689                        LogicalPlan::CreateBatch { patterns, .. } => {
3690                            // Append to existing batch
3691                            patterns.push(create_clause.pattern.clone());
3692                        }
3693                        LogicalPlan::Create { input, pattern } => {
3694                            // Convert single Create to CreateBatch with both patterns
3695                            let first_pattern = pattern.clone();
3696                            plan = LogicalPlan::CreateBatch {
3697                                input: input.clone(),
3698                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3699                            };
3700                        }
3701                        _ => {
3702                            // Start new Create (may become batch if more CREATEs follow)
3703                            plan = LogicalPlan::Create {
3704                                input: Box::new(plan),
3705                                pattern: create_clause.pattern.clone(),
3706                            };
3707                        }
3708                    }
3709                    // Add variables from created nodes and relationships to scope
3710                    for path in &create_clause.pattern.paths {
3711                        for element in &path.elements {
3712                            match element {
3713                                PatternElement::Node(n) => {
3714                                    if let Some(var) = &n.variable
3715                                        && !var.is_empty()
3716                                    {
3717                                        create_introduced_vars.insert(var.clone());
3718                                        add_var_to_scope(
3719                                            &mut vars_in_scope,
3720                                            var,
3721                                            VariableType::Node,
3722                                        )?;
3723                                    }
3724                                }
3725                                PatternElement::Relationship(r) => {
3726                                    if let Some(var) = &r.variable
3727                                        && !var.is_empty()
3728                                    {
3729                                        create_introduced_vars.insert(var.clone());
3730                                        add_var_to_scope(
3731                                            &mut vars_in_scope,
3732                                            var,
3733                                            VariableType::Edge,
3734                                        )?;
3735                                    }
3736                                }
3737                                PatternElement::Parenthesized { .. } => {
3738                                    // Skip for now - not commonly used in CREATE
3739                                }
3740                            }
3741                        }
3742                    }
3743                }
3744                Clause::Set(set_clause) => {
3745                    // Validate SET value expressions
3746                    for item in &set_clause.items {
3747                        match item {
3748                            SetItem::Property { value, .. }
3749                            | SetItem::Variable { value, .. }
3750                            | SetItem::VariablePlus { value, .. } => {
3751                                validate_expression_variables(value, &vars_in_scope)?;
3752                                validate_expression(value, &vars_in_scope)?;
3753                                if contains_pattern_predicate(value) {
3754                                    return Err(anyhow!(
3755                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3756                                    ));
3757                                }
3758                            }
3759                            SetItem::Labels { .. } => {}
3760                        }
3761                    }
3762                    plan = LogicalPlan::Set {
3763                        input: Box::new(plan),
3764                        items: set_clause.items.clone(),
3765                    };
3766                }
3767                Clause::Remove(remove_clause) => {
3768                    plan = LogicalPlan::Remove {
3769                        input: Box::new(plan),
3770                        items: remove_clause.items.clone(),
3771                    };
3772                }
3773                Clause::Delete(delete_clause) => {
3774                    // Validate DELETE targets
3775                    for item in &delete_clause.items {
3776                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3777                        if matches!(item, Expr::LabelCheck { .. }) {
3778                            return Err(anyhow!(
3779                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3780                            ));
3781                        }
3782                        let vars_used = collect_expr_variables(item);
3783                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3784                        if vars_used.is_empty() {
3785                            return Err(anyhow!(
3786                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3787                            ));
3788                        }
3789                        for var in &vars_used {
3790                            // Check if variable is defined
3791                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3792                                return Err(anyhow!(
3793                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3794                                    var
3795                                ));
3796                            }
3797                        }
3798                        // Strict type check only for simple variable references —
3799                        // complex expressions (property access, array index, etc.)
3800                        // may resolve to a node/edge at runtime even if the base
3801                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3802                        if let Expr::Variable(name) = item
3803                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3804                            && matches!(
3805                                info.var_type,
3806                                VariableType::Scalar | VariableType::ScalarLiteral
3807                            )
3808                        {
3809                            return Err(anyhow!(
3810                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3811                                name
3812                            ));
3813                        }
3814                    }
3815                    // Track deleted variables for later validation
3816                    for item in &delete_clause.items {
3817                        if let Expr::Variable(name) = item {
3818                            deleted_vars.insert(name.clone());
3819                        }
3820                    }
3821                    plan = LogicalPlan::Delete {
3822                        input: Box::new(plan),
3823                        items: delete_clause.items.clone(),
3824                        detach: delete_clause.detach,
3825                    };
3826                }
3827                Clause::With(with_clause) => {
3828                    let (new_plan, new_vars) =
3829                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3830                    plan = new_plan;
3831                    vars_in_scope = new_vars;
3832                }
3833                Clause::WithRecursive(with_recursive) => {
3834                    // Plan the recursive CTE
3835                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3836                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3837                    add_var_to_scope(
3838                        &mut vars_in_scope,
3839                        &with_recursive.name,
3840                        VariableType::Scalar,
3841                    )?;
3842                }
3843                Clause::Return(return_clause) => {
3844                    // Check for property/label access on deleted entities
3845                    if !deleted_vars.is_empty() {
3846                        for item in &return_clause.items {
3847                            if let ReturnItem::Expr { expr, .. } = item {
3848                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3849                            }
3850                        }
3851                    }
3852                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3853                } // All Clause variants are handled above - no catch-all needed
3854            }
3855        }
3856
3857        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3858        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3859        // If RETURN was used, the plan will have been wrapped in Project, so we only
3860        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3861        let plan = match &plan {
3862            LogicalPlan::Create { .. }
3863            | LogicalPlan::CreateBatch { .. }
3864            | LogicalPlan::Delete { .. }
3865            | LogicalPlan::Set { .. }
3866            | LogicalPlan::Remove { .. }
3867            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3868                input: Box::new(plan),
3869                skip: None,
3870                fetch: Some(0),
3871            },
3872            _ => plan,
3873        };
3874
3875        Ok(plan)
3876    }
3877
3878    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3879        match expr {
3880            Expr::Property(_, _)
3881                if !collected
3882                    .iter()
3883                    .any(|e| e.to_string_repr() == expr.to_string_repr()) =>
3884            {
3885                collected.push(expr.clone());
3886            }
3887            Expr::Property(_, _) => {}
3888            Expr::Variable(_) => {
3889                // Variables are already available, don't need to project them
3890            }
3891            Expr::BinaryOp { left, right, .. } => {
3892                Self::collect_properties_from_expr(left, collected);
3893                Self::collect_properties_from_expr(right, collected);
3894            }
3895            Expr::FunctionCall {
3896                args, window_spec, ..
3897            } => {
3898                for arg in args {
3899                    Self::collect_properties_from_expr(arg, collected);
3900                }
3901                if let Some(spec) = window_spec {
3902                    for partition_expr in &spec.partition_by {
3903                        Self::collect_properties_from_expr(partition_expr, collected);
3904                    }
3905                    for sort_item in &spec.order_by {
3906                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3907                    }
3908                }
3909            }
3910            Expr::List(items) => {
3911                for item in items {
3912                    Self::collect_properties_from_expr(item, collected);
3913                }
3914            }
3915            Expr::UnaryOp { expr: e, .. }
3916            | Expr::IsNull(e)
3917            | Expr::IsNotNull(e)
3918            | Expr::IsUnique(e) => {
3919                Self::collect_properties_from_expr(e, collected);
3920            }
3921            Expr::Case {
3922                expr,
3923                when_then,
3924                else_expr,
3925            } => {
3926                if let Some(e) = expr {
3927                    Self::collect_properties_from_expr(e, collected);
3928                }
3929                for (w, t) in when_then {
3930                    Self::collect_properties_from_expr(w, collected);
3931                    Self::collect_properties_from_expr(t, collected);
3932                }
3933                if let Some(e) = else_expr {
3934                    Self::collect_properties_from_expr(e, collected);
3935                }
3936            }
3937            Expr::In { expr, list } => {
3938                Self::collect_properties_from_expr(expr, collected);
3939                Self::collect_properties_from_expr(list, collected);
3940            }
3941            Expr::ArrayIndex { array, index } => {
3942                Self::collect_properties_from_expr(array, collected);
3943                Self::collect_properties_from_expr(index, collected);
3944            }
3945            Expr::ArraySlice { array, start, end } => {
3946                Self::collect_properties_from_expr(array, collected);
3947                if let Some(s) = start {
3948                    Self::collect_properties_from_expr(s, collected);
3949                }
3950                if let Some(e) = end {
3951                    Self::collect_properties_from_expr(e, collected);
3952                }
3953            }
3954            _ => {}
3955        }
3956    }
3957
3958    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3959        if let Expr::FunctionCall { window_spec, .. } = expr {
3960            // Collect any function with a window spec (OVER clause)
3961            if window_spec.is_some() {
3962                if !collected
3963                    .iter()
3964                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3965                {
3966                    collected.push(expr.clone());
3967                }
3968                return;
3969            }
3970        }
3971
3972        match expr {
3973            Expr::BinaryOp { left, right, .. } => {
3974                Self::collect_window_functions(left, collected);
3975                Self::collect_window_functions(right, collected);
3976            }
3977            Expr::FunctionCall { args, .. } => {
3978                for arg in args {
3979                    Self::collect_window_functions(arg, collected);
3980                }
3981            }
3982            Expr::List(items) => {
3983                for i in items {
3984                    Self::collect_window_functions(i, collected);
3985                }
3986            }
3987            Expr::Map(items) => {
3988                for (_, i) in items {
3989                    Self::collect_window_functions(i, collected);
3990                }
3991            }
3992            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3993                Self::collect_window_functions(e, collected);
3994            }
3995            Expr::Case {
3996                expr,
3997                when_then,
3998                else_expr,
3999            } => {
4000                if let Some(e) = expr {
4001                    Self::collect_window_functions(e, collected);
4002                }
4003                for (w, t) in when_then {
4004                    Self::collect_window_functions(w, collected);
4005                    Self::collect_window_functions(t, collected);
4006                }
4007                if let Some(e) = else_expr {
4008                    Self::collect_window_functions(e, collected);
4009                }
4010            }
4011            Expr::Reduce {
4012                init, list, expr, ..
4013            } => {
4014                Self::collect_window_functions(init, collected);
4015                Self::collect_window_functions(list, collected);
4016                Self::collect_window_functions(expr, collected);
4017            }
4018            Expr::Quantifier {
4019                list, predicate, ..
4020            } => {
4021                Self::collect_window_functions(list, collected);
4022                Self::collect_window_functions(predicate, collected);
4023            }
4024            Expr::In { expr, list } => {
4025                Self::collect_window_functions(expr, collected);
4026                Self::collect_window_functions(list, collected);
4027            }
4028            Expr::ArrayIndex { array, index } => {
4029                Self::collect_window_functions(array, collected);
4030                Self::collect_window_functions(index, collected);
4031            }
4032            Expr::ArraySlice { array, start, end } => {
4033                Self::collect_window_functions(array, collected);
4034                if let Some(s) = start {
4035                    Self::collect_window_functions(s, collected);
4036                }
4037                if let Some(e) = end {
4038                    Self::collect_window_functions(e, collected);
4039                }
4040            }
4041            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
4042            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
4043            _ => {}
4044        }
4045    }
4046
4047    /// Transform property expressions in manual window functions to use qualified variable names.
4048    ///
4049    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
4050    /// so the executor can look up values directly from the row HashMap after the
4051    /// intermediate projection has materialized these properties with qualified names.
4052    ///
4053    /// Transforms ALL window functions (both manual and aggregate).
4054    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
4055    fn transform_window_expr_properties(expr: Expr) -> Expr {
4056        let Expr::FunctionCall {
4057            name,
4058            args,
4059            window_spec: Some(spec),
4060            distinct,
4061        } = expr
4062        else {
4063            return expr;
4064        };
4065
4066        // Transform arguments for ALL window functions
4067        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
4068        let transformed_args = args
4069            .into_iter()
4070            .map(Self::transform_property_to_variable)
4071            .collect();
4072
4073        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
4074        let transformed_partition_by = spec
4075            .partition_by
4076            .into_iter()
4077            .map(Self::transform_property_to_variable)
4078            .collect();
4079
4080        let transformed_order_by = spec
4081            .order_by
4082            .into_iter()
4083            .map(|item| SortItem {
4084                expr: Self::transform_property_to_variable(item.expr),
4085                ascending: item.ascending,
4086            })
4087            .collect();
4088
4089        Expr::FunctionCall {
4090            name,
4091            args: transformed_args,
4092            window_spec: Some(WindowSpec {
4093                partition_by: transformed_partition_by,
4094                order_by: transformed_order_by,
4095            }),
4096            distinct,
4097        }
4098    }
4099
4100    /// Transform a property expression to a variable expression with qualified name.
4101    ///
4102    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
4103    fn transform_property_to_variable(expr: Expr) -> Expr {
4104        let Expr::Property(base, prop) = expr else {
4105            return expr;
4106        };
4107
4108        match *base {
4109            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
4110            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
4111        }
4112    }
4113
4114    /// Transform VALID_AT macro into function call
4115    ///
4116    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
4117    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
4118    fn transform_valid_at_to_function(expr: Expr) -> Expr {
4119        match expr {
4120            Expr::ValidAt {
4121                entity,
4122                timestamp,
4123                start_prop,
4124                end_prop,
4125            } => {
4126                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
4127                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
4128
4129                Expr::FunctionCall {
4130                    name: "uni.temporal.validAt".to_string(),
4131                    args: vec![
4132                        Self::transform_valid_at_to_function(*entity),
4133                        Expr::Literal(CypherLiteral::String(start)),
4134                        Expr::Literal(CypherLiteral::String(end)),
4135                        Self::transform_valid_at_to_function(*timestamp),
4136                    ],
4137                    distinct: false,
4138                    window_spec: None,
4139                }
4140            }
4141            // Recursively transform nested expressions
4142            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4143                left: Box::new(Self::transform_valid_at_to_function(*left)),
4144                op,
4145                right: Box::new(Self::transform_valid_at_to_function(*right)),
4146            },
4147            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
4148                op,
4149                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4150            },
4151            Expr::FunctionCall {
4152                name,
4153                args,
4154                distinct,
4155                window_spec,
4156            } => Expr::FunctionCall {
4157                name,
4158                args: args
4159                    .into_iter()
4160                    .map(Self::transform_valid_at_to_function)
4161                    .collect(),
4162                distinct,
4163                window_spec,
4164            },
4165            Expr::Property(base, prop) => {
4166                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
4167            }
4168            Expr::List(items) => Expr::List(
4169                items
4170                    .into_iter()
4171                    .map(Self::transform_valid_at_to_function)
4172                    .collect(),
4173            ),
4174            Expr::In { expr, list } => Expr::In {
4175                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4176                list: Box::new(Self::transform_valid_at_to_function(*list)),
4177            },
4178            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
4179            Expr::IsNotNull(e) => {
4180                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
4181            }
4182            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
4183            // Other cases: return as-is
4184            other => other,
4185        }
4186    }
4187
4188    /// Rewrite system-metadata function calls (`id(v)`, `created_at(v)`,
4189    /// `updated_at(v)`) to direct property access on the corresponding
4190    /// internal column (`v._vid`, `v._created_at`, `v._updated_at`). This
4191    /// normalization enables predicate pushdown via the Property pattern
4192    /// recognized by `PredicateAnalyzer`.
4193    ///
4194    /// All three functions share the same shape: single-arg, argument
4195    /// must be a node/edge variable, returns the column value directly.
4196    fn rewrite_id_to_vid(expr: Expr, vars_in_scope: &[VariableInfo]) -> Expr {
4197        match expr {
4198            Expr::FunctionCall {
4199                name,
4200                args,
4201                distinct,
4202                window_spec,
4203            } if args.len() == 1 && Self::metadata_function_column(&name, None).is_some() => {
4204                if let Expr::Variable(ref var) = args[0] {
4205                    // `id()` resolves to `_eid` for an edge binding and `_vid`
4206                    // for a node — edge rows expose `_eid`, not `_vid`. Mirror
4207                    // the projection path (`df_expr.rs` translate of `id`).
4208                    let var_type = find_var_in_scope(vars_in_scope, var).map(|v| v.var_type);
4209                    let column = Self::metadata_function_column(&name, var_type)
4210                        .unwrap()
4211                        .to_string();
4212                    Expr::Property(Box::new(Expr::Variable(var.clone())), column)
4213                } else {
4214                    Expr::FunctionCall {
4215                        name,
4216                        args,
4217                        distinct,
4218                        window_spec,
4219                    }
4220                }
4221            }
4222            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4223                left: Box::new(Self::rewrite_id_to_vid(*left, vars_in_scope)),
4224                op,
4225                right: Box::new(Self::rewrite_id_to_vid(*right, vars_in_scope)),
4226            },
4227            Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
4228                op,
4229                expr: Box::new(Self::rewrite_id_to_vid(*inner, vars_in_scope)),
4230            },
4231            other => other,
4232        }
4233    }
4234
4235    /// Return the internal column name for a system-metadata function, or
4236    /// `None` if the name is not one of the recognised metadata functions.
4237    ///
4238    /// `id()` maps to `_eid` when its argument is a relationship
4239    /// (`VariableType::Edge`) and `_vid` otherwise; `var_type` is `None` when the
4240    /// caller only needs the is-metadata-function test.
4241    fn metadata_function_column(
4242        name: &str,
4243        var_type: Option<VariableType>,
4244    ) -> Option<&'static str> {
4245        if name.eq_ignore_ascii_case("id") {
4246            if matches!(var_type, Some(VariableType::Edge)) {
4247                Some("_eid")
4248            } else {
4249                Some("_vid")
4250            }
4251        } else if name.eq_ignore_ascii_case("created_at") {
4252            Some("_created_at")
4253        } else if name.eq_ignore_ascii_case("updated_at") {
4254            Some("_updated_at")
4255        } else {
4256            None
4257        }
4258    }
4259
4260    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
4261    fn plan_match_clause(
4262        &self,
4263        match_clause: &MatchClause,
4264        plan: LogicalPlan,
4265        vars_in_scope: &mut Vec<VariableInfo>,
4266    ) -> Result<LogicalPlan> {
4267        let mut plan = plan;
4268
4269        if match_clause.pattern.paths.is_empty() {
4270            return Err(anyhow!("Empty pattern"));
4271        }
4272
4273        // Track variables introduced by this OPTIONAL MATCH
4274        let vars_before_pattern = vars_in_scope.len();
4275
4276        for path in &match_clause.pattern.paths {
4277            if let Some(mode) = &path.shortest_path_mode {
4278                plan =
4279                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
4280            } else {
4281                plan = self.plan_path(
4282                    path,
4283                    plan,
4284                    vars_in_scope,
4285                    match_clause.optional,
4286                    vars_before_pattern,
4287                )?;
4288            }
4289        }
4290
4291        // Collect variables introduced by this OPTIONAL MATCH pattern
4292        let optional_vars: HashSet<String> = if match_clause.optional {
4293            vars_in_scope[vars_before_pattern..]
4294                .iter()
4295                .map(|v| v.name.clone())
4296                .collect()
4297        } else {
4298            HashSet::new()
4299        };
4300
4301        // Handle WHERE clause with vector_similarity and predicate pushdown
4302        if let Some(predicate) = &match_clause.where_clause {
4303            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
4304        }
4305
4306        Ok(plan)
4307    }
4308
4309    /// Plan a shortestPath pattern.
4310    fn plan_shortest_path(
4311        &self,
4312        path: &PathPattern,
4313        plan: LogicalPlan,
4314        vars_in_scope: &mut Vec<VariableInfo>,
4315        mode: &ShortestPathMode,
4316        _vars_before_pattern: usize,
4317    ) -> Result<LogicalPlan> {
4318        let mut plan = plan;
4319        let elements = &path.elements;
4320
4321        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
4322        if elements.len() < 3 || elements.len().is_multiple_of(2) {
4323            return Err(anyhow!(
4324                "shortestPath requires at least one relationship: (a)-[*]->(b)"
4325            ));
4326        }
4327
4328        let source_node = match &elements[0] {
4329            PatternElement::Node(n) => n,
4330            _ => return Err(anyhow!("ShortestPath must start with a node")),
4331        };
4332        let rel = match &elements[1] {
4333            PatternElement::Relationship(r) => r,
4334            _ => {
4335                return Err(anyhow!(
4336                    "ShortestPath middle element must be a relationship"
4337                ));
4338            }
4339        };
4340        let target_node = match &elements[2] {
4341            PatternElement::Node(n) => n,
4342            _ => return Err(anyhow!("ShortestPath must end with a node")),
4343        };
4344
4345        let source_var = source_node
4346            .variable
4347            .clone()
4348            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
4349        let target_var = target_node
4350            .variable
4351            .clone()
4352            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
4353        let path_var = path
4354            .variable
4355            .clone()
4356            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
4357
4358        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
4359        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
4360
4361        // Plan source node if not bound
4362        if !source_bound {
4363            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
4364        } else if let Some(prop_filter) =
4365            self.properties_to_expr(&source_var, &source_node.properties)
4366        {
4367            plan = LogicalPlan::Filter {
4368                input: Box::new(plan),
4369                predicate: prop_filter,
4370                optional_variables: HashSet::new(),
4371            };
4372        }
4373
4374        // Plan target node if not bound
4375        let target_label_id = if !target_bound {
4376            // Use first label for target_label_id
4377            let target_label_name = target_node
4378                .labels
4379                .first()
4380                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
4381            // Native lookup first; then consult `CatalogProvider` /
4382            // `ReplacementScanProvider` and allocate a virtual label-id
4383            // (M5b follow-up #6). Virtual ids dispatch to
4384            // `CatalogVertexScanExec` at physical-plan time.
4385            let target_label_id =
4386                if let Some(meta) = self.schema.get_label_case_insensitive(target_label_name) {
4387                    meta.id
4388                } else if let Some((vid, _)) = self.allocate_virtual_label(target_label_name)? {
4389                    vid
4390                } else {
4391                    return Err(anyhow!("Label {} not found", target_label_name));
4392                };
4393
4394            let target_scan = LogicalPlan::Scan {
4395                label_id: target_label_id,
4396                labels: target_node.labels.names().to_vec(),
4397                variable: target_var.clone(),
4398                filter: self.properties_to_expr(&target_var, &target_node.properties),
4399                optional: false,
4400            };
4401
4402            plan = Self::join_with_plan(plan, target_scan);
4403            target_label_id
4404        } else {
4405            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
4406            {
4407                plan = LogicalPlan::Filter {
4408                    input: Box::new(plan),
4409                    predicate: prop_filter,
4410                    optional_variables: HashSet::new(),
4411                };
4412            }
4413            0 // Wildcard for already-bound target
4414        };
4415
4416        // Add ShortestPath operator
4417        let edge_type_ids = if rel.types.is_empty() {
4418            // If no type specified, fetch all edge types (both schema and schemaless)
4419            self.schema.all_edge_type_ids()
4420        } else {
4421            let mut ids = Vec::new();
4422            for type_name in &rel.types {
4423                let id = if let Some(meta) = self.schema.edge_types.get(type_name) {
4424                    meta.id
4425                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
4426                    vid
4427                } else {
4428                    return Err(anyhow!("Edge type {} not found", type_name));
4429                };
4430                ids.push(id);
4431            }
4432            ids
4433        };
4434
4435        // Extract hop constraints from relationship pattern
4436        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
4437        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
4438
4439        let sp_plan = match mode {
4440            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
4441                input: Box::new(plan),
4442                edge_type_ids,
4443                direction: rel.direction.clone(),
4444                source_variable: source_var.clone(),
4445                target_variable: target_var.clone(),
4446                target_label_id,
4447                path_variable: path_var.clone(),
4448                min_hops,
4449                max_hops,
4450            },
4451            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
4452                input: Box::new(plan),
4453                edge_type_ids,
4454                direction: rel.direction.clone(),
4455                source_variable: source_var.clone(),
4456                target_variable: target_var.clone(),
4457                target_label_id,
4458                path_variable: path_var.clone(),
4459                min_hops,
4460                max_hops,
4461            },
4462        };
4463
4464        if !source_bound {
4465            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
4466        }
4467        if !target_bound {
4468            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
4469        }
4470        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
4471
4472        Ok(sp_plan)
4473    }
4474    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
4475    ///
4476    /// This is a public entry point for the Locy plan builder to reuse the
4477    /// existing pattern-planning logic for clause bodies.
4478    pub fn plan_pattern(
4479        &self,
4480        pattern: &Pattern,
4481        initial_vars: &[VariableInfo],
4482    ) -> Result<LogicalPlan> {
4483        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
4484        let vars_before_pattern = vars_in_scope.len();
4485        let mut plan = LogicalPlan::Empty;
4486        for path in &pattern.paths {
4487            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
4488        }
4489        Ok(plan)
4490    }
4491
4492    /// Plan a regular MATCH path (not shortestPath).
4493    fn plan_path(
4494        &self,
4495        path: &PathPattern,
4496        plan: LogicalPlan,
4497        vars_in_scope: &mut Vec<VariableInfo>,
4498        optional: bool,
4499        vars_before_pattern: usize,
4500    ) -> Result<LogicalPlan> {
4501        let mut plan = plan;
4502        let elements = &path.elements;
4503        let mut i = 0;
4504
4505        let path_variable = path.variable.clone();
4506
4507        // Check for VariableAlreadyBound: path variable already in scope
4508        if let Some(pv) = &path_variable
4509            && !pv.is_empty()
4510            && is_var_in_scope(vars_in_scope, pv)
4511        {
4512            return Err(anyhow!(
4513                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4514                pv
4515            ));
4516        }
4517
4518        // Check for VariableAlreadyBound: path variable conflicts with element variables
4519        if let Some(pv) = &path_variable
4520            && !pv.is_empty()
4521        {
4522            for element in elements {
4523                match element {
4524                    PatternElement::Node(n) => {
4525                        if let Some(v) = &n.variable
4526                            && v == pv
4527                        {
4528                            return Err(anyhow!(
4529                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4530                                pv
4531                            ));
4532                        }
4533                    }
4534                    PatternElement::Relationship(r) => {
4535                        if let Some(v) = &r.variable
4536                            && v == pv
4537                        {
4538                            return Err(anyhow!(
4539                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4540                                pv
4541                            ));
4542                        }
4543                    }
4544                    PatternElement::Parenthesized { .. } => {}
4545                }
4546            }
4547        }
4548
4549        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
4550        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
4551        let mut optional_pattern_vars: HashSet<String> = if optional {
4552            let mut vars = HashSet::new();
4553            for element in elements {
4554                match element {
4555                    PatternElement::Node(n) => {
4556                        if let Some(v) = &n.variable
4557                            && !v.is_empty()
4558                            && !is_var_in_scope(vars_in_scope, v)
4559                        {
4560                            vars.insert(v.clone());
4561                        }
4562                    }
4563                    PatternElement::Relationship(r) => {
4564                        if let Some(v) = &r.variable
4565                            && !v.is_empty()
4566                            && !is_var_in_scope(vars_in_scope, v)
4567                        {
4568                            vars.insert(v.clone());
4569                        }
4570                    }
4571                    PatternElement::Parenthesized { pattern, .. } => {
4572                        // Also check nested patterns
4573                        for nested_elem in &pattern.elements {
4574                            match nested_elem {
4575                                PatternElement::Node(n) => {
4576                                    if let Some(v) = &n.variable
4577                                        && !v.is_empty()
4578                                        && !is_var_in_scope(vars_in_scope, v)
4579                                    {
4580                                        vars.insert(v.clone());
4581                                    }
4582                                }
4583                                PatternElement::Relationship(r) => {
4584                                    if let Some(v) = &r.variable
4585                                        && !v.is_empty()
4586                                        && !is_var_in_scope(vars_in_scope, v)
4587                                    {
4588                                        vars.insert(v.clone());
4589                                    }
4590                                }
4591                                _ => {}
4592                            }
4593                        }
4594                    }
4595                }
4596            }
4597            // Include path variable if present
4598            if let Some(pv) = &path_variable
4599                && !pv.is_empty()
4600            {
4601                vars.insert(pv.clone());
4602            }
4603            vars
4604        } else {
4605            HashSet::new()
4606        };
4607
4608        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
4609        // These must participate in Trail mode (relationship uniqueness) enforcement
4610        // across ALL segments in this path, so that VLP segments like [*0..1] don't
4611        // traverse through edges already claimed by a bound relationship [r].
4612        let path_bound_edge_vars: HashSet<String> = {
4613            let mut bound = HashSet::new();
4614            for element in elements {
4615                if let PatternElement::Relationship(rel) = element
4616                    && let Some(ref var_name) = rel.variable
4617                    && !var_name.is_empty()
4618                    && vars_in_scope[..vars_before_pattern]
4619                        .iter()
4620                        .any(|v| v.name == *var_name)
4621                {
4622                    bound.insert(var_name.clone());
4623                }
4624            }
4625            bound
4626        };
4627
4628        // Track if any traverses were added (for zero-length path detection)
4629        let mut had_traverses = false;
4630        // Track the node variable for zero-length path binding
4631        let mut single_node_variable: Option<String> = None;
4632        // Collect node/edge variables for BindPath (fixed-length path binding)
4633        let mut path_node_vars: Vec<String> = Vec::new();
4634        let mut path_edge_vars: Vec<String> = Vec::new();
4635        // Track the last processed outer node variable for QPP source binding.
4636        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4637        let mut last_outer_node_var: Option<String> = None;
4638
4639        // Multi-hop path variables are now supported - path is accumulated across hops
4640        while i < elements.len() {
4641            let element = &elements[i];
4642            match element {
4643                PatternElement::Node(n) => {
4644                    let mut variable = n.variable.clone().unwrap_or_default();
4645                    if variable.is_empty() {
4646                        variable = self.next_anon_var();
4647                    }
4648                    // Track first node variable for zero-length path
4649                    if single_node_variable.is_none() {
4650                        single_node_variable = Some(variable.clone());
4651                    }
4652                    let is_bound =
4653                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4654                    if optional && !is_bound {
4655                        optional_pattern_vars.insert(variable.clone());
4656                    }
4657
4658                    if is_bound {
4659                        // Check for type conflict - can't use an Edge/Path as a Node
4660                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4661                            && !info.var_type.is_compatible_with(VariableType::Node)
4662                        {
4663                            return Err(anyhow!(
4664                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4665                                variable,
4666                                info.var_type
4667                            ));
4668                        }
4669                        if let Some(node_filter) =
4670                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4671                        {
4672                            plan = LogicalPlan::Filter {
4673                                input: Box::new(plan),
4674                                predicate: node_filter,
4675                                optional_variables: HashSet::new(),
4676                            };
4677                        }
4678                    } else {
4679                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4680                        if !variable.is_empty() {
4681                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4682                        }
4683                    }
4684
4685                    // Track source node for BindPath
4686                    if path_variable.is_some() && path_node_vars.is_empty() {
4687                        path_node_vars.push(variable.clone());
4688                    }
4689
4690                    // Look ahead for relationships
4691                    let mut current_source_var = variable;
4692                    last_outer_node_var = Some(current_source_var.clone());
4693                    i += 1;
4694                    while i < elements.len() {
4695                        if let PatternElement::Relationship(r) = &elements[i] {
4696                            if i + 1 < elements.len() {
4697                                let target_node_part = &elements[i + 1];
4698                                if let PatternElement::Node(n_target) = target_node_part {
4699                                    // For VLP traversals, pass path_variable through
4700                                    // For fixed-length, we use BindPath instead
4701                                    let is_vlp = r.range.is_some();
4702                                    let traverse_path_var =
4703                                        if is_vlp { path_variable.clone() } else { None };
4704
4705                                    // If we're about to start a VLP segment and there are
4706                                    // collected fixed-hop path vars, create an intermediate
4707                                    // BindPath for the fixed prefix first. The VLP will then
4708                                    // extend this existing path.
4709                                    if is_vlp
4710                                        && let Some(pv) = path_variable.as_ref()
4711                                        && !path_node_vars.is_empty()
4712                                    {
4713                                        plan = LogicalPlan::BindPath {
4714                                            input: Box::new(plan),
4715                                            node_variables: std::mem::take(&mut path_node_vars),
4716                                            edge_variables: std::mem::take(&mut path_edge_vars),
4717                                            path_variable: pv.clone(),
4718                                        };
4719                                        if !is_var_in_scope(vars_in_scope, pv) {
4720                                            add_var_to_scope(
4721                                                vars_in_scope,
4722                                                pv,
4723                                                VariableType::Path,
4724                                            )?;
4725                                        }
4726                                    }
4727
4728                                    // Plan the traverse from the current source node
4729                                    let target_was_bound =
4730                                        n_target.variable.as_ref().is_some_and(|v| {
4731                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4732                                        });
4733                                    let (new_plan, target_var, effective_target) = self
4734                                        .plan_traverse_with_source(
4735                                            plan,
4736                                            vars_in_scope,
4737                                            TraverseParams {
4738                                                rel: r,
4739                                                target_node: n_target,
4740                                                optional,
4741                                                path_variable: traverse_path_var,
4742                                                optional_pattern_vars: optional_pattern_vars
4743                                                    .clone(),
4744                                            },
4745                                            &current_source_var,
4746                                            vars_before_pattern,
4747                                            &path_bound_edge_vars,
4748                                        )?;
4749                                    plan = new_plan;
4750                                    if optional && !target_was_bound {
4751                                        optional_pattern_vars.insert(target_var.clone());
4752                                    }
4753
4754                                    // Track edge/target node for BindPath
4755                                    if path_variable.is_some() && !is_vlp {
4756                                        // Use the edge variable if given, otherwise use
4757                                        // the internal tracking column pattern.
4758                                        // Use effective_target (which may be __rebound_x
4759                                        // for bound-target traversals) to match the actual
4760                                        // column name produced by GraphTraverseExec.
4761                                        if let Some(ev) = &r.variable {
4762                                            path_edge_vars.push(ev.clone());
4763                                        } else {
4764                                            path_edge_vars
4765                                                .push(format!("__eid_to_{}", effective_target));
4766                                        }
4767                                        path_node_vars.push(target_var.clone());
4768                                    }
4769
4770                                    current_source_var = target_var;
4771                                    last_outer_node_var = Some(current_source_var.clone());
4772                                    had_traverses = true;
4773                                    i += 2;
4774                                } else {
4775                                    return Err(anyhow!("Relationship must be followed by a node"));
4776                                }
4777                            } else {
4778                                return Err(anyhow!("Relationship cannot be the last element"));
4779                            }
4780                        } else {
4781                            break;
4782                        }
4783                    }
4784                }
4785                PatternElement::Relationship(_) => {
4786                    return Err(anyhow!("Pattern must start with a node"));
4787                }
4788                PatternElement::Parenthesized { pattern, range } => {
4789                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4790                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4791                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4792                        return Err(anyhow!(
4793                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4794                        ));
4795                    }
4796
4797                    let source_node = match &pattern.elements[0] {
4798                        PatternElement::Node(n) => n,
4799                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4800                    };
4801
4802                    // Extract all relationship-node pairs (QPP steps)
4803                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4804                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4805                        let rel = match &pattern.elements[pair_idx] {
4806                            PatternElement::Relationship(r) => r,
4807                            _ => {
4808                                return Err(anyhow!(
4809                                    "Quantified pattern element at position {} must be a relationship",
4810                                    pair_idx
4811                                ));
4812                            }
4813                        };
4814                        let node = match &pattern.elements[pair_idx + 1] {
4815                            PatternElement::Node(n) => n,
4816                            _ => {
4817                                return Err(anyhow!(
4818                                    "Quantified pattern element at position {} must be a node",
4819                                    pair_idx + 1
4820                                ));
4821                            }
4822                        };
4823                        // Reject nested quantifiers
4824                        if rel.range.is_some() {
4825                            return Err(anyhow!(
4826                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4827                            ));
4828                        }
4829                        qpp_rels.push((rel, node));
4830                    }
4831
4832                    // Check if there's an outer target node after the Parenthesized element.
4833                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4834                    // target that should receive the traversal result.
4835                    let inner_target_node = qpp_rels.last().unwrap().1;
4836                    let outer_target_node = if i + 1 < elements.len() {
4837                        match &elements[i + 1] {
4838                            PatternElement::Node(n) => Some(n),
4839                            _ => None,
4840                        }
4841                    } else {
4842                        None
4843                    };
4844                    // Use the outer target for variable binding and filters; inner target
4845                    // labels are used for state constraints within the NFA.
4846                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4847
4848                    // For simple 3-element single-hop QPP without intermediate label constraints,
4849                    // fall back to existing VLP behavior (copy range to relationship).
4850                    let use_simple_vlp = qpp_rels.len() == 1
4851                        && inner_target_node
4852                            .labels
4853                            .first()
4854                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4855                            .is_none();
4856
4857                    // Plan source node.
4858                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4859                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4860                    // node variable, use it; otherwise fall back to the inner source.
4861                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4862                        // The preceding outer node is already bound and in scope
4863                        // Apply any property filters from the inner source node
4864                        if let Some(prop_filter) =
4865                            self.properties_to_expr(outer_src, &source_node.properties)
4866                        {
4867                            plan = LogicalPlan::Filter {
4868                                input: Box::new(plan),
4869                                predicate: prop_filter,
4870                                optional_variables: HashSet::new(),
4871                            };
4872                        }
4873                        outer_src.clone()
4874                    } else {
4875                        let sv = source_node
4876                            .variable
4877                            .clone()
4878                            .filter(|v| !v.is_empty())
4879                            .unwrap_or_else(|| self.next_anon_var());
4880
4881                        if is_var_in_scope(vars_in_scope, &sv) {
4882                            // Source is already bound, apply property filter if needed
4883                            if let Some(prop_filter) =
4884                                self.properties_to_expr(&sv, &source_node.properties)
4885                            {
4886                                plan = LogicalPlan::Filter {
4887                                    input: Box::new(plan),
4888                                    predicate: prop_filter,
4889                                    optional_variables: HashSet::new(),
4890                                };
4891                            }
4892                        } else {
4893                            // Source is unbound, scan it
4894                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4895                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4896                            if optional {
4897                                optional_pattern_vars.insert(sv.clone());
4898                            }
4899                        }
4900                        sv
4901                    };
4902
4903                    if use_simple_vlp {
4904                        // Simple single-hop QPP: apply range to relationship and use VLP path
4905                        let mut relationship = qpp_rels[0].0.clone();
4906                        relationship.range = range.clone();
4907
4908                        let target_was_bound = target_node
4909                            .variable
4910                            .as_ref()
4911                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4912                        let (new_plan, target_var, _effective_target) = self
4913                            .plan_traverse_with_source(
4914                                plan,
4915                                vars_in_scope,
4916                                TraverseParams {
4917                                    rel: &relationship,
4918                                    target_node,
4919                                    optional,
4920                                    path_variable: path_variable.clone(),
4921                                    optional_pattern_vars: optional_pattern_vars.clone(),
4922                                },
4923                                &source_variable,
4924                                vars_before_pattern,
4925                                &path_bound_edge_vars,
4926                            )?;
4927                        plan = new_plan;
4928                        if optional && !target_was_bound {
4929                            optional_pattern_vars.insert(target_var);
4930                        }
4931                    } else {
4932                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4933                        let mut qpp_step_infos = Vec::new();
4934                        let mut all_edge_type_ids = Vec::new();
4935
4936                        for (rel, node) in &qpp_rels {
4937                            let mut step_edge_type_ids = Vec::new();
4938                            if rel.types.is_empty() {
4939                                step_edge_type_ids = self.schema.all_edge_type_ids();
4940                            } else {
4941                                for type_name in &rel.types {
4942                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4943                                        step_edge_type_ids.push(edge_meta.id);
4944                                    }
4945                                }
4946                            }
4947                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4948
4949                            let target_label = node.labels.first().and_then(|l| {
4950                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4951                            });
4952
4953                            qpp_step_infos.push(QppStepInfo {
4954                                edge_type_ids: step_edge_type_ids,
4955                                direction: rel.direction.clone(),
4956                                target_label,
4957                            });
4958                        }
4959
4960                        // Deduplicate edge type IDs for adjacency warming
4961                        all_edge_type_ids.sort_unstable();
4962                        all_edge_type_ids.dedup();
4963
4964                        // Compute iteration bounds from range
4965                        let hops_per_iter = qpp_step_infos.len();
4966                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4967                        let (min_iter, max_iter) = if let Some(range) = range {
4968                            let min = range.min.unwrap_or(1) as usize;
4969                            let max = range
4970                                .max
4971                                .map(|m| m as usize)
4972                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4973                            (min, max)
4974                        } else {
4975                            (1, 1)
4976                        };
4977                        let min_hops = min_iter * hops_per_iter;
4978                        let max_hops = max_iter * hops_per_iter;
4979
4980                        // Target variable from the last node in the QPP sub-pattern
4981                        let target_variable = target_node
4982                            .variable
4983                            .clone()
4984                            .filter(|v| !v.is_empty())
4985                            .unwrap_or_else(|| self.next_anon_var());
4986
4987                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4988
4989                        // Determine target label for the final node
4990                        let target_label_meta = target_node
4991                            .labels
4992                            .first()
4993                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4994
4995                        // Collect scope match variables
4996                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4997                            [vars_before_pattern..]
4998                            .iter()
4999                            .map(|v| v.name.clone())
5000                            .collect();
5001                        scope_match_variables.insert(target_variable.clone());
5002
5003                        // Handle bound target: use rebound variable for traverse
5004                        let rebound_target_var = if target_is_bound {
5005                            Some(target_variable.clone())
5006                        } else {
5007                            None
5008                        };
5009                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5010                            format!("__rebound_{}", bv)
5011                        } else {
5012                            target_variable.clone()
5013                        };
5014
5015                        plan = LogicalPlan::Traverse {
5016                            input: Box::new(plan),
5017                            edge_type_ids: all_edge_type_ids,
5018                            direction: qpp_rels[0].0.direction.clone(),
5019                            source_variable: source_variable.to_string(),
5020                            target_variable: effective_target_var.clone(),
5021                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
5022                            step_variable: None, // QPP doesn't expose intermediate edges
5023                            min_hops,
5024                            max_hops,
5025                            optional,
5026                            target_filter: self.node_filter_expr(
5027                                &target_variable,
5028                                &target_node.labels,
5029                                &target_node.properties,
5030                            ),
5031                            path_variable: path_variable.clone(),
5032                            edge_properties: HashSet::new(),
5033                            is_variable_length: true,
5034                            optional_pattern_vars: optional_pattern_vars.clone(),
5035                            scope_match_variables,
5036                            edge_filter_expr: None,
5037                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5038                            qpp_steps: Some(qpp_step_infos),
5039                        };
5040
5041                        // Handle bound target: filter rebound results against original variable
5042                        if let Some(ref btv) = rebound_target_var {
5043                            // Filter: __rebound_x._vid = x._vid
5044                            let filter_pred = Expr::BinaryOp {
5045                                left: Box::new(Expr::Property(
5046                                    Box::new(Expr::Variable(effective_target_var.clone())),
5047                                    "_vid".to_string(),
5048                                )),
5049                                op: BinaryOp::Eq,
5050                                right: Box::new(Expr::Property(
5051                                    Box::new(Expr::Variable(btv.clone())),
5052                                    "_vid".to_string(),
5053                                )),
5054                            };
5055                            plan = LogicalPlan::Filter {
5056                                input: Box::new(plan),
5057                                predicate: filter_pred,
5058                                optional_variables: if optional {
5059                                    optional_pattern_vars.clone()
5060                                } else {
5061                                    HashSet::new()
5062                                },
5063                            };
5064                        }
5065
5066                        // Add target variable to scope
5067                        if !target_is_bound {
5068                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5069                        }
5070
5071                        // Add path variable to scope
5072                        if let Some(ref pv) = path_variable
5073                            && !pv.is_empty()
5074                            && !is_var_in_scope(vars_in_scope, pv)
5075                        {
5076                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5077                        }
5078                    }
5079                    had_traverses = true;
5080
5081                    // Skip the outer target node if we consumed it
5082                    if outer_target_node.is_some() {
5083                        i += 2; // skip both Parenthesized and the following Node
5084                    } else {
5085                        i += 1;
5086                    }
5087                }
5088            }
5089        }
5090
5091        // If this is a single-node pattern with a path variable, bind the zero-length path
5092        // E.g., `p = (a)` should create a Path with one node and zero edges
5093        if let Some(ref path_var) = path_variable
5094            && !path_var.is_empty()
5095            && !had_traverses
5096            && let Some(node_var) = single_node_variable
5097        {
5098            plan = LogicalPlan::BindZeroLengthPath {
5099                input: Box::new(plan),
5100                node_variable: node_var,
5101                path_variable: path_var.clone(),
5102            };
5103            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5104        }
5105
5106        // Bind fixed-length path from collected node/edge variables
5107        if let Some(ref path_var) = path_variable
5108            && !path_var.is_empty()
5109            && had_traverses
5110            && !path_node_vars.is_empty()
5111            && !is_var_in_scope(vars_in_scope, path_var)
5112        {
5113            plan = LogicalPlan::BindPath {
5114                input: Box::new(plan),
5115                node_variables: path_node_vars,
5116                edge_variables: path_edge_vars,
5117                path_variable: path_var.clone(),
5118            };
5119            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5120        }
5121
5122        Ok(plan)
5123    }
5124
5125    /// Plan a traverse with an explicit source variable name.
5126    ///
5127    /// Returns `(plan, target_variable, effective_target_variable)` where:
5128    /// - `target_variable` is the semantic variable name for downstream scope
5129    /// - `effective_target_variable` is the actual column-name prefix used by
5130    ///   the traverse (may be `__rebound_x` for bound-target patterns)
5131    fn plan_traverse_with_source(
5132        &self,
5133        plan: LogicalPlan,
5134        vars_in_scope: &mut Vec<VariableInfo>,
5135        params: TraverseParams<'_>,
5136        source_variable: &str,
5137        vars_before_pattern: usize,
5138        path_bound_edge_vars: &HashSet<String>,
5139    ) -> Result<(LogicalPlan, String, String)> {
5140        // Check for parameter used as relationship predicate
5141        if let Some(Expr::Parameter(_)) = &params.rel.properties {
5142            return Err(anyhow!(
5143                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
5144            ));
5145        }
5146
5147        let mut edge_type_ids = Vec::new();
5148        let mut dst_labels = Vec::new();
5149        let mut unknown_types = Vec::new();
5150
5151        if params.rel.types.is_empty() {
5152            // All types - include both schema and schemaless edge types
5153            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
5154            edge_type_ids = self.schema.all_edge_type_ids();
5155            for meta in self.schema.edge_types.values() {
5156                dst_labels.extend(meta.dst_labels.iter().cloned());
5157            }
5158        } else {
5159            for type_name in &params.rel.types {
5160                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
5161                    // Known type - use standard Traverse with type_id
5162                    edge_type_ids.push(edge_meta.id);
5163                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
5164                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
5165                    // M5b.3: virtual edge type (plugin-registered CatalogTable).
5166                    // Resolving it into `edge_type_ids` (not `unknown_types`)
5167                    // lets the regular `Traverse` planner build a structured
5168                    // plan that the physical planner can dispatch to a
5169                    // `CatalogEdgeScanExec` mid-pattern.
5170                    edge_type_ids.push(vid);
5171                } else {
5172                    // Unknown type - will use TraverseMainByType
5173                    unknown_types.push(type_name.clone());
5174                }
5175            }
5176        }
5177
5178        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
5179        edge_type_ids.sort_unstable();
5180        edge_type_ids.dedup();
5181        unknown_types.sort_unstable();
5182        unknown_types.dedup();
5183
5184        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
5185        if target_variable.is_empty() {
5186            target_variable = self.next_anon_var();
5187        }
5188        let target_is_bound =
5189            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
5190
5191        // Check for VariableTypeConflict: relationship variable used as node
5192        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
5193        if let Some(rel_var) = &params.rel.variable
5194            && !rel_var.is_empty()
5195            && rel_var == &target_variable
5196        {
5197            return Err(anyhow!(
5198                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
5199                rel_var
5200            ));
5201        }
5202
5203        // Check for VariableTypeConflict/RelationshipUniquenessViolation
5204        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
5205        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
5206        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
5207        let mut bound_edge_var: Option<String> = None;
5208        let mut bound_edge_list_var: Option<String> = None;
5209        if let Some(rel_var) = &params.rel.variable
5210            && !rel_var.is_empty()
5211            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
5212        {
5213            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
5214                .iter()
5215                .any(|v| v.name == *rel_var);
5216
5217            if info.var_type == VariableType::Edge {
5218                // Check if this edge variable comes from a previous clause (before this MATCH)
5219                if is_from_previous_clause {
5220                    // Edge variable bound from previous clause - this is allowed
5221                    // We'll filter the traversal to match this specific edge
5222                    bound_edge_var = Some(rel_var.clone());
5223                } else {
5224                    // Same relationship variable used twice in the same MATCH clause
5225                    return Err(anyhow!(
5226                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
5227                        rel_var
5228                    ));
5229                }
5230            } else if params.rel.range.is_some()
5231                && is_from_previous_clause
5232                && matches!(
5233                    info.var_type,
5234                    VariableType::Scalar | VariableType::ScalarLiteral
5235                )
5236            {
5237                // Allow VLP rebound against a previously bound relationship list
5238                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
5239                bound_edge_list_var = Some(rel_var.clone());
5240            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
5241                return Err(anyhow!(
5242                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
5243                    rel_var,
5244                    info.var_type
5245                ));
5246            }
5247        }
5248
5249        // Check for VariableTypeConflict: target node variable already bound as non-Node
5250        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
5251        if target_is_bound
5252            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5253            && !info.var_type.is_compatible_with(VariableType::Node)
5254        {
5255            return Err(anyhow!(
5256                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
5257                target_variable,
5258                info.var_type
5259            ));
5260        }
5261
5262        // If all requested types are unknown (schemaless), use TraverseMainByType
5263        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
5264        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
5265        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
5266            // All types are unknown - use schemaless traversal
5267
5268            let is_variable_length = params.rel.range.is_some();
5269
5270            const DEFAULT_MAX_HOPS: usize = 100;
5271            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5272                let min = range.min.unwrap_or(1) as usize;
5273                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5274                (min, max)
5275            } else {
5276                (1, 1)
5277            };
5278
5279            // For both single-hop and variable-length paths:
5280            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5281            //   Single-hop: step_var holds a single edge object
5282            //   VLP: step_var holds a list of edge objects
5283            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5284            let step_var = params.rel.variable.clone();
5285            let path_var = params.path_variable.clone();
5286
5287            // Compute scope_match_variables for relationship uniqueness scoping.
5288            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5289                .iter()
5290                .map(|v| v.name.clone())
5291                .collect();
5292            if let Some(ref sv) = step_var {
5293                // Only add the step variable to scope if it's NOT rebound from a previous clause.
5294                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
5295                // filtering because the second MATCH intentionally reuses the same edge.
5296                if bound_edge_var.is_none() {
5297                    scope_match_variables.insert(sv.clone());
5298                }
5299            }
5300            scope_match_variables.insert(target_variable.clone());
5301            // Include bound edge variables from this path for cross-segment Trail mode
5302            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
5303            // edges already claimed by a bound relationship [r] in the same path.
5304            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
5305            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
5306            scope_match_variables.extend(
5307                path_bound_edge_vars
5308                    .iter()
5309                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
5310                    .cloned(),
5311            );
5312
5313            let mut plan = LogicalPlan::TraverseMainByType {
5314                type_names: unknown_types,
5315                input: Box::new(plan),
5316                direction: params.rel.direction.clone(),
5317                source_variable: source_variable.to_string(),
5318                target_variable: target_variable.clone(),
5319                step_variable: step_var.clone(),
5320                min_hops,
5321                max_hops,
5322                optional: params.optional,
5323                target_filter: self.node_filter_expr(
5324                    &target_variable,
5325                    &params.target_node.labels,
5326                    &params.target_node.properties,
5327                ),
5328                path_variable: path_var.clone(),
5329                is_variable_length,
5330                optional_pattern_vars: params.optional_pattern_vars.clone(),
5331                scope_match_variables,
5332                edge_filter_expr: if is_variable_length {
5333                    let filter_var = step_var
5334                        .clone()
5335                        .unwrap_or_else(|| "__anon_edge".to_string());
5336                    self.properties_to_expr(&filter_var, &params.rel.properties)
5337                } else {
5338                    None
5339                },
5340                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5341            };
5342
5343            // Only apply bound target filter for Imported variables (from outer scope/subquery).
5344            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5345            // uses Parameter which requires the value to be in params (subquery context).
5346            if target_is_bound
5347                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5348                && info.var_type == VariableType::Imported
5349            {
5350                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5351            }
5352
5353            // Apply relationship property predicates for fixed-length schemaless
5354            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
5355            // For VLP, predicates are stored inline in edge_filter_expr (above).
5356            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5357            if !is_variable_length
5358                && let Some(edge_var_name) = step_var.as_ref()
5359                && let Some(edge_prop_filter) =
5360                    self.properties_to_expr(edge_var_name, &params.rel.properties)
5361            {
5362                let filter_optional_vars = if params.optional {
5363                    params.optional_pattern_vars.clone()
5364                } else {
5365                    HashSet::new()
5366                };
5367                plan = LogicalPlan::Filter {
5368                    input: Box::new(plan),
5369                    predicate: edge_prop_filter,
5370                    optional_variables: filter_optional_vars,
5371                };
5372            }
5373
5374            // Add the bound variables to scope
5375            if let Some(sv) = &step_var {
5376                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5377                if is_variable_length
5378                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5379                {
5380                    info.is_vlp = true;
5381                }
5382            }
5383            if let Some(pv) = &path_var
5384                && !is_var_in_scope(vars_in_scope, pv)
5385            {
5386                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5387            }
5388            if !is_var_in_scope(vars_in_scope, &target_variable) {
5389                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5390            }
5391
5392            return Ok((plan, target_variable.clone(), target_variable));
5393        }
5394
5395        // If we have a mix of known and unknown types, error for now
5396        // (could be extended to Union of Traverse + TraverseMainByType)
5397        if !unknown_types.is_empty() {
5398            return Err(anyhow!(
5399                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
5400                unknown_types
5401            ));
5402        }
5403
5404        // Resolve target label to either a schema id or a virtual id from the
5405        // plugin registry. Mid-pattern virtual-label dispatch (M5b.3) requires
5406        // the virtual id to flow into `Traverse.target_label_id` so the
5407        // physical planner can layer a `CatalogVertexScanExec` join on the
5408        // traverse output. Mirrors the schema-then-virtual fallthrough used
5409        // by single-vertex `Scan` planning (~`plan_node_pattern` below).
5410        let mut virtual_target_label_id: Option<u16> = None;
5411        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
5412            // Use first label for target_label_id
5413            // For schemaless support, allow unknown target labels
5414            match self.schema.get_label_case_insensitive(label_name) {
5415                Some(meta) => Some(meta),
5416                None => {
5417                    if let Some((vid, _)) = self.allocate_virtual_label(label_name)? {
5418                        virtual_target_label_id = Some(vid);
5419                    }
5420                    None
5421                }
5422            }
5423        } else if !target_is_bound {
5424            // Infer from edge type(s)
5425            let unique_dsts: Vec<_> = dst_labels
5426                .into_iter()
5427                .collect::<HashSet<_>>()
5428                .into_iter()
5429                .collect();
5430            if unique_dsts.len() == 1 {
5431                let label_name = &unique_dsts[0];
5432                self.schema.get_label_case_insensitive(label_name)
5433            } else {
5434                // Multiple or no destination labels inferred - allow any target
5435                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
5436                // where the edge type can connect to multiple labels
5437                None
5438            }
5439        } else {
5440            None
5441        };
5442
5443        // Check if this is a variable-length pattern (has range specifier like *1..3)
5444        let is_variable_length = params.rel.range.is_some();
5445
5446        // For VLP patterns, default min to 1 and max to a reasonable limit.
5447        // For single-hop patterns (no range), both are 1.
5448        const DEFAULT_MAX_HOPS: usize = 100;
5449        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5450            let min = range.min.unwrap_or(1) as usize;
5451            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5452            (min, max)
5453        } else {
5454            (1, 1)
5455        };
5456
5457        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5458        //   Single-hop: step_var holds a single edge object
5459        //   VLP: step_var holds a list of edge objects
5460        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5461        let step_var = params.rel.variable.clone();
5462        let path_var = params.path_variable.clone();
5463
5464        // If we have a bound edge variable from a previous clause, use a temp variable
5465        // for the Traverse step, then filter to match the bound edge
5466        let rebound_var = bound_edge_var
5467            .as_ref()
5468            .or(bound_edge_list_var.as_ref())
5469            .cloned();
5470        let effective_step_var = if let Some(ref bv) = rebound_var {
5471            Some(format!("__rebound_{}", bv))
5472        } else {
5473            step_var.clone()
5474        };
5475
5476        // If we have a bound target variable from a previous clause (e.g. WITH),
5477        // use a temp variable for the Traverse step, then filter to match the bound
5478        // target — mirroring the bound edge pattern above.
5479        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
5480            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
5481                .map(|info| info.var_type == VariableType::Imported)
5482                .unwrap_or(false);
5483            if !is_imported {
5484                Some(target_variable.clone())
5485            } else {
5486                None
5487            }
5488        } else {
5489            None
5490        };
5491
5492        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5493            format!("__rebound_{}", bv)
5494        } else {
5495            target_variable.clone()
5496        };
5497
5498        // Collect all variables (node + edge) from the current MATCH clause scope
5499        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
5500        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
5501        // if their associated variable is in this set. This prevents relationship
5502        // uniqueness from being enforced across disconnected MATCH clauses.
5503        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5504            .iter()
5505            .map(|v| v.name.clone())
5506            .collect();
5507        // Include the current traverse's edge variable (not yet added to vars_in_scope)
5508        if let Some(ref sv) = effective_step_var {
5509            scope_match_variables.insert(sv.clone());
5510        }
5511        // Include the target variable (not yet added to vars_in_scope)
5512        scope_match_variables.insert(effective_target_var.clone());
5513        // Include bound edge variables from this path for cross-segment Trail mode
5514        // enforcement (same as the schemaless path above).
5515        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
5516
5517        let mut plan = LogicalPlan::Traverse {
5518            input: Box::new(plan),
5519            edge_type_ids,
5520            direction: params.rel.direction.clone(),
5521            source_variable: source_variable.to_string(),
5522            target_variable: effective_target_var.clone(),
5523            target_label_id: target_label_meta
5524                .map(|m| m.id)
5525                .or(virtual_target_label_id)
5526                .unwrap_or(0),
5527            step_variable: effective_step_var.clone(),
5528            min_hops,
5529            max_hops,
5530            optional: params.optional,
5531            target_filter: self.node_filter_expr(
5532                &target_variable,
5533                &params.target_node.labels,
5534                &params.target_node.properties,
5535            ),
5536            path_variable: path_var.clone(),
5537            edge_properties: HashSet::new(),
5538            is_variable_length,
5539            optional_pattern_vars: params.optional_pattern_vars.clone(),
5540            scope_match_variables,
5541            edge_filter_expr: if is_variable_length {
5542                // Use the step variable name, or a fallback for anonymous edges.
5543                // The variable name is used by properties_to_expr to build
5544                // `var.prop = value` expressions. For BFS property checking,
5545                // only the property name and value matter (the variable name
5546                // is stripped during extraction).
5547                let filter_var = effective_step_var
5548                    .clone()
5549                    .unwrap_or_else(|| "__anon_edge".to_string());
5550                self.properties_to_expr(&filter_var, &params.rel.properties)
5551            } else {
5552                None
5553            },
5554            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5555            qpp_steps: None,
5556        };
5557
5558        // Pre-compute optional variables set for filter nodes in this traverse.
5559        // Used by relationship property filters and bound-edge filters below.
5560        let filter_optional_vars = if params.optional {
5561            params.optional_pattern_vars.clone()
5562        } else {
5563            HashSet::new()
5564        };
5565
5566        // Apply relationship property predicates (e.g. [r {k: v}]).
5567        // For VLP, predicates are stored inline in edge_filter_expr (above).
5568        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5569        if !is_variable_length
5570            && let Some(edge_var_name) = effective_step_var.as_ref()
5571            && let Some(edge_prop_filter) =
5572                self.properties_to_expr(edge_var_name, &params.rel.properties)
5573        {
5574            plan = LogicalPlan::Filter {
5575                input: Box::new(plan),
5576                predicate: edge_prop_filter,
5577                optional_variables: filter_optional_vars.clone(),
5578            };
5579        }
5580
5581        // Only apply bound target filter for Imported variables (from outer scope/subquery).
5582        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5583        // uses Parameter which requires the value to be in params (subquery context).
5584        if target_is_bound
5585            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5586            && info.var_type == VariableType::Imported
5587        {
5588            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5589        }
5590
5591        // If we have a bound edge variable, add a filter to match it
5592        if let Some(ref bv) = bound_edge_var {
5593            let temp_var = format!("__rebound_{}", bv);
5594            let bound_check = Expr::BinaryOp {
5595                left: Box::new(Expr::Property(
5596                    Box::new(Expr::Variable(temp_var)),
5597                    "_eid".to_string(),
5598                )),
5599                op: BinaryOp::Eq,
5600                right: Box::new(Expr::Property(
5601                    Box::new(Expr::Variable(bv.clone())),
5602                    "_eid".to_string(),
5603                )),
5604            };
5605            plan = LogicalPlan::Filter {
5606                input: Box::new(plan),
5607                predicate: bound_check,
5608                optional_variables: filter_optional_vars.clone(),
5609            };
5610        }
5611
5612        // If we have a bound relationship list variable for a VLP pattern,
5613        // add a filter to match the traversed relationship list exactly.
5614        if let Some(ref bv) = bound_edge_list_var {
5615            let temp_var = format!("__rebound_{}", bv);
5616            let temp_eids = Expr::ListComprehension {
5617                variable: "__rebound_edge".to_string(),
5618                list: Box::new(Expr::Variable(temp_var)),
5619                where_clause: None,
5620                map_expr: Box::new(Expr::FunctionCall {
5621                    name: "toInteger".to_string(),
5622                    args: vec![Expr::Property(
5623                        Box::new(Expr::Variable("__rebound_edge".to_string())),
5624                        "_eid".to_string(),
5625                    )],
5626                    distinct: false,
5627                    window_spec: None,
5628                }),
5629            };
5630            let bound_eids = Expr::ListComprehension {
5631                variable: "__bound_edge".to_string(),
5632                list: Box::new(Expr::Variable(bv.clone())),
5633                where_clause: None,
5634                map_expr: Box::new(Expr::FunctionCall {
5635                    name: "toInteger".to_string(),
5636                    args: vec![Expr::Property(
5637                        Box::new(Expr::Variable("__bound_edge".to_string())),
5638                        "_eid".to_string(),
5639                    )],
5640                    distinct: false,
5641                    window_spec: None,
5642                }),
5643            };
5644            let bound_list_check = Expr::BinaryOp {
5645                left: Box::new(temp_eids),
5646                op: BinaryOp::Eq,
5647                right: Box::new(bound_eids),
5648            };
5649            plan = LogicalPlan::Filter {
5650                input: Box::new(plan),
5651                predicate: bound_list_check,
5652                optional_variables: filter_optional_vars.clone(),
5653            };
5654        }
5655
5656        // If we have a bound target variable (non-imported), add a filter to constrain
5657        // the traversal output to match the previously bound target node.
5658        if let Some(ref bv) = rebound_target_var {
5659            let temp_var = format!("__rebound_{}", bv);
5660            let bound_check = Expr::BinaryOp {
5661                left: Box::new(Expr::Property(
5662                    Box::new(Expr::Variable(temp_var.clone())),
5663                    "_vid".to_string(),
5664                )),
5665                op: BinaryOp::Eq,
5666                right: Box::new(Expr::Property(
5667                    Box::new(Expr::Variable(bv.clone())),
5668                    "_vid".to_string(),
5669                )),
5670            };
5671            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5672            // so that OptionalFilterExec excludes it from the grouping key and
5673            // properly nullifies it in recovery rows when all matches are filtered out.
5674            // Without this, each traverse result creates its own group (keyed by
5675            // __rebound_c._vid), and null-row recovery emits a spurious null row
5676            // for every non-matching target instead of one per source group.
5677            let mut rebound_filter_vars = filter_optional_vars;
5678            if params.optional {
5679                rebound_filter_vars.insert(temp_var);
5680            }
5681            plan = LogicalPlan::Filter {
5682                input: Box::new(plan),
5683                predicate: bound_check,
5684                optional_variables: rebound_filter_vars,
5685            };
5686        }
5687
5688        // Add the bound variables to scope
5689        // Skip adding the edge variable if it's already bound from a previous clause
5690        if let Some(sv) = &step_var
5691            && bound_edge_var.is_none()
5692            && bound_edge_list_var.is_none()
5693        {
5694            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5695            if is_variable_length
5696                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5697            {
5698                info.is_vlp = true;
5699            }
5700        }
5701        if let Some(pv) = &path_var
5702            && !is_var_in_scope(vars_in_scope, pv)
5703        {
5704            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5705        }
5706        if !is_var_in_scope(vars_in_scope, &target_variable) {
5707            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5708        }
5709
5710        Ok((plan, target_variable, effective_target_var))
5711    }
5712
5713    /// Combine a new scan plan with an existing plan.
5714    ///
5715    /// If the existing plan is `Empty`, returns the new plan directly.
5716    /// Otherwise, wraps them in a `CrossJoin`.
5717    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5718        if matches!(existing, LogicalPlan::Empty) {
5719            new
5720        } else {
5721            LogicalPlan::CrossJoin {
5722                left: Box::new(existing),
5723                right: Box::new(new),
5724            }
5725        }
5726    }
5727
5728    /// Split node map predicates into scan-pushable and residual filters.
5729    ///
5730    /// A predicate is scan-pushable when its value expression references only
5731    /// the node variable itself (or no variables). Predicates referencing other
5732    /// in-scope variables (correlated predicates) are returned as residual so
5733    /// they can be applied after joining with the existing plan.
5734    fn split_node_property_filters_for_scan(
5735        &self,
5736        variable: &str,
5737        properties: &Option<Expr>,
5738    ) -> (Option<Expr>, Option<Expr>) {
5739        let entries = match properties {
5740            Some(Expr::Map(entries)) => entries,
5741            _ => return (None, None),
5742        };
5743
5744        if entries.is_empty() {
5745            return (None, None);
5746        }
5747
5748        let mut pushdown_entries = Vec::new();
5749        let mut residual_entries = Vec::new();
5750
5751        for (prop, val_expr) in entries {
5752            let vars = collect_expr_variables(val_expr);
5753            if vars.iter().all(|v| v == variable) {
5754                pushdown_entries.push((prop.clone(), val_expr.clone()));
5755            } else {
5756                residual_entries.push((prop.clone(), val_expr.clone()));
5757            }
5758        }
5759
5760        let pushdown_map = if pushdown_entries.is_empty() {
5761            None
5762        } else {
5763            Some(Expr::Map(pushdown_entries))
5764        };
5765        let residual_map = if residual_entries.is_empty() {
5766            None
5767        } else {
5768            Some(Expr::Map(residual_entries))
5769        };
5770
5771        (
5772            self.properties_to_expr(variable, &pushdown_map),
5773            self.properties_to_expr(variable, &residual_map),
5774        )
5775    }
5776
5777    /// Decide whether per-label `Scan` branches for a label disjunction can
5778    /// safely be combined under `LogicalPlan::Union`. Returns `true` iff every
5779    /// label in `labels` is registered in the schema AND every pair shares an
5780    /// identical property name+type set.
5781    ///
5782    /// When this returns `false`, the disjunction must fall back to a single
5783    /// `ScanMainByLabels` over all labels — otherwise DataFusion's
5784    /// `UnionExec::try_new` panics in `union_schema` because the per-label
5785    /// `GraphScanExec` outputs (`_vid` + `_labels` + per-label projected
5786    /// properties) have different field counts. Issue rustic-ai/uni-db#62.
5787    ///
5788    /// We deliberately compare full schema property sets rather than only the
5789    /// properties referenced by the current query: at this logical-planning
5790    /// stage we have not yet collected `all_properties`, and `*` wildcards
5791    /// (e.g. from unknown function calls) would expand per-label downstream
5792    /// in `df_planner::resolve_properties` even when the query text only
5793    /// touches common columns.
5794    fn label_branches_share_property_schema(&self, labels: &[String]) -> bool {
5795        if labels.len() < 2 {
5796            return true;
5797        }
5798        let mut iter = labels.iter();
5799        let first = iter.next().expect("len >= 2");
5800        let Some(first_props) = self.schema.properties.get(first) else {
5801            return false;
5802        };
5803        for label in iter {
5804            let Some(props) = self.schema.properties.get(label) else {
5805                return false;
5806            };
5807            if props.len() != first_props.len() {
5808                return false;
5809            }
5810            for (name, meta) in first_props {
5811                let Some(other_meta) = props.get(name) else {
5812                    return false;
5813                };
5814                if meta.r#type != other_meta.r#type {
5815                    return false;
5816                }
5817            }
5818        }
5819        true
5820    }
5821
5822    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5823    fn plan_unbound_node(
5824        &self,
5825        node: &NodePattern,
5826        variable: &str,
5827        plan: LogicalPlan,
5828        optional: bool,
5829    ) -> Result<LogicalPlan> {
5830        // Properties handling
5831        let properties = match &node.properties {
5832            Some(Expr::Map(entries)) => entries.as_slice(),
5833            Some(Expr::Parameter(_)) => {
5834                return Err(anyhow!(
5835                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5836                ));
5837            }
5838            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5839            None => &[],
5840        };
5841
5842        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5843
5844        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5845            if let Some(predicate) = residual {
5846                LogicalPlan::Filter {
5847                    input: Box::new(input),
5848                    predicate,
5849                    optional_variables: HashSet::new(),
5850                }
5851            } else {
5852                input
5853            }
5854        };
5855
5856        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5857            self.split_node_property_filters_for_scan(variable, &node.properties)
5858        } else {
5859            (self.properties_to_expr(variable, &node.properties), None)
5860        };
5861
5862        // Check for ext_id in properties when no label is specified
5863        if node.labels.is_empty() {
5864            // Try to find ext_id property for main table lookup
5865            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5866                // Extract the ext_id value as a string
5867                let ext_id = match ext_id_value {
5868                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5869                    _ => {
5870                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5871                    }
5872                };
5873
5874                // Build filter for remaining properties (excluding ext_id)
5875                let remaining_props: Vec<_> = properties
5876                    .iter()
5877                    .filter(|(k, _)| k != "ext_id")
5878                    .cloned()
5879                    .collect();
5880
5881                let remaining_expr = if remaining_props.is_empty() {
5882                    None
5883                } else {
5884                    Some(Expr::Map(remaining_props))
5885                };
5886
5887                let (prop_filter, residual_filter) = if has_existing_scope {
5888                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5889                } else {
5890                    (self.properties_to_expr(variable, &remaining_expr), None)
5891                };
5892
5893                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5894                    variable: variable.to_string(),
5895                    ext_id,
5896                    filter: prop_filter,
5897                    optional,
5898                };
5899
5900                let joined = Self::join_with_plan(plan, ext_id_lookup);
5901                return Ok(apply_residual_filter(joined, residual_filter));
5902            }
5903
5904            // No ext_id: create ScanAll for unlabeled node pattern
5905            let scan_all = LogicalPlan::ScanAll {
5906                variable: variable.to_string(),
5907                filter: node_scan_filter,
5908                optional,
5909            };
5910
5911            let joined = Self::join_with_plan(plan, scan_all);
5912            return Ok(apply_residual_filter(joined, node_residual_filter));
5913        }
5914
5915        // Label disjunction `(n:A|B|C)` — emit Union of label-scoped Scans.
5916        //
5917        // Storage fact: a multi-labeled vertex is fanned out into every
5918        // per-label table it carries (uni-store/src/runtime/writer.rs's
5919        // `push_vertex_to_labels`), so the same vid can appear in both the
5920        // `A` scan and the `B` scan of a disjunctive query. Use
5921        // `Union { all: false }` so the combined result deduplicates by row
5922        // contents (which include the vid) rather than emitting the same
5923        // vertex twice. The single-label-disjunction case (`Disjunction(["A"])`)
5924        // is encoded the same way the parser already encodes single edge
5925        // types, and reduces to one Scan with no Union wrapping.
5926        if node.labels.is_proper_disjunction() {
5927            let label_names: Vec<String> = node.labels.names().to_vec();
5928
5929            // Per-label branches under a `Union` only line up when every
5930            // branch produces the same Arrow schema. The narrow-scan
5931            // `Scan` path resolves columns *per label*, so heterogeneous
5932            // property sets (or any schemaless label in the mix) yield
5933            // mismatched widths and DataFusion's `UnionExec::try_new`
5934            // panics inside `union_schema` (issue rustic-ai/uni-db#62).
5935            //
5936            // For those cases, lower every branch to a *single-label*
5937            // `ScanMainByLabels` instead. The schemaless main-table scan
5938            // resolves columns from `all_properties` directly (no per-label
5939            // expansion), so all branches emit a uniform schema and the
5940            // outer `Union { all: false }` deduplicates correctly. We
5941            // keep the per-branch Union shape (rather than collapsing to
5942            // a single multi-label scan) because multi-label
5943            // `ScanMainByLabels` has AND/intersection semantics — wrong
5944            // for a disjunction.
5945            let use_main_table_branches = !self.label_branches_share_property_schema(&label_names);
5946
5947            let mut branches: Vec<LogicalPlan> = Vec::with_capacity(label_names.len());
5948            for label_name in &label_names {
5949                let branch = if use_main_table_branches {
5950                    LogicalPlan::ScanMainByLabels {
5951                        labels: vec![label_name.clone()],
5952                        variable: variable.to_string(),
5953                        filter: node_scan_filter.clone(),
5954                        optional,
5955                    }
5956                } else {
5957                    let meta = self
5958                        .schema
5959                        .get_label_case_insensitive(label_name)
5960                        .expect("share_property_schema true implies all labels in schema");
5961                    LogicalPlan::Scan {
5962                        label_id: meta.id,
5963                        labels: vec![label_name.clone()],
5964                        variable: variable.to_string(),
5965                        filter: node_scan_filter.clone(),
5966                        optional,
5967                    }
5968                };
5969                branches.push(branch);
5970            }
5971            // Left-leaning Union: Union(Union(A, B), C). All inner
5972            // unions dedupe by row, so the outer one does too.
5973            let mut iter = branches.into_iter();
5974            let mut union_plan = iter
5975                .next()
5976                .expect("is_proper_disjunction implies at least 2 labels");
5977            for next in iter {
5978                union_plan = LogicalPlan::Union {
5979                    left: Box::new(union_plan),
5980                    right: Box::new(next),
5981                    all: false,
5982                };
5983            }
5984            let joined = Self::join_with_plan(plan, union_plan);
5985            return Ok(apply_residual_filter(joined, node_residual_filter));
5986        }
5987
5988        // Use first label for label_id (primary label for dataset selection)
5989        let label_name = &node.labels[0];
5990
5991        // Check if label exists in schema
5992        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5993            // Known label: use standard Scan
5994            let scan = LogicalPlan::Scan {
5995                label_id: label_meta.id,
5996                labels: node.labels.names().to_vec(),
5997                variable: variable.to_string(),
5998                filter: node_scan_filter,
5999                optional,
6000            };
6001
6002            let joined = Self::join_with_plan(plan, scan);
6003            Ok(apply_residual_filter(joined, node_residual_filter))
6004        } else {
6005            // Unknown label. Try a CatalogProvider / ReplacementScanProvider
6006            // claim first: on success allocate a virtual label-ID and emit a
6007            // regular `Scan` against the virtual id (`df_planner` dispatches
6008            // to `CatalogVertexScanExec`). When no provider claims and the
6009            // replacement-scan gate is on, strict-mode errors. When the gate
6010            // is off and no provider claims, preserve today's silent-empty
6011            // schemaless `ScanMainByLabels` behavior bit-for-bit.
6012            if let Some((virtual_id, _)) = self.allocate_virtual_label(label_name)? {
6013                let scan = LogicalPlan::Scan {
6014                    label_id: virtual_id,
6015                    labels: node.labels.names().to_vec(),
6016                    variable: variable.to_string(),
6017                    filter: node_scan_filter,
6018                    optional,
6019                };
6020                let joined = Self::join_with_plan(plan, scan);
6021                return Ok(apply_residual_filter(joined, node_residual_filter));
6022            }
6023            if self.replacement_scans_enabled {
6024                return Err(anyhow!(
6025                    "Label `{}` is not defined in schema and no \
6026                     CatalogProvider or ReplacementScanProvider claimed it; \
6027                     strict-mode (replacement_scans=true) requires the label \
6028                     to resolve",
6029                    label_name
6030                ));
6031            }
6032
6033            let scan_main = LogicalPlan::ScanMainByLabels {
6034                labels: node.labels.names().to_vec(),
6035                variable: variable.to_string(),
6036                filter: node_scan_filter,
6037                optional,
6038            };
6039
6040            let joined = Self::join_with_plan(plan, scan_main);
6041            Ok(apply_residual_filter(joined, node_residual_filter))
6042        }
6043    }
6044
6045    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
6046    ///
6047    /// When `optional_vars` is non-empty, the Filter will preserve rows where
6048    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
6049    fn plan_where_clause(
6050        &self,
6051        predicate: &Expr,
6052        plan: LogicalPlan,
6053        vars_in_scope: &[VariableInfo],
6054        optional_vars: HashSet<String>,
6055    ) -> Result<LogicalPlan> {
6056        // Validate no aggregation functions in WHERE clause
6057        validate_no_aggregation_in_where(predicate)?;
6058
6059        // Validate all variables used are in scope
6060        validate_expression_variables(predicate, vars_in_scope)?;
6061
6062        // Validate expression types (function args, boolean operators)
6063        validate_expression(predicate, vars_in_scope)?;
6064
6065        // Check that WHERE predicate isn't a bare node/edge/path variable
6066        if let Expr::Variable(var_name) = predicate
6067            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
6068            && matches!(
6069                info.var_type,
6070                VariableType::Node | VariableType::Edge | VariableType::Path
6071            )
6072        {
6073            return Err(anyhow!(
6074                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
6075                info.var_type
6076            ));
6077        }
6078
6079        let mut plan = plan;
6080
6081        // Transform VALID_AT macro to function call
6082        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
6083
6084        // Rewrite id(var) to var._vid (or var._eid for an edge) so
6085        // PredicateAnalyzer can push it down.
6086        let transformed_predicate = Self::rewrite_id_to_vid(transformed_predicate, vars_in_scope);
6087
6088        let mut current_predicate =
6089            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
6090
6091        // 1. Try to extract vector_similarity predicate for optimization
6092        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
6093            let vs = &extraction.predicate;
6094            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
6095                plan = Self::replace_scan_with_knn(
6096                    plan,
6097                    &vs.variable,
6098                    &vs.property,
6099                    vs.query.clone(),
6100                    vs.threshold,
6101                );
6102                if let Some(residual) = extraction.residual {
6103                    current_predicate = residual;
6104                } else {
6105                    current_predicate = Expr::TRUE;
6106                }
6107            }
6108        }
6109
6110        // 2. Label/type disjunction → narrow-scan rewrite.
6111        //
6112        // `WHERE n:A OR n:B` and `WHERE type(r) = 'A' OR type(r) = 'B'`
6113        // are functionally identical to the inline forms `(n:A|B)` and
6114        // `[r:A|B]`, but a literal pattern lowering would route them
6115        // through `Filter(LabelCheck OR LabelCheck)` over `ScanAll` —
6116        // a full vertex/edge scan plus residual filter, missing the
6117        // narrow-scan fast-path that the inline forms get for free.
6118        // Detect those OR-chains here and rewrite the upstream
6119        // `ScanAll` / `Traverse` accordingly.
6120        let conjuncts = Self::split_and_conjuncts(&current_predicate);
6121        let mut keep: Vec<Expr> = Vec::with_capacity(conjuncts.len());
6122        for conj in conjuncts {
6123            let mut consumed = false;
6124            for var in vars_in_scope {
6125                if optional_vars.contains(&var.name) {
6126                    continue;
6127                }
6128                // Node label disjunction → Union of label-scoped Scans.
6129                if Self::is_scan_all_for(&plan, &var.name)
6130                    && let Some(labels) = try_label_or_to_union(&conj, &var.name)
6131                {
6132                    plan = self.replace_scan_all_with_label_union(plan, &var.name, &labels, false);
6133                    consumed = true;
6134                    break;
6135                }
6136                // Edge type disjunction → merge into Traverse.edge_type_ids.
6137                if let Some(types) = try_type_or_to_union(&conj, &var.name)
6138                    && Self::merge_traverse_types_for(&plan, &var.name, &types).is_some()
6139                {
6140                    let mut ids: Vec<u32> = Vec::with_capacity(types.len());
6141                    let mut all_known = true;
6142                    for t in &types {
6143                        match self.schema.edge_types.get(t) {
6144                            Some(meta) => ids.push(meta.id),
6145                            None => {
6146                                all_known = false;
6147                                break;
6148                            }
6149                        }
6150                    }
6151                    if all_known {
6152                        plan = Self::set_traverse_edge_type_ids(plan, &var.name, ids);
6153                        consumed = true;
6154                        break;
6155                    }
6156                }
6157            }
6158            if !consumed {
6159                keep.push(conj);
6160            }
6161        }
6162        current_predicate = Self::combine_predicates(keep).unwrap_or(Expr::TRUE);
6163
6164        // 3. Push eligible predicates to Scan OR Traverse filters
6165        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
6166        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
6167        // rows. Let them stay in the Filter operator which handles NULL preservation.
6168        for var in vars_in_scope {
6169            // Skip pushdown for optional variables - they need NULL preservation in Filter
6170            if optional_vars.contains(&var.name) {
6171                continue;
6172            }
6173
6174            // Check if var is produced by a Scan
6175            if Self::find_scan_label_id(&plan, &var.name).is_some() {
6176                let (pushable, residual) =
6177                    Self::extract_variable_predicates(&current_predicate, &var.name);
6178
6179                for pred in pushable {
6180                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
6181                }
6182
6183                if let Some(r) = residual {
6184                    current_predicate = r;
6185                } else {
6186                    current_predicate = Expr::TRUE;
6187                }
6188            } else if Self::is_traverse_target(&plan, &var.name) {
6189                // Push to Traverse
6190                let (pushable, residual) =
6191                    Self::extract_variable_predicates(&current_predicate, &var.name);
6192
6193                for pred in pushable {
6194                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
6195                }
6196
6197                if let Some(r) = residual {
6198                    current_predicate = r;
6199                } else {
6200                    current_predicate = Expr::TRUE;
6201                }
6202            }
6203        }
6204
6205        // 4. Push predicates to Apply.input_filter
6206        // This filters input rows BEFORE executing correlated subqueries.
6207        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
6208
6209        // 5. Add Filter node for any remaining predicates
6210        if !current_predicate.is_true_literal() {
6211            plan = LogicalPlan::Filter {
6212                input: Box::new(plan),
6213                predicate: current_predicate,
6214                optional_variables: optional_vars,
6215            };
6216        }
6217
6218        Ok(plan)
6219    }
6220
6221    fn rewrite_predicates_using_indexes(
6222        &self,
6223        predicate: &Expr,
6224        plan: &LogicalPlan,
6225        vars_in_scope: &[VariableInfo],
6226    ) -> Result<Expr> {
6227        let mut rewritten = predicate.clone();
6228
6229        for var in vars_in_scope {
6230            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
6231                // Find label name
6232                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
6233
6234                if let Some(label) = label_name
6235                    && let Some(props) = self.schema.properties.get(&label)
6236                {
6237                    for (gen_col, meta) in props {
6238                        if meta.generation_expression.is_some() {
6239                            // Use cached parsed expression
6240                            if let Some(schema_expr) =
6241                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
6242                            {
6243                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
6244                                rewritten = Self::replace_expression(
6245                                    rewritten,
6246                                    schema_expr,
6247                                    &var.name,
6248                                    gen_col,
6249                                );
6250                            }
6251                        }
6252                    }
6253                }
6254            }
6255        }
6256        Ok(rewritten)
6257    }
6258
6259    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
6260        // First, normalize schema_expr to use query_var
6261        let schema_var = schema_expr.extract_variable();
6262
6263        if let Some(s_var) = schema_var {
6264            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
6265
6266            if expr == target_expr {
6267                return Expr::Property(
6268                    Box::new(Expr::Variable(query_var.to_string())),
6269                    gen_col.to_string(),
6270                );
6271            }
6272        }
6273
6274        // Recurse
6275        match expr {
6276            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
6277                left: Box::new(Self::replace_expression(
6278                    *left,
6279                    schema_expr,
6280                    query_var,
6281                    gen_col,
6282                )),
6283                op,
6284                right: Box::new(Self::replace_expression(
6285                    *right,
6286                    schema_expr,
6287                    query_var,
6288                    gen_col,
6289                )),
6290            },
6291            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
6292                op,
6293                expr: Box::new(Self::replace_expression(
6294                    *expr,
6295                    schema_expr,
6296                    query_var,
6297                    gen_col,
6298                )),
6299            },
6300            Expr::FunctionCall {
6301                name,
6302                args,
6303                distinct,
6304                window_spec,
6305            } => Expr::FunctionCall {
6306                name,
6307                args: args
6308                    .into_iter()
6309                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
6310                    .collect(),
6311                distinct,
6312                window_spec,
6313            },
6314            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
6315                *expr,
6316                schema_expr,
6317                query_var,
6318                gen_col,
6319            ))),
6320            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
6321                *expr,
6322                schema_expr,
6323                query_var,
6324                gen_col,
6325            ))),
6326            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
6327                *expr,
6328                schema_expr,
6329                query_var,
6330                gen_col,
6331            ))),
6332            Expr::ArrayIndex {
6333                array: e,
6334                index: idx,
6335            } => Expr::ArrayIndex {
6336                array: Box::new(Self::replace_expression(
6337                    *e,
6338                    schema_expr,
6339                    query_var,
6340                    gen_col,
6341                )),
6342                index: Box::new(Self::replace_expression(
6343                    *idx,
6344                    schema_expr,
6345                    query_var,
6346                    gen_col,
6347                )),
6348            },
6349            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
6350                array: Box::new(Self::replace_expression(
6351                    *array,
6352                    schema_expr,
6353                    query_var,
6354                    gen_col,
6355                )),
6356                start: start.map(|s| {
6357                    Box::new(Self::replace_expression(
6358                        *s,
6359                        schema_expr,
6360                        query_var,
6361                        gen_col,
6362                    ))
6363                }),
6364                end: end.map(|e| {
6365                    Box::new(Self::replace_expression(
6366                        *e,
6367                        schema_expr,
6368                        query_var,
6369                        gen_col,
6370                    ))
6371                }),
6372            },
6373            Expr::List(exprs) => Expr::List(
6374                exprs
6375                    .into_iter()
6376                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
6377                    .collect(),
6378            ),
6379            Expr::Map(entries) => Expr::Map(
6380                entries
6381                    .into_iter()
6382                    .map(|(k, v)| {
6383                        (
6384                            k,
6385                            Self::replace_expression(v, schema_expr, query_var, gen_col),
6386                        )
6387                    })
6388                    .collect(),
6389            ),
6390            Expr::Property(e, prop) => Expr::Property(
6391                Box::new(Self::replace_expression(
6392                    *e,
6393                    schema_expr,
6394                    query_var,
6395                    gen_col,
6396                )),
6397                prop,
6398            ),
6399            Expr::Case {
6400                expr: case_expr,
6401                when_then,
6402                else_expr,
6403            } => Expr::Case {
6404                expr: case_expr.map(|e| {
6405                    Box::new(Self::replace_expression(
6406                        *e,
6407                        schema_expr,
6408                        query_var,
6409                        gen_col,
6410                    ))
6411                }),
6412                when_then: when_then
6413                    .into_iter()
6414                    .map(|(w, t)| {
6415                        (
6416                            Self::replace_expression(w, schema_expr, query_var, gen_col),
6417                            Self::replace_expression(t, schema_expr, query_var, gen_col),
6418                        )
6419                    })
6420                    .collect(),
6421                else_expr: else_expr.map(|e| {
6422                    Box::new(Self::replace_expression(
6423                        *e,
6424                        schema_expr,
6425                        query_var,
6426                        gen_col,
6427                    ))
6428                }),
6429            },
6430            Expr::Reduce {
6431                accumulator,
6432                init,
6433                variable: reduce_var,
6434                list,
6435                expr: reduce_expr,
6436            } => Expr::Reduce {
6437                accumulator,
6438                init: Box::new(Self::replace_expression(
6439                    *init,
6440                    schema_expr,
6441                    query_var,
6442                    gen_col,
6443                )),
6444                variable: reduce_var,
6445                list: Box::new(Self::replace_expression(
6446                    *list,
6447                    schema_expr,
6448                    query_var,
6449                    gen_col,
6450                )),
6451                expr: Box::new(Self::replace_expression(
6452                    *reduce_expr,
6453                    schema_expr,
6454                    query_var,
6455                    gen_col,
6456                )),
6457            },
6458
6459            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
6460            _ => expr,
6461        }
6462    }
6463
6464    /// Returns `true` iff `variable` is bound to a `ScanAll` operator
6465    /// (somewhere under `plan`). Used to gate the
6466    /// `WHERE n:A OR n:B` → `Union(Scan{A}, Scan{B})` rewrite — we only
6467    /// fire it when the variable is currently doing a full vertex scan,
6468    /// not when it's already bound to a labeled `Scan`.
6469    fn is_scan_all_for(plan: &LogicalPlan, variable: &str) -> bool {
6470        match plan {
6471            LogicalPlan::ScanAll { variable: var, .. } => var == variable,
6472            LogicalPlan::Filter { input, .. }
6473            | LogicalPlan::Project { input, .. }
6474            | LogicalPlan::Sort { input, .. }
6475            | LogicalPlan::Limit { input, .. }
6476            | LogicalPlan::Aggregate { input, .. }
6477            | LogicalPlan::Apply { input, .. }
6478            | LogicalPlan::Traverse { input, .. } => Self::is_scan_all_for(input, variable),
6479            LogicalPlan::CrossJoin { left, right } => {
6480                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6481            }
6482            LogicalPlan::Union { left, right, .. } => {
6483                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6484            }
6485            _ => false,
6486        }
6487    }
6488
6489    /// Replace the `ScanAll` for `variable` in `plan` with a left-leaning
6490    /// `Union` of label-scoped `Scan` (or `ScanMainByLabels` for unknown
6491    /// labels) operators built from `labels`. Used by the
6492    /// `WHERE n:A OR n:B` rewrite.
6493    fn replace_scan_all_with_label_union(
6494        &self,
6495        plan: LogicalPlan,
6496        variable: &str,
6497        labels: &[String],
6498        optional: bool,
6499    ) -> LogicalPlan {
6500        match plan {
6501            LogicalPlan::ScanAll {
6502                variable: var,
6503                filter,
6504                optional: scan_optional,
6505            } if var == variable => {
6506                // Heterogeneous (or any-schemaless) disjunction: route every
6507                // branch through a single-label `ScanMainByLabels` so all
6508                // branches emit a uniform schemaless schema. Avoids the
6509                // DataFusion `union_schema` panic. See `plan_unbound_node`
6510                // and issue rustic-ai/uni-db#62.
6511                let use_main_table_branches = !self.label_branches_share_property_schema(labels);
6512
6513                let mut branches: Vec<LogicalPlan> = Vec::with_capacity(labels.len());
6514                for label in labels {
6515                    let branch = if use_main_table_branches {
6516                        LogicalPlan::ScanMainByLabels {
6517                            labels: vec![label.clone()],
6518                            variable: variable.to_string(),
6519                            filter: filter.clone(),
6520                            optional: scan_optional || optional,
6521                        }
6522                    } else {
6523                        let meta = self
6524                            .schema
6525                            .get_label_case_insensitive(label)
6526                            .expect("share_property_schema true implies all labels in schema");
6527                        LogicalPlan::Scan {
6528                            label_id: meta.id,
6529                            labels: vec![label.clone()],
6530                            variable: variable.to_string(),
6531                            filter: filter.clone(),
6532                            optional: scan_optional || optional,
6533                        }
6534                    };
6535                    branches.push(branch);
6536                }
6537                let mut iter = branches.into_iter();
6538                let mut union_plan = iter.next().expect("at least one label");
6539                for next in iter {
6540                    union_plan = LogicalPlan::Union {
6541                        left: Box::new(union_plan),
6542                        right: Box::new(next),
6543                        all: false,
6544                    };
6545                }
6546                union_plan
6547            }
6548            LogicalPlan::Filter {
6549                input,
6550                predicate,
6551                optional_variables,
6552            } => LogicalPlan::Filter {
6553                input: Box::new(
6554                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6555                ),
6556                predicate,
6557                optional_variables,
6558            },
6559            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6560                input: Box::new(
6561                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6562                ),
6563                projections,
6564            },
6565            LogicalPlan::CrossJoin { left, right } => {
6566                if Self::is_scan_all_for(&left, variable) {
6567                    LogicalPlan::CrossJoin {
6568                        left: Box::new(
6569                            self.replace_scan_all_with_label_union(
6570                                *left, variable, labels, optional,
6571                            ),
6572                        ),
6573                        right,
6574                    }
6575                } else {
6576                    LogicalPlan::CrossJoin {
6577                        left,
6578                        right: Box::new(
6579                            self.replace_scan_all_with_label_union(
6580                                *right, variable, labels, optional,
6581                            ),
6582                        ),
6583                    }
6584                }
6585            }
6586            LogicalPlan::Traverse {
6587                input,
6588                edge_type_ids,
6589                direction,
6590                source_variable,
6591                target_variable,
6592                target_label_id,
6593                step_variable,
6594                min_hops,
6595                max_hops,
6596                optional: trav_optional,
6597                target_filter,
6598                path_variable,
6599                edge_properties,
6600                is_variable_length,
6601                optional_pattern_vars,
6602                scope_match_variables,
6603                edge_filter_expr,
6604                path_mode,
6605                qpp_steps,
6606            } => LogicalPlan::Traverse {
6607                input: Box::new(
6608                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6609                ),
6610                edge_type_ids,
6611                direction,
6612                source_variable,
6613                target_variable,
6614                target_label_id,
6615                step_variable,
6616                min_hops,
6617                max_hops,
6618                optional: trav_optional,
6619                target_filter,
6620                path_variable,
6621                edge_properties,
6622                is_variable_length,
6623                optional_pattern_vars,
6624                scope_match_variables,
6625                edge_filter_expr,
6626                path_mode,
6627                qpp_steps,
6628            },
6629            other => other,
6630        }
6631    }
6632
6633    /// Returns `Some(())` iff `variable` is the `step_variable` (i.e. the
6634    /// edge variable) of some `Traverse` operator in `plan`. Used to gate
6635    /// the `WHERE type(r) = 'A' OR type(r) = 'B'` rewrite — we need a
6636    /// Traverse whose types we can merge into.
6637    fn merge_traverse_types_for(
6638        plan: &LogicalPlan,
6639        edge_var: &str,
6640        _types: &[String],
6641    ) -> Option<()> {
6642        match plan {
6643            LogicalPlan::Traverse {
6644                step_variable,
6645                input,
6646                ..
6647            } => {
6648                if step_variable.as_deref() == Some(edge_var) {
6649                    Some(())
6650                } else {
6651                    Self::merge_traverse_types_for(input, edge_var, _types)
6652                }
6653            }
6654            LogicalPlan::Filter { input, .. }
6655            | LogicalPlan::Project { input, .. }
6656            | LogicalPlan::Sort { input, .. }
6657            | LogicalPlan::Limit { input, .. }
6658            | LogicalPlan::Aggregate { input, .. }
6659            | LogicalPlan::Apply { input, .. } => {
6660                Self::merge_traverse_types_for(input, edge_var, _types)
6661            }
6662            LogicalPlan::CrossJoin { left, right } | LogicalPlan::Union { left, right, .. } => {
6663                Self::merge_traverse_types_for(left, edge_var, _types)
6664                    .or_else(|| Self::merge_traverse_types_for(right, edge_var, _types))
6665            }
6666            _ => None,
6667        }
6668    }
6669
6670    /// Replace `edge_type_ids` on the Traverse whose `step_variable`
6671    /// equals `edge_var`. Used by the type-OR rewrite.
6672    fn set_traverse_edge_type_ids(
6673        plan: LogicalPlan,
6674        edge_var: &str,
6675        new_ids: Vec<u32>,
6676    ) -> LogicalPlan {
6677        match plan {
6678            LogicalPlan::Traverse {
6679                input,
6680                edge_type_ids,
6681                direction,
6682                source_variable,
6683                target_variable,
6684                target_label_id,
6685                step_variable,
6686                min_hops,
6687                max_hops,
6688                optional,
6689                target_filter,
6690                path_variable,
6691                edge_properties,
6692                is_variable_length,
6693                optional_pattern_vars,
6694                scope_match_variables,
6695                edge_filter_expr,
6696                path_mode,
6697                qpp_steps,
6698            } => {
6699                let matches_var = step_variable.as_deref() == Some(edge_var);
6700                let recursed_input = if matches_var {
6701                    input
6702                } else {
6703                    Box::new(Self::set_traverse_edge_type_ids(
6704                        *input,
6705                        edge_var,
6706                        new_ids.clone(),
6707                    ))
6708                };
6709                LogicalPlan::Traverse {
6710                    input: recursed_input,
6711                    edge_type_ids: if matches_var { new_ids } else { edge_type_ids },
6712                    direction,
6713                    source_variable,
6714                    target_variable,
6715                    target_label_id,
6716                    step_variable,
6717                    min_hops,
6718                    max_hops,
6719                    optional,
6720                    target_filter,
6721                    path_variable,
6722                    edge_properties,
6723                    is_variable_length,
6724                    optional_pattern_vars,
6725                    scope_match_variables,
6726                    edge_filter_expr,
6727                    path_mode,
6728                    qpp_steps,
6729                }
6730            }
6731            LogicalPlan::Filter {
6732                input,
6733                predicate,
6734                optional_variables,
6735            } => LogicalPlan::Filter {
6736                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6737                predicate,
6738                optional_variables,
6739            },
6740            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6741                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6742                projections,
6743            },
6744            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6745                left: Box::new(Self::set_traverse_edge_type_ids(
6746                    *left,
6747                    edge_var,
6748                    new_ids.clone(),
6749                )),
6750                right: Box::new(Self::set_traverse_edge_type_ids(*right, edge_var, new_ids)),
6751            },
6752            other => other,
6753        }
6754    }
6755
6756    /// Check if the variable is the target of a Traverse node
6757    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
6758        match plan {
6759            LogicalPlan::Traverse {
6760                target_variable,
6761                input,
6762                ..
6763            } => target_variable == variable || Self::is_traverse_target(input, variable),
6764            LogicalPlan::Filter { input, .. }
6765            | LogicalPlan::Project { input, .. }
6766            | LogicalPlan::Sort { input, .. }
6767            | LogicalPlan::Limit { input, .. }
6768            | LogicalPlan::Aggregate { input, .. }
6769            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
6770            LogicalPlan::CrossJoin { left, right } => {
6771                Self::is_traverse_target(left, variable)
6772                    || Self::is_traverse_target(right, variable)
6773            }
6774            _ => false,
6775        }
6776    }
6777
6778    /// Push a predicate into a Traverse's target_filter for the specified variable
6779    fn push_predicate_to_traverse(
6780        plan: LogicalPlan,
6781        variable: &str,
6782        predicate: Expr,
6783    ) -> LogicalPlan {
6784        match plan {
6785            LogicalPlan::Traverse {
6786                input,
6787                edge_type_ids,
6788                direction,
6789                source_variable,
6790                target_variable,
6791                target_label_id,
6792                step_variable,
6793                min_hops,
6794                max_hops,
6795                optional,
6796                target_filter,
6797                path_variable,
6798                edge_properties,
6799                is_variable_length,
6800                optional_pattern_vars,
6801                scope_match_variables,
6802                edge_filter_expr,
6803                path_mode,
6804                qpp_steps,
6805            } => {
6806                if target_variable == variable {
6807                    // Found the traverse producing this variable
6808                    let new_filter = match target_filter {
6809                        Some(existing) => Some(Expr::BinaryOp {
6810                            left: Box::new(existing),
6811                            op: BinaryOp::And,
6812                            right: Box::new(predicate),
6813                        }),
6814                        None => Some(predicate),
6815                    };
6816                    LogicalPlan::Traverse {
6817                        input,
6818                        edge_type_ids,
6819                        direction,
6820                        source_variable,
6821                        target_variable,
6822                        target_label_id,
6823                        step_variable,
6824                        min_hops,
6825                        max_hops,
6826                        optional,
6827                        target_filter: new_filter,
6828                        path_variable,
6829                        edge_properties,
6830                        is_variable_length,
6831                        optional_pattern_vars,
6832                        scope_match_variables,
6833                        edge_filter_expr,
6834                        path_mode,
6835                        qpp_steps,
6836                    }
6837                } else {
6838                    // Recurse into input
6839                    LogicalPlan::Traverse {
6840                        input: Box::new(Self::push_predicate_to_traverse(
6841                            *input, variable, predicate,
6842                        )),
6843                        edge_type_ids,
6844                        direction,
6845                        source_variable,
6846                        target_variable,
6847                        target_label_id,
6848                        step_variable,
6849                        min_hops,
6850                        max_hops,
6851                        optional,
6852                        target_filter,
6853                        path_variable,
6854                        edge_properties,
6855                        is_variable_length,
6856                        optional_pattern_vars,
6857                        scope_match_variables,
6858                        edge_filter_expr,
6859                        path_mode,
6860                        qpp_steps,
6861                    }
6862                }
6863            }
6864            LogicalPlan::Filter {
6865                input,
6866                predicate: p,
6867                optional_variables: opt_vars,
6868            } => LogicalPlan::Filter {
6869                input: Box::new(Self::push_predicate_to_traverse(
6870                    *input, variable, predicate,
6871                )),
6872                predicate: p,
6873                optional_variables: opt_vars,
6874            },
6875            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6876                input: Box::new(Self::push_predicate_to_traverse(
6877                    *input, variable, predicate,
6878                )),
6879                projections,
6880            },
6881            LogicalPlan::CrossJoin { left, right } => {
6882                // Check which side has the variable
6883                if Self::is_traverse_target(&left, variable) {
6884                    LogicalPlan::CrossJoin {
6885                        left: Box::new(Self::push_predicate_to_traverse(
6886                            *left, variable, predicate,
6887                        )),
6888                        right,
6889                    }
6890                } else {
6891                    LogicalPlan::CrossJoin {
6892                        left,
6893                        right: Box::new(Self::push_predicate_to_traverse(
6894                            *right, variable, predicate,
6895                        )),
6896                    }
6897                }
6898            }
6899            other => other,
6900        }
6901    }
6902
6903    /// Plan a WITH clause, handling aggregations and projections.
6904    fn plan_with_clause(
6905        &self,
6906        with_clause: &WithClause,
6907        plan: LogicalPlan,
6908        vars_in_scope: &[VariableInfo],
6909    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
6910        let mut plan = plan;
6911        let mut group_by: Vec<Expr> = Vec::new();
6912        let mut aggregates: Vec<Expr> = Vec::new();
6913        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
6914        let mut has_agg = false;
6915        let mut projections = Vec::new();
6916        let mut new_vars: Vec<VariableInfo> = Vec::new();
6917        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
6918        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
6919        let mut projected_aliases: HashSet<String> = HashSet::new();
6920        let mut has_unaliased_non_variable_expr = false;
6921
6922        for item in &with_clause.items {
6923            match item {
6924                ReturnItem::All => {
6925                    // WITH * - add all variables in scope
6926                    for v in vars_in_scope {
6927                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6928                        projected_aliases.insert(v.name.clone());
6929                        projected_simple_reprs.insert(v.name.clone());
6930                    }
6931                    new_vars.extend(vars_in_scope.iter().cloned());
6932                }
6933                ReturnItem::Expr { expr, alias, .. } => {
6934                    if matches!(expr, Expr::Wildcard) {
6935                        for v in vars_in_scope {
6936                            projections
6937                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6938                            projected_aliases.insert(v.name.clone());
6939                            projected_simple_reprs.insert(v.name.clone());
6940                        }
6941                        new_vars.extend(vars_in_scope.iter().cloned());
6942                    } else {
6943                        // Validate expression variables and syntax
6944                        validate_expression_variables(expr, vars_in_scope)?;
6945                        validate_expression(expr, vars_in_scope)?;
6946                        // Pattern predicates are not allowed in WITH
6947                        if contains_pattern_predicate(expr) {
6948                            return Err(anyhow!(
6949                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
6950                            ));
6951                        }
6952
6953                        projections.push((expr.clone(), alias.clone()));
6954                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
6955                            // Bare aggregate — push directly
6956                            has_agg = true;
6957                            aggregates.push(expr.clone());
6958                            projected_aggregate_reprs.insert(expr.to_string_repr());
6959                        } else if !is_window_function(expr)
6960                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
6961                        {
6962                            // Compound aggregate or expression containing aggregates
6963                            has_agg = true;
6964                            compound_agg_exprs.push(expr.clone());
6965                            for inner in extract_inner_aggregates(expr) {
6966                                let repr = inner.to_string_repr();
6967                                if !projected_aggregate_reprs.contains(&repr) {
6968                                    aggregates.push(inner);
6969                                    projected_aggregate_reprs.insert(repr);
6970                                }
6971                            }
6972                        } else if !group_by.contains(expr) {
6973                            group_by.push(expr.clone());
6974                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
6975                                projected_simple_reprs.insert(expr.to_string_repr());
6976                            }
6977                        }
6978
6979                        // Preserve non-scalar type information when WITH aliases
6980                        // entity/path-capable expressions.
6981                        if let Some(a) = alias {
6982                            if projected_aliases.contains(a) {
6983                                return Err(anyhow!(
6984                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6985                                    a
6986                                ));
6987                            }
6988                            let inferred = infer_with_output_type(expr, vars_in_scope);
6989                            new_vars.push(VariableInfo::new(a.clone(), inferred));
6990                            projected_aliases.insert(a.clone());
6991                        } else if let Expr::Variable(v) = expr {
6992                            if projected_aliases.contains(v) {
6993                                return Err(anyhow!(
6994                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6995                                    v
6996                                ));
6997                            }
6998                            // Preserve the original type if the variable is just passed through
6999                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
7000                                new_vars.push(existing.clone());
7001                            } else {
7002                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
7003                            }
7004                            projected_aliases.insert(v.clone());
7005                        } else {
7006                            has_unaliased_non_variable_expr = true;
7007                        }
7008                    }
7009                }
7010            }
7011        }
7012
7013        // Collect extra variables that need to survive the projection stage
7014        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
7015        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
7016        let mut passthrough_extras: Vec<String> = Vec::new();
7017        let mut seen_passthrough: HashSet<String> = HashSet::new();
7018
7019        if let Some(predicate) = &with_clause.where_clause {
7020            for name in collect_expr_variables(predicate) {
7021                if !projected_names.contains(name.as_str())
7022                    && find_var_in_scope(vars_in_scope, &name).is_some()
7023                    && seen_passthrough.insert(name.clone())
7024                {
7025                    passthrough_extras.push(name);
7026                }
7027            }
7028        }
7029
7030        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
7031        // Carry those variables through the projection so Sort can resolve them.
7032        if !has_agg && let Some(order_by) = &with_clause.order_by {
7033            for item in order_by {
7034                for name in collect_expr_variables(&item.expr) {
7035                    if !projected_names.contains(name.as_str())
7036                        && find_var_in_scope(vars_in_scope, &name).is_some()
7037                        && seen_passthrough.insert(name.clone())
7038                    {
7039                        passthrough_extras.push(name);
7040                    }
7041                }
7042            }
7043        }
7044
7045        let needs_cleanup = !passthrough_extras.is_empty();
7046        for extra in &passthrough_extras {
7047            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
7048        }
7049
7050        // Validate compound aggregate expressions: non-aggregate refs must be
7051        // individually present in the group_by as simple variables or properties.
7052        if has_agg {
7053            let group_by_reprs: HashSet<String> =
7054                group_by.iter().map(|e| e.to_string_repr()).collect();
7055            for expr in &compound_agg_exprs {
7056                let mut refs = Vec::new();
7057                collect_non_aggregate_refs(expr, false, &mut refs);
7058                for r in &refs {
7059                    let is_covered = match r {
7060                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
7061                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
7062                    };
7063                    if !is_covered {
7064                        return Err(anyhow!(
7065                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
7066                        ));
7067                    }
7068                }
7069            }
7070        }
7071
7072        if has_agg {
7073            plan = LogicalPlan::Aggregate {
7074                input: Box::new(plan),
7075                group_by,
7076                aggregates,
7077            };
7078
7079            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
7080            // can reference the WITH aliases instead of raw column names.
7081            let rename_projections: Vec<(Expr, Option<String>)> = projections
7082                .iter()
7083                .map(|(expr, alias)| {
7084                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
7085                        // Bare aggregate — reference by column name
7086                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
7087                    } else if is_compound_aggregate(expr)
7088                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
7089                    {
7090                        // Compound aggregate — replace inner aggregates with
7091                        // column references, keep outer expression
7092                        (replace_aggregates_with_columns(expr), alias.clone())
7093                    } else {
7094                        (Expr::Variable(expr.to_string_repr()), alias.clone())
7095                    }
7096                })
7097                .collect();
7098            plan = LogicalPlan::Project {
7099                input: Box::new(plan),
7100                projections: rename_projections,
7101            };
7102        } else if !projections.is_empty() {
7103            plan = LogicalPlan::Project {
7104                input: Box::new(plan),
7105                projections: projections.clone(),
7106            };
7107        }
7108
7109        // Apply the WHERE filter (post-projection, with extras still visible).
7110        if let Some(predicate) = &with_clause.where_clause {
7111            plan = LogicalPlan::Filter {
7112                input: Box::new(plan),
7113                predicate: predicate.clone(),
7114                optional_variables: HashSet::new(),
7115            };
7116        }
7117
7118        // Validate and apply ORDER BY for WITH clause.
7119        // Keep pre-WITH vars in scope for parser compatibility, then apply
7120        // stricter checks for aggregate-containing ORDER BY items.
7121        if let Some(order_by) = &with_clause.order_by {
7122            // Build a mapping from aliases and projected expression reprs to
7123            // output columns of the preceding Project/Aggregate pipeline.
7124            let with_order_aliases: HashMap<String, Expr> = projections
7125                .iter()
7126                .flat_map(|(expr, alias)| {
7127                    let output_col = if let Some(a) = alias {
7128                        a.clone()
7129                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
7130                        aggregate_column_name(expr)
7131                    } else {
7132                        expr.to_string_repr()
7133                    };
7134
7135                    let mut entries = Vec::new();
7136                    // ORDER BY alias
7137                    if let Some(a) = alias {
7138                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
7139                    }
7140                    // ORDER BY projected expression (e.g. me.age)
7141                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
7142                    entries
7143                })
7144                .collect();
7145
7146            let order_by_scope: Vec<VariableInfo> = {
7147                let mut scope = new_vars.clone();
7148                for v in vars_in_scope {
7149                    if !is_var_in_scope(&scope, &v.name) {
7150                        scope.push(v.clone());
7151                    }
7152                }
7153                scope
7154            };
7155            for item in order_by {
7156                validate_expression_variables(&item.expr, &order_by_scope)?;
7157                validate_expression(&item.expr, &order_by_scope)?;
7158                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
7159                if has_aggregate_in_item && !has_agg {
7160                    return Err(anyhow!(
7161                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
7162                    ));
7163                }
7164                if has_agg && has_aggregate_in_item {
7165                    validate_with_order_by_aggregate_item(
7166                        &item.expr,
7167                        &projected_aggregate_reprs,
7168                        &projected_simple_reprs,
7169                        &projected_aliases,
7170                    )?;
7171                }
7172            }
7173            let rewritten_order_by: Vec<SortItem> = order_by
7174                .iter()
7175                .map(|item| {
7176                    let mut expr =
7177                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
7178                    if has_agg {
7179                        // Rewrite any aggregate calls to the aggregate output
7180                        // columns produced by Aggregate.
7181                        expr = replace_aggregates_with_columns(&expr);
7182                        // Then re-map projected property expressions to aliases
7183                        // from the WITH projection.
7184                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
7185                    }
7186                    SortItem {
7187                        expr,
7188                        ascending: item.ascending,
7189                    }
7190                })
7191                .collect();
7192            plan = LogicalPlan::Sort {
7193                input: Box::new(plan),
7194                order_by: rewritten_order_by,
7195            };
7196        }
7197
7198        // Non-variable expressions in WITH must be aliased.
7199        // This check is intentionally placed after ORDER BY validation so
7200        // higher-priority semantic errors (e.g., ambiguous aggregation in
7201        // ORDER BY) can surface first.
7202        if has_unaliased_non_variable_expr {
7203            return Err(anyhow!(
7204                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
7205            ));
7206        }
7207
7208        // Validate and apply SKIP/LIMIT for WITH clause
7209        let skip = with_clause
7210            .skip
7211            .as_ref()
7212            .map(|e| {
7213                self.note_folded_limit_skip(e);
7214                parse_non_negative_integer(e, "SKIP", &self.params)
7215            })
7216            .transpose()?
7217            .flatten();
7218        let fetch = with_clause
7219            .limit
7220            .as_ref()
7221            .map(|e| {
7222                self.note_folded_limit_skip(e);
7223                parse_non_negative_integer(e, "LIMIT", &self.params)
7224            })
7225            .transpose()?
7226            .flatten();
7227
7228        if skip.is_some() || fetch.is_some() {
7229            plan = LogicalPlan::Limit {
7230                input: Box::new(plan),
7231                skip,
7232                fetch,
7233            };
7234        }
7235
7236        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
7237        if needs_cleanup {
7238            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
7239                .iter()
7240                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
7241                .collect();
7242            plan = LogicalPlan::Project {
7243                input: Box::new(plan),
7244                projections: cleanup_projections,
7245            };
7246        }
7247
7248        if with_clause.distinct {
7249            plan = LogicalPlan::Distinct {
7250                input: Box::new(plan),
7251            };
7252        }
7253
7254        Ok((plan, new_vars))
7255    }
7256
7257    fn plan_with_recursive(
7258        &self,
7259        with_recursive: &WithRecursiveClause,
7260        _prev_plan: LogicalPlan,
7261        vars_in_scope: &[VariableInfo],
7262    ) -> Result<LogicalPlan> {
7263        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
7264        match &*with_recursive.query {
7265            Query::Union { left, right, .. } => {
7266                // Plan the anchor (initial) query with current scope
7267                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
7268
7269                // Plan the recursive query with the CTE name added to scope
7270                // so it can reference itself
7271                let mut recursive_scope = vars_in_scope.to_vec();
7272                recursive_scope.push(VariableInfo::new(
7273                    with_recursive.name.clone(),
7274                    VariableType::Scalar,
7275                ));
7276                let recursive_plan =
7277                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
7278
7279                Ok(LogicalPlan::RecursiveCTE {
7280                    cte_name: with_recursive.name.clone(),
7281                    initial: Box::new(initial_plan),
7282                    recursive: Box::new(recursive_plan),
7283                })
7284            }
7285            _ => Err(anyhow::anyhow!(
7286                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
7287            )),
7288        }
7289    }
7290
7291    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
7292        let entries = match properties {
7293            Some(Expr::Map(entries)) => entries,
7294            _ => return None,
7295        };
7296
7297        if entries.is_empty() {
7298            return None;
7299        }
7300        let mut final_expr = None;
7301        for (prop, val_expr) in entries {
7302            let eq_expr = Expr::BinaryOp {
7303                left: Box::new(Expr::Property(
7304                    Box::new(Expr::Variable(variable.to_string())),
7305                    prop.clone(),
7306                )),
7307                op: BinaryOp::Eq,
7308                right: Box::new(val_expr.clone()),
7309            };
7310
7311            if let Some(e) = final_expr {
7312                final_expr = Some(Expr::BinaryOp {
7313                    left: Box::new(e),
7314                    op: BinaryOp::And,
7315                    right: Box::new(eq_expr),
7316                });
7317            } else {
7318                final_expr = Some(eq_expr);
7319            }
7320        }
7321        final_expr
7322    }
7323
7324    /// Build a filter expression from node properties and labels.
7325    ///
7326    /// This is used for TraverseMainByType where we need to filter target nodes
7327    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
7328    pub fn node_filter_expr(
7329        &self,
7330        variable: &str,
7331        labels: &[String],
7332        properties: &Option<Expr>,
7333    ) -> Option<Expr> {
7334        let mut final_expr = None;
7335
7336        // Add label checks using hasLabel(variable, 'label')
7337        for label in labels {
7338            let label_check = Expr::FunctionCall {
7339                name: "hasLabel".to_string(),
7340                args: vec![
7341                    Expr::Variable(variable.to_string()),
7342                    Expr::Literal(CypherLiteral::String(label.clone())),
7343                ],
7344                distinct: false,
7345                window_spec: None,
7346            };
7347
7348            final_expr = match final_expr {
7349                Some(e) => Some(Expr::BinaryOp {
7350                    left: Box::new(e),
7351                    op: BinaryOp::And,
7352                    right: Box::new(label_check),
7353                }),
7354                None => Some(label_check),
7355            };
7356        }
7357
7358        // Add property checks
7359        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
7360            final_expr = match final_expr {
7361                Some(e) => Some(Expr::BinaryOp {
7362                    left: Box::new(e),
7363                    op: BinaryOp::And,
7364                    right: Box::new(prop_expr),
7365                }),
7366                None => Some(prop_expr),
7367            };
7368        }
7369
7370        final_expr
7371    }
7372
7373    /// Create a filter plan that ensures traversed target matches a bound variable.
7374    ///
7375    /// Used in EXISTS subquery patterns where the target is already bound.
7376    /// Compares the target's VID against the bound variable's VID.
7377    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
7378        // Compare the traverse-discovered target's VID against the bound variable's VID.
7379        // Left side: Property access on the variable from current scope.
7380        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
7381        // We use Variable("{var}._vid") to access the VID column from the traverse output,
7382        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
7383        let bound_check = Expr::BinaryOp {
7384            left: Box::new(Expr::Property(
7385                Box::new(Expr::Variable(target_variable.to_string())),
7386                "_vid".to_string(),
7387            )),
7388            op: BinaryOp::Eq,
7389            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
7390        };
7391        LogicalPlan::Filter {
7392            input: Box::new(plan),
7393            predicate: bound_check,
7394            optional_variables: HashSet::new(),
7395        }
7396    }
7397
7398    /// Replace a Scan node matching the variable with a VectorKnn node
7399    fn replace_scan_with_knn(
7400        plan: LogicalPlan,
7401        variable: &str,
7402        property: &str,
7403        query: Expr,
7404        threshold: Option<f32>,
7405    ) -> LogicalPlan {
7406        match plan {
7407            LogicalPlan::Scan {
7408                label_id,
7409                labels,
7410                variable: scan_var,
7411                filter,
7412                optional,
7413            } => {
7414                if scan_var == variable {
7415                    // Inject any existing scan filter into VectorKnn?
7416                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
7417                    // Typically filter is applied post-Knn or during Knn if supported.
7418                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
7419                    // If `filter` is present on Scan, it must be preserved.
7420                    // We can wrap VectorKnn in Filter if Scan had filter.
7421
7422                    let knn = LogicalPlan::VectorKnn {
7423                        label_id,
7424                        variable: variable.to_string(),
7425                        property: property.to_string(),
7426                        query,
7427                        k: 100, // Default K, should push down LIMIT
7428                        threshold,
7429                    };
7430
7431                    if let Some(f) = filter {
7432                        LogicalPlan::Filter {
7433                            input: Box::new(knn),
7434                            predicate: f,
7435                            optional_variables: HashSet::new(),
7436                        }
7437                    } else {
7438                        knn
7439                    }
7440                } else {
7441                    LogicalPlan::Scan {
7442                        label_id,
7443                        labels,
7444                        variable: scan_var,
7445                        filter,
7446                        optional,
7447                    }
7448                }
7449            }
7450            LogicalPlan::Filter {
7451                input,
7452                predicate,
7453                optional_variables,
7454            } => LogicalPlan::Filter {
7455                input: Box::new(Self::replace_scan_with_knn(
7456                    *input, variable, property, query, threshold,
7457                )),
7458                predicate,
7459                optional_variables,
7460            },
7461            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7462                input: Box::new(Self::replace_scan_with_knn(
7463                    *input, variable, property, query, threshold,
7464                )),
7465                projections,
7466            },
7467            LogicalPlan::Limit { input, skip, fetch } => {
7468                // If we encounter Limit, we should ideally push K down to VectorKnn
7469                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
7470                // Limit comes later.
7471                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
7472                LogicalPlan::Limit {
7473                    input: Box::new(Self::replace_scan_with_knn(
7474                        *input, variable, property, query, threshold,
7475                    )),
7476                    skip,
7477                    fetch,
7478                }
7479            }
7480            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7481                left: Box::new(Self::replace_scan_with_knn(
7482                    *left,
7483                    variable,
7484                    property,
7485                    query.clone(),
7486                    threshold,
7487                )),
7488                right: Box::new(Self::replace_scan_with_knn(
7489                    *right, variable, property, query, threshold,
7490                )),
7491            },
7492            other => other,
7493        }
7494    }
7495
7496    /// Find the label_id for a Scan node matching the given variable
7497    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
7498        match plan {
7499            LogicalPlan::Scan {
7500                label_id,
7501                variable: var,
7502                ..
7503            } if var == variable => Some(*label_id),
7504            LogicalPlan::ScanAll { variable: var, .. } if var == variable => Some(0),
7505            LogicalPlan::Filter { input, .. }
7506            | LogicalPlan::Project { input, .. }
7507            | LogicalPlan::Sort { input, .. }
7508            | LogicalPlan::Limit { input, .. }
7509            | LogicalPlan::Aggregate { input, .. }
7510            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
7511            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
7512                .or_else(|| Self::find_scan_label_id(right, variable)),
7513            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
7514            _ => None,
7515        }
7516    }
7517
7518    /// Push a predicate into a Scan's filter for the specified variable
7519    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
7520        match plan {
7521            LogicalPlan::Scan {
7522                label_id,
7523                labels,
7524                variable: var,
7525                filter,
7526                optional,
7527            } if var == variable => {
7528                // Merge the predicate with existing filter
7529                let new_filter = match filter {
7530                    Some(existing) => Some(Expr::BinaryOp {
7531                        left: Box::new(existing),
7532                        op: BinaryOp::And,
7533                        right: Box::new(predicate),
7534                    }),
7535                    None => Some(predicate),
7536                };
7537                LogicalPlan::Scan {
7538                    label_id,
7539                    labels,
7540                    variable: var,
7541                    filter: new_filter,
7542                    optional,
7543                }
7544            }
7545            LogicalPlan::ScanAll {
7546                variable: var,
7547                filter,
7548                optional,
7549            } if var == variable => {
7550                let new_filter = match filter {
7551                    Some(existing) => Some(Expr::BinaryOp {
7552                        left: Box::new(existing),
7553                        op: BinaryOp::And,
7554                        right: Box::new(predicate),
7555                    }),
7556                    None => Some(predicate),
7557                };
7558                LogicalPlan::ScanAll {
7559                    variable: var,
7560                    filter: new_filter,
7561                    optional,
7562                }
7563            }
7564            LogicalPlan::Filter {
7565                input,
7566                predicate: p,
7567                optional_variables: opt_vars,
7568            } => LogicalPlan::Filter {
7569                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7570                predicate: p,
7571                optional_variables: opt_vars,
7572            },
7573            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7574                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7575                projections,
7576            },
7577            LogicalPlan::CrossJoin { left, right } => {
7578                // Check which side has the variable
7579                if Self::find_scan_label_id(&left, variable).is_some() {
7580                    LogicalPlan::CrossJoin {
7581                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
7582                        right,
7583                    }
7584                } else {
7585                    LogicalPlan::CrossJoin {
7586                        left,
7587                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
7588                    }
7589                }
7590            }
7591            LogicalPlan::Traverse {
7592                input,
7593                edge_type_ids,
7594                direction,
7595                source_variable,
7596                target_variable,
7597                target_label_id,
7598                step_variable,
7599                min_hops,
7600                max_hops,
7601                optional,
7602                target_filter,
7603                path_variable,
7604                edge_properties,
7605                is_variable_length,
7606                optional_pattern_vars,
7607                scope_match_variables,
7608                edge_filter_expr,
7609                path_mode,
7610                qpp_steps,
7611            } => LogicalPlan::Traverse {
7612                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7613                edge_type_ids,
7614                direction,
7615                source_variable,
7616                target_variable,
7617                target_label_id,
7618                step_variable,
7619                min_hops,
7620                max_hops,
7621                optional,
7622                target_filter,
7623                path_variable,
7624                edge_properties,
7625                is_variable_length,
7626                optional_pattern_vars,
7627                scope_match_variables,
7628                edge_filter_expr,
7629                path_mode,
7630                qpp_steps,
7631            },
7632            other => other,
7633        }
7634    }
7635
7636    /// Extract predicates that reference only the specified variable
7637    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
7638        let analyzer = PredicateAnalyzer::new();
7639        let analysis = analyzer.analyze(predicate, variable);
7640
7641        // Return pushable predicates and combined residual
7642        let residual = if analysis.residual.is_empty() {
7643            None
7644        } else {
7645            let mut iter = analysis.residual.into_iter();
7646            let first = iter.next().unwrap();
7647            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
7648                left: Box::new(acc),
7649                op: BinaryOp::And,
7650                right: Box::new(e),
7651            }))
7652        };
7653
7654        (analysis.pushable, residual)
7655    }
7656
7657    // =====================================================================
7658    // Apply Predicate Pushdown - Helper Functions
7659    // =====================================================================
7660
7661    /// Split AND-connected predicates into a list.
7662    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
7663        match expr {
7664            Expr::BinaryOp {
7665                left,
7666                op: BinaryOp::And,
7667                right,
7668            } => {
7669                let mut result = Self::split_and_conjuncts(left);
7670                result.extend(Self::split_and_conjuncts(right));
7671                result
7672            }
7673            _ => vec![expr.clone()],
7674        }
7675    }
7676
7677    /// Combine predicates with AND.
7678    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
7679        if predicates.is_empty() {
7680            return None;
7681        }
7682        let mut result = predicates[0].clone();
7683        for pred in predicates.iter().skip(1) {
7684            result = Expr::BinaryOp {
7685                left: Box::new(result),
7686                op: BinaryOp::And,
7687                right: Box::new(pred.clone()),
7688            };
7689        }
7690        Some(result)
7691    }
7692
7693    /// Collect all variable names referenced in an expression.
7694    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
7695        let mut vars = HashSet::new();
7696        Self::collect_expr_variables_impl(expr, &mut vars);
7697        vars
7698    }
7699
7700    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
7701        match expr {
7702            Expr::Variable(name) => {
7703                vars.insert(name.clone());
7704            }
7705            Expr::Property(inner, _) => {
7706                if let Expr::Variable(name) = inner.as_ref() {
7707                    vars.insert(name.clone());
7708                } else {
7709                    Self::collect_expr_variables_impl(inner, vars);
7710                }
7711            }
7712            Expr::BinaryOp { left, right, .. } => {
7713                Self::collect_expr_variables_impl(left, vars);
7714                Self::collect_expr_variables_impl(right, vars);
7715            }
7716            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7717            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
7718            Expr::FunctionCall { args, .. } => {
7719                for arg in args {
7720                    Self::collect_expr_variables_impl(arg, vars);
7721                }
7722            }
7723            Expr::List(items) => {
7724                for item in items {
7725                    Self::collect_expr_variables_impl(item, vars);
7726                }
7727            }
7728            Expr::Case {
7729                expr,
7730                when_then,
7731                else_expr,
7732            } => {
7733                if let Some(e) = expr {
7734                    Self::collect_expr_variables_impl(e, vars);
7735                }
7736                for (w, t) in when_then {
7737                    Self::collect_expr_variables_impl(w, vars);
7738                    Self::collect_expr_variables_impl(t, vars);
7739                }
7740                if let Some(e) = else_expr {
7741                    Self::collect_expr_variables_impl(e, vars);
7742                }
7743            }
7744            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7745            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
7746            // they introduce local variable bindings not in outer scope.
7747            _ => {}
7748        }
7749    }
7750
7751    /// Collect all variables produced by a logical plan.
7752    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
7753        let mut vars = HashSet::new();
7754        Self::collect_plan_variables_impl(plan, &mut vars);
7755        vars
7756    }
7757
7758    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
7759        match plan {
7760            LogicalPlan::Scan { variable, .. } => {
7761                vars.insert(variable.clone());
7762            }
7763            LogicalPlan::Traverse {
7764                target_variable,
7765                step_variable,
7766                input,
7767                path_variable,
7768                ..
7769            } => {
7770                vars.insert(target_variable.clone());
7771                if let Some(sv) = step_variable {
7772                    vars.insert(sv.clone());
7773                }
7774                if let Some(pv) = path_variable {
7775                    vars.insert(pv.clone());
7776                }
7777                Self::collect_plan_variables_impl(input, vars);
7778            }
7779            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
7780            LogicalPlan::Project { input, projections } => {
7781                for (expr, alias) in projections {
7782                    if let Some(a) = alias {
7783                        vars.insert(a.clone());
7784                    } else if let Expr::Variable(v) = expr {
7785                        vars.insert(v.clone());
7786                    }
7787                }
7788                Self::collect_plan_variables_impl(input, vars);
7789            }
7790            LogicalPlan::Apply {
7791                input, subquery, ..
7792            } => {
7793                Self::collect_plan_variables_impl(input, vars);
7794                Self::collect_plan_variables_impl(subquery, vars);
7795            }
7796            LogicalPlan::CrossJoin { left, right } => {
7797                Self::collect_plan_variables_impl(left, vars);
7798                Self::collect_plan_variables_impl(right, vars);
7799            }
7800            LogicalPlan::Unwind {
7801                input, variable, ..
7802            } => {
7803                vars.insert(variable.clone());
7804                Self::collect_plan_variables_impl(input, vars);
7805            }
7806            LogicalPlan::Aggregate { input, .. } => {
7807                Self::collect_plan_variables_impl(input, vars);
7808            }
7809            LogicalPlan::Distinct { input } => {
7810                Self::collect_plan_variables_impl(input, vars);
7811            }
7812            LogicalPlan::Sort { input, .. } => {
7813                Self::collect_plan_variables_impl(input, vars);
7814            }
7815            LogicalPlan::Limit { input, .. } => {
7816                Self::collect_plan_variables_impl(input, vars);
7817            }
7818            LogicalPlan::VectorKnn { variable, .. } => {
7819                vars.insert(variable.clone());
7820            }
7821            LogicalPlan::ProcedureCall { yield_items, .. } => {
7822                for (name, alias) in yield_items {
7823                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
7824                }
7825            }
7826            LogicalPlan::ShortestPath {
7827                input,
7828                path_variable,
7829                ..
7830            } => {
7831                vars.insert(path_variable.clone());
7832                Self::collect_plan_variables_impl(input, vars);
7833            }
7834            LogicalPlan::AllShortestPaths {
7835                input,
7836                path_variable,
7837                ..
7838            } => {
7839                vars.insert(path_variable.clone());
7840                Self::collect_plan_variables_impl(input, vars);
7841            }
7842            LogicalPlan::RecursiveCTE {
7843                initial, recursive, ..
7844            } => {
7845                Self::collect_plan_variables_impl(initial, vars);
7846                Self::collect_plan_variables_impl(recursive, vars);
7847            }
7848            LogicalPlan::SubqueryCall {
7849                input, subquery, ..
7850            } => {
7851                Self::collect_plan_variables_impl(input, vars);
7852                Self::collect_plan_variables_impl(subquery, vars);
7853            }
7854            _ => {}
7855        }
7856    }
7857
7858    /// Extract predicates that only reference variables from Apply's input.
7859    /// Returns (input_only_predicates, remaining_predicates).
7860    fn extract_apply_input_predicates(
7861        predicate: &Expr,
7862        input_variables: &HashSet<String>,
7863        subquery_new_variables: &HashSet<String>,
7864    ) -> (Vec<Expr>, Vec<Expr>) {
7865        let conjuncts = Self::split_and_conjuncts(predicate);
7866        let mut input_preds = Vec::new();
7867        let mut remaining = Vec::new();
7868
7869        for conj in conjuncts {
7870            let vars = Self::collect_expr_variables(&conj);
7871
7872            // Predicate only references input variables (none from subquery)
7873            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
7874            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
7875
7876            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
7877                input_preds.push(conj);
7878            } else {
7879                remaining.push(conj);
7880            }
7881        }
7882
7883        (input_preds, remaining)
7884    }
7885
7886    /// Push eligible predicates into Apply.input_filter.
7887    /// This filters input rows BEFORE executing the correlated subquery.
7888    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
7889        match plan {
7890            LogicalPlan::Apply {
7891                input,
7892                subquery,
7893                input_filter,
7894            } => {
7895                // Collect variables from input plan
7896                let input_vars = Self::collect_plan_variables(&input);
7897
7898                // Collect NEW variables introduced by subquery (not in input)
7899                let subquery_vars = Self::collect_plan_variables(&subquery);
7900                let new_subquery_vars: HashSet<String> =
7901                    subquery_vars.difference(&input_vars).cloned().collect();
7902
7903                // Extract predicates that only reference input variables
7904                let (input_preds, remaining) = Self::extract_apply_input_predicates(
7905                    current_predicate,
7906                    &input_vars,
7907                    &new_subquery_vars,
7908                );
7909
7910                // Update current_predicate to only remaining predicates
7911                *current_predicate = if remaining.is_empty() {
7912                    Expr::TRUE
7913                } else {
7914                    Self::combine_predicates(remaining).unwrap()
7915                };
7916
7917                // Combine extracted predicates with existing input_filter
7918                let new_input_filter = if input_preds.is_empty() {
7919                    input_filter
7920                } else {
7921                    let extracted = Self::combine_predicates(input_preds).unwrap();
7922                    match input_filter {
7923                        Some(existing) => Some(Expr::BinaryOp {
7924                            left: Box::new(existing),
7925                            op: BinaryOp::And,
7926                            right: Box::new(extracted),
7927                        }),
7928                        None => Some(extracted),
7929                    }
7930                };
7931
7932                // Recurse into input plan
7933                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
7934
7935                LogicalPlan::Apply {
7936                    input: Box::new(new_input),
7937                    subquery,
7938                    input_filter: new_input_filter,
7939                }
7940            }
7941            // Recurse into other plan nodes
7942            LogicalPlan::Filter {
7943                input,
7944                predicate,
7945                optional_variables,
7946            } => LogicalPlan::Filter {
7947                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7948                predicate,
7949                optional_variables,
7950            },
7951            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7952                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7953                projections,
7954            },
7955            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
7956                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7957                order_by,
7958            },
7959            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
7960                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7961                skip,
7962                fetch,
7963            },
7964            LogicalPlan::Aggregate {
7965                input,
7966                group_by,
7967                aggregates,
7968            } => LogicalPlan::Aggregate {
7969                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7970                group_by,
7971                aggregates,
7972            },
7973            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7974                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
7975                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
7976            },
7977            LogicalPlan::Traverse {
7978                input,
7979                edge_type_ids,
7980                direction,
7981                source_variable,
7982                target_variable,
7983                target_label_id,
7984                step_variable,
7985                min_hops,
7986                max_hops,
7987                optional,
7988                target_filter,
7989                path_variable,
7990                edge_properties,
7991                is_variable_length,
7992                optional_pattern_vars,
7993                scope_match_variables,
7994                edge_filter_expr,
7995                path_mode,
7996                qpp_steps,
7997            } => LogicalPlan::Traverse {
7998                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7999                edge_type_ids,
8000                direction,
8001                source_variable,
8002                target_variable,
8003                target_label_id,
8004                step_variable,
8005                min_hops,
8006                max_hops,
8007                optional,
8008                target_filter,
8009                path_variable,
8010                edge_properties,
8011                is_variable_length,
8012                optional_pattern_vars,
8013                scope_match_variables,
8014                edge_filter_expr,
8015                path_mode,
8016                qpp_steps,
8017            },
8018            other => other,
8019        }
8020    }
8021}
8022
8023/// Get the expected column name for an aggregate expression.
8024///
8025/// This is the single source of truth for aggregate column naming, used by:
8026/// - Logical planner (to create column references)
8027/// - Physical planner (to rename DataFusion's auto-generated column names)
8028/// - Fallback executor (to name result columns)
8029pub fn aggregate_column_name(expr: &Expr) -> String {
8030    expr.to_string_repr()
8031}
8032
8033/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
8034#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8035pub struct ExplainOutput {
8036    /// Debug-formatted logical plan tree.
8037    pub plan_text: String,
8038    /// Index availability report for each scan in the plan.
8039    pub index_usage: Vec<IndexUsage>,
8040    /// Rough row and cost estimates for the full plan.
8041    pub cost_estimates: CostEstimates,
8042    /// Planner warnings (e.g., missing index, forced full scan).
8043    pub warnings: Vec<String>,
8044    /// Suggested indexes that would improve this query.
8045    pub suggestions: Vec<IndexSuggestion>,
8046}
8047
8048/// Suggestion for creating an index to improve query performance.
8049#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8050pub struct IndexSuggestion {
8051    /// Label or edge type that would benefit from the index.
8052    pub label_or_type: String,
8053    /// Property to index.
8054    pub property: String,
8055    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
8056    pub index_type: String,
8057    /// Human-readable explanation of the performance benefit.
8058    pub reason: String,
8059    /// Ready-to-execute Cypher statement to create the index.
8060    pub create_statement: String,
8061}
8062
8063/// Index availability report for a single scan operator.
8064#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8065pub struct IndexUsage {
8066    pub label_or_type: String,
8067    pub property: String,
8068    pub index_type: String,
8069    /// Whether the index was actually used for this scan.
8070    pub used: bool,
8071    /// Human-readable explanation of why the index was or was not used.
8072    pub reason: Option<String>,
8073}
8074
8075/// Rough cost and row count estimates for a complete logical plan.
8076#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8077pub struct CostEstimates {
8078    /// Estimated number of rows the plan will produce.
8079    pub estimated_rows: f64,
8080    /// Abstract cost units (lower is cheaper).
8081    pub estimated_cost: f64,
8082}
8083
8084impl QueryPlanner {
8085    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
8086    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
8087        let plan = self.plan(ast)?;
8088        self.explain_logical_plan(&plan)
8089    }
8090
8091    /// Produce an EXPLAIN report for an already-planned logical plan.
8092    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
8093        let index_usage = self.analyze_index_usage(plan)?;
8094        let cost_estimates = self.estimate_costs(plan)?;
8095        let suggestions = self.collect_index_suggestions(plan);
8096        let warnings = Vec::new();
8097        let plan_text = format!("{:#?}", plan);
8098
8099        Ok(ExplainOutput {
8100            plan_text,
8101            index_usage,
8102            cost_estimates,
8103            warnings,
8104            suggestions,
8105        })
8106    }
8107
8108    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
8109        let mut usage = Vec::new();
8110        self.collect_index_usage(plan, &mut usage);
8111        Ok(usage)
8112    }
8113
8114    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
8115        match plan {
8116            LogicalPlan::Scan {
8117                label_id,
8118                filter: Some(filter),
8119                ..
8120            } => {
8121                // Detect indexed-property pushdown — issue #57. Run the same
8122                // analyzer the physical planner uses; if it reports a
8123                // hash-index hit, surface it in EXPLAIN.
8124                if let Some(label_name) = self.schema.label_name_by_id(*label_id) {
8125                    let analyzer = crate::query::pushdown::IndexAwareAnalyzer::new(&self.schema);
8126                    // The variable name is the scan's binding variable; we
8127                    // reach for it via the Scan node directly.
8128                    if let LogicalPlan::Scan { variable, .. } = plan {
8129                        let strategy = analyzer.analyze(filter, variable, *label_id);
8130                        for prop in strategy.hash_index_columns {
8131                            usage.push(IndexUsage {
8132                                label_or_type: label_name.to_string(),
8133                                property: prop,
8134                                index_type: "HASH".to_string(),
8135                                used: true,
8136                                reason: Some(
8137                                    "Hash index point lookup pushed into Lance scan".to_string(),
8138                                ),
8139                            });
8140                        }
8141                    }
8142                }
8143            }
8144            LogicalPlan::Scan { .. } => {}
8145            LogicalPlan::VectorKnn {
8146                label_id, property, ..
8147            } => {
8148                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
8149                usage.push(IndexUsage {
8150                    label_or_type: label_name.to_string(),
8151                    property: property.clone(),
8152                    index_type: "VECTOR".to_string(),
8153                    used: true,
8154                    reason: None,
8155                });
8156            }
8157            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
8158            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
8159            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
8160            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
8161            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
8162            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
8163            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
8164            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8165                self.collect_index_usage(left, usage);
8166                self.collect_index_usage(right, usage);
8167            }
8168            _ => {}
8169        }
8170    }
8171
8172    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
8173        Ok(CostEstimates {
8174            estimated_rows: 100.0,
8175            estimated_cost: 10.0,
8176        })
8177    }
8178
8179    /// Collect index suggestions based on query patterns.
8180    ///
8181    /// Currently detects:
8182    /// - Temporal predicates from `uni.validAt()` function calls
8183    /// - Temporal predicates from `VALID_AT` macro expansion
8184    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
8185        let mut suggestions = Vec::new();
8186        self.collect_temporal_suggestions(plan, &mut suggestions);
8187        suggestions
8188    }
8189
8190    /// Recursively collect temporal index suggestions from the plan.
8191    fn collect_temporal_suggestions(
8192        &self,
8193        plan: &LogicalPlan,
8194        suggestions: &mut Vec<IndexSuggestion>,
8195    ) {
8196        match plan {
8197            LogicalPlan::Filter {
8198                input, predicate, ..
8199            } => {
8200                // Check for temporal patterns in the predicate
8201                self.detect_temporal_pattern(predicate, suggestions);
8202                // Recurse into input
8203                self.collect_temporal_suggestions(input, suggestions);
8204            }
8205            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
8206            LogicalPlan::Project { input, .. } => {
8207                self.collect_temporal_suggestions(input, suggestions)
8208            }
8209            LogicalPlan::Limit { input, .. } => {
8210                self.collect_temporal_suggestions(input, suggestions)
8211            }
8212            LogicalPlan::Sort { input, .. } => {
8213                self.collect_temporal_suggestions(input, suggestions)
8214            }
8215            LogicalPlan::Aggregate { input, .. } => {
8216                self.collect_temporal_suggestions(input, suggestions)
8217            }
8218            LogicalPlan::Traverse { input, .. } => {
8219                self.collect_temporal_suggestions(input, suggestions)
8220            }
8221            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8222                self.collect_temporal_suggestions(left, suggestions);
8223                self.collect_temporal_suggestions(right, suggestions);
8224            }
8225            _ => {}
8226        }
8227    }
8228
8229    /// Detect temporal predicate patterns and suggest indexes.
8230    ///
8231    /// Detects two patterns:
8232    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
8233    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
8234    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
8235        match expr {
8236            // Pattern 1: uni.temporal.validAt() function call
8237            Expr::FunctionCall { name, args, .. }
8238                if (name.eq_ignore_ascii_case("uni.temporal.validAt")
8239                    || name.eq_ignore_ascii_case("validAt"))
8240                    && args.len() >= 2 =>
8241            {
8242                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
8243                let start_prop = if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1)
8244                {
8245                    s.clone()
8246                } else {
8247                    "valid_from".to_string()
8248                };
8249
8250                // Try to extract label from the node expression
8251                if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
8252                    self.suggest_temporal_index(&var, &start_prop, suggestions);
8253                }
8254            }
8255
8256            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
8257            Expr::BinaryOp {
8258                left,
8259                op: BinaryOp::And,
8260                right,
8261            } => {
8262                // Check left side for `prop <= time` pattern (temporal start condition)
8263                if let Expr::BinaryOp {
8264                    left: prop_expr,
8265                    op: BinaryOp::LtEq,
8266                    ..
8267                } = left.as_ref()
8268                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
8269                    && (prop_name == "valid_from"
8270                        || prop_name.contains("start")
8271                        || prop_name.contains("from")
8272                        || prop_name.contains("begin"))
8273                    && let Some(var) = base.extract_variable()
8274                {
8275                    self.suggest_temporal_index(&var, prop_name, suggestions);
8276                }
8277
8278                // Recurse into both sides of AND
8279                self.detect_temporal_pattern(left.as_ref(), suggestions);
8280                self.detect_temporal_pattern(right.as_ref(), suggestions);
8281            }
8282
8283            // Recurse into other binary ops
8284            Expr::BinaryOp { left, right, .. } => {
8285                self.detect_temporal_pattern(left.as_ref(), suggestions);
8286                self.detect_temporal_pattern(right.as_ref(), suggestions);
8287            }
8288
8289            _ => {}
8290        }
8291    }
8292
8293    /// Suggest a scalar index for a temporal property if one doesn't already exist.
8294    fn suggest_temporal_index(
8295        &self,
8296        _variable: &str,
8297        property: &str,
8298        suggestions: &mut Vec<IndexSuggestion>,
8299    ) {
8300        // Check if a scalar index already exists for this property
8301        // We need to check all labels since we may not know the exact label from the variable
8302        let mut has_index = false;
8303
8304        for index in &self.schema.indexes {
8305            if let IndexDefinition::Scalar(config) = index
8306                && config.properties.contains(&property.to_string())
8307            {
8308                has_index = true;
8309                break;
8310            }
8311        }
8312
8313        if !has_index {
8314            // Avoid duplicate suggestions
8315            let already_suggested = suggestions.iter().any(|s| s.property == property);
8316            if !already_suggested {
8317                suggestions.push(IndexSuggestion {
8318                    label_or_type: "(detected from temporal query)".to_string(),
8319                    property: property.to_string(),
8320                    index_type: "SCALAR (BTree)".to_string(),
8321                    reason: format!(
8322                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
8323                        property
8324                    ),
8325                    create_statement: format!(
8326                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
8327                        property, property
8328                    ),
8329                });
8330            }
8331        }
8332    }
8333
8334    /// Helper functions for expression normalization
8335    /// Normalize an expression for storage: strip variable prefixes
8336    /// For simple property: u.email -> "email"
8337    /// For expressions: lower(u.email) -> "lower(email)"
8338    fn normalize_expression_for_storage(expr: &Expr) -> String {
8339        match expr {
8340            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
8341            _ => {
8342                // Serialize expression and strip variable prefix
8343                let expr_str = expr.to_string_repr();
8344                Self::strip_variable_prefix(&expr_str)
8345            }
8346        }
8347    }
8348
8349    /// Strip variable references like "u.prop" from expression strings
8350    /// Converts "lower(u.email)" to "lower(email)"
8351    fn strip_variable_prefix(expr_str: &str) -> String {
8352        use regex::Regex;
8353        // Match patterns like "word.property" and replace with just "property"
8354        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
8355        re.replace_all(expr_str, "$1").to_string()
8356    }
8357
8358    /// Plan a schema command from the new AST
8359    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
8360        match cmd {
8361            SchemaCommand::CreateVectorIndex(c) => {
8362                use uni_common::vector_index_opts::{
8363                    VectorIndexOpts, build_vector_index_type, parse_vector_metric,
8364                };
8365                // Accept either a numeric value (`partitions: 256`) or a quoted string
8366                // (`partitions: '256'`) — Cypher map literals produce the former.
8367                let opt = |key: &str| -> Option<u32> {
8368                    c.options.get(key).and_then(|v| {
8369                        v.as_u64()
8370                            .map(|n| n as u32)
8371                            .or_else(|| v.as_str().and_then(|s| s.parse::<u32>().ok()))
8372                    })
8373                };
8374                let opt_u8 = |key: &str| -> Option<u8> {
8375                    c.options.get(key).and_then(|v| {
8376                        v.as_u64()
8377                            .map(|n| n as u8)
8378                            .or_else(|| v.as_str().and_then(|s| s.parse::<u8>().ok()))
8379                    })
8380                };
8381                let opt_u64 = |key: &str| -> Option<u64> {
8382                    c.options.get(key).and_then(|v| {
8383                        v.as_u64()
8384                            .or_else(|| v.as_str().and_then(|s| s.parse::<u64>().ok()))
8385                    })
8386                };
8387                // Single source of truth (shared with the `uni.create_vector_index`
8388                // procedure) so dense / native-multivector / MUVERA behave identically.
8389                let index_type = build_vector_index_type(&VectorIndexOpts {
8390                    type_name: c.options.get("type").and_then(|v| v.as_str()),
8391                    partitions: opt("partitions"),
8392                    m: opt("m"),
8393                    ef_construction: opt("ef_construction"),
8394                    sub_vectors: opt("sub_vectors"),
8395                    num_bits: opt_u8("num_bits"),
8396                    k_sim: opt("k_sim"),
8397                    reps: opt("reps"),
8398                    d_proj: opt("d_proj"),
8399                    seed: opt_u64("seed"),
8400                    inner: c.options.get("inner").and_then(|v| v.as_str()),
8401                });
8402
8403                // Parse embedding config from options
8404                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
8405                    Self::parse_embedding_config(emb_val)?
8406                } else {
8407                    None
8408                };
8409
8410                // Parse the distance metric from OPTIONS (default Cosine).
8411                let metric = parse_vector_metric(c.options.get("metric").and_then(|v| v.as_str()))?;
8412
8413                let config = VectorIndexConfig {
8414                    name: c.name,
8415                    label: c.label,
8416                    property: c.property,
8417                    metric,
8418                    index_type,
8419                    embedding_config,
8420                    metadata: Default::default(),
8421                };
8422                Ok(LogicalPlan::CreateVectorIndex {
8423                    config,
8424                    if_not_exists: c.if_not_exists,
8425                })
8426            }
8427            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
8428                config: FullTextIndexConfig {
8429                    name: cfg.name,
8430                    label: cfg.label,
8431                    properties: cfg.properties,
8432                    tokenizer: TokenizerConfig::Standard,
8433                    with_positions: true,
8434                    metadata: Default::default(),
8435                },
8436                if_not_exists: cfg.if_not_exists,
8437            }),
8438            SchemaCommand::CreateScalarIndex(cfg) => {
8439                // Convert expressions to storage strings (strip variable prefix)
8440                let properties: Vec<String> = cfg
8441                    .expressions
8442                    .iter()
8443                    .map(Self::normalize_expression_for_storage)
8444                    .collect();
8445
8446                Ok(LogicalPlan::CreateScalarIndex {
8447                    config: ScalarIndexConfig {
8448                        name: cfg.name,
8449                        label: cfg.label,
8450                        properties,
8451                        index_type: ScalarIndexType::BTree,
8452                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
8453                        metadata: Default::default(),
8454                    },
8455                    if_not_exists: cfg.if_not_exists,
8456                })
8457            }
8458            SchemaCommand::CreateJsonFtsIndex(cfg) => {
8459                let with_positions = cfg
8460                    .options
8461                    .get("with_positions")
8462                    .and_then(|v| v.as_bool())
8463                    .unwrap_or(false);
8464                Ok(LogicalPlan::CreateJsonFtsIndex {
8465                    config: JsonFtsIndexConfig {
8466                        name: cfg.name,
8467                        label: cfg.label,
8468                        column: cfg.column,
8469                        paths: Vec::new(),
8470                        with_positions,
8471                        metadata: Default::default(),
8472                    },
8473                    if_not_exists: cfg.if_not_exists,
8474                })
8475            }
8476            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
8477                name: drop.name,
8478                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
8479            }),
8480            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
8481            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
8482            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
8483            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
8484            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
8485            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
8486            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
8487            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
8488            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
8489            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
8490            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
8491            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
8492            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
8493            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
8494            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
8495            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
8496                destination: path,
8497                options: HashMap::new(),
8498            }),
8499            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
8500                label: cmd.label,
8501                path: cmd.path,
8502                format: cmd.format,
8503                options: cmd.options,
8504            }),
8505            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
8506                label: cmd.label,
8507                path: cmd.path,
8508                format: cmd.format,
8509                options: cmd.options,
8510            }),
8511        }
8512    }
8513
8514    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
8515        let obj = emb_val
8516            .as_object()
8517            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
8518
8519        // Parse alias (required)
8520        let alias = obj
8521            .get("alias")
8522            .and_then(|v| v.as_str())
8523            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
8524
8525        // Parse source properties (required)
8526        let source_properties = obj
8527            .get("source")
8528            .and_then(|v| v.as_array())
8529            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
8530            .iter()
8531            .filter_map(|v| v.as_str().map(|s| s.to_string()))
8532            .collect::<Vec<_>>();
8533
8534        if source_properties.is_empty() {
8535            return Err(anyhow!(
8536                "embedding.source must contain at least one property"
8537            ));
8538        }
8539
8540        let batch_size = obj
8541            .get("batch_size")
8542            .and_then(|v| v.as_u64())
8543            .map(|v| v as usize)
8544            .unwrap_or(32);
8545
8546        let document_prefix = obj
8547            .get("document_prefix")
8548            .and_then(|v| v.as_str())
8549            .map(|s| s.to_string());
8550
8551        let query_prefix = obj
8552            .get("query_prefix")
8553            .and_then(|v| v.as_str())
8554            .map(|s| s.to_string());
8555
8556        Ok(Some(EmbeddingConfig {
8557            alias: alias.to_string(),
8558            source_properties,
8559            batch_size,
8560            document_prefix,
8561            query_prefix,
8562        }))
8563    }
8564}
8565
8566/// Collect all properties referenced anywhere in the LogicalPlan tree.
8567///
8568/// This is critical for window functions: properties must be materialized
8569/// at the Scan node so they're available for window operations later.
8570///
8571/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
8572pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
8573    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
8574    collect_properties_recursive(plan, &mut properties);
8575    properties
8576}
8577
8578/// Recursively walk the LogicalPlan tree and collect all property references.
8579fn collect_properties_recursive(
8580    plan: &LogicalPlan,
8581    properties: &mut HashMap<String, HashSet<String>>,
8582) {
8583    match plan {
8584        LogicalPlan::Window {
8585            input,
8586            window_exprs,
8587        } => {
8588            // Collect from window expressions
8589            for expr in window_exprs {
8590                collect_properties_from_expr_into(expr, properties);
8591            }
8592            collect_properties_recursive(input, properties);
8593        }
8594        LogicalPlan::Project { input, projections } => {
8595            for (expr, _alias) in projections {
8596                collect_properties_from_expr_into(expr, properties);
8597            }
8598            collect_properties_recursive(input, properties);
8599        }
8600        LogicalPlan::Sort { input, order_by } => {
8601            for sort_item in order_by {
8602                collect_properties_from_expr_into(&sort_item.expr, properties);
8603            }
8604            collect_properties_recursive(input, properties);
8605        }
8606        LogicalPlan::Filter {
8607            input, predicate, ..
8608        } => {
8609            collect_properties_from_expr_into(predicate, properties);
8610            collect_properties_recursive(input, properties);
8611        }
8612        LogicalPlan::Aggregate {
8613            input,
8614            group_by,
8615            aggregates,
8616        } => {
8617            for expr in group_by {
8618                collect_properties_from_expr_into(expr, properties);
8619            }
8620            for expr in aggregates {
8621                collect_properties_from_expr_into(expr, properties);
8622            }
8623            collect_properties_recursive(input, properties);
8624        }
8625        LogicalPlan::Scan {
8626            filter: Some(expr), ..
8627        } => {
8628            collect_properties_from_expr_into(expr, properties);
8629        }
8630        LogicalPlan::Scan { filter: None, .. } => {}
8631        LogicalPlan::ExtIdLookup {
8632            filter: Some(expr), ..
8633        } => {
8634            collect_properties_from_expr_into(expr, properties);
8635        }
8636        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
8637        LogicalPlan::ScanAll {
8638            filter: Some(expr), ..
8639        } => {
8640            collect_properties_from_expr_into(expr, properties);
8641        }
8642        LogicalPlan::ScanAll { filter: None, .. } => {}
8643        LogicalPlan::ScanMainByLabels {
8644            filter: Some(expr), ..
8645        } => {
8646            collect_properties_from_expr_into(expr, properties);
8647        }
8648        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
8649        LogicalPlan::TraverseMainByType {
8650            input,
8651            target_filter,
8652            ..
8653        } => {
8654            if let Some(expr) = target_filter {
8655                collect_properties_from_expr_into(expr, properties);
8656            }
8657            collect_properties_recursive(input, properties);
8658        }
8659        LogicalPlan::Traverse {
8660            input,
8661            target_filter,
8662            step_variable: _,
8663            ..
8664        } => {
8665            if let Some(expr) = target_filter {
8666                collect_properties_from_expr_into(expr, properties);
8667            }
8668            // Note: Edge properties (step_variable) will be collected from expressions
8669            // that reference them. The edge_properties field in LogicalPlan is populated
8670            // later during physical planning based on this collected map.
8671            collect_properties_recursive(input, properties);
8672        }
8673        LogicalPlan::Unwind { input, expr, .. } => {
8674            collect_properties_from_expr_into(expr, properties);
8675            collect_properties_recursive(input, properties);
8676        }
8677        LogicalPlan::Create { input, pattern } => {
8678            // Mark variables referenced in CREATE patterns with "*" so plan_scan
8679            // adds structural projections (bare entity columns). Without this,
8680            // execute_create_pattern() can't find bound variables and creates
8681            // spurious new nodes instead of using existing MATCH'd ones.
8682            mark_pattern_variables(pattern, properties);
8683            collect_properties_recursive(input, properties);
8684        }
8685        LogicalPlan::CreateBatch { input, patterns } => {
8686            for pattern in patterns {
8687                mark_pattern_variables(pattern, properties);
8688            }
8689            collect_properties_recursive(input, properties);
8690        }
8691        LogicalPlan::Merge {
8692            input,
8693            pattern,
8694            on_match,
8695            on_create,
8696        } => {
8697            mark_pattern_variables(pattern, properties);
8698            if let Some(set_clause) = on_match {
8699                mark_set_item_variables(&set_clause.items, properties);
8700            }
8701            if let Some(set_clause) = on_create {
8702                mark_set_item_variables(&set_clause.items, properties);
8703            }
8704            collect_properties_recursive(input, properties);
8705        }
8706        LogicalPlan::Set { input, items } => {
8707            mark_set_item_variables(items, properties);
8708            collect_properties_recursive(input, properties);
8709        }
8710        LogicalPlan::Remove { input, items } => {
8711            for item in items {
8712                match item {
8713                    RemoveItem::Property(expr) => {
8714                        // REMOVE n.prop — collect the property and mark the variable
8715                        // with "*" so full structural projection is applied.
8716                        collect_properties_from_expr_into(expr, properties);
8717                        if let Expr::Property(base, _) = expr
8718                            && let Expr::Variable(var) = base.as_ref()
8719                        {
8720                            properties
8721                                .entry(var.clone())
8722                                .or_default()
8723                                .insert("*".to_string());
8724                        }
8725                    }
8726                    RemoveItem::Labels { variable, .. } => {
8727                        // REMOVE n:Label — mark n with "*"
8728                        properties
8729                            .entry(variable.clone())
8730                            .or_default()
8731                            .insert("*".to_string());
8732                    }
8733                }
8734            }
8735            collect_properties_recursive(input, properties);
8736        }
8737        LogicalPlan::Delete { input, items, .. } => {
8738            for expr in items {
8739                collect_properties_from_expr_into(expr, properties);
8740            }
8741            collect_properties_recursive(input, properties);
8742        }
8743        LogicalPlan::Foreach {
8744            input, list, body, ..
8745        } => {
8746            collect_properties_from_expr_into(list, properties);
8747            for plan in body {
8748                collect_properties_recursive(plan, properties);
8749            }
8750            collect_properties_recursive(input, properties);
8751        }
8752        LogicalPlan::Limit { input, .. } => {
8753            collect_properties_recursive(input, properties);
8754        }
8755        LogicalPlan::CrossJoin { left, right } => {
8756            collect_properties_recursive(left, properties);
8757            collect_properties_recursive(right, properties);
8758        }
8759        LogicalPlan::Apply {
8760            input,
8761            subquery,
8762            input_filter,
8763        } => {
8764            if let Some(expr) = input_filter {
8765                collect_properties_from_expr_into(expr, properties);
8766            }
8767            collect_properties_recursive(input, properties);
8768            collect_properties_recursive(subquery, properties);
8769        }
8770        LogicalPlan::Union { left, right, .. } => {
8771            collect_properties_recursive(left, properties);
8772            collect_properties_recursive(right, properties);
8773        }
8774        LogicalPlan::RecursiveCTE {
8775            initial, recursive, ..
8776        } => {
8777            collect_properties_recursive(initial, properties);
8778            collect_properties_recursive(recursive, properties);
8779        }
8780        LogicalPlan::ProcedureCall { arguments, .. } => {
8781            for arg in arguments {
8782                collect_properties_from_expr_into(arg, properties);
8783            }
8784        }
8785        LogicalPlan::VectorKnn { query, .. } => {
8786            collect_properties_from_expr_into(query, properties);
8787        }
8788        LogicalPlan::InvertedIndexLookup { terms, .. } => {
8789            collect_properties_from_expr_into(terms, properties);
8790        }
8791        LogicalPlan::ShortestPath { input, .. } => {
8792            collect_properties_recursive(input, properties);
8793        }
8794        LogicalPlan::AllShortestPaths { input, .. } => {
8795            collect_properties_recursive(input, properties);
8796        }
8797        LogicalPlan::Distinct { input } => {
8798            collect_properties_recursive(input, properties);
8799        }
8800        LogicalPlan::QuantifiedPattern {
8801            input,
8802            pattern_plan,
8803            ..
8804        } => {
8805            collect_properties_recursive(input, properties);
8806            collect_properties_recursive(pattern_plan, properties);
8807        }
8808        LogicalPlan::BindZeroLengthPath { input, .. } => {
8809            collect_properties_recursive(input, properties);
8810        }
8811        LogicalPlan::BindPath { input, .. } => {
8812            collect_properties_recursive(input, properties);
8813        }
8814        LogicalPlan::SubqueryCall { input, subquery } => {
8815            collect_properties_recursive(input, properties);
8816            collect_properties_recursive(subquery, properties);
8817        }
8818        LogicalPlan::LocyProject {
8819            input, projections, ..
8820        } => {
8821            for (expr, _alias) in projections {
8822                match expr {
8823                    // Bare variable in LocyProject: only need _vid for node variables
8824                    // (plan_locy_project extracts VID directly). Adding "*" would create
8825                    // a structural Struct column that conflicts with derived scan columns.
8826                    Expr::Variable(name) if !name.contains('.') => {
8827                        properties
8828                            .entry(name.clone())
8829                            .or_default()
8830                            .insert("_vid".to_string());
8831                    }
8832                    _ => collect_properties_from_expr_into(expr, properties),
8833                }
8834            }
8835            collect_properties_recursive(input, properties);
8836        }
8837        LogicalPlan::LocyFold {
8838            input,
8839            fold_bindings,
8840            ..
8841        } => {
8842            for (_name, expr) in fold_bindings {
8843                collect_properties_from_expr_into(expr, properties);
8844            }
8845            collect_properties_recursive(input, properties);
8846        }
8847        LogicalPlan::LocyBestBy {
8848            input, criteria, ..
8849        } => {
8850            for (expr, _asc) in criteria {
8851                collect_properties_from_expr_into(expr, properties);
8852            }
8853            collect_properties_recursive(input, properties);
8854        }
8855        LogicalPlan::LocyPriority { input, .. } => {
8856            collect_properties_recursive(input, properties);
8857        }
8858        LogicalPlan::LocyModelInvoke { input, .. } => {
8859            // Model invocations don't introduce new property accesses
8860            // — feature expressions are lifted to hidden YIELD items
8861            // by `extract_model_invocations` (uni-locy typecheck) and
8862            // their property refs are already collected via the
8863            // wrapped LocyProject's projection walk.
8864            collect_properties_recursive(input, properties);
8865        }
8866        // DDL and other plans don't reference properties
8867        _ => {}
8868    }
8869}
8870
8871/// Mark target variables from SET items with "*" and collect value expressions.
8872fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
8873    for item in items {
8874        match item {
8875            SetItem::Property { expr, value } => {
8876                // SET n.prop = val — mark n with STRUCT_ONLY_SENTINEL so the
8877                // scan builds the bare `n` struct column (needed for executor
8878                // `row.get(var_name)`) WITHOUT pulling the full schema. The
8879                // explicit `prop` is collected via `collect_properties_from_expr_into`
8880                // below and joins the variable's HashSet alongside the sentinel.
8881                //
8882                // If the same variable is also referenced bare elsewhere
8883                // (e.g. `SET n.x = 1 RETURN n`), `collect_properties_from_expr_into`
8884                // inserts "*" through the bare-Variable path; "*" dominates
8885                // the sentinel in `resolve_properties`, so the full schema
8886                // is still pulled when actually required.
8887                collect_properties_from_expr_into(expr, properties);
8888                collect_properties_from_expr_into(value, properties);
8889                if let Expr::Property(base, _) = expr
8890                    && let Expr::Variable(var) = base.as_ref()
8891                {
8892                    properties
8893                        .entry(var.clone())
8894                        .or_default()
8895                        .insert(STRUCT_ONLY_SENTINEL.to_string());
8896                }
8897            }
8898            SetItem::Labels { variable, .. } => {
8899                // SET n:Label — need full access to n
8900                properties
8901                    .entry(variable.clone())
8902                    .or_default()
8903                    .insert("*".to_string());
8904            }
8905            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
8906                // SET n = {props} or SET n += {props}
8907                properties
8908                    .entry(variable.clone())
8909                    .or_default()
8910                    .insert("*".to_string());
8911                collect_properties_from_expr_into(value, properties);
8912            }
8913        }
8914    }
8915}
8916
8917/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
8918/// adds structural projections (bare entity Struct columns) for them.
8919/// This is needed so that execute_create_pattern() can find bound variables
8920/// in the row HashMap and reuse existing nodes instead of creating new ones.
8921fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
8922    for path in &pattern.paths {
8923        if let Some(ref v) = path.variable {
8924            properties
8925                .entry(v.clone())
8926                .or_default()
8927                .insert("*".to_string());
8928        }
8929        for element in &path.elements {
8930            match element {
8931                PatternElement::Node(n) => {
8932                    if let Some(ref v) = n.variable {
8933                        properties
8934                            .entry(v.clone())
8935                            .or_default()
8936                            .insert("*".to_string());
8937                    }
8938                    // Also collect properties from inline property expressions
8939                    if let Some(ref props) = n.properties {
8940                        collect_properties_from_expr_into(props, properties);
8941                    }
8942                }
8943                PatternElement::Relationship(r) => {
8944                    if let Some(ref v) = r.variable {
8945                        properties
8946                            .entry(v.clone())
8947                            .or_default()
8948                            .insert("*".to_string());
8949                    }
8950                    if let Some(ref props) = r.properties {
8951                        collect_properties_from_expr_into(props, properties);
8952                    }
8953                }
8954                PatternElement::Parenthesized { pattern, .. } => {
8955                    let sub = Pattern {
8956                        paths: vec![pattern.as_ref().clone()],
8957                    };
8958                    mark_pattern_variables(&sub, properties);
8959                }
8960            }
8961        }
8962    }
8963}
8964
8965/// Collect properties from an expression into a HashMap.
8966fn collect_properties_from_expr_into(
8967    expr: &Expr,
8968    properties: &mut HashMap<String, HashSet<String>>,
8969) {
8970    match expr {
8971        Expr::PatternComprehension {
8972            where_clause,
8973            map_expr,
8974            ..
8975        } => {
8976            // Collect properties from the WHERE clause and map expression.
8977            // The pattern itself creates local bindings that don't need
8978            // property collection from the outer scope.
8979            if let Some(where_expr) = where_clause {
8980                collect_properties_from_expr_into(where_expr, properties);
8981            }
8982            collect_properties_from_expr_into(map_expr, properties);
8983        }
8984        Expr::Variable(name) => {
8985            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
8986            if let Some((var, prop)) = name.split_once('.') {
8987                properties
8988                    .entry(var.to_string())
8989                    .or_default()
8990                    .insert(prop.to_string());
8991            } else {
8992                // Bare variable (e.g., RETURN n) — needs all properties materialized
8993                properties
8994                    .entry(name.clone())
8995                    .or_default()
8996                    .insert("*".to_string());
8997            }
8998        }
8999        Expr::Property(base, name) => {
9000            // Extract variable name from the base expression
9001            if let Expr::Variable(var) = base.as_ref() {
9002                properties
9003                    .entry(var.clone())
9004                    .or_default()
9005                    .insert(name.clone());
9006                // Don't recurse into Variable — that would mark it as a bare
9007                // variable reference (adding "*") when it's just a property base.
9008            } else {
9009                // Recurse for complex base expressions (nested property, function call, etc.)
9010                collect_properties_from_expr_into(base, properties);
9011            }
9012        }
9013        Expr::BinaryOp { left, right, .. } => {
9014            collect_properties_from_expr_into(left, properties);
9015            collect_properties_from_expr_into(right, properties);
9016        }
9017        Expr::FunctionCall {
9018            name,
9019            args,
9020            window_spec,
9021            ..
9022        } => {
9023            // Analyze function for property requirements (pushdown hydration)
9024            analyze_function_property_requirements(name, args, properties);
9025
9026            // Collect from arguments
9027            for arg in args {
9028                collect_properties_from_expr_into(arg, properties);
9029            }
9030
9031            // Collect from window spec (PARTITION BY, ORDER BY)
9032            if let Some(spec) = window_spec {
9033                for part_expr in &spec.partition_by {
9034                    collect_properties_from_expr_into(part_expr, properties);
9035                }
9036                for sort_item in &spec.order_by {
9037                    collect_properties_from_expr_into(&sort_item.expr, properties);
9038                }
9039            }
9040        }
9041        Expr::UnaryOp { expr, .. } => {
9042            collect_properties_from_expr_into(expr, properties);
9043        }
9044        Expr::List(items) => {
9045            for item in items {
9046                collect_properties_from_expr_into(item, properties);
9047            }
9048        }
9049        Expr::Map(entries) => {
9050            for (_key, value) in entries {
9051                collect_properties_from_expr_into(value, properties);
9052            }
9053        }
9054        Expr::ListComprehension {
9055            list,
9056            where_clause,
9057            map_expr,
9058            ..
9059        } => {
9060            collect_properties_from_expr_into(list, properties);
9061            if let Some(where_expr) = where_clause {
9062                collect_properties_from_expr_into(where_expr, properties);
9063            }
9064            collect_properties_from_expr_into(map_expr, properties);
9065        }
9066        Expr::Case {
9067            expr,
9068            when_then,
9069            else_expr,
9070        } => {
9071            if let Some(scrutinee_expr) = expr {
9072                collect_properties_from_expr_into(scrutinee_expr, properties);
9073            }
9074            for (when, then) in when_then {
9075                collect_properties_from_expr_into(when, properties);
9076                collect_properties_from_expr_into(then, properties);
9077            }
9078            if let Some(default_expr) = else_expr {
9079                collect_properties_from_expr_into(default_expr, properties);
9080            }
9081        }
9082        Expr::Quantifier {
9083            list, predicate, ..
9084        } => {
9085            collect_properties_from_expr_into(list, properties);
9086            collect_properties_from_expr_into(predicate, properties);
9087        }
9088        Expr::Reduce {
9089            init, list, expr, ..
9090        } => {
9091            collect_properties_from_expr_into(init, properties);
9092            collect_properties_from_expr_into(list, properties);
9093            collect_properties_from_expr_into(expr, properties);
9094        }
9095        Expr::Exists { query, .. } => {
9096            // Walk into EXISTS body to collect property references for outer-scope variables.
9097            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
9098            // are included in the outer scan's property list. Extra properties collected for
9099            // inner-only variables are harmless — the outer scan ignores unknown variable names.
9100            collect_properties_from_subquery(query, properties);
9101        }
9102        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
9103            collect_properties_from_subquery(query, properties);
9104        }
9105        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
9106            collect_properties_from_expr_into(expr, properties);
9107        }
9108        Expr::In { expr, list } => {
9109            collect_properties_from_expr_into(expr, properties);
9110            collect_properties_from_expr_into(list, properties);
9111        }
9112        Expr::ArrayIndex { array, index } => {
9113            if let Expr::Variable(var) = array.as_ref() {
9114                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
9115                    // Static string key: e['name'] → only need that specific property
9116                    properties
9117                        .entry(var.clone())
9118                        .or_default()
9119                        .insert(prop_name.clone());
9120                } else {
9121                    // Dynamic property access: e[prop] → need all properties
9122                    properties
9123                        .entry(var.clone())
9124                        .or_default()
9125                        .insert("*".to_string());
9126                }
9127            }
9128            collect_properties_from_expr_into(array, properties);
9129            collect_properties_from_expr_into(index, properties);
9130        }
9131        Expr::ArraySlice { array, start, end } => {
9132            collect_properties_from_expr_into(array, properties);
9133            if let Some(start_expr) = start {
9134                collect_properties_from_expr_into(start_expr, properties);
9135            }
9136            if let Some(end_expr) = end {
9137                collect_properties_from_expr_into(end_expr, properties);
9138            }
9139        }
9140        Expr::ValidAt {
9141            entity,
9142            timestamp,
9143            start_prop,
9144            end_prop,
9145        } => {
9146            // Extract property requirements from ValidAt expression
9147            if let Expr::Variable(var) = entity.as_ref() {
9148                if let Some(prop) = start_prop {
9149                    properties
9150                        .entry(var.clone())
9151                        .or_default()
9152                        .insert(prop.clone());
9153                }
9154                if let Some(prop) = end_prop {
9155                    properties
9156                        .entry(var.clone())
9157                        .or_default()
9158                        .insert(prop.clone());
9159                }
9160            }
9161            collect_properties_from_expr_into(entity, properties);
9162            collect_properties_from_expr_into(timestamp, properties);
9163        }
9164        Expr::MapProjection { base, items } => {
9165            collect_properties_from_expr_into(base, properties);
9166            for item in items {
9167                match item {
9168                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
9169                        if let Expr::Variable(var) = base.as_ref() {
9170                            properties
9171                                .entry(var.clone())
9172                                .or_default()
9173                                .insert(prop.clone());
9174                        }
9175                    }
9176                    uni_cypher::ast::MapProjectionItem::AllProperties => {
9177                        if let Expr::Variable(var) = base.as_ref() {
9178                            properties
9179                                .entry(var.clone())
9180                                .or_default()
9181                                .insert("*".to_string());
9182                        }
9183                    }
9184                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
9185                        collect_properties_from_expr_into(expr, properties);
9186                    }
9187                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
9188                }
9189            }
9190        }
9191        Expr::LabelCheck { expr, .. } => {
9192            collect_properties_from_expr_into(expr, properties);
9193        }
9194        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
9195        // Mark them with "*" so the outer scan produces structural projections that
9196        // extract_row_params can resolve.
9197        Expr::Parameter(name) => {
9198            properties
9199                .entry(name.clone())
9200                .or_default()
9201                .insert("*".to_string());
9202        }
9203        // Literals and wildcard don't reference properties
9204        Expr::Literal(_) | Expr::Wildcard => {}
9205    }
9206}
9207
9208/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
9209///
9210/// This is needed so that correlated property accesses like `a.city` inside
9211/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
9212/// scan to include `a.city` in its projected columns.
9213fn collect_properties_from_subquery(
9214    query: &Query,
9215    properties: &mut HashMap<String, HashSet<String>>,
9216) {
9217    match query {
9218        Query::Single(stmt) => {
9219            for clause in &stmt.clauses {
9220                match clause {
9221                    Clause::Match(m) => {
9222                        if let Some(ref wc) = m.where_clause {
9223                            collect_properties_from_expr_into(wc, properties);
9224                        }
9225                    }
9226                    Clause::With(w) => {
9227                        for item in &w.items {
9228                            if let ReturnItem::Expr { expr, .. } = item {
9229                                collect_properties_from_expr_into(expr, properties);
9230                            }
9231                        }
9232                        if let Some(ref wc) = w.where_clause {
9233                            collect_properties_from_expr_into(wc, properties);
9234                        }
9235                    }
9236                    Clause::Return(r) => {
9237                        for item in &r.items {
9238                            if let ReturnItem::Expr { expr, .. } = item {
9239                                collect_properties_from_expr_into(expr, properties);
9240                            }
9241                        }
9242                    }
9243                    _ => {}
9244                }
9245            }
9246        }
9247        Query::Union { left, right, .. } => {
9248            collect_properties_from_subquery(left, properties);
9249            collect_properties_from_subquery(right, properties);
9250        }
9251        _ => {}
9252    }
9253}
9254
9255/// Analyze function calls to extract property requirements for pushdown hydration
9256///
9257/// This function examines function calls and their arguments to determine which properties
9258/// need to be loaded for entity arguments. For example:
9259/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
9260/// - keys(n) -> n needs all properties (*)
9261///
9262/// The extracted requirements are added to the properties map for later use during
9263/// scan planning.
9264fn analyze_function_property_requirements(
9265    name: &str,
9266    args: &[Expr],
9267    properties: &mut HashMap<String, HashSet<String>>,
9268) {
9269    use crate::query::function_props::get_function_spec;
9270
9271    /// Helper to mark a variable as needing all properties.
9272    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
9273        properties
9274            .entry(var.to_string())
9275            .or_default()
9276            .insert("*".to_string());
9277    }
9278
9279    // System-managed timestamp functions: require only the corresponding
9280    // `_created_at` / `_updated_at` column, not full entity materialization.
9281    if name.eq_ignore_ascii_case("created_at") || name.eq_ignore_ascii_case("updated_at") {
9282        if let Some(Expr::Variable(var)) = args.first() {
9283            let col = if name.eq_ignore_ascii_case("created_at") {
9284                "_created_at"
9285            } else {
9286                "_updated_at"
9287            };
9288            properties
9289                .entry(var.clone())
9290                .or_default()
9291                .insert(col.to_string());
9292        }
9293        return;
9294    }
9295
9296    let Some(spec) = get_function_spec(name) else {
9297        // Unknown function: conservatively require all properties for variable args
9298        for arg in args {
9299            if let Expr::Variable(var) = arg {
9300                mark_wildcard(var, properties);
9301            }
9302        }
9303        return;
9304    };
9305
9306    // Extract property names from string literal arguments
9307    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
9308        let entity_arg = args.get(entity_arg_idx);
9309        let prop_arg = args.get(prop_arg_idx);
9310
9311        match (entity_arg, prop_arg) {
9312            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
9313                properties
9314                    .entry(var.clone())
9315                    .or_default()
9316                    .insert(prop.clone());
9317            }
9318            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
9319                // Parameter property name: need all properties
9320                mark_wildcard(var, properties);
9321            }
9322            _ => {}
9323        }
9324    }
9325
9326    // Handle full entity requirement (keys(), properties())
9327    if spec.needs_full_entity {
9328        for &idx in spec.entity_args {
9329            if let Some(Expr::Variable(var)) = args.get(idx) {
9330                mark_wildcard(var, properties);
9331            }
9332        }
9333    }
9334}
9335
9336// ============================================================================
9337// Phase 5a-impl — fork-aware fusion rewrite
9338// ============================================================================
9339
9340/// Trait that exposes the per-fork "is there a fork-local index for
9341/// `(label, column)`?" lookup. Implemented for `StorageManager` so
9342/// callers don't need to depend on the fork module directly; tests
9343/// can mock by implementing it on a `HashMap`.
9344pub trait ForkIndexLookup {
9345    fn fork_index_for(
9346        &self,
9347        label: &str,
9348        column: &str,
9349    ) -> Option<uni_store::fork::ForkLocalIndexKind>;
9350
9351    /// Phase 5b followup: resolve a label id, then dispatch to
9352    /// `fork_index_for`. Used by the rewrite when wrapping
9353    /// `VectorKnn` and `InvertedIndexLookup` nodes which carry
9354    /// `label_id: u16` rather than the label name. Default returns
9355    /// `None`; the `StorageManager` impl resolves via its
9356    /// `schema_manager`.
9357    fn fork_index_for_label_id(
9358        &self,
9359        _label_id: u16,
9360        _column: &str,
9361    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9362        None
9363    }
9364}
9365
9366impl ForkIndexLookup for uni_store::storage::StorageManager {
9367    fn fork_index_for(
9368        &self,
9369        label: &str,
9370        column: &str,
9371    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9372        self.fork_index_exists(label, column)
9373    }
9374
9375    fn fork_index_for_label_id(
9376        &self,
9377        label_id: u16,
9378        column: &str,
9379    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9380        let schema = self.schema_manager().schema();
9381        let label_name = schema.label_name_by_id(label_id)?;
9382        self.fork_index_exists(label_name, column)
9383    }
9384}
9385
9386/// Fold a trailing `SET var.prop = value` into the freshly-created entity's
9387/// inline property map, eliminating the separate `Set` write pass.
9388///
9389/// Rewrites `CREATE (a)-[r:T]->(b) SET r.x = e.v` into the equivalent of
9390/// `CREATE (a)-[r:T {x: e.v}]->(b)`, so the plan collapses from `Set → Create`
9391/// to a single `Create`. This removes an entire read-modify-write operator
9392/// (`MutationSetExec`) — measured at ~38% of per-edge `UNWIND … CREATE … SET`
9393/// execution — that the bulk write path never pays.
9394///
9395/// # Examples
9396///
9397/// ```ignore
9398/// // CREATE (a)-[r:LINK]->(b) SET r.role = e.role   ==>
9399/// // CREATE (a)-[r:LINK {role: e.role}]->(b)
9400/// let fused = fuse_create_set(plan);
9401/// ```
9402///
9403/// The fold is **all-or-nothing per `SET` clause** and only fires when every
9404/// item is safe:
9405/// - the item is the simple `Variable.property = value` form (not `+=`, label
9406///   set `SET n:L`, or whole-entity map assignment `SET n = {...}`),
9407/// - the target variable is introduced by the immediately-preceding
9408///   `Create`/`CreateBatch` (a MATCHed variable is left untouched),
9409/// - the target element's inline properties are absent or a map literal (a
9410///   parameter-map form such as `CREATE (n $props)` cannot be merged),
9411/// - the value references no variable created in the same statement, so
9412///   evaluating it at create time is observably identical to SET time.
9413///
9414/// When any item fails these checks the whole `Set` node is preserved, keeping
9415/// semantics unchanged. The pass is idempotent: a plan with no fusable
9416/// `Set`/`Create` adjacency passes through untouched.
9417#[must_use]
9418pub fn fuse_create_set(plan: LogicalPlan) -> LogicalPlan {
9419    match plan {
9420        LogicalPlan::Set { input, items } => {
9421            // Fuse any deeper adjacency first so chained
9422            // `CREATE … SET … CREATE … SET` collapses bottom-up.
9423            let input = fuse_create_set(*input);
9424            match input {
9425                LogicalPlan::Create {
9426                    input: child,
9427                    pattern,
9428                } => {
9429                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9430                    match try_fuse_set_items(std::slice::from_ref(&pattern), &items, &bound_vars) {
9431                        Some(mut patterns) => LogicalPlan::Create {
9432                            input: child,
9433                            // try_fuse_set_items returns exactly as many patterns
9434                            // as it was given (one here).
9435                            pattern: patterns
9436                                .pop()
9437                                .expect("one pattern in yields one pattern out"),
9438                        },
9439                        None => LogicalPlan::Set {
9440                            input: Box::new(LogicalPlan::Create {
9441                                input: child,
9442                                pattern,
9443                            }),
9444                            items,
9445                        },
9446                    }
9447                }
9448                LogicalPlan::CreateBatch {
9449                    input: child,
9450                    patterns,
9451                } => {
9452                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9453                    match try_fuse_set_items(&patterns, &items, &bound_vars) {
9454                        Some(fused) => LogicalPlan::CreateBatch {
9455                            input: child,
9456                            patterns: fused,
9457                        },
9458                        None => LogicalPlan::Set {
9459                            input: Box::new(LogicalPlan::CreateBatch {
9460                                input: child,
9461                                patterns,
9462                            }),
9463                            items,
9464                        },
9465                    }
9466                }
9467                other => LogicalPlan::Set {
9468                    input: Box::new(other),
9469                    items,
9470                },
9471            }
9472        }
9473        // Recurse through the operators that can sit above a write clause so a
9474        // `Set` under RETURN/ORDER BY/LIMIT is still reached. This mirrors the
9475        // pragmatic recursion of `rewrite_for_fork_fusion`: variants that never
9476        // sit above a write clause fall through `other => other` unchanged.
9477        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9478            input: Box::new(fuse_create_set(*input)),
9479            projections,
9480        },
9481        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9482            input: Box::new(fuse_create_set(*input)),
9483            skip,
9484            fetch,
9485        },
9486        LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
9487            input: Box::new(fuse_create_set(*input)),
9488            order_by,
9489        },
9490        LogicalPlan::Filter {
9491            input,
9492            predicate,
9493            optional_variables,
9494        } => LogicalPlan::Filter {
9495            input: Box::new(fuse_create_set(*input)),
9496            predicate,
9497            optional_variables,
9498        },
9499        LogicalPlan::Create { input, pattern } => LogicalPlan::Create {
9500            input: Box::new(fuse_create_set(*input)),
9501            pattern,
9502        },
9503        LogicalPlan::CreateBatch { input, patterns } => LogicalPlan::CreateBatch {
9504            input: Box::new(fuse_create_set(*input)),
9505            patterns,
9506        },
9507        other => other,
9508    }
9509}
9510
9511/// Try to fold every `SET` item into the given CREATE patterns.
9512///
9513/// Returns the rewritten patterns when *all* items fuse safely (see
9514/// [`fuse_create_set`] for the conditions); returns `None` the moment any item
9515/// is unfusable, so the caller can keep the original `Set` node untouched.
9516///
9517/// `bound_vars` are the variables produced by the CREATE's input plan (e.g. an
9518/// upstream MATCH). A CREATE pattern may *reuse* such a variable as an endpoint
9519/// (`MATCH (a) CREATE (a)-[r:T]->(b)`), so `pattern_variable_names` alone cannot
9520/// tell a freshly-created variable from a reused one. Reused variables are
9521/// excluded from `owner`: a `SET` on them must not fuse, because the executor
9522/// skips inline properties on already-bound elements (which would silently drop
9523/// the write).
9524fn try_fuse_set_items(
9525    patterns: &[Pattern],
9526    items: &[SetItem],
9527    bound_vars: &HashSet<String>,
9528) -> Option<Vec<Pattern>> {
9529    // Map each freshly-created variable to the index of the pattern that
9530    // introduces it, skipping any variable already bound upstream.
9531    let mut owner: HashMap<String, usize> = HashMap::new();
9532    for (idx, pattern) in patterns.iter().enumerate() {
9533        for var in crate::query::df_graph::mutation_common::pattern_variable_names(pattern) {
9534            if bound_vars.contains(&var) {
9535                continue;
9536            }
9537            owner.entry(var).or_insert(idx);
9538        }
9539    }
9540
9541    let mut out = patterns.to_vec();
9542    for item in items {
9543        let SetItem::Property { expr, value } = item else {
9544            return None; // `+=`, label set, or whole-entity map assignment
9545        };
9546        let Expr::Property(base, prop) = expr else {
9547            return None; // not a property target
9548        };
9549        let Expr::Variable(var) = base.as_ref() else {
9550            return None; // e.g. `n[expr].x` or a deeper path
9551        };
9552        let Some(&idx) = owner.get(var) else {
9553            return None; // target is a MATCHed (not created) variable
9554        };
9555        // Evaluating the value at create time must equal evaluating it at SET
9556        // time: reject any reference to a variable created in this statement
9557        // (its value may not yet exist when the element is constructed).
9558        if collect_expr_variables(value)
9559            .iter()
9560            .any(|referenced| owner.contains_key(referenced))
9561        {
9562            return None;
9563        }
9564        if !merge_pattern_property(&mut out[idx], var, prop, value) {
9565            return None; // element absent or has a non-map property form
9566        }
9567    }
9568    Some(out)
9569}
9570
9571/// Merge `var.prop = value` into the matching element's inline property map.
9572///
9573/// Returns `false` (leaving the pattern unchanged) when the variable's element
9574/// is not found or its existing properties are a non-map expression that cannot
9575/// be merged. Any pre-existing entry for `prop` is replaced so the SET's
9576/// last-write-wins precedence is preserved.
9577fn merge_pattern_property(pattern: &mut Pattern, var: &str, prop: &str, value: &Expr) -> bool {
9578    for path in &mut pattern.paths {
9579        if merge_into_elements(&mut path.elements, var, prop, value) {
9580            return true;
9581        }
9582    }
9583    false
9584}
9585
9586/// Recursive worker for [`merge_pattern_property`] over a list of elements.
9587fn merge_into_elements(
9588    elements: &mut [PatternElement],
9589    var: &str,
9590    prop: &str,
9591    value: &Expr,
9592) -> bool {
9593    for element in elements {
9594        match element {
9595            PatternElement::Node(n) if n.variable.as_deref() == Some(var) => {
9596                return set_map_property(&mut n.properties, prop, value.clone());
9597            }
9598            PatternElement::Relationship(r) if r.variable.as_deref() == Some(var) => {
9599                return set_map_property(&mut r.properties, prop, value.clone());
9600            }
9601            PatternElement::Parenthesized { pattern, .. } => {
9602                if merge_into_elements(&mut pattern.elements, var, prop, value) {
9603                    return true;
9604                }
9605            }
9606            _ => {}
9607        }
9608    }
9609    false
9610}
9611
9612/// Set `prop = value` on an optional inline property map, last-write-wins.
9613///
9614/// Returns `false` without mutating when the properties are present but are not
9615/// a map literal (e.g. `CREATE (n $params)`), which cannot accept a single key.
9616fn set_map_property(props: &mut Option<Expr>, prop: &str, value: Expr) -> bool {
9617    match props {
9618        None => {
9619            *props = Some(Expr::Map(vec![(prop.to_string(), value)]));
9620            true
9621        }
9622        Some(Expr::Map(entries)) => {
9623            entries.retain(|(k, _)| k != prop);
9624            entries.push((prop.to_string(), value));
9625            true
9626        }
9627        Some(_) => false,
9628    }
9629}
9630
9631/// Walk a [`LogicalPlan`] tree and rewrite each `Scan` whose target
9632/// `(label, column)` has a registered fork-local index into the
9633/// matching `FusedIndexScan` variant.
9634///
9635/// Phase 5a-impl Step 4 covers `VidUidForkFirst`; Steps 5 and 6 add
9636/// `BtreeUnion` and `SortedKWayMerge` by extending `kind_for_filter`.
9637///
9638/// Idempotent: a tree that already contains `FusedIndexScan` nodes
9639/// passes through unchanged.
9640#[must_use]
9641pub fn rewrite_for_fork_fusion<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9642    rewrite_node(plan, lookup)
9643}
9644
9645fn rewrite_node<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9646    match plan {
9647        LogicalPlan::Scan {
9648            label_id,
9649            labels,
9650            variable,
9651            filter,
9652            optional,
9653        } => {
9654            // VidUid fusion only fires on a single-label scan with an
9655            // equality filter on a registered UID column. BTree and
9656            // Sorted will extend this match in Steps 5 and 6.
9657            let kind = if labels.len() == 1
9658                && let Some(col) = filter
9659                    .as_ref()
9660                    .and_then(|f| equality_target_column(f, &variable))
9661                && let Some(idx_kind) = lookup.fork_index_for(&labels[0], &col)
9662            {
9663                into_fusion_kind(idx_kind)
9664            } else {
9665                None
9666            };
9667            match kind {
9668                Some(kind) => LogicalPlan::FusedIndexScan {
9669                    label_id,
9670                    labels,
9671                    variable,
9672                    filter,
9673                    optional,
9674                    kind,
9675                },
9676                None => LogicalPlan::Scan {
9677                    label_id,
9678                    labels,
9679                    variable,
9680                    filter,
9681                    optional,
9682                },
9683            }
9684        }
9685        // Phase 5b followup: wrap lossy leaf operators when a
9686        // matching fork-local index has been registered. The wrap
9687        // preserves the original node's fields (the physical
9688        // planner unwraps and recurses); only the explain-plan
9689        // surface and runtime-stats operator name change. The
9690        // actual fusion still happens at the `BranchedBackend`
9691        // layer via Lance's per-branch reads.
9692        //
9693        // The CALL-style vector/FTS queries land as `ProcedureCall`
9694        // (not the dedicated `VectorKnn`/`InvertedIndexLookup`
9695        // operators); recognize those by procedure name and the
9696        // shape of their first two arguments (`label, column, ...`).
9697        LogicalPlan::ProcedureCall {
9698            procedure_name,
9699            arguments,
9700            yield_items,
9701        } => {
9702            let kind = procedure_call_fusion_kind(&procedure_name, &arguments, lookup);
9703            let inner = LogicalPlan::ProcedureCall {
9704                procedure_name,
9705                arguments,
9706                yield_items,
9707            };
9708            match kind {
9709                Some(kind) => LogicalPlan::FusedIndexScanWrapped {
9710                    inner: Box::new(inner),
9711                    kind,
9712                },
9713                None => inner,
9714            }
9715        }
9716        LogicalPlan::VectorKnn {
9717            label_id,
9718            variable,
9719            property,
9720            query,
9721            k,
9722            threshold,
9723        } => {
9724            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9725                && let Some(kind) = into_fusion_kind(idx_kind)
9726            {
9727                LogicalPlan::FusedIndexScanWrapped {
9728                    inner: Box::new(LogicalPlan::VectorKnn {
9729                        label_id,
9730                        variable,
9731                        property,
9732                        query,
9733                        k,
9734                        threshold,
9735                    }),
9736                    kind,
9737                }
9738            } else {
9739                LogicalPlan::VectorKnn {
9740                    label_id,
9741                    variable,
9742                    property,
9743                    query,
9744                    k,
9745                    threshold,
9746                }
9747            }
9748        }
9749        LogicalPlan::InvertedIndexLookup {
9750            label_id,
9751            variable,
9752            property,
9753            terms,
9754        } => {
9755            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9756                && let Some(kind) = into_fusion_kind(idx_kind)
9757            {
9758                LogicalPlan::FusedIndexScanWrapped {
9759                    inner: Box::new(LogicalPlan::InvertedIndexLookup {
9760                        label_id,
9761                        variable,
9762                        property,
9763                        terms,
9764                    }),
9765                    kind,
9766                }
9767            } else {
9768                LogicalPlan::InvertedIndexLookup {
9769                    label_id,
9770                    variable,
9771                    property,
9772                    terms,
9773                }
9774            }
9775        }
9776        // Tree-recursive variants — only the ones that can carry a
9777        // Scan in their subtree need to recurse here. Adding more is
9778        // safe (a missing recursion just means fusion doesn't fire
9779        // for that nested context, not incorrect results).
9780        LogicalPlan::Filter {
9781            input,
9782            predicate,
9783            optional_variables,
9784        } => LogicalPlan::Filter {
9785            input: Box::new(rewrite_node(*input, lookup)),
9786            predicate,
9787            optional_variables,
9788        },
9789        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9790            input: Box::new(rewrite_node(*input, lookup)),
9791            projections,
9792        },
9793        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9794            input: Box::new(rewrite_node(*input, lookup)),
9795            skip,
9796            fetch,
9797        },
9798        LogicalPlan::Sort { input, order_by } => {
9799            // Phase 5a-impl Sorted fusion: when the immediate child
9800            // is a single-label Scan AND the sole sort key is a
9801            // single-column property reference on that scan's
9802            // variable AND the column has a fork-local Sorted index
9803            // registered, rewrite to FusedIndexScan { SortedKWayMerge }.
9804            // Otherwise recurse normally.
9805            let new_input = match (*input, &order_by[..]) {
9806                (
9807                    LogicalPlan::Scan {
9808                        label_id,
9809                        labels,
9810                        variable,
9811                        filter,
9812                        optional,
9813                    },
9814                    [single_sort],
9815                ) if labels.len() == 1
9816                    && let Some(col) = column_of_scan_variable(&single_sort.expr, &variable)
9817                    && let Some(uni_store::fork::ForkLocalIndexKind::Sorted) =
9818                        lookup.fork_index_for(&labels[0], &col) =>
9819                {
9820                    LogicalPlan::FusedIndexScan {
9821                        label_id,
9822                        labels,
9823                        variable,
9824                        filter,
9825                        optional,
9826                        kind: FusionKind::SortedKWayMerge,
9827                    }
9828                }
9829                (other_input, _) => rewrite_node(other_input, lookup),
9830            };
9831            LogicalPlan::Sort {
9832                input: Box::new(new_input),
9833                order_by,
9834            }
9835        }
9836        LogicalPlan::Union { left, right, all } => LogicalPlan::Union {
9837            left: Box::new(rewrite_node(*left, lookup)),
9838            right: Box::new(rewrite_node(*right, lookup)),
9839            all,
9840        },
9841        // Everything else passes through unchanged. Adding more
9842        // arms is purely additive — fusion just doesn't fire inside
9843        // un-recursed-into subtrees.
9844        other => other,
9845    }
9846}
9847
9848/// Phase 5b followup: inspect a CALL-style procedure invocation
9849/// for a `(label, column)` pair and check whether a fork-local
9850/// index has been registered for it.
9851///
9852/// Recognizes:
9853/// - `uni.vector.query(label, column, query_vec, k)` → `AnnRerank`
9854///   when a `Vector` fork-local index exists.
9855/// - `uni.fts.query(label, column, query, k)` → `Bm25Rrf` when a
9856///   `FullText` fork-local index exists.
9857///
9858/// Returns `None` for any other procedure (no rewrite) or when the
9859/// registry has no matching entry.
9860fn procedure_call_fusion_kind<L: ForkIndexLookup>(
9861    procedure_name: &str,
9862    arguments: &[Expr],
9863    lookup: &L,
9864) -> Option<FusionKind> {
9865    if arguments.len() < 2 {
9866        return None;
9867    }
9868    let label = match &arguments[0] {
9869        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9870        _ => return None,
9871    };
9872    let column = match &arguments[1] {
9873        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9874        _ => return None,
9875    };
9876    let expected = match procedure_name {
9877        "uni.vector.query" => uni_store::fork::ForkLocalIndexKind::Vector,
9878        "uni.fts.query" => uni_store::fork::ForkLocalIndexKind::FullText,
9879        _ => return None,
9880    };
9881    let registered = lookup.fork_index_for(label, column)?;
9882    if registered != expected {
9883        return None;
9884    }
9885    into_fusion_kind(registered)
9886}
9887
9888/// Map a fork-local index kind to its planner-side fusion variant.
9889/// Returns `None` for any future `ForkLocalIndexKind` we don't yet
9890/// know how to fuse — the caller falls back to a regular Scan.
9891fn into_fusion_kind(kind: uni_store::fork::ForkLocalIndexKind) -> Option<FusionKind> {
9892    use uni_store::fork::ForkLocalIndexKind as K;
9893    match kind {
9894        K::VidUid => Some(FusionKind::VidUidForkFirst),
9895        K::ScalarBtree => Some(FusionKind::BtreeUnion),
9896        K::Sorted => Some(FusionKind::SortedKWayMerge),
9897        K::Vector => Some(FusionKind::AnnRerank),
9898        K::FullText => Some(FusionKind::Bm25Rrf),
9899        // `ForkLocalIndexKind` is `#[non_exhaustive]`; future kinds
9900        // we don't yet handle are silently passed through as a
9901        // regular Scan so a forward-incompatible binary doesn't
9902        // panic — just misses the fusion opportunity.
9903        _ => None,
9904    }
9905}
9906
9907/// Inspect a Scan filter `Expr` for a single-column equality predicate
9908/// against the scan's variable. Returns the column name if the
9909/// predicate matches the shape `variable.column = <literal_or_param>`
9910/// (or its commuted form). Returns `None` for any other shape — fusion
9911/// only fires on the simple case in Phase 5a-impl.
9912fn equality_target_column(filter: &Expr, scan_variable: &str) -> Option<String> {
9913    let (lhs, rhs) = match filter {
9914        Expr::BinaryOp {
9915            left,
9916            op: uni_cypher::ast::BinaryOp::Eq,
9917            right,
9918        } => (left.as_ref(), right.as_ref()),
9919        _ => return None,
9920    };
9921    // Try lhs = column-of-scan-var, rhs = literal/param; or commuted.
9922    if let Some(col) = column_of_scan_variable(lhs, scan_variable)
9923        && is_constant_or_param(rhs)
9924    {
9925        return Some(col);
9926    }
9927    if let Some(col) = column_of_scan_variable(rhs, scan_variable)
9928        && is_constant_or_param(lhs)
9929    {
9930        return Some(col);
9931    }
9932    None
9933}
9934
9935fn column_of_scan_variable(expr: &Expr, scan_variable: &str) -> Option<String> {
9936    if let Expr::Property(base, prop) = expr
9937        && let Expr::Variable(v) = base.as_ref()
9938        && v == scan_variable
9939    {
9940        return Some(prop.clone());
9941    }
9942    None
9943}
9944
9945fn is_constant_or_param(expr: &Expr) -> bool {
9946    matches!(expr, Expr::Literal(_) | Expr::Parameter(_))
9947}
9948
9949#[cfg(test)]
9950mod pushdown_tests {
9951    use super::*;
9952
9953    #[test]
9954    fn test_validat_extracts_property_names() {
9955        // validAt(e, 'start', 'end', ts) → e: {start, end}
9956        let mut properties = HashMap::new();
9957
9958        let args = vec![
9959            Expr::Variable("e".to_string()),
9960            Expr::Literal(CypherLiteral::String("start".to_string())),
9961            Expr::Literal(CypherLiteral::String("end".to_string())),
9962            Expr::Variable("ts".to_string()),
9963        ];
9964
9965        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
9966
9967        assert!(properties.contains_key("e"));
9968        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
9969            .iter()
9970            .cloned()
9971            .collect();
9972        assert_eq!(properties.get("e").unwrap(), &e_props);
9973    }
9974
9975    #[test]
9976    fn test_keys_requires_wildcard() {
9977        // keys(n) → n: {*}
9978        let mut properties = HashMap::new();
9979
9980        let args = vec![Expr::Variable("n".to_string())];
9981
9982        analyze_function_property_requirements("keys", &args, &mut properties);
9983
9984        assert!(properties.contains_key("n"));
9985        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
9986        assert_eq!(properties.get("n").unwrap(), &n_props);
9987    }
9988
9989    #[test]
9990    fn test_properties_requires_wildcard() {
9991        // properties(n) → n: {*}
9992        let mut properties = HashMap::new();
9993
9994        let args = vec![Expr::Variable("n".to_string())];
9995
9996        analyze_function_property_requirements("properties", &args, &mut properties);
9997
9998        assert!(properties.contains_key("n"));
9999        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10000        assert_eq!(properties.get("n").unwrap(), &n_props);
10001    }
10002
10003    #[test]
10004    fn test_unknown_function_conservative() {
10005        // customUdf(e) → e: {*}
10006        let mut properties = HashMap::new();
10007
10008        let args = vec![Expr::Variable("e".to_string())];
10009
10010        analyze_function_property_requirements("customUdf", &args, &mut properties);
10011
10012        assert!(properties.contains_key("e"));
10013        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10014        assert_eq!(properties.get("e").unwrap(), &e_props);
10015    }
10016
10017    #[test]
10018    fn test_parameter_property_name() {
10019        // validAt(e, $start, $end, ts) → e: {*}
10020        let mut properties = HashMap::new();
10021
10022        let args = vec![
10023            Expr::Variable("e".to_string()),
10024            Expr::Parameter("start".to_string()),
10025            Expr::Parameter("end".to_string()),
10026            Expr::Variable("ts".to_string()),
10027        ];
10028
10029        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
10030
10031        assert!(properties.contains_key("e"));
10032        assert!(properties.get("e").unwrap().contains("*"));
10033    }
10034
10035    #[test]
10036    fn test_validat_expr_extracts_properties() {
10037        // Test Expr::ValidAt variant property extraction
10038        let mut properties = HashMap::new();
10039
10040        let validat_expr = Expr::ValidAt {
10041            entity: Box::new(Expr::Variable("e".to_string())),
10042            timestamp: Box::new(Expr::Variable("ts".to_string())),
10043            start_prop: Some("valid_from".to_string()),
10044            end_prop: Some("valid_to".to_string()),
10045        };
10046
10047        collect_properties_from_expr_into(&validat_expr, &mut properties);
10048
10049        assert!(properties.contains_key("e"));
10050        assert!(properties.get("e").unwrap().contains("valid_from"));
10051        assert!(properties.get("e").unwrap().contains("valid_to"));
10052    }
10053
10054    #[test]
10055    fn test_array_index_requires_wildcard() {
10056        // e[prop] → e: {*}
10057        let mut properties = HashMap::new();
10058
10059        let array_index_expr = Expr::ArrayIndex {
10060            array: Box::new(Expr::Variable("e".to_string())),
10061            index: Box::new(Expr::Variable("prop".to_string())),
10062        };
10063
10064        collect_properties_from_expr_into(&array_index_expr, &mut properties);
10065
10066        assert!(properties.contains_key("e"));
10067        assert!(properties.get("e").unwrap().contains("*"));
10068    }
10069
10070    #[test]
10071    fn test_property_access_extraction() {
10072        // e.name → e: {name}
10073        let mut properties = HashMap::new();
10074
10075        let prop_access = Expr::Property(
10076            Box::new(Expr::Variable("e".to_string())),
10077            "name".to_string(),
10078        );
10079
10080        collect_properties_from_expr_into(&prop_access, &mut properties);
10081
10082        assert!(properties.contains_key("e"));
10083        assert!(properties.get("e").unwrap().contains("name"));
10084    }
10085}