Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::{PredicateAnalyzer, try_label_or_to_union, try_type_or_to_union};
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig, ScalarIndexConfig,
14    ScalarIndexType, Schema, SparseVectorIndexConfig, TokenizerConfig, VectorIndexConfig,
15};
16use uni_cypher::ast::{
17    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
18    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
19    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
20    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
21    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
22    WithRecursiveClause,
23};
24
25/// Sentinel column name inserted into a variable's property set to request
26/// that the planner build the bare struct column (`add_structural_projection`)
27/// WITHOUT pulling the full schema.
28///
29/// Emitted by `mark_set_item_variables` for `SetItem::Property` targets only.
30/// Other SET variants (`Labels`, `Variable`, `VariablePlus`) and REMOVE still
31/// emit `"*"` because they replace/merge the whole node.
32///
33/// **Union semantics:** When both `"*"` and the sentinel appear in the same
34/// variable's HashSet (e.g. `SET n.x = 1 RETURN n` collects both), `"*"`
35/// dominates — schema expansion still happens. The sentinel only changes
36/// behavior when it's the sole structural marker present.
37///
38/// Reserved-name convention: the double-underscore prefix marks this as
39/// internal. Schema validation should reject user-declared properties with
40/// this name (deferred follow-up).
41pub(crate) const STRUCT_ONLY_SENTINEL: &str = "__set_struct__";
42
43/// Type of variable in scope for semantic validation.
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum VariableType {
46    /// Node variable (from MATCH (n), CREATE (n), etc.)
47    Node,
48    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
49    Edge,
50    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
51    Path,
52    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
53    /// Could hold a map or dynamic value — property access is allowed.
54    Scalar,
55    /// Scalar from a known non-graph literal (int, float, bool, string, list).
56    /// Property access is NOT allowed on these at compile time.
57    ScalarLiteral,
58    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
59    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
60    Imported,
61}
62
63impl VariableType {
64    /// Returns true if this type is compatible with the expected type.
65    ///
66    /// `Imported` is always compatible because the actual type is unknown at plan time.
67    fn is_compatible_with(self, expected: VariableType) -> bool {
68        self == expected
69            || self == VariableType::Imported
70            // ScalarLiteral behaves like Scalar for compatibility checks
71            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
72    }
73}
74
75/// Information about a variable in scope during planning.
76#[derive(Debug, Clone)]
77pub struct VariableInfo {
78    /// Variable name as written in the query.
79    pub name: String,
80    /// Semantic type of the variable.
81    pub var_type: VariableType,
82    /// True if this is a variable-length path (VLP) step variable.
83    ///
84    /// VLP step variables are typed as Edge but semantically hold edge lists.
85    pub is_vlp: bool,
86}
87
88impl VariableInfo {
89    pub fn new(name: String, var_type: VariableType) -> Self {
90        Self {
91            name,
92            var_type,
93            is_vlp: false,
94        }
95    }
96}
97
98/// Find a variable in scope by name.
99fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
100    vars.iter().find(|v| v.name == name)
101}
102
103/// Check if a variable is in scope.
104fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
105    find_var_in_scope(vars, name).is_some()
106}
107
108/// Check if an expression contains a pattern predicate.
109fn contains_pattern_predicate(expr: &Expr) -> bool {
110    if matches!(
111        expr,
112        Expr::Exists {
113            from_pattern_predicate: true,
114            ..
115        }
116    ) {
117        return true;
118    }
119    let mut found = false;
120    expr.for_each_child(&mut |child| {
121        if !found {
122            found = contains_pattern_predicate(child);
123        }
124    });
125    found
126}
127
128/// Add a variable to scope with type conflict validation.
129/// Returns an error if the variable already exists with a different type.
130fn add_var_to_scope(
131    vars: &mut Vec<VariableInfo>,
132    name: &str,
133    var_type: VariableType,
134) -> Result<()> {
135    if name.is_empty() {
136        return Ok(());
137    }
138
139    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
140        if existing.var_type == VariableType::Imported {
141            // Imported vars upgrade to the concrete type
142            existing.var_type = var_type;
143        } else if var_type == VariableType::Imported || existing.var_type == var_type {
144            // New type is Imported (keep existing) or same type — no conflict
145        } else if matches!(
146            existing.var_type,
147            VariableType::Scalar | VariableType::ScalarLiteral
148        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
149        {
150            // Scalar can be used as Node/Edge in CREATE context — a scalar
151            // holding a node/edge reference is valid for pattern use
152            existing.var_type = var_type;
153        } else {
154            return Err(anyhow!(
155                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
156                name,
157                existing.var_type,
158                var_type
159            ));
160        }
161    } else {
162        vars.push(VariableInfo::new(name.to_string(), var_type));
163    }
164    Ok(())
165}
166
167/// Convert VariableInfo vec to String vec for backward compatibility
168fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
169    vars.iter().map(|v| v.name.clone()).collect()
170}
171
172fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
173    match expr {
174        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
175            .map(|info| info.var_type)
176            .unwrap_or(VariableType::Scalar),
177        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
178        // Known non-graph literals: property access is NOT valid on these.
179        Expr::Literal(CypherLiteral::Integer(_))
180        | Expr::Literal(CypherLiteral::Float(_))
181        | Expr::Literal(CypherLiteral::String(_))
182        | Expr::Literal(CypherLiteral::Bool(_))
183        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
184        Expr::FunctionCall { name, args, .. } => {
185            let lower = name.to_lowercase();
186            if lower == "coalesce" {
187                infer_coalesce_type(args, vars_in_scope)
188            } else if lower == "collect" && !args.is_empty() {
189                let collected = infer_with_output_type(&args[0], vars_in_scope);
190                if matches!(
191                    collected,
192                    VariableType::Node
193                        | VariableType::Edge
194                        | VariableType::Path
195                        | VariableType::Imported
196                ) {
197                    collected
198                } else {
199                    VariableType::Scalar
200                }
201            } else {
202                VariableType::Scalar
203            }
204        }
205        // WITH list literals/expressions produce scalar list values. Preserving
206        // entity typing here causes invalid node/edge reuse in later MATCH clauses
207        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
208        // Lists are ScalarLiteral since property access is not valid on them.
209        Expr::List(_) => VariableType::ScalarLiteral,
210        _ => VariableType::Scalar,
211    }
212}
213
214fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
215    let mut resolved: Option<VariableType> = None;
216    let mut saw_imported = false;
217    for arg in args {
218        let t = infer_with_output_type(arg, vars_in_scope);
219        match t {
220            VariableType::Node | VariableType::Edge | VariableType::Path => {
221                if let Some(existing) = resolved {
222                    if existing != t {
223                        return VariableType::Scalar;
224                    }
225                } else {
226                    resolved = Some(t);
227                }
228            }
229            VariableType::Imported => saw_imported = true,
230            VariableType::Scalar | VariableType::ScalarLiteral => {}
231        }
232    }
233    if let Some(t) = resolved {
234        t
235    } else if saw_imported {
236        VariableType::Imported
237    } else {
238        VariableType::Scalar
239    }
240}
241
242fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
243    match expr {
244        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
245            .map(|info| info.var_type)
246            .unwrap_or(VariableType::Scalar),
247        Expr::FunctionCall { name, args, .. }
248            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
249        {
250            infer_with_output_type(&args[0], vars_in_scope)
251        }
252        Expr::List(items) => {
253            let mut inferred: Option<VariableType> = None;
254            for item in items {
255                let t = infer_with_output_type(item, vars_in_scope);
256                if !matches!(
257                    t,
258                    VariableType::Node
259                        | VariableType::Edge
260                        | VariableType::Path
261                        | VariableType::Imported
262                ) {
263                    return VariableType::Scalar;
264                }
265                if let Some(existing) = inferred {
266                    if existing != t
267                        && t != VariableType::Imported
268                        && existing != VariableType::Imported
269                    {
270                        return VariableType::Scalar;
271                    }
272                    if existing == VariableType::Imported && t != VariableType::Imported {
273                        inferred = Some(t);
274                    }
275                } else {
276                    inferred = Some(t);
277                }
278            }
279            inferred.unwrap_or(VariableType::Scalar)
280        }
281        _ => VariableType::Scalar,
282    }
283}
284
285/// Collect all variable names referenced in an expression
286fn collect_expr_variables(expr: &Expr) -> Vec<String> {
287    let mut vars = Vec::new();
288    collect_expr_variables_inner(expr, &mut vars);
289    vars
290}
291
292/// Collect the names of `$param` references in a constant-foldable expression.
293///
294/// Walks the variants that `eval_const_numeric_expr` accepts (the only shapes a
295/// successfully-folded `LIMIT`/`SKIP` expression can take): parameters,
296/// literals, unary/binary arithmetic, and the whitelisted numeric functions.
297/// Used to tell the plan cache which parameter values were baked into the plan.
298fn collect_expr_parameters(expr: &Expr, names: &mut Vec<String>) {
299    match expr {
300        Expr::Parameter(name) => {
301            if !names.contains(name) {
302                names.push(name.clone());
303            }
304        }
305        Expr::UnaryOp { expr: e, .. } => collect_expr_parameters(e, names),
306        Expr::BinaryOp { left, right, .. } => {
307            collect_expr_parameters(left, names);
308            collect_expr_parameters(right, names);
309        }
310        Expr::FunctionCall { args, .. } => {
311            for a in args {
312                collect_expr_parameters(a, names);
313            }
314        }
315        _ => {}
316    }
317}
318
319fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
320    let mut add_var = |name: &String| {
321        if !vars.contains(name) {
322            vars.push(name.clone());
323        }
324    };
325
326    match expr {
327        Expr::Variable(name) => add_var(name),
328        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
329        Expr::BinaryOp { left, right, .. } => {
330            collect_expr_variables_inner(left, vars);
331            collect_expr_variables_inner(right, vars);
332        }
333        Expr::UnaryOp { expr: e, .. }
334        | Expr::IsNull(e)
335        | Expr::IsNotNull(e)
336        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
337        Expr::FunctionCall { args, .. } => {
338            for a in args {
339                collect_expr_variables_inner(a, vars);
340            }
341        }
342        Expr::List(items) => {
343            for item in items {
344                collect_expr_variables_inner(item, vars);
345            }
346        }
347        Expr::In { expr: e, list } => {
348            collect_expr_variables_inner(e, vars);
349            collect_expr_variables_inner(list, vars);
350        }
351        Expr::Case {
352            expr: case_expr,
353            when_then,
354            else_expr,
355        } => {
356            if let Some(e) = case_expr {
357                collect_expr_variables_inner(e, vars);
358            }
359            for (w, t) in when_then {
360                collect_expr_variables_inner(w, vars);
361                collect_expr_variables_inner(t, vars);
362            }
363            if let Some(e) = else_expr {
364                collect_expr_variables_inner(e, vars);
365            }
366        }
367        Expr::Map(entries) => {
368            for (_, v) in entries {
369                collect_expr_variables_inner(v, vars);
370            }
371        }
372        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
373        Expr::ArrayIndex { array, index } => {
374            collect_expr_variables_inner(array, vars);
375            collect_expr_variables_inner(index, vars);
376        }
377        Expr::ArraySlice { array, start, end } => {
378            collect_expr_variables_inner(array, vars);
379            if let Some(s) = start {
380                collect_expr_variables_inner(s, vars);
381            }
382            if let Some(e) = end {
383                collect_expr_variables_inner(e, vars);
384            }
385        }
386        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
387        // they introduce local variable bindings not in outer scope.
388        _ => {}
389    }
390}
391
392/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
393///
394/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
395/// before the final RETURN projection without losing alias semantics.
396fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
397    let repr = expr.to_string_repr();
398    if let Some(rewritten) = aliases.get(&repr) {
399        return rewritten.clone();
400    }
401
402    match expr {
403        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
404        Expr::Property(base, prop) => Expr::Property(
405            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
406            prop.clone(),
407        ),
408        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
409            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
410            op: *op,
411            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
412        },
413        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
414            op: *op,
415            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
416        },
417        Expr::FunctionCall {
418            name,
419            args,
420            distinct,
421            window_spec,
422        } => Expr::FunctionCall {
423            name: name.clone(),
424            args: args
425                .iter()
426                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
427                .collect(),
428            distinct: *distinct,
429            window_spec: window_spec.clone(),
430        },
431        Expr::List(items) => Expr::List(
432            items
433                .iter()
434                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
435                .collect(),
436        ),
437        Expr::Map(entries) => Expr::Map(
438            entries
439                .iter()
440                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
441                .collect(),
442        ),
443        Expr::Case {
444            expr: case_expr,
445            when_then,
446            else_expr,
447        } => Expr::Case {
448            expr: case_expr
449                .as_ref()
450                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
451            when_then: when_then
452                .iter()
453                .map(|(w, t)| {
454                    (
455                        rewrite_order_by_expr_with_aliases(w, aliases),
456                        rewrite_order_by_expr_with_aliases(t, aliases),
457                    )
458                })
459                .collect(),
460            else_expr: else_expr
461                .as_ref()
462                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
463        },
464        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
465        // they introduce local variable bindings that could shadow aliases.
466        _ => expr.clone(),
467    }
468}
469
470/// Validate function call argument types.
471/// Returns error if type constraints are violated.
472fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
473    let name_lower = name.to_lowercase();
474
475    // labels() requires Node
476    if name_lower == "labels"
477        && let Some(Expr::Variable(var_name)) = args.first()
478        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
479        && !info.var_type.is_compatible_with(VariableType::Node)
480    {
481        return Err(anyhow!(
482            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
483        ));
484    }
485
486    // type() requires Edge
487    if name_lower == "type"
488        && let Some(Expr::Variable(var_name)) = args.first()
489        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
490        && !info.var_type.is_compatible_with(VariableType::Edge)
491    {
492        return Err(anyhow!(
493            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
494        ));
495    }
496
497    // properties() requires Node/Edge/Map (not scalar literals)
498    if name_lower == "properties"
499        && let Some(arg) = args.first()
500    {
501        match arg {
502            Expr::Literal(CypherLiteral::Integer(_))
503            | Expr::Literal(CypherLiteral::Float(_))
504            | Expr::Literal(CypherLiteral::String(_))
505            | Expr::Literal(CypherLiteral::Bool(_))
506            | Expr::List(_) => {
507                return Err(anyhow!(
508                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
509                ));
510            }
511            Expr::Variable(var_name) => {
512                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
513                    && matches!(
514                        info.var_type,
515                        VariableType::Scalar | VariableType::ScalarLiteral
516                    )
517                {
518                    return Err(anyhow!(
519                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
520                    ));
521                }
522            }
523            _ => {}
524        }
525    }
526
527    // nodes()/relationships() require Path
528    if (name_lower == "nodes" || name_lower == "relationships")
529        && let Some(Expr::Variable(var_name)) = args.first()
530        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
531        && !info.var_type.is_compatible_with(VariableType::Path)
532    {
533        return Err(anyhow!(
534            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
535            name_lower
536        ));
537    }
538
539    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
540    if name_lower == "size"
541        && let Some(Expr::Variable(var_name)) = args.first()
542        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
543        && info.var_type == VariableType::Path
544    {
545        return Err(anyhow!(
546            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
547        ));
548    }
549
550    // length()/size() do NOT accept Node or single-Edge arguments.
551    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
552    // but are actually edge lists — size()/length() is valid on those.
553    if (name_lower == "length" || name_lower == "size")
554        && let Some(Expr::Variable(var_name)) = args.first()
555        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
556        && (info.var_type == VariableType::Node
557            || (info.var_type == VariableType::Edge && !info.is_vlp))
558    {
559        return Err(anyhow!(
560            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
561            name_lower
562        ));
563    }
564
565    Ok(())
566}
567
568/// Check if an expression is a non-boolean literal.
569fn is_non_boolean_literal(expr: &Expr) -> bool {
570    matches!(
571        expr,
572        Expr::Literal(CypherLiteral::Integer(_))
573            | Expr::Literal(CypherLiteral::Float(_))
574            | Expr::Literal(CypherLiteral::String(_))
575            | Expr::List(_)
576            | Expr::Map(_)
577    )
578}
579
580/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
581fn validate_boolean_expression(expr: &Expr) -> Result<()> {
582    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
583    if let Expr::BinaryOp { left, op, right } = expr
584        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
585    {
586        let op_name = format!("{op:?}").to_uppercase();
587        for operand in [left.as_ref(), right.as_ref()] {
588            if is_non_boolean_literal(operand) {
589                return Err(anyhow!(
590                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
591                    op_name
592                ));
593            }
594        }
595    }
596    if let Expr::UnaryOp {
597        op: uni_cypher::ast::UnaryOp::Not,
598        expr: inner,
599    } = expr
600        && is_non_boolean_literal(inner)
601    {
602        return Err(anyhow!(
603            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
604        ));
605    }
606    let mut result = Ok(());
607    expr.for_each_child(&mut |child| {
608        if result.is_ok() {
609            result = validate_boolean_expression(child);
610        }
611    });
612    result
613}
614
615/// Validate that all variables used in an expression are in scope.
616fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
617    let used_vars = collect_expr_variables(expr);
618    for var in used_vars {
619        if !is_var_in_scope(vars_in_scope, &var) {
620            return Err(anyhow!(
621                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
622                var
623            ));
624        }
625    }
626    Ok(())
627}
628
629/// Check if a function name (lowercase) is an aggregate function.
630fn is_aggregate_function_name(name: &str) -> bool {
631    matches!(
632        name.to_lowercase().as_str(),
633        "count"
634            | "sum"
635            | "avg"
636            | "min"
637            | "max"
638            | "collect"
639            | "stdev"
640            | "stddev"
641            | "stdevp"
642            | "stddevp"
643            | "variance"
644            | "variancep"
645            | "percentiledisc"
646            | "percentilecont"
647            | "btic_min"
648            | "btic_max"
649            | "btic_span_agg"
650            | "btic_count_at"
651    ) || uni_cypher::is_known_plugin_aggregate(name)
652}
653
654/// Returns true if the expression is a window function (FunctionCall with window_spec).
655fn is_window_function(expr: &Expr) -> bool {
656    matches!(
657        expr,
658        Expr::FunctionCall {
659            window_spec: Some(_),
660            ..
661        }
662    )
663}
664
665/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
666/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
667/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
668/// size() call, BinaryOp, etc.).
669fn is_compound_aggregate(expr: &Expr) -> bool {
670    if !expr.is_aggregate() {
671        return false;
672    }
673    match expr {
674        Expr::FunctionCall {
675            name, window_spec, ..
676        } => {
677            // A bare aggregate FunctionCall is NOT compound
678            if window_spec.is_some() {
679                return true; // window wrapping an aggregate — treat as compound
680            }
681            !is_aggregate_function_name(name)
682        }
683        // Subquery aggregates are "bare" (not compound)
684        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
685        // Everything else (ListComprehension, BinaryOp, etc.) is compound
686        _ => true,
687    }
688}
689
690/// Recursively collect all bare aggregate FunctionCall sub-expressions from
691/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
692/// the outermost aggregate boundaries).
693///
694/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
695/// is searched because the body (`map_expr`, `predicate`, `expr`) references
696/// the loop variable, not outer-scope aggregates.
697fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
698    let mut out = Vec::new();
699    extract_inner_aggregates_rec(expr, &mut out);
700    out
701}
702
703fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
704    match expr {
705        Expr::FunctionCall {
706            name, window_spec, ..
707        } if window_spec.is_none() && is_aggregate_function_name(name) => {
708            // Found a bare aggregate — collect it and stop recursing
709            out.push(expr.clone());
710        }
711        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
712            out.push(expr.clone());
713        }
714        // For list comprehension, only search the `list` source for aggregates
715        Expr::ListComprehension { list, .. } => {
716            extract_inner_aggregates_rec(list, out);
717        }
718        // For quantifier, only search the `list` source
719        Expr::Quantifier { list, .. } => {
720            extract_inner_aggregates_rec(list, out);
721        }
722        // For reduce, search `init` and `list` (not the body `expr`)
723        Expr::Reduce { init, list, .. } => {
724            extract_inner_aggregates_rec(init, out);
725            extract_inner_aggregates_rec(list, out);
726        }
727        // Standard recursive cases
728        Expr::FunctionCall { args, .. } => {
729            for arg in args {
730                extract_inner_aggregates_rec(arg, out);
731            }
732        }
733        Expr::BinaryOp { left, right, .. } => {
734            extract_inner_aggregates_rec(left, out);
735            extract_inner_aggregates_rec(right, out);
736        }
737        Expr::UnaryOp { expr: e, .. }
738        | Expr::IsNull(e)
739        | Expr::IsNotNull(e)
740        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
741        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
742        Expr::List(items) => {
743            for item in items {
744                extract_inner_aggregates_rec(item, out);
745            }
746        }
747        Expr::Case {
748            expr: case_expr,
749            when_then,
750            else_expr,
751        } => {
752            if let Some(e) = case_expr {
753                extract_inner_aggregates_rec(e, out);
754            }
755            for (w, t) in when_then {
756                extract_inner_aggregates_rec(w, out);
757                extract_inner_aggregates_rec(t, out);
758            }
759            if let Some(e) = else_expr {
760                extract_inner_aggregates_rec(e, out);
761            }
762        }
763        Expr::In {
764            expr: in_expr,
765            list,
766        } => {
767            extract_inner_aggregates_rec(in_expr, out);
768            extract_inner_aggregates_rec(list, out);
769        }
770        Expr::ArrayIndex { array, index } => {
771            extract_inner_aggregates_rec(array, out);
772            extract_inner_aggregates_rec(index, out);
773        }
774        Expr::ArraySlice { array, start, end } => {
775            extract_inner_aggregates_rec(array, out);
776            if let Some(s) = start {
777                extract_inner_aggregates_rec(s, out);
778            }
779            if let Some(e) = end {
780                extract_inner_aggregates_rec(e, out);
781            }
782        }
783        Expr::Map(entries) => {
784            for (_, v) in entries {
785                extract_inner_aggregates_rec(v, out);
786            }
787        }
788        _ => {}
789    }
790}
791
792/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
793/// `Expr::Variable(aggregate_column_name(agg))`.
794///
795/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
796/// rewritten (the body references the loop variable, not outer-scope columns).
797fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
798    match expr {
799        Expr::FunctionCall {
800            name, window_spec, ..
801        } if window_spec.is_none() && is_aggregate_function_name(name) => {
802            // Replace bare aggregate with column reference
803            Expr::Variable(aggregate_column_name(expr))
804        }
805        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
806            Expr::Variable(aggregate_column_name(expr))
807        }
808        Expr::ListComprehension {
809            variable,
810            list,
811            where_clause,
812            map_expr,
813        } => Expr::ListComprehension {
814            variable: variable.clone(),
815            list: Box::new(replace_aggregates_with_columns(list)),
816            where_clause: where_clause.clone(), // don't touch — references loop var
817            map_expr: map_expr.clone(),         // don't touch — references loop var
818        },
819        Expr::Quantifier {
820            quantifier,
821            variable,
822            list,
823            predicate,
824        } => Expr::Quantifier {
825            quantifier: *quantifier,
826            variable: variable.clone(),
827            list: Box::new(replace_aggregates_with_columns(list)),
828            predicate: predicate.clone(), // don't touch — references loop var
829        },
830        Expr::Reduce {
831            accumulator,
832            init,
833            variable,
834            list,
835            expr: body,
836        } => Expr::Reduce {
837            accumulator: accumulator.clone(),
838            init: Box::new(replace_aggregates_with_columns(init)),
839            variable: variable.clone(),
840            list: Box::new(replace_aggregates_with_columns(list)),
841            expr: body.clone(), // don't touch — references loop var
842        },
843        Expr::FunctionCall {
844            name,
845            args,
846            distinct,
847            window_spec,
848        } => Expr::FunctionCall {
849            name: name.clone(),
850            args: args.iter().map(replace_aggregates_with_columns).collect(),
851            distinct: *distinct,
852            window_spec: window_spec.clone(),
853        },
854        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
855            left: Box::new(replace_aggregates_with_columns(left)),
856            op: *op,
857            right: Box::new(replace_aggregates_with_columns(right)),
858        },
859        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
860            op: *op,
861            expr: Box::new(replace_aggregates_with_columns(e)),
862        },
863        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
864        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
865        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
866        Expr::Property(base, prop) => Expr::Property(
867            Box::new(replace_aggregates_with_columns(base)),
868            prop.clone(),
869        ),
870        Expr::List(items) => {
871            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
872        }
873        Expr::Case {
874            expr: case_expr,
875            when_then,
876            else_expr,
877        } => Expr::Case {
878            expr: case_expr
879                .as_ref()
880                .map(|e| Box::new(replace_aggregates_with_columns(e))),
881            when_then: when_then
882                .iter()
883                .map(|(w, t)| {
884                    (
885                        replace_aggregates_with_columns(w),
886                        replace_aggregates_with_columns(t),
887                    )
888                })
889                .collect(),
890            else_expr: else_expr
891                .as_ref()
892                .map(|e| Box::new(replace_aggregates_with_columns(e))),
893        },
894        Expr::In {
895            expr: in_expr,
896            list,
897        } => Expr::In {
898            expr: Box::new(replace_aggregates_with_columns(in_expr)),
899            list: Box::new(replace_aggregates_with_columns(list)),
900        },
901        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
902            array: Box::new(replace_aggregates_with_columns(array)),
903            index: Box::new(replace_aggregates_with_columns(index)),
904        },
905        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
906            array: Box::new(replace_aggregates_with_columns(array)),
907            start: start
908                .as_ref()
909                .map(|e| Box::new(replace_aggregates_with_columns(e))),
910            end: end
911                .as_ref()
912                .map(|e| Box::new(replace_aggregates_with_columns(e))),
913        },
914        Expr::Map(entries) => Expr::Map(
915            entries
916                .iter()
917                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
918                .collect(),
919        ),
920        // Leaf expressions — return as-is
921        other => other.clone(),
922    }
923}
924
925/// Check if an expression contains any aggregate function (recursively).
926fn contains_aggregate_recursive(expr: &Expr) -> bool {
927    match expr {
928        Expr::FunctionCall { name, args, .. } => {
929            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
930        }
931        Expr::BinaryOp { left, right, .. } => {
932            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
933        }
934        Expr::UnaryOp { expr: e, .. }
935        | Expr::IsNull(e)
936        | Expr::IsNotNull(e)
937        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
938        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
939        Expr::Case {
940            expr,
941            when_then,
942            else_expr,
943        } => {
944            expr.as_deref().is_some_and(contains_aggregate_recursive)
945                || when_then.iter().any(|(w, t)| {
946                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
947                })
948                || else_expr
949                    .as_deref()
950                    .is_some_and(contains_aggregate_recursive)
951        }
952        Expr::In { expr, list } => {
953            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
954        }
955        Expr::Property(base, _) => contains_aggregate_recursive(base),
956        Expr::ListComprehension { list, .. } => {
957            // Only check the list source — where_clause/map_expr reference the loop variable
958            contains_aggregate_recursive(list)
959        }
960        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
961        Expr::Reduce { init, list, .. } => {
962            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
963        }
964        Expr::ArrayIndex { array, index } => {
965            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
966        }
967        Expr::ArraySlice { array, start, end } => {
968            contains_aggregate_recursive(array)
969                || start.as_deref().is_some_and(contains_aggregate_recursive)
970                || end.as_deref().is_some_and(contains_aggregate_recursive)
971        }
972        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
973        _ => false,
974    }
975}
976
977/// Check if an expression contains a non-deterministic function (e.g. rand()).
978fn contains_non_deterministic(expr: &Expr) -> bool {
979    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
980        return true;
981    }
982    let mut found = false;
983    expr.for_each_child(&mut |child| {
984        if !found {
985            found = contains_non_deterministic(child);
986        }
987    });
988    found
989}
990
991fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
992    match expr {
993        Expr::FunctionCall { name, args, .. } => {
994            if is_aggregate_function_name(name) {
995                out.insert(expr.to_string_repr());
996                return;
997            }
998            for arg in args {
999                collect_aggregate_reprs(arg, out);
1000            }
1001        }
1002        Expr::BinaryOp { left, right, .. } => {
1003            collect_aggregate_reprs(left, out);
1004            collect_aggregate_reprs(right, out);
1005        }
1006        Expr::UnaryOp { expr, .. }
1007        | Expr::IsNull(expr)
1008        | Expr::IsNotNull(expr)
1009        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
1010        Expr::List(items) => {
1011            for item in items {
1012                collect_aggregate_reprs(item, out);
1013            }
1014        }
1015        Expr::Case {
1016            expr,
1017            when_then,
1018            else_expr,
1019        } => {
1020            if let Some(e) = expr {
1021                collect_aggregate_reprs(e, out);
1022            }
1023            for (w, t) in when_then {
1024                collect_aggregate_reprs(w, out);
1025                collect_aggregate_reprs(t, out);
1026            }
1027            if let Some(e) = else_expr {
1028                collect_aggregate_reprs(e, out);
1029            }
1030        }
1031        Expr::In { expr, list } => {
1032            collect_aggregate_reprs(expr, out);
1033            collect_aggregate_reprs(list, out);
1034        }
1035        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
1036        Expr::ListComprehension { list, .. } => {
1037            collect_aggregate_reprs(list, out);
1038        }
1039        Expr::Quantifier { list, .. } => {
1040            collect_aggregate_reprs(list, out);
1041        }
1042        Expr::Reduce { init, list, .. } => {
1043            collect_aggregate_reprs(init, out);
1044            collect_aggregate_reprs(list, out);
1045        }
1046        Expr::ArrayIndex { array, index } => {
1047            collect_aggregate_reprs(array, out);
1048            collect_aggregate_reprs(index, out);
1049        }
1050        Expr::ArraySlice { array, start, end } => {
1051            collect_aggregate_reprs(array, out);
1052            if let Some(s) = start {
1053                collect_aggregate_reprs(s, out);
1054            }
1055            if let Some(e) = end {
1056                collect_aggregate_reprs(e, out);
1057            }
1058        }
1059        _ => {}
1060    }
1061}
1062
1063#[derive(Debug, Clone)]
1064enum NonAggregateRef {
1065    Var(String),
1066    Property {
1067        repr: String,
1068        base_var: Option<String>,
1069    },
1070}
1071
1072fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1073    match expr {
1074        Expr::FunctionCall { name, args, .. } => {
1075            if is_aggregate_function_name(name) {
1076                return;
1077            }
1078            for arg in args {
1079                collect_non_aggregate_refs(arg, inside_agg, out);
1080            }
1081        }
1082        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1083        Expr::Property(base, _) if !inside_agg => {
1084            let base_var = if let Expr::Variable(v) = base.as_ref() {
1085                Some(v.clone())
1086            } else {
1087                None
1088            };
1089            out.push(NonAggregateRef::Property {
1090                repr: expr.to_string_repr(),
1091                base_var,
1092            });
1093        }
1094        Expr::BinaryOp { left, right, .. } => {
1095            collect_non_aggregate_refs(left, inside_agg, out);
1096            collect_non_aggregate_refs(right, inside_agg, out);
1097        }
1098        Expr::UnaryOp { expr, .. }
1099        | Expr::IsNull(expr)
1100        | Expr::IsNotNull(expr)
1101        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1102        Expr::List(items) => {
1103            for item in items {
1104                collect_non_aggregate_refs(item, inside_agg, out);
1105            }
1106        }
1107        Expr::Case {
1108            expr,
1109            when_then,
1110            else_expr,
1111        } => {
1112            if let Some(e) = expr {
1113                collect_non_aggregate_refs(e, inside_agg, out);
1114            }
1115            for (w, t) in when_then {
1116                collect_non_aggregate_refs(w, inside_agg, out);
1117                collect_non_aggregate_refs(t, inside_agg, out);
1118            }
1119            if let Some(e) = else_expr {
1120                collect_non_aggregate_refs(e, inside_agg, out);
1121            }
1122        }
1123        Expr::In { expr, list } => {
1124            collect_non_aggregate_refs(expr, inside_agg, out);
1125            collect_non_aggregate_refs(list, inside_agg, out);
1126        }
1127        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1128        // source. The body references the loop variable, not outer-scope vars.
1129        Expr::ListComprehension { list, .. } => {
1130            collect_non_aggregate_refs(list, inside_agg, out);
1131        }
1132        Expr::Quantifier { list, .. } => {
1133            collect_non_aggregate_refs(list, inside_agg, out);
1134        }
1135        Expr::Reduce { init, list, .. } => {
1136            collect_non_aggregate_refs(init, inside_agg, out);
1137            collect_non_aggregate_refs(list, inside_agg, out);
1138        }
1139        _ => {}
1140    }
1141}
1142
1143fn validate_with_order_by_aggregate_item(
1144    expr: &Expr,
1145    projected_aggregate_reprs: &HashSet<String>,
1146    projected_simple_reprs: &HashSet<String>,
1147    projected_aliases: &HashSet<String>,
1148) -> Result<()> {
1149    let mut aggregate_reprs = HashSet::new();
1150    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1151    for agg in aggregate_reprs {
1152        if !projected_aggregate_reprs.contains(&agg) {
1153            return Err(anyhow!(
1154                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1155                agg
1156            ));
1157        }
1158    }
1159
1160    let mut refs = Vec::new();
1161    collect_non_aggregate_refs(expr, false, &mut refs);
1162    refs.retain(|r| match r {
1163        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1164        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1165    });
1166
1167    let mut dedup = HashSet::new();
1168    refs.retain(|r| {
1169        let key = match r {
1170            NonAggregateRef::Var(v) => format!("v:{v}"),
1171            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1172        };
1173        dedup.insert(key)
1174    });
1175
1176    if refs.len() > 1 {
1177        return Err(anyhow!(
1178            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1179        ));
1180    }
1181
1182    if let Some(r) = refs.first() {
1183        return match r {
1184            NonAggregateRef::Var(v) => Err(anyhow!(
1185                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1186                v
1187            )),
1188            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1189                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1190                base_var
1191                    .clone()
1192                    .unwrap_or_else(|| "<property-base>".to_string())
1193            )),
1194        };
1195    }
1196
1197    Ok(())
1198}
1199
1200/// Validate that no aggregation functions appear in WHERE clause.
1201fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1202    if contains_aggregate_recursive(predicate) {
1203        return Err(anyhow!(
1204            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1205        ));
1206    }
1207    Ok(())
1208}
1209
1210#[derive(Debug, Clone, Copy)]
1211enum ConstNumber {
1212    Int(i64),
1213    Float(f64),
1214}
1215
1216impl ConstNumber {
1217    fn to_f64(self) -> f64 {
1218        match self {
1219            Self::Int(v) => v as f64,
1220            Self::Float(v) => v,
1221        }
1222    }
1223}
1224
1225fn eval_const_numeric_expr(
1226    expr: &Expr,
1227    params: &HashMap<String, uni_common::Value>,
1228) -> Result<ConstNumber> {
1229    match expr {
1230        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1231        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1232        Expr::Parameter(name) => match params.get(name) {
1233            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1234            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1235            Some(uni_common::Value::Null) => Err(anyhow!(
1236                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1237                name
1238            )),
1239            Some(other) => Err(anyhow!(
1240                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1241                name,
1242                other
1243            )),
1244            None => Err(anyhow!(
1245                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1246            )),
1247        },
1248        Expr::UnaryOp {
1249            op: uni_cypher::ast::UnaryOp::Neg,
1250            expr,
1251        } => match eval_const_numeric_expr(expr, params)? {
1252            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1253            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1254        },
1255        Expr::BinaryOp { left, op, right } => {
1256            let l = eval_const_numeric_expr(left, params)?;
1257            let r = eval_const_numeric_expr(right, params)?;
1258            match op {
1259                BinaryOp::Add => match (l, r) {
1260                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1261                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1262                },
1263                BinaryOp::Sub => match (l, r) {
1264                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1265                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1266                },
1267                BinaryOp::Mul => match (l, r) {
1268                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1269                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1270                },
1271                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1272                BinaryOp::Mod => match (l, r) {
1273                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1274                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1275                },
1276                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1277                _ => Err(anyhow!(
1278                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1279                )),
1280            }
1281        }
1282        Expr::FunctionCall { name, args, .. } => {
1283            let lower = name.to_lowercase();
1284            match lower.as_str() {
1285                "rand" if args.is_empty() => {
1286                    use rand::RngExt;
1287                    let mut rng = rand::rng();
1288                    Ok(ConstNumber::Float(rng.random::<f64>()))
1289                }
1290                "tointeger" | "toint" if args.len() == 1 => {
1291                    match eval_const_numeric_expr(&args[0], params)? {
1292                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1293                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1294                    }
1295                }
1296                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1297                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1298                )),
1299                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1300                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1301                )),
1302                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1303                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1304                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1305                },
1306                _ => Err(anyhow!(
1307                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1308                )),
1309            }
1310        }
1311        _ => Err(anyhow!(
1312            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1313        )),
1314    }
1315}
1316
1317/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1318/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1319fn parse_non_negative_integer(
1320    expr: &Expr,
1321    clause_name: &str,
1322    params: &HashMap<String, uni_common::Value>,
1323) -> Result<Option<usize>> {
1324    let referenced_vars = collect_expr_variables(expr);
1325    if !referenced_vars.is_empty() {
1326        return Err(anyhow!(
1327            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1328            clause_name
1329        ));
1330    }
1331
1332    let value = eval_const_numeric_expr(expr, params)?;
1333    let as_int = match value {
1334        ConstNumber::Int(v) => v,
1335        ConstNumber::Float(v) => {
1336            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1337                return Err(anyhow!(
1338                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1339                    clause_name
1340                ));
1341            }
1342            v as i64
1343        }
1344    };
1345    if as_int < 0 {
1346        return Err(anyhow!(
1347            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1348            clause_name
1349        ));
1350    }
1351    Ok(Some(as_int as usize))
1352}
1353
1354/// Validate that aggregation functions are not nested.
1355fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1356    if let Expr::FunctionCall { name, args, .. } = expr
1357        && is_aggregate_function_name(name)
1358    {
1359        for arg in args {
1360            if contains_aggregate_recursive(arg) {
1361                return Err(anyhow!(
1362                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1363                ));
1364            }
1365            if contains_non_deterministic(arg) {
1366                return Err(anyhow!(
1367                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1368                ));
1369            }
1370        }
1371    }
1372    let mut result = Ok(());
1373    expr.for_each_child(&mut |child| {
1374        if result.is_ok() {
1375            result = validate_no_nested_aggregation(child);
1376        }
1377    });
1378    result
1379}
1380
1381/// Validate that an expression does not access properties or labels of
1382/// deleted entities. `type(r)` on a deleted relationship is allowed per
1383/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1384fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1385    // Check n.prop on a deleted variable
1386    if let Expr::Property(inner, _) = expr
1387        && let Expr::Variable(name) = inner.as_ref()
1388        && deleted_vars.contains(name)
1389    {
1390        return Err(anyhow!(
1391            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1392            name
1393        ));
1394    }
1395    // Check labels(n) or keys(n) on a deleted variable
1396    if let Expr::FunctionCall { name, args, .. } = expr
1397        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1398        && args.len() == 1
1399        && let Expr::Variable(var) = &args[0]
1400        && deleted_vars.contains(var)
1401    {
1402        return Err(anyhow!(
1403            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1404            name.to_lowercase(),
1405            var
1406        ));
1407    }
1408    let mut result = Ok(());
1409    expr.for_each_child(&mut |child| {
1410        if result.is_ok() {
1411            result = validate_no_deleted_entity_access(child, deleted_vars);
1412        }
1413    });
1414    result
1415}
1416
1417/// Validate that all variables referenced in properties are defined,
1418/// either in scope or in the local CREATE variable list.
1419fn validate_property_variables(
1420    properties: &Option<Expr>,
1421    vars_in_scope: &[VariableInfo],
1422    create_vars: &[&str],
1423) -> Result<()> {
1424    if let Some(props) = properties {
1425        for var in collect_expr_variables(props) {
1426            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1427                return Err(anyhow!(
1428                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1429                    var
1430                ));
1431            }
1432        }
1433    }
1434    Ok(())
1435}
1436
1437/// Check that a variable name is not already bound in scope or in the local CREATE list.
1438/// Used to prevent rebinding in CREATE clauses.
1439fn check_not_already_bound(
1440    name: &str,
1441    vars_in_scope: &[VariableInfo],
1442    create_vars: &[&str],
1443) -> Result<()> {
1444    if is_var_in_scope(vars_in_scope, name) {
1445        return Err(anyhow!(
1446            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1447            name
1448        ));
1449    }
1450    if create_vars.contains(&name) {
1451        return Err(anyhow!(
1452            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1453            name
1454        ));
1455    }
1456    Ok(())
1457}
1458
1459fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1460    let mut scope = vars_in_scope.to_vec();
1461
1462    for path in &pattern.paths {
1463        if let Some(path_var) = &path.variable
1464            && !path_var.is_empty()
1465            && !is_var_in_scope(&scope, path_var)
1466        {
1467            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1468        }
1469        for element in &path.elements {
1470            match element {
1471                PatternElement::Node(n) => {
1472                    if let Some(v) = &n.variable
1473                        && !v.is_empty()
1474                        && !is_var_in_scope(&scope, v)
1475                    {
1476                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1477                    }
1478                }
1479                PatternElement::Relationship(r) => {
1480                    if let Some(v) = &r.variable
1481                        && !v.is_empty()
1482                        && !is_var_in_scope(&scope, v)
1483                    {
1484                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1485                    }
1486                }
1487                PatternElement::Parenthesized { .. } => {}
1488            }
1489        }
1490    }
1491
1492    scope
1493}
1494
1495fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1496    match item {
1497        SetItem::Property { expr, value } => {
1498            validate_expression_variables(expr, vars_in_scope)?;
1499            validate_expression(expr, vars_in_scope)?;
1500            validate_expression_variables(value, vars_in_scope)?;
1501            validate_expression(value, vars_in_scope)?;
1502            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1503                return Err(anyhow!(
1504                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1505                ));
1506            }
1507        }
1508        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1509            if !is_var_in_scope(vars_in_scope, variable) {
1510                return Err(anyhow!(
1511                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1512                    variable
1513                ));
1514            }
1515            validate_expression_variables(value, vars_in_scope)?;
1516            validate_expression(value, vars_in_scope)?;
1517            if contains_pattern_predicate(value) {
1518                return Err(anyhow!(
1519                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1520                ));
1521            }
1522        }
1523        SetItem::Labels { variable, .. } => {
1524            if !is_var_in_scope(vars_in_scope, variable) {
1525                return Err(anyhow!(
1526                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1527                    variable
1528                ));
1529            }
1530        }
1531    }
1532
1533    Ok(())
1534}
1535
1536/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1537/// The OpenCypher spec requires all property values in MERGE to be non-null.
1538fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1539    if let Some(Expr::Map(entries)) = properties {
1540        for (key, value) in entries {
1541            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1542                return Err(anyhow!(
1543                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1544                    key
1545                ));
1546            }
1547        }
1548    }
1549    Ok(())
1550}
1551
1552/// Flatten every label name appearing in a `Pattern` (across all paths
1553/// and node elements). Used by the M5 follow-up #6 write-rejection
1554/// guard to refuse CREATE/MERGE that names a virtual catalog-resolved
1555/// label.
1556fn collect_pattern_labels(pattern: &uni_cypher::ast::Pattern) -> Vec<String> {
1557    let mut out = Vec::new();
1558    for path in &pattern.paths {
1559        for element in &path.elements {
1560            if let PatternElement::Node(n) = element {
1561                for l in n.labels.names() {
1562                    out.push(l.clone());
1563                }
1564            }
1565        }
1566    }
1567    out
1568}
1569
1570fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1571    for path in &merge_clause.pattern.paths {
1572        for element in &path.elements {
1573            match element {
1574                PatternElement::Node(n) => {
1575                    if let Some(Expr::Parameter(_)) = &n.properties {
1576                        return Err(anyhow!(
1577                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1578                        ));
1579                    }
1580                    reject_null_merge_properties(&n.properties)?;
1581                    // VariableAlreadyBound: reject if a bound variable is used
1582                    // as a standalone MERGE node or introduces new labels/properties.
1583                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1584                    if let Some(variable) = &n.variable
1585                        && !variable.is_empty()
1586                        && is_var_in_scope(vars_in_scope, variable)
1587                    {
1588                        let is_standalone = path.elements.len() == 1;
1589                        let has_new_labels = !n.labels.is_empty();
1590                        let has_new_properties = n.properties.is_some();
1591                        if is_standalone || has_new_labels || has_new_properties {
1592                            return Err(anyhow!(
1593                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1594                                variable
1595                            ));
1596                        }
1597                    }
1598                }
1599                PatternElement::Relationship(r) => {
1600                    if let Some(variable) = &r.variable
1601                        && !variable.is_empty()
1602                        && is_var_in_scope(vars_in_scope, variable)
1603                    {
1604                        return Err(anyhow!(
1605                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1606                            variable
1607                        ));
1608                    }
1609                    if r.types.len() != 1 {
1610                        return Err(anyhow!(
1611                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1612                        ));
1613                    }
1614                    if r.range.is_some() {
1615                        return Err(anyhow!(
1616                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1617                        ));
1618                    }
1619                    if let Some(Expr::Parameter(_)) = &r.properties {
1620                        return Err(anyhow!(
1621                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1622                        ));
1623                    }
1624                    reject_null_merge_properties(&r.properties)?;
1625                }
1626                PatternElement::Parenthesized { .. } => {}
1627            }
1628        }
1629    }
1630
1631    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1632    for item in &merge_clause.on_create {
1633        validate_merge_set_item(item, &merge_scope)?;
1634    }
1635    for item in &merge_clause.on_match {
1636        validate_merge_set_item(item, &merge_scope)?;
1637    }
1638
1639    Ok(())
1640}
1641
1642/// Recursively validate an expression for type errors, undefined variables, etc.
1643fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1644    // Validate boolean operators and nested aggregation first
1645    validate_boolean_expression(expr)?;
1646    validate_no_nested_aggregation(expr)?;
1647
1648    // Helper to validate multiple expressions
1649    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1650        for e in exprs {
1651            validate_expression(e, vars)?;
1652        }
1653        Ok(())
1654    }
1655
1656    match expr {
1657        Expr::FunctionCall { name, args, .. } => {
1658            validate_function_call(name, args, vars_in_scope)?;
1659            validate_all(args, vars_in_scope)
1660        }
1661        Expr::BinaryOp { left, right, .. } => {
1662            validate_expression(left, vars_in_scope)?;
1663            validate_expression(right, vars_in_scope)
1664        }
1665        Expr::UnaryOp { expr: e, .. }
1666        | Expr::IsNull(e)
1667        | Expr::IsNotNull(e)
1668        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1669        Expr::Property(base, prop) => {
1670            if let Expr::Variable(var_name) = base.as_ref()
1671                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1672            {
1673                // Paths don't have properties
1674                if var_info.var_type == VariableType::Path {
1675                    return Err(anyhow!(
1676                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1677                        var_name,
1678                        prop
1679                    ));
1680                }
1681                // Known non-graph literals (int, float, bool, string, list) don't have properties
1682                if var_info.var_type == VariableType::ScalarLiteral {
1683                    return Err(anyhow!(
1684                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1685                    ));
1686                }
1687            }
1688            validate_expression(base, vars_in_scope)
1689        }
1690        Expr::List(items) => validate_all(items, vars_in_scope),
1691        Expr::Case {
1692            expr: case_expr,
1693            when_then,
1694            else_expr,
1695        } => {
1696            if let Some(e) = case_expr {
1697                validate_expression(e, vars_in_scope)?;
1698            }
1699            for (w, t) in when_then {
1700                validate_expression(w, vars_in_scope)?;
1701                validate_expression(t, vars_in_scope)?;
1702            }
1703            if let Some(e) = else_expr {
1704                validate_expression(e, vars_in_scope)?;
1705            }
1706            Ok(())
1707        }
1708        Expr::In { expr: e, list } => {
1709            validate_expression(e, vars_in_scope)?;
1710            validate_expression(list, vars_in_scope)
1711        }
1712        Expr::Exists {
1713            query,
1714            from_pattern_predicate: true,
1715        } => {
1716            // Pattern predicates cannot introduce new named variables.
1717            // Extract named vars from inner MATCH pattern, check each is in scope.
1718            if let Query::Single(stmt) = query.as_ref() {
1719                for clause in &stmt.clauses {
1720                    if let Clause::Match(m) = clause {
1721                        for path in &m.pattern.paths {
1722                            for elem in &path.elements {
1723                                match elem {
1724                                    PatternElement::Node(n) => {
1725                                        if let Some(var) = &n.variable
1726                                            && !is_var_in_scope(vars_in_scope, var)
1727                                        {
1728                                            return Err(anyhow!(
1729                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1730                                                var
1731                                            ));
1732                                        }
1733                                    }
1734                                    PatternElement::Relationship(r) => {
1735                                        if let Some(var) = &r.variable
1736                                            && !is_var_in_scope(vars_in_scope, var)
1737                                        {
1738                                            return Err(anyhow!(
1739                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1740                                                var
1741                                            ));
1742                                        }
1743                                    }
1744                                    _ => {}
1745                                }
1746                            }
1747                        }
1748                    }
1749                }
1750            }
1751            Ok(())
1752        }
1753        _ => Ok(()),
1754    }
1755}
1756
1757/// One step (hop) in a Quantified Path Pattern sub-pattern.
1758///
1759/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1760#[derive(Debug, Clone)]
1761pub struct QppStepInfo {
1762    /// Edge type IDs that this step can traverse.
1763    pub edge_type_ids: Vec<u32>,
1764    /// Traversal direction for this step.
1765    pub direction: Direction,
1766    /// Optional label constraint on the target node.
1767    pub target_label: Option<String>,
1768}
1769
1770/// Phase 5a-impl: per-type fusion strategy for `LogicalPlan::FusedIndexScan`.
1771///
1772/// `#[non_exhaustive]` so Phase 5b can add `AnnRerank` and `Bm25Rrf`
1773/// without breaking downstream pattern-match exhaustiveness.
1774#[derive(Debug, Clone)]
1775#[non_exhaustive]
1776pub enum FusionKind {
1777    /// Union of parent + fork-local BTree hits, deduped by VID.
1778    BtreeUnion,
1779    /// k-way merge of pre-sorted parent + fork streams (ORDER BY).
1780    SortedKWayMerge,
1781    /// Fork-first UID lookup; falls back to parent on miss. Used
1782    /// when a fork rebinds an external UID and queries must see the
1783    /// fork's binding before the parent's.
1784    VidUidForkFirst,
1785    /// Phase 5b — vector ANN rerank: top-k from primary's index +
1786    /// top-k from fork-local index, merged and reranked by exact
1787    /// distance. Recall ≥ 95% per spec §8.2.
1788    AnnRerank,
1789    /// Phase 5b — BM25 reciprocal rank fusion: ranked lists from
1790    /// primary's and fork-local FTS indexes combined via standard
1791    /// RRF (`score = sum 1 / (k_rrf + rank_i)`, k_rrf = 60).
1792    Bm25Rrf,
1793    /// M4 — hybrid RRF that includes a learned-sparse (SPLADE) source:
1794    /// emitted for `uni.search` whose properties map carries a `sparse`
1795    /// key, fused via N-ary RRF in `run_hybrid_search`. Independent of
1796    /// fork-local indexes.
1797    SparseRrf,
1798    /// M4 — sparse dot-product rerank: the `uni.sparse.query` analogue of
1799    /// [`FusionKind::AnnRerank`], fusing primary's and fork-local sparse
1800    /// indexes. Reserved: emitted once fork-local sparse indexes land
1801    /// (issue #95 Task #4 introduces `ForkLocalIndexKind::Sparse`).
1802    SparseDot,
1803}
1804
1805/// Logical query plan produced by [`QueryPlanner`].
1806///
1807/// Each variant represents one step in the Cypher execution pipeline.
1808/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1809/// transform or join them, and the root node defines the final output.
1810#[derive(Debug, Clone)]
1811pub enum LogicalPlan {
1812    /// UNION / UNION ALL of two sub-plans.
1813    Union {
1814        left: Box<LogicalPlan>,
1815        right: Box<LogicalPlan>,
1816        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1817        all: bool,
1818    },
1819    /// Scan vertices of a single labeled dataset.
1820    Scan {
1821        label_id: u16,
1822        labels: Vec<String>,
1823        variable: String,
1824        filter: Option<Expr>,
1825        optional: bool,
1826    },
1827    /// Phase 5a-impl: fused scan over both primary's index and the
1828    /// forked session's fork-local index. Emitted by the planner only
1829    /// when (a) the session is forked AND (b) `StorageManager::fork_index_exists`
1830    /// returns `Some(_)` for the target column. Otherwise the planner
1831    /// keeps emitting `Scan` and Lance's `base_paths` chain transparently
1832    /// covers parent-inherited indexes.
1833    ///
1834    /// `kind` selects the per-type fusion strategy:
1835    /// - `BtreeUnion` — union of parent + fork hits, dedup by VID.
1836    /// - `SortedKWayMerge` — k-way merge of two pre-sorted streams.
1837    /// - `VidUidForkFirst` — probe fork's branch first, fall back to
1838    ///   parent's UID index on miss.
1839    FusedIndexScan {
1840        label_id: u16,
1841        labels: Vec<String>,
1842        variable: String,
1843        filter: Option<Expr>,
1844        optional: bool,
1845        kind: FusionKind,
1846    },
1847    /// Phase 5b followup: planner-side observability marker for the
1848    /// lossy fusion types. Wraps the original `VectorKnn` or
1849    /// `InvertedIndexLookup` (or any future leaf operator whose
1850    /// shape differs from `Scan`) without changing its fields, so
1851    /// the physical planner can decay it to `inner` unchanged.
1852    ///
1853    /// Runtime behavior is identical to running `inner` directly;
1854    /// the wrap is purely for explain-plan and runtime-stats
1855    /// observability. The actual fusion happens at the
1856    /// `BranchedBackend` layer (per-branch Lance reads via
1857    /// `base_paths`), exactly as in Phase 5b's core ship.
1858    FusedIndexScanWrapped {
1859        inner: Box<LogicalPlan>,
1860        kind: FusionKind,
1861    },
1862    /// Lookup vertices by ext_id using the main vertices table.
1863    /// Used when a query references ext_id without specifying a label.
1864    ExtIdLookup {
1865        variable: String,
1866        ext_id: String,
1867        filter: Option<Expr>,
1868        optional: bool,
1869    },
1870    /// Scan all vertices from main table (MATCH (n) without label).
1871    /// Used for schemaless queries that don't specify any label.
1872    ScanAll {
1873        variable: String,
1874        filter: Option<Expr>,
1875        optional: bool,
1876    },
1877    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1878    /// Used for labels not defined in schema (schemaless support).
1879    /// Scan main vertices table by label name(s) for schemaless support.
1880    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1881    ScanMainByLabels {
1882        labels: Vec<String>,
1883        variable: String,
1884        filter: Option<Expr>,
1885        optional: bool,
1886    },
1887    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1888    Empty,
1889    /// UNWIND: expand a list expression into one row per element.
1890    Unwind {
1891        input: Box<LogicalPlan>,
1892        expr: Expr,
1893        variable: String,
1894    },
1895    Traverse {
1896        input: Box<LogicalPlan>,
1897        edge_type_ids: Vec<u32>,
1898        direction: Direction,
1899        source_variable: String,
1900        target_variable: String,
1901        target_label_id: u16,
1902        step_variable: Option<String>,
1903        min_hops: usize,
1904        max_hops: usize,
1905        optional: bool,
1906        target_filter: Option<Expr>,
1907        path_variable: Option<String>,
1908        edge_properties: HashSet<String>,
1909        /// Whether this is a variable-length pattern (has `*` range specifier).
1910        /// When true, step_variable holds a list of edges (even for *1..1).
1911        is_variable_length: bool,
1912        /// All variables from this OPTIONAL MATCH pattern.
1913        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1914        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1915        optional_pattern_vars: HashSet<String>,
1916        /// Variable names (node + edge) from the current MATCH clause scope.
1917        /// Used for relationship uniqueness scoping: only edge ID columns whose
1918        /// associated variable is in this set participate in uniqueness filtering.
1919        /// Variables from previous disconnected MATCH clauses are excluded.
1920        scope_match_variables: HashSet<String>,
1921        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1922        edge_filter_expr: Option<Expr>,
1923        /// Path traversal semantics (Trail by default for OpenCypher).
1924        path_mode: crate::query::df_graph::nfa::PathMode,
1925        /// QPP steps for multi-hop quantified path patterns.
1926        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1927        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1928        qpp_steps: Option<Vec<QppStepInfo>>,
1929    },
1930    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1931    /// Used for edge types not defined in schema (schemaless support).
1932    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1933    TraverseMainByType {
1934        type_names: Vec<String>,
1935        input: Box<LogicalPlan>,
1936        direction: Direction,
1937        source_variable: String,
1938        target_variable: String,
1939        step_variable: Option<String>,
1940        min_hops: usize,
1941        max_hops: usize,
1942        optional: bool,
1943        target_filter: Option<Expr>,
1944        path_variable: Option<String>,
1945        /// Whether this is a variable-length pattern (has `*` range specifier).
1946        /// When true, step_variable holds a list of edges (even for *1..1).
1947        is_variable_length: bool,
1948        /// All variables from this OPTIONAL MATCH pattern.
1949        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1950        optional_pattern_vars: HashSet<String>,
1951        /// Variables belonging to the current MATCH clause scope.
1952        /// Used for relationship uniqueness scoping: only edge columns whose
1953        /// associated variable is in this set participate in uniqueness filtering.
1954        scope_match_variables: HashSet<String>,
1955        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1956        edge_filter_expr: Option<Expr>,
1957        /// Path traversal semantics (Trail by default for OpenCypher).
1958        path_mode: crate::query::df_graph::nfa::PathMode,
1959    },
1960    Filter {
1961        input: Box<LogicalPlan>,
1962        predicate: Expr,
1963        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1964        /// When evaluating the filter, if any of these variables are NULL,
1965        /// the row is preserved regardless of the predicate result.
1966        optional_variables: HashSet<String>,
1967    },
1968    Create {
1969        input: Box<LogicalPlan>,
1970        pattern: Pattern,
1971    },
1972    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1973    ///
1974    /// This variant combines multiple CREATE patterns into a single plan node
1975    /// to avoid deep recursion when executing many CREATEs sequentially.
1976    CreateBatch {
1977        input: Box<LogicalPlan>,
1978        patterns: Vec<Pattern>,
1979    },
1980    Merge {
1981        input: Box<LogicalPlan>,
1982        pattern: Pattern,
1983        on_match: Option<SetClause>,
1984        on_create: Option<SetClause>,
1985    },
1986    Set {
1987        input: Box<LogicalPlan>,
1988        items: Vec<SetItem>,
1989    },
1990    Remove {
1991        input: Box<LogicalPlan>,
1992        items: Vec<RemoveItem>,
1993    },
1994    Delete {
1995        input: Box<LogicalPlan>,
1996        items: Vec<Expr>,
1997        detach: bool,
1998    },
1999    /// FOREACH (variable IN list | clauses)
2000    Foreach {
2001        input: Box<LogicalPlan>,
2002        variable: String,
2003        list: Expr,
2004        body: Vec<LogicalPlan>,
2005    },
2006    Sort {
2007        input: Box<LogicalPlan>,
2008        order_by: Vec<SortItem>,
2009    },
2010    Limit {
2011        input: Box<LogicalPlan>,
2012        skip: Option<usize>,
2013        fetch: Option<usize>,
2014    },
2015    Aggregate {
2016        input: Box<LogicalPlan>,
2017        group_by: Vec<Expr>,
2018        aggregates: Vec<Expr>,
2019    },
2020    Distinct {
2021        input: Box<LogicalPlan>,
2022    },
2023    Window {
2024        input: Box<LogicalPlan>,
2025        window_exprs: Vec<Expr>,
2026    },
2027    Project {
2028        input: Box<LogicalPlan>,
2029        projections: Vec<(Expr, Option<String>)>,
2030    },
2031    CrossJoin {
2032        left: Box<LogicalPlan>,
2033        right: Box<LogicalPlan>,
2034    },
2035    Apply {
2036        input: Box<LogicalPlan>,
2037        subquery: Box<LogicalPlan>,
2038        input_filter: Option<Expr>,
2039    },
2040    RecursiveCTE {
2041        cte_name: String,
2042        initial: Box<LogicalPlan>,
2043        recursive: Box<LogicalPlan>,
2044    },
2045    ProcedureCall {
2046        procedure_name: String,
2047        arguments: Vec<Expr>,
2048        yield_items: Vec<(String, Option<String>)>,
2049    },
2050    SubqueryCall {
2051        input: Box<LogicalPlan>,
2052        subquery: Box<LogicalPlan>,
2053    },
2054    VectorKnn {
2055        label_id: u16,
2056        variable: String,
2057        property: String,
2058        query: Expr,
2059        k: usize,
2060        threshold: Option<f32>,
2061    },
2062    InvertedIndexLookup {
2063        label_id: u16,
2064        variable: String,
2065        property: String,
2066        terms: Expr,
2067    },
2068    ShortestPath {
2069        input: Box<LogicalPlan>,
2070        edge_type_ids: Vec<u32>,
2071        direction: Direction,
2072        source_variable: String,
2073        target_variable: String,
2074        target_label_id: u16,
2075        path_variable: String,
2076        /// Minimum number of hops (edges) in the path. Default is 1.
2077        min_hops: u32,
2078        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2079        max_hops: u32,
2080    },
2081    /// allShortestPaths() - Returns all paths with minimum length
2082    AllShortestPaths {
2083        input: Box<LogicalPlan>,
2084        edge_type_ids: Vec<u32>,
2085        direction: Direction,
2086        source_variable: String,
2087        target_variable: String,
2088        target_label_id: u16,
2089        path_variable: String,
2090        /// Minimum number of hops (edges) in the path. Default is 1.
2091        min_hops: u32,
2092        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2093        max_hops: u32,
2094    },
2095    QuantifiedPattern {
2096        input: Box<LogicalPlan>,
2097        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
2098        min_iterations: u32,
2099        max_iterations: u32,
2100        path_variable: Option<String>,
2101        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
2102        binding_variable: String, // Output variable of iteration (e.g. 'b')
2103    },
2104    // DDL Plans
2105    CreateVectorIndex {
2106        config: VectorIndexConfig,
2107        if_not_exists: bool,
2108    },
2109    /// Scored sparse-vector (SPLADE / learned-sparse) index. Reached via
2110    /// `CREATE VECTOR INDEX … OPTIONS{type:'sparse'}`, which shares the vector
2111    /// DDL surface but is a distinct index kind.
2112    CreateSparseIndex {
2113        config: SparseVectorIndexConfig,
2114        if_not_exists: bool,
2115    },
2116    CreateFullTextIndex {
2117        config: FullTextIndexConfig,
2118        if_not_exists: bool,
2119    },
2120    CreateScalarIndex {
2121        config: ScalarIndexConfig,
2122        if_not_exists: bool,
2123    },
2124    CreateJsonFtsIndex {
2125        config: JsonFtsIndexConfig,
2126        if_not_exists: bool,
2127    },
2128    DropIndex {
2129        name: String,
2130        if_exists: bool,
2131    },
2132    ShowIndexes {
2133        filter: Option<String>,
2134    },
2135    Copy {
2136        target: String,
2137        source: String,
2138        is_export: bool,
2139        options: HashMap<String, Value>,
2140    },
2141    Backup {
2142        destination: String,
2143        options: HashMap<String, Value>,
2144    },
2145    Explain {
2146        plan: Box<LogicalPlan>,
2147    },
2148    // Admin Plans
2149    ShowDatabase,
2150    ShowConfig,
2151    ShowStatistics,
2152    Vacuum,
2153    Checkpoint,
2154    CopyTo {
2155        label: String,
2156        path: String,
2157        format: String,
2158        options: HashMap<String, Value>,
2159    },
2160    CopyFrom {
2161        label: String,
2162        path: String,
2163        format: String,
2164        options: HashMap<String, Value>,
2165    },
2166    // Schema DDL
2167    CreateLabel(CreateLabel),
2168    CreateEdgeType(CreateEdgeType),
2169    AlterLabel(AlterLabel),
2170    AlterEdgeType(AlterEdgeType),
2171    DropLabel(DropLabel),
2172    DropEdgeType(DropEdgeType),
2173    // Constraints
2174    CreateConstraint(CreateConstraint),
2175    DropConstraint(DropConstraint),
2176    ShowConstraints(ShowConstraints),
2177    /// Bind a zero-length path (single node pattern with path variable).
2178    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2179    BindZeroLengthPath {
2180        input: Box<LogicalPlan>,
2181        node_variable: String,
2182        path_variable: String,
2183    },
2184    /// Bind a fixed-length path from already-computed node and edge columns.
2185    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2186    BindPath {
2187        input: Box<LogicalPlan>,
2188        node_variables: Vec<String>,
2189        edge_variables: Vec<String>,
2190        path_variable: String,
2191    },
2192
2193    // ── Locy variants ──────────────────────────────────────────
2194    /// Top-level Locy program: stratified rules + commands.
2195    LocyProgram {
2196        strata: Vec<super::planner_locy_types::LocyStratum>,
2197        commands: Vec<super::planner_locy_types::LocyCommand>,
2198        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2199        max_iterations: usize,
2200        timeout: std::time::Duration,
2201        max_derived_bytes: usize,
2202        deterministic_best_by: bool,
2203        strict_probability_domain: bool,
2204        probability_epsilon: f64,
2205        exact_probability: bool,
2206        max_bdd_variables: usize,
2207        top_k_proofs: usize,
2208        /// Active probability semiring (rollout D-7). Defaults to
2209        /// `AddMultProb` (Phase 1/2 byte-identical behavior). `BddExact`
2210        /// is selected by `LocyConfig::resolve()` when `exact_probability`
2211        /// is true.
2212        semiring_kind: uni_locy::SemiringKind,
2213        /// Phase B Slice 3: per-evaluation registry of neural classifiers
2214        /// keyed by model name. Empty for programs without `CREATE MODEL`.
2215        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2216        /// Phase B follow-up: optional memoization cache. `None` →
2217        /// runtime creates a fresh per-query cache; `Some` → shared
2218        /// across queries (caller-managed).
2219        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2220        /// Phase C B1-B3 follow-up: per-query side-channel store
2221        /// for per-invocation (raw, calibrated, confidence_band)
2222        /// records. Flows alongside `classifier_cache` into
2223        /// `LocyProgramExec`.
2224        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2225    },
2226    /// FOLD operator: lattice-join non-key columns per KEY group.
2227    LocyFold {
2228        input: Box<LogicalPlan>,
2229        key_columns: Vec<String>,
2230        fold_bindings: Vec<(String, Expr)>,
2231        strict_probability_domain: bool,
2232        probability_epsilon: f64,
2233    },
2234    /// BEST BY operator: select best row per KEY group by ordered criteria.
2235    LocyBestBy {
2236        input: Box<LogicalPlan>,
2237        key_columns: Vec<String>,
2238        /// (expression, ascending) pairs.
2239        criteria: Vec<(Expr, bool)>,
2240    },
2241    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2242    LocyPriority {
2243        input: Box<LogicalPlan>,
2244        key_columns: Vec<String>,
2245    },
2246    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2247    LocyDerivedScan {
2248        scan_index: usize,
2249        data: Arc<RwLock<Vec<RecordBatch>>>,
2250        schema: SchemaRef,
2251    },
2252    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2253    /// without carrying through helper/property columns like the regular Project.
2254    LocyProject {
2255        input: Box<LogicalPlan>,
2256        projections: Vec<(Expr, Option<String>)>,
2257        /// Expected output Arrow type per projection (for CAST support).
2258        target_types: Vec<DataType>,
2259    },
2260    /// Phase B A4: invoke registered neural classifiers against the
2261    /// input batches and overwrite the per-invocation placeholder
2262    /// column with each row's predicted probability. Wraps a Locy
2263    /// clause body plan when `CompiledClause.model_invocations` is
2264    /// non-empty; transparent (passes batches through unchanged) when
2265    /// the list is empty.
2266    ///
2267    /// Registry and cache are carried on the node so that
2268    /// `execute_subplan` — which spins up a fresh
2269    /// `HybridPhysicalPlanner` per call — can lower it to a physical
2270    /// `LocyModelInvokeExec` without depending on planner-side
2271    /// runtime state.
2272    LocyModelInvoke {
2273        input: Box<LogicalPlan>,
2274        invocations: Vec<uni_locy::ModelInvocation>,
2275        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2276        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2277        /// Phase C B1-B3 follow-up: per-query side-channel store
2278        /// for per-invocation (raw, calibrated, confidence_band)
2279        /// records. `LocyModelInvokeExec` writes here after each
2280        /// classifier call; EXPLAIN reads via collect_neural_calls
2281        /// to surface NeuralProvenance for ALONG/FOLD-position
2282        /// invocations and Mode B re-execution paths.
2283        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2284        /// Phase D D3 runtime: one handle per `path_context.source_rule`
2285        /// referenced by any invocation on this node. The handle's
2286        /// `data: Arc<RwLock<Vec<RecordBatch>>>` is shared with the
2287        /// `DerivedScanRegistry`; the source rule's derived facts are
2288        /// already converged by the time this node executes (the
2289        /// dependency-graph builder ensures source rules sit in
2290        /// earlier strata).
2291        path_context_handles: std::collections::HashMap<
2292            String,
2293            super::df_graph::locy_model_invoke::PathContextHandle,
2294        >,
2295    },
2296}
2297
2298/// Extracted vector similarity predicate info for optimization
2299struct VectorSimilarityPredicate {
2300    variable: String,
2301    property: String,
2302    query: Expr,
2303    threshold: Option<f32>,
2304}
2305
2306/// Result of extracting vector_similarity from a predicate
2307struct VectorSimilarityExtraction {
2308    /// The extracted vector similarity predicate
2309    predicate: VectorSimilarityPredicate,
2310    /// Remaining predicates that couldn't be optimized (if any)
2311    residual: Option<Expr>,
2312}
2313
2314/// Try to extract a vector_similarity predicate from an expression.
2315/// Matches patterns like:
2316/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2317/// - n.embedding ~= $query
2318///
2319/// Also handles AND predicates.
2320fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2321    match expr {
2322        Expr::BinaryOp { left, op, right } => {
2323            // Handle AND: check both sides for vector_similarity
2324            if matches!(op, BinaryOp::And) {
2325                // Try left side first
2326                if let Some(vs) = extract_simple_vector_similarity(left) {
2327                    return Some(VectorSimilarityExtraction {
2328                        predicate: vs,
2329                        residual: Some(right.as_ref().clone()),
2330                    });
2331                }
2332                // Try right side
2333                if let Some(vs) = extract_simple_vector_similarity(right) {
2334                    return Some(VectorSimilarityExtraction {
2335                        predicate: vs,
2336                        residual: Some(left.as_ref().clone()),
2337                    });
2338                }
2339                // Recursively check within left/right for nested ANDs
2340                if let Some(mut extraction) = extract_vector_similarity(left) {
2341                    extraction.residual = Some(combine_with_and(
2342                        extraction.residual,
2343                        right.as_ref().clone(),
2344                    ));
2345                    return Some(extraction);
2346                }
2347                if let Some(mut extraction) = extract_vector_similarity(right) {
2348                    extraction.residual =
2349                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2350                    return Some(extraction);
2351                }
2352                return None;
2353            }
2354
2355            // Simple case: direct vector_similarity comparison
2356            if let Some(vs) = extract_simple_vector_similarity(expr) {
2357                return Some(VectorSimilarityExtraction {
2358                    predicate: vs,
2359                    residual: None,
2360                });
2361            }
2362            None
2363        }
2364        _ => None,
2365    }
2366}
2367
2368/// Helper to combine an optional expression with another using AND
2369fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2370    match opt_expr {
2371        Some(e) => Expr::BinaryOp {
2372            left: Box::new(e),
2373            op: BinaryOp::And,
2374            right: Box::new(other),
2375        },
2376        None => other,
2377    }
2378}
2379
2380/// Extract a simple vector_similarity comparison (no AND)
2381fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2382    match expr {
2383        Expr::BinaryOp { left, op, right } => {
2384            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2385            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2386                && let (Some(vs), Some(thresh)) = (
2387                    extract_vector_similarity_call(left),
2388                    extract_float_literal(right),
2389                )
2390            {
2391                return Some(VectorSimilarityPredicate {
2392                    variable: vs.0,
2393                    property: vs.1,
2394                    query: vs.2,
2395                    threshold: Some(thresh),
2396                });
2397            }
2398            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2399            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2400                && let (Some(thresh), Some(vs)) = (
2401                    extract_float_literal(left),
2402                    extract_vector_similarity_call(right),
2403                )
2404            {
2405                return Some(VectorSimilarityPredicate {
2406                    variable: vs.0,
2407                    property: vs.1,
2408                    query: vs.2,
2409                    threshold: Some(thresh),
2410                });
2411            }
2412            // Pattern: n.embedding ~= query
2413            if matches!(op, BinaryOp::ApproxEq)
2414                && let Expr::Property(var_expr, prop) = left.as_ref()
2415                && let Expr::Variable(var) = var_expr.as_ref()
2416            {
2417                return Some(VectorSimilarityPredicate {
2418                    variable: var.clone(),
2419                    property: prop.clone(),
2420                    query: right.as_ref().clone(),
2421                    threshold: None,
2422                });
2423            }
2424            None
2425        }
2426        _ => None,
2427    }
2428}
2429
2430/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2431fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2432    if let Expr::FunctionCall { name, args, .. } = expr
2433        && name.eq_ignore_ascii_case("vector_similarity")
2434        && args.len() == 2
2435    {
2436        // First arg should be Property(Identifier(var), prop)
2437        if let Expr::Property(var_expr, prop) = &args[0]
2438            && let Expr::Variable(var) = var_expr.as_ref()
2439        {
2440            // Second arg is query
2441            return Some((var.clone(), prop.clone(), args[1].clone()));
2442        }
2443    }
2444    None
2445}
2446
2447/// Extract a float value from a literal expression
2448fn extract_float_literal(expr: &Expr) -> Option<f32> {
2449    match expr {
2450        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2451        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2452        _ => None,
2453    }
2454}
2455
2456/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2457///
2458/// `QueryPlanner` applies semantic validation (variable scoping, label
2459/// resolution, type checking) and produces a plan tree that the executor
2460/// can run against storage.
2461#[derive(Debug)]
2462pub struct QueryPlanner {
2463    schema: Arc<Schema>,
2464    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2465    gen_expr_cache: HashMap<(String, String), Expr>,
2466    /// Counter for generating unique anonymous variable names.
2467    anon_counter: std::sync::atomic::AtomicUsize,
2468    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2469    params: HashMap<String, uni_common::Value>,
2470    /// Optional plugin registry consulted when label / edge-type / identifier
2471    /// resolution misses the local schema (M5b — Catalog / ReplacementScan).
2472    plugin_registry: Option<Arc<uni_plugin::PluginRegistry>>,
2473    /// Gate for replacement-scan dispatch on unknown identifiers (M5b).
2474    replacement_scans_enabled: bool,
2475    /// Names of parameters folded into a `LIMIT`/`SKIP` position during the
2476    /// plan. The resulting `LogicalPlan::Limit` bakes the concrete values in, so
2477    /// a plan cache keyed on query text must additionally key on these
2478    /// parameters' values (see `folded_limit_skip_params`). Interior-mutable
2479    /// because `plan` takes `&self`.
2480    folded_limit_skip_params: std::sync::Mutex<std::collections::BTreeSet<String>>,
2481}
2482
2483struct TraverseParams<'a> {
2484    rel: &'a RelationshipPattern,
2485    target_node: &'a NodePattern,
2486    optional: bool,
2487    path_variable: Option<String>,
2488    /// All variables from this OPTIONAL MATCH pattern.
2489    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2490    optional_pattern_vars: HashSet<String>,
2491}
2492
2493impl QueryPlanner {
2494    /// Create a new planner for the given schema.
2495    ///
2496    /// Pre-parses all generation expressions defined in the schema so that
2497    /// repeated plan calls avoid redundant parsing.
2498    pub fn new(schema: Arc<Schema>) -> Self {
2499        // Pre-parse all generation expressions for caching
2500        let mut gen_expr_cache = HashMap::new();
2501        for (label, props) in &schema.properties {
2502            for (gen_col, meta) in props {
2503                if let Some(expr_str) = &meta.generation_expression
2504                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2505                {
2506                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2507                }
2508            }
2509        }
2510        Self {
2511            schema,
2512            gen_expr_cache,
2513            anon_counter: std::sync::atomic::AtomicUsize::new(0),
2514            params: HashMap::new(),
2515            plugin_registry: None,
2516            replacement_scans_enabled: false,
2517            folded_limit_skip_params: std::sync::Mutex::new(std::collections::BTreeSet::new()),
2518        }
2519    }
2520
2521    /// Graph schema this planner resolves labels and property types against.
2522    pub(crate) fn schema(&self) -> &Schema {
2523        &self.schema
2524    }
2525
2526    /// Record the parameters referenced by a successfully-folded `LIMIT`/`SKIP`
2527    /// expression so the caller's plan cache can key on their values.
2528    fn note_folded_limit_skip(&self, expr: &Expr) {
2529        let mut names = Vec::new();
2530        collect_expr_parameters(expr, &mut names);
2531        if !names.is_empty()
2532            && let Ok(mut acc) = self.folded_limit_skip_params.lock()
2533        {
2534            acc.extend(names);
2535        }
2536    }
2537
2538    /// Parameter names folded into `LIMIT`/`SKIP` positions during the last
2539    /// [`plan`](Self::plan).
2540    ///
2541    /// The cached plan bakes these values in, so a text-keyed plan cache must
2542    /// fold their current values into its key — otherwise two calls differing
2543    /// only in a LIMIT/SKIP parameter would wrongly share one cached plan.
2544    /// Returns an empty vector when no parameter was folded.
2545    #[must_use]
2546    pub fn folded_limit_skip_params(&self) -> Vec<String> {
2547        self.folded_limit_skip_params
2548            .lock()
2549            .map(|acc| acc.iter().cloned().collect())
2550            .unwrap_or_default()
2551    }
2552
2553    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2554    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2555        self.params = params;
2556        self
2557    }
2558
2559    /// Attach a plugin registry for catalog / replacement-scan fallbacks
2560    /// (M5b). When absent, label / edge-type resolution behaves exactly as
2561    /// before; when present, an unknown label is offered to each
2562    /// `CatalogProvider` before erroring.
2563    #[must_use]
2564    pub fn with_plugin_registry(mut self, registry: Arc<uni_plugin::PluginRegistry>) -> Self {
2565        self.plugin_registry = Some(registry);
2566        self
2567    }
2568
2569    /// Enable replacement-scan dispatch on unknown identifiers (M5b §4.23).
2570    /// Default off; opt-in only.
2571    #[must_use]
2572    pub fn with_replacement_scans(mut self, enabled: bool) -> Self {
2573        self.replacement_scans_enabled = enabled;
2574        self
2575    }
2576
2577    /// Allocate (or look up) a virtual label ID for `name` by consulting
2578    /// every registered `CatalogProvider` and then every registered
2579    /// `ReplacementScanProvider` (only the latter when the replacement-
2580    /// scan gate is on). On a first claim the catalog table is stashed
2581    /// on the host's [`uni_plugin::PluginRegistry`] under a freshly
2582    /// allocated virtual ID; subsequent calls with the same name return
2583    /// the cached ID and refresh the stashed table.
2584    ///
2585    /// Returns `None` if no provider claims the label or no plugin
2586    /// registry is attached. Returns `Some((id, table))` on a hit; the
2587    /// `id` lies in `[VIRTUAL_LABEL_ID_START, VIRTUAL_LABEL_ID_SENTINEL)`.
2588    /// Errors are surfaced as `Some(Err(_))`-equivalent via `Result`.
2589    fn allocate_virtual_label(
2590        &self,
2591        name: &str,
2592    ) -> Result<Option<(u16, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2593        let Some(registry) = self.plugin_registry.as_ref() else {
2594            return Ok(None);
2595        };
2596        // 1. CatalogProvider (always consulted, no gate — Batch 2 semantics).
2597        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2598        for cat in registry.catalogs() {
2599            if let Some(t) = cat.resolve_label(name) {
2600                claimed = Some(t);
2601                break;
2602            }
2603        }
2604        // 2. ReplacementScanProvider (gated). Only consult if no
2605        //    CatalogProvider already claimed.
2606        if claimed.is_none() {
2607            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2608            if let Some(Replacement::CatalogTable(t)) =
2609                self.consult_replacement_scan(ReplacementRequest::Label(name))
2610            {
2611                claimed = Some(t);
2612            }
2613        }
2614        let Some(table) = claimed else {
2615            return Ok(None);
2616        };
2617        let id = registry
2618            .register_virtual_label(name, Arc::clone(&table))
2619            .map_err(|e| anyhow!("virtual label registration failed for `{name}`: {e}"))?;
2620        Ok(Some((id, table)))
2621    }
2622
2623    /// Reject any write operation that names a label currently allocated
2624    /// as a virtual (catalog-backed) label. Catalog tables are read-only
2625    /// in this milestone — there is no write-back path through
2626    /// `CatalogTable::scan` to the originating provider, so silently
2627    /// allowing the write would produce ghosted state on the host side
2628    /// without affecting the external catalog. Errors with a clear,
2629    /// actionable message.
2630    fn reject_virtual_label_writes(&self, labels: &[String], op: &str) -> Result<()> {
2631        let Some(registry) = self.plugin_registry.as_ref() else {
2632            return Ok(());
2633        };
2634        for label in labels {
2635            if registry.virtual_label_by_name(label).is_some() {
2636                return Err(anyhow!(
2637                    "Cannot {op} on virtual (catalog-resolved) label `{label}` — virtual \
2638                     labels are read-only; write back via the originating catalog \
2639                     instead"
2640                ));
2641            }
2642        }
2643        Ok(())
2644    }
2645
2646    /// Edge-type analog of [`Self::allocate_virtual_label`].
2647    fn allocate_virtual_edge_type(
2648        &self,
2649        name: &str,
2650    ) -> Result<Option<(u32, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2651        let Some(registry) = self.plugin_registry.as_ref() else {
2652            return Ok(None);
2653        };
2654        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2655        for cat in registry.catalogs() {
2656            if let Some(t) = cat.resolve_edge_type(name) {
2657                claimed = Some(t);
2658                break;
2659            }
2660        }
2661        let Some(table) = claimed else {
2662            return Ok(None);
2663        };
2664        let id = registry
2665            .register_virtual_edge_type(name, Arc::clone(&table))
2666            .map_err(|e| anyhow!("virtual edge-type registration failed for `{name}`: {e}"))?;
2667        Ok(Some((id, table)))
2668    }
2669
2670    /// Try to resolve an unknown identifier through replacement-scan providers
2671    /// (gated by [`Self::with_replacement_scans`]). Returns the first
2672    /// [`Replacement`] any registered provider produces, or `None` if the
2673    /// gate is off, no registry is attached, or no provider claims the
2674    /// identifier. First-match wins (mirrors DuckDB).
2675    pub(crate) fn consult_replacement_scan(
2676        &self,
2677        request: uni_plugin::traits::catalog::ReplacementRequest<'_>,
2678    ) -> Option<uni_plugin::traits::catalog::Replacement> {
2679        if !self.replacement_scans_enabled {
2680            return None;
2681        }
2682        let registry = self.plugin_registry.as_ref()?;
2683        for r in registry.replacement_scans().iter() {
2684            if let Some(replacement) = r.replace(&request) {
2685                tracing::debug!(
2686                    target: "uni.plugin.registry",
2687                    ?request,
2688                    ?replacement,
2689                    "identifier resolved via ReplacementScanProvider"
2690                );
2691                return Some(replacement);
2692            }
2693        }
2694        None
2695    }
2696
2697    /// Resolve a user-typed procedure name against the attached plugin
2698    /// registry, applying the same namespace-prefix rules as
2699    /// `ProcedureRegistry::resolve_user_procedure` (host-coupled
2700    /// procedure dispatch). Returns `true` if any namespace claims the
2701    /// name. Used by the procedure-call replacement-scan gate to decide
2702    /// whether to consult before substituting.
2703    fn procedure_resolves(&self, user_name: &str) -> bool {
2704        let Some(registry) = self.plugin_registry.as_ref() else {
2705            return false;
2706        };
2707        // Try every namespace/local split (first-dot → last-dot) so dotted
2708        // plugin ids resolve alongside the first-dot M9/builtin convention.
2709        // Mirrors `ProcedureRegistry::resolve_user_procedure`.
2710        if uni_plugin::QName::candidate_splits(user_name).any(|q| registry.procedure(&q).is_some())
2711        {
2712            return true;
2713        }
2714        let stripped = user_name.strip_prefix("uni.").unwrap_or(user_name);
2715        for plugin_id in ["uni", "builtin", "apoc-core", "custom"] {
2716            if registry
2717                .procedure(&uni_plugin::QName::new(plugin_id, stripped))
2718                .is_some()
2719            {
2720                return true;
2721            }
2722        }
2723        false
2724    }
2725
2726    /// Construct a [`uni_plugin::QName`] from a user-typed identifier for
2727    /// passing to [`Replacement`]-scan providers. If the name is dotted,
2728    /// the last segment is the local and the rest is the namespace
2729    /// (mirroring `QName::parse`). Bare names — which Cypher allows for
2730    /// procedures (`CALL foo()`) and functions (`RETURN foo(x)`) — are
2731    /// encoded with the conventional `"user"` namespace; providers that
2732    /// want to match a bare-typed name should inspect `.local()`.
2733    fn qname_from_user(name: &str) -> uni_plugin::QName {
2734        uni_plugin::QName::parse(name).unwrap_or_else(|_| uni_plugin::QName::new("user", name))
2735    }
2736
2737    /// Apply `ReplacementScanProvider`-driven function rewrites to the
2738    /// query's AST. When the gate is off or no registry is attached, the
2739    /// walker is short-circuited and the query is returned unchanged.
2740    /// Otherwise, every [`uni_cypher::ast::Expr::FunctionCall`] is offered
2741    /// to registered providers (first-match wins); a returned
2742    /// `Replacement::Function(new_qname)` substitutes the name in place.
2743    /// Rewrite depth is capped at 1 — the rewritten name is NOT re-
2744    /// consulted (a chained `A→B→A` provider therefore stops after the
2745    /// first hop). Wrong-variant returns (`CatalogTable`, `Procedure`)
2746    /// error immediately.
2747    fn rewrite_function_calls_in_query(
2748        &self,
2749        query: uni_cypher::ast::Query,
2750    ) -> Result<uni_cypher::ast::Query> {
2751        if !self.replacement_scans_enabled || self.plugin_registry.is_none() {
2752            return Ok(query);
2753        }
2754        let mut rename = |name: &str| -> Result<Option<String>> {
2755            let qname = Self::qname_from_user(name);
2756            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2757            match self.consult_replacement_scan(ReplacementRequest::Function(&qname)) {
2758                Some(Replacement::Function(new_qname)) => {
2759                    // Cypher function-call dispatch is bare-name-keyed
2760                    // (the per-category translators in `df_expr` match on
2761                    // `name.to_uppercase()` against bare local strings —
2762                    // "UPPER", "ABS", etc.). When the provider returns a
2763                    // synthetic-namespace target (`builtin.*` or `user.*`),
2764                    // strip the namespace so the AST name is what those
2765                    // dispatchers expect; for plugin-namespaced targets,
2766                    // preserve the full dotted form (matches how users
2767                    // type them).
2768                    let rewritten = match new_qname.namespace() {
2769                        "builtin" | "user" => new_qname.local().to_string(),
2770                        _ => new_qname.to_string(),
2771                    };
2772                    tracing::debug!(
2773                        target: "uni.plugin.registry",
2774                        from = %name,
2775                        to = %rewritten,
2776                        "function call rerouted via ReplacementScanProvider"
2777                    );
2778                    Ok(Some(rewritten))
2779                }
2780                Some(other) => Err(anyhow!(
2781                    "ReplacementScanProvider returned wrong variant for Function \
2782                     request `{}`: expected `Function`, got {:?}",
2783                    name,
2784                    other
2785                )),
2786                None => Ok(None),
2787            }
2788        };
2789        crate::query::rewrite::function_rename::rewrite_function_calls_in_query(query, &mut rename)
2790    }
2791
2792    /// Plan a Cypher query with no pre-bound variables.
2793    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2794        self.plan_with_scope(query, Vec::new())
2795    }
2796
2797    /// Plan a Cypher query with a set of externally pre-bound variable names.
2798    ///
2799    /// `vars` lists variable names already in scope before this query executes
2800    /// (e.g., from an enclosing Locy rule body).
2801    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2802        // Apply query rewrites before planning
2803        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2804        // M5 follow-up #5: function-call rewrite via ReplacementScanProvider.
2805        // Done as an AST pass *before* planning so the rewritten name flows
2806        // through every downstream stage (translation, UDF resolution,
2807        // execution) as if the user had typed it. No-op when the gate is
2808        // off or no provider claims the call. First-match wins; hard-cap
2809        // at one rewrite per call site (the rewritten name is NOT re-
2810        // consulted) — see `rewrite_function_calls_in_query`.
2811        let rewritten_query = self.rewrite_function_calls_in_query(rewritten_query)?;
2812        if Self::has_mixed_union_modes(&rewritten_query) {
2813            return Err(anyhow!(
2814                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2815            ));
2816        }
2817
2818        match rewritten_query {
2819            Query::Single(stmt) => self.plan_single(stmt, vars),
2820            Query::Union { left, right, all } => {
2821                let l = self.plan_with_scope(*left, vars.clone())?;
2822                let r = self.plan_with_scope(*right, vars)?;
2823
2824                // Validate that both sides have the same column names
2825                let left_cols = Self::extract_projection_columns(&l);
2826                let right_cols = Self::extract_projection_columns(&r);
2827
2828                if left_cols != right_cols {
2829                    return Err(anyhow!(
2830                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2831                    ));
2832                }
2833
2834                Ok(LogicalPlan::Union {
2835                    left: Box::new(l),
2836                    right: Box::new(r),
2837                    all,
2838                })
2839            }
2840            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2841            Query::Explain(inner) => {
2842                let inner_plan = self.plan_with_scope(*inner, vars)?;
2843                Ok(LogicalPlan::Explain {
2844                    plan: Box::new(inner_plan),
2845                })
2846            }
2847            Query::TimeTravel { .. } => {
2848                unreachable!("TimeTravel should be resolved at API layer before planning")
2849            }
2850        }
2851    }
2852
2853    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2854        match query {
2855            Query::Union { left, right, all } => {
2856                out.insert(*all);
2857                Self::collect_union_modes(left, out);
2858                Self::collect_union_modes(right, out);
2859            }
2860            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2861            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2862            Query::Single(_) | Query::Schema(_) => {}
2863        }
2864    }
2865
2866    fn has_mixed_union_modes(query: &Query) -> bool {
2867        let mut modes = HashSet::new();
2868        Self::collect_union_modes(query, &mut modes);
2869        modes.len() > 1
2870    }
2871
2872    fn next_anon_var(&self) -> String {
2873        let id = self
2874            .anon_counter
2875            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2876        format!("_anon_{}", id)
2877    }
2878
2879    /// Extract projection column names from a logical plan.
2880    /// Used for UNION column validation.
2881    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2882        match plan {
2883            LogicalPlan::Project { projections, .. } => projections
2884                .iter()
2885                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2886                .collect(),
2887            LogicalPlan::Limit { input, .. }
2888            | LogicalPlan::Sort { input, .. }
2889            | LogicalPlan::Distinct { input, .. }
2890            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2891            LogicalPlan::Union { left, right, .. } => {
2892                let left_cols = Self::extract_projection_columns(left);
2893                if left_cols.is_empty() {
2894                    Self::extract_projection_columns(right)
2895                } else {
2896                    left_cols
2897                }
2898            }
2899            LogicalPlan::Aggregate {
2900                group_by,
2901                aggregates,
2902                ..
2903            } => {
2904                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2905                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2906                cols
2907            }
2908            _ => Vec::new(),
2909        }
2910    }
2911
2912    fn plan_return_clause(
2913        &self,
2914        return_clause: &ReturnClause,
2915        plan: LogicalPlan,
2916        vars_in_scope: &[VariableInfo],
2917    ) -> Result<LogicalPlan> {
2918        let mut plan = plan;
2919        let mut group_by = Vec::new();
2920        let mut aggregates = Vec::new();
2921        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2922        let mut has_agg = false;
2923        let mut projections = Vec::new();
2924        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2925        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2926        let mut projected_aliases: HashSet<String> = HashSet::new();
2927
2928        for item in &return_clause.items {
2929            match item {
2930                ReturnItem::All => {
2931                    // RETURN * - add all user-named variables in scope
2932                    // (anonymous variables like _anon_0 are excluded)
2933                    let user_vars: Vec<_> = vars_in_scope
2934                        .iter()
2935                        .filter(|v| !v.name.starts_with("_anon_"))
2936                        .collect();
2937                    if user_vars.is_empty() {
2938                        return Err(anyhow!(
2939                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2940                        ));
2941                    }
2942                    for v in user_vars {
2943                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2944                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2945                            group_by.push(Expr::Variable(v.name.clone()));
2946                        }
2947                        projected_aliases.insert(v.name.clone());
2948                        projected_simple_reprs.insert(v.name.clone());
2949                    }
2950                }
2951                ReturnItem::Expr {
2952                    expr,
2953                    alias,
2954                    source_text,
2955                } => {
2956                    if matches!(expr, Expr::Wildcard) {
2957                        for v in vars_in_scope {
2958                            projections
2959                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2960                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2961                                group_by.push(Expr::Variable(v.name.clone()));
2962                            }
2963                            projected_aliases.insert(v.name.clone());
2964                            projected_simple_reprs.insert(v.name.clone());
2965                        }
2966                    } else {
2967                        // Validate expression variables are defined
2968                        validate_expression_variables(expr, vars_in_scope)?;
2969                        // Validate function argument types and boolean operators
2970                        validate_expression(expr, vars_in_scope)?;
2971                        // Pattern predicates are not allowed in RETURN
2972                        if contains_pattern_predicate(expr) {
2973                            return Err(anyhow!(
2974                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2975                            ));
2976                        }
2977
2978                        // Use source text as column name when no explicit alias
2979                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2980                        projections.push((expr.clone(), effective_alias));
2981                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2982                            // Bare aggregate — push directly
2983                            has_agg = true;
2984                            aggregates.push(expr.clone());
2985                            projected_aggregate_reprs.insert(expr.to_string_repr());
2986                        } else if !is_window_function(expr)
2987                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2988                        {
2989                            // Compound aggregate or expression containing aggregates —
2990                            // extract the inner bare aggregates for the Aggregate node
2991                            has_agg = true;
2992                            compound_agg_exprs.push(expr.clone());
2993                            for inner in extract_inner_aggregates(expr) {
2994                                let repr = inner.to_string_repr();
2995                                if !projected_aggregate_reprs.contains(&repr) {
2996                                    aggregates.push(inner);
2997                                    projected_aggregate_reprs.insert(repr);
2998                                }
2999                            }
3000                        } else if !group_by.contains(expr) {
3001                            group_by.push(expr.clone());
3002                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
3003                                projected_simple_reprs.insert(expr.to_string_repr());
3004                            }
3005                        }
3006
3007                        if let Some(a) = alias {
3008                            if projected_aliases.contains(a) {
3009                                return Err(anyhow!(
3010                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
3011                                    a
3012                                ));
3013                            }
3014                            projected_aliases.insert(a.clone());
3015                        } else if let Expr::Variable(v) = expr {
3016                            if projected_aliases.contains(v) {
3017                                return Err(anyhow!(
3018                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
3019                                    v
3020                                ));
3021                            }
3022                            projected_aliases.insert(v.clone());
3023                        }
3024                    }
3025                }
3026            }
3027        }
3028
3029        // Validate compound aggregate expressions: non-aggregate refs must be
3030        // individually present in the group_by as simple variables or properties.
3031        if has_agg {
3032            let group_by_reprs: HashSet<String> =
3033                group_by.iter().map(|e| e.to_string_repr()).collect();
3034            for expr in &compound_agg_exprs {
3035                let mut refs = Vec::new();
3036                collect_non_aggregate_refs(expr, false, &mut refs);
3037                for r in &refs {
3038                    let is_covered = match r {
3039                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
3040                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
3041                    };
3042                    if !is_covered {
3043                        return Err(anyhow!(
3044                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
3045                        ));
3046                    }
3047                }
3048            }
3049        }
3050
3051        if has_agg {
3052            plan = LogicalPlan::Aggregate {
3053                input: Box::new(plan),
3054                group_by,
3055                aggregates,
3056            };
3057        }
3058
3059        let mut window_exprs = Vec::new();
3060        for (expr, _) in &projections {
3061            Self::collect_window_functions(expr, &mut window_exprs);
3062        }
3063
3064        if let Some(order_by) = &return_clause.order_by {
3065            for item in order_by {
3066                Self::collect_window_functions(&item.expr, &mut window_exprs);
3067            }
3068        }
3069
3070        let has_window_exprs = !window_exprs.is_empty();
3071
3072        if has_window_exprs {
3073            // Before creating the Window node, we need to ensure all properties
3074            // referenced by window functions are available. Create a Project node
3075            // that loads these properties.
3076            let mut props_needed_for_window: Vec<Expr> = Vec::new();
3077            for window_expr in &window_exprs {
3078                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
3079            }
3080
3081            // Also include non-window expressions from projections that might be needed
3082            // Preserve qualified names (e.g., "e.salary") as aliases for properties
3083            let non_window_projections: Vec<_> = projections
3084                .iter()
3085                .filter_map(|(expr, alias)| {
3086                    // Keep expressions that don't have window_spec
3087                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
3088                        window_spec.is_none()
3089                    } else {
3090                        true
3091                    };
3092
3093                    if keep {
3094                        // For property references, use the qualified name as alias
3095                        let new_alias = if matches!(expr, Expr::Property(..)) {
3096                            Some(expr.to_string_repr())
3097                        } else {
3098                            alias.clone()
3099                        };
3100                        Some((expr.clone(), new_alias))
3101                    } else {
3102                        None
3103                    }
3104                })
3105                .collect();
3106
3107            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
3108                let mut intermediate_projections = non_window_projections;
3109                // Add any additional property references needed by window functions
3110                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
3111                for prop in &props_needed_for_window {
3112                    if !intermediate_projections
3113                        .iter()
3114                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
3115                    {
3116                        let qualified_name = prop.to_string_repr();
3117                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
3118                    }
3119                }
3120
3121                if !intermediate_projections.is_empty() {
3122                    plan = LogicalPlan::Project {
3123                        input: Box::new(plan),
3124                        projections: intermediate_projections,
3125                    };
3126                }
3127            }
3128
3129            // Transform property expressions in window functions to use qualified variable names
3130            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
3131            let transformed_window_exprs: Vec<Expr> = window_exprs
3132                .into_iter()
3133                .map(Self::transform_window_expr_properties)
3134                .collect();
3135
3136            plan = LogicalPlan::Window {
3137                input: Box::new(plan),
3138                window_exprs: transformed_window_exprs,
3139            };
3140        }
3141
3142        if let Some(order_by) = &return_clause.order_by {
3143            let alias_exprs: HashMap<String, Expr> = projections
3144                .iter()
3145                .filter_map(|(expr, alias)| {
3146                    alias.as_ref().map(|a| {
3147                        // ORDER BY is planned before the final RETURN projection.
3148                        // In aggregate contexts, aliases must resolve to the
3149                        // post-aggregate output columns, not raw aggregate calls.
3150                        let rewritten = if has_agg && !has_window_exprs {
3151                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
3152                                Expr::Variable(aggregate_column_name(expr))
3153                            } else if is_compound_aggregate(expr)
3154                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
3155                            {
3156                                replace_aggregates_with_columns(expr)
3157                            } else {
3158                                Expr::Variable(expr.to_string_repr())
3159                            }
3160                        } else {
3161                            expr.clone()
3162                        };
3163                        (a.clone(), rewritten)
3164                    })
3165                })
3166                .collect();
3167
3168            // Build an extended scope that includes RETURN aliases so ORDER BY
3169            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
3170            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
3171                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
3172                // Keep aliases and directly returned variables in scope.
3173                let mut scope = Vec::new();
3174                for (expr, alias) in &projections {
3175                    if let Some(a) = alias
3176                        && !is_var_in_scope(&scope, a)
3177                    {
3178                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3179                    }
3180                    if let Expr::Variable(v) = expr
3181                        && !is_var_in_scope(&scope, v)
3182                    {
3183                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3184                    }
3185                }
3186                scope
3187            } else {
3188                let mut scope = vars_in_scope.to_vec();
3189                for (expr, alias) in &projections {
3190                    if let Some(a) = alias
3191                        && !is_var_in_scope(&scope, a)
3192                    {
3193                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3194                    } else if let Expr::Variable(v) = expr
3195                        && !is_var_in_scope(&scope, v)
3196                    {
3197                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3198                    }
3199                }
3200                scope
3201            };
3202            // Validate ORDER BY expressions against the extended scope
3203            for item in order_by {
3204                // DISTINCT allows ORDER BY on the same projected expression
3205                // even when underlying variables are not otherwise visible.
3206                let matches_projected_expr = return_clause.distinct
3207                    && projections
3208                        .iter()
3209                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
3210                if !matches_projected_expr {
3211                    validate_expression_variables(&item.expr, &order_by_scope)?;
3212                    validate_expression(&item.expr, &order_by_scope)?;
3213                }
3214                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
3215                if has_aggregate_in_item && !has_agg {
3216                    return Err(anyhow!(
3217                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
3218                    ));
3219                }
3220                if has_agg && has_aggregate_in_item {
3221                    validate_with_order_by_aggregate_item(
3222                        &item.expr,
3223                        &projected_aggregate_reprs,
3224                        &projected_simple_reprs,
3225                        &projected_aliases,
3226                    )?;
3227                }
3228            }
3229            let rewritten_order_by: Vec<SortItem> = order_by
3230                .iter()
3231                .map(|item| SortItem {
3232                    expr: {
3233                        let mut rewritten =
3234                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
3235                        if has_agg && !has_window_exprs {
3236                            rewritten = replace_aggregates_with_columns(&rewritten);
3237                        }
3238                        rewritten
3239                    },
3240                    ascending: item.ascending,
3241                })
3242                .collect();
3243            plan = LogicalPlan::Sort {
3244                input: Box::new(plan),
3245                order_by: rewritten_order_by,
3246            };
3247        }
3248
3249        if return_clause.skip.is_some() || return_clause.limit.is_some() {
3250            let skip = return_clause
3251                .skip
3252                .as_ref()
3253                .map(|e| {
3254                    self.note_folded_limit_skip(e);
3255                    parse_non_negative_integer(e, "SKIP", &self.params)
3256                })
3257                .transpose()?
3258                .flatten();
3259            let fetch = return_clause
3260                .limit
3261                .as_ref()
3262                .map(|e| {
3263                    self.note_folded_limit_skip(e);
3264                    parse_non_negative_integer(e, "LIMIT", &self.params)
3265                })
3266                .transpose()?
3267                .flatten();
3268
3269            plan = LogicalPlan::Limit {
3270                input: Box::new(plan),
3271                skip,
3272                fetch,
3273            };
3274        }
3275
3276        if !projections.is_empty() {
3277            // If we created an Aggregate or Window node, we need to adjust the final projections
3278            // to reference aggregate/window function results as columns instead of re-evaluating them
3279            let final_projections = if has_agg || has_window_exprs {
3280                projections
3281                    .into_iter()
3282                    .map(|(expr, alias)| {
3283                        // Check if this expression is an aggregate function
3284                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
3285                        {
3286                            // Bare aggregate — replace with column reference
3287                            let col_name = aggregate_column_name(&expr);
3288                            (Expr::Variable(col_name), alias)
3289                        } else if !has_window_exprs
3290                            && (is_compound_aggregate(&expr)
3291                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
3292                        {
3293                            // Compound aggregate — replace inner aggregates with
3294                            // column references, keep outer expression for Project
3295                            (replace_aggregates_with_columns(&expr), alias)
3296                        }
3297                        // For grouped RETURN projections, reference the pre-computed
3298                        // group-by output column instead of re-evaluating the expression
3299                        // against the aggregate schema (which no longer has original vars).
3300                        else if has_agg
3301                            && !has_window_exprs
3302                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
3303                        {
3304                            (Expr::Variable(expr.to_string_repr()), alias)
3305                        }
3306                        // Check if this expression is a window function
3307                        else if let Expr::FunctionCall {
3308                            window_spec: Some(_),
3309                            ..
3310                        } = &expr
3311                        {
3312                            // Replace window function with a column reference to its result
3313                            // The column name in the Window output is the full expression string
3314                            let window_col_name = expr.to_string_repr();
3315                            // Keep the original alias for the final output
3316                            (Expr::Variable(window_col_name), alias)
3317                        } else {
3318                            (expr, alias)
3319                        }
3320                    })
3321                    .collect()
3322            } else {
3323                projections
3324            };
3325
3326            plan = LogicalPlan::Project {
3327                input: Box::new(plan),
3328                projections: final_projections,
3329            };
3330        }
3331
3332        if return_clause.distinct {
3333            plan = LogicalPlan::Distinct {
3334                input: Box::new(plan),
3335            };
3336        }
3337
3338        Ok(plan)
3339    }
3340
3341    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
3342        let typed_vars: Vec<VariableInfo> = initial_vars
3343            .into_iter()
3344            .map(|name| VariableInfo::new(name, VariableType::Imported))
3345            .collect();
3346        self.plan_single_typed(query, typed_vars)
3347    }
3348
3349    /// Rewrite a query then plan it, preserving typed variable scope when possible.
3350    ///
3351    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
3352    /// variable info through and avoid false type-conflict errors in subqueries.
3353    /// For unions and other compound queries, falls back to `plan_with_scope`.
3354    fn rewrite_and_plan_typed(
3355        &self,
3356        query: Query,
3357        typed_vars: &[VariableInfo],
3358    ) -> Result<LogicalPlan> {
3359        let rewritten = crate::query::rewrite::rewrite_query(query)?;
3360        match rewritten {
3361            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
3362            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
3363        }
3364    }
3365
3366    fn plan_single_typed(
3367        &self,
3368        query: Statement,
3369        initial_vars: Vec<VariableInfo>,
3370    ) -> Result<LogicalPlan> {
3371        let mut plan = LogicalPlan::Empty;
3372
3373        if !initial_vars.is_empty() {
3374            // Project bound variables from outer scope as parameters.
3375            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
3376            // Use Parameter expressions to read from params, not Variable which would read from input row.
3377            let projections = initial_vars
3378                .iter()
3379                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
3380                .collect();
3381            plan = LogicalPlan::Project {
3382                input: Box::new(plan),
3383                projections,
3384            };
3385        }
3386
3387        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
3388        // Track variables introduced by CREATE clauses so we can distinguish
3389        // MATCH-introduced variables (which cannot be re-created as bare nodes)
3390        // from CREATE-introduced variables (which can be referenced as bare nodes).
3391        let mut create_introduced_vars: HashSet<String> = HashSet::new();
3392        // Track variables targeted by DELETE so we can reject property/label
3393        // access on deleted entities in subsequent RETURN clauses.
3394        let mut deleted_vars: HashSet<String> = HashSet::new();
3395
3396        let clause_count = query.clauses.len();
3397        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
3398            match clause {
3399                Clause::Match(match_clause) => {
3400                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
3401                }
3402                Clause::Unwind(unwind) => {
3403                    plan = LogicalPlan::Unwind {
3404                        input: Box::new(plan),
3405                        expr: unwind.expr.clone(),
3406                        variable: unwind.variable.clone(),
3407                    };
3408                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
3409                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
3410                }
3411                Clause::Call(call_clause) => {
3412                    match &call_clause.kind {
3413                        CallKind::Procedure {
3414                            procedure,
3415                            arguments,
3416                        } => {
3417                            // Validate that procedure arguments don't contain aggregation functions
3418                            for arg in arguments {
3419                                if contains_aggregate_recursive(arg) {
3420                                    return Err(anyhow!(
3421                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
3422                                    ));
3423                                }
3424                            }
3425
3426                            let has_yield_star = call_clause.yield_items.len() == 1
3427                                && call_clause.yield_items[0].name == "*"
3428                                && call_clause.yield_items[0].alias.is_none();
3429                            if has_yield_star && clause_idx + 1 < clause_count {
3430                                return Err(anyhow!(
3431                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
3432                                ));
3433                            }
3434
3435                            // Validate for duplicate yield names (VariableAlreadyBound)
3436                            let mut yield_names = Vec::new();
3437                            for item in &call_clause.yield_items {
3438                                if item.name == "*" {
3439                                    continue;
3440                                }
3441                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
3442                                if yield_names.contains(output_name) {
3443                                    return Err(anyhow!(
3444                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
3445                                        output_name
3446                                    ));
3447                                }
3448                                // Check against existing scope (in-query CALL must not shadow)
3449                                if clause_idx > 0
3450                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
3451                                {
3452                                    return Err(anyhow!(
3453                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
3454                                        output_name
3455                                    ));
3456                                }
3457                                yield_names.push(output_name.clone());
3458                            }
3459
3460                            let mut yields = Vec::new();
3461                            for item in &call_clause.yield_items {
3462                                if item.name == "*" {
3463                                    continue;
3464                                }
3465                                yields.push((item.name.clone(), item.alias.clone()));
3466                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
3467                                // Use Imported because procedure return types are unknown
3468                                // at plan time (could be nodes, edges, or scalars)
3469                                add_var_to_scope(
3470                                    &mut vars_in_scope,
3471                                    var_name,
3472                                    VariableType::Imported,
3473                                )?;
3474                            }
3475                            // M5 follow-up #5: if replacement-scan dispatch is
3476                            // enabled and the procedure name does not resolve
3477                            // against the plugin registry, consult registered
3478                            // `ReplacementScanProvider`s. A `Replacement::Procedure`
3479                            // substitutes the call's target name in the logical
3480                            // plan; the rewritten name must itself resolve or
3481                            // we error immediately (no second-tier consult — caps
3482                            // rewrite depth at one).
3483                            let procedure_name = if self.replacement_scans_enabled
3484                                && !self.procedure_resolves(procedure)
3485                            {
3486                                use uni_plugin::traits::catalog::{
3487                                    Replacement, ReplacementRequest,
3488                                };
3489                                let qname = Self::qname_from_user(procedure);
3490                                match self
3491                                    .consult_replacement_scan(ReplacementRequest::Procedure(&qname))
3492                                {
3493                                    Some(Replacement::Procedure(new_qname)) => {
3494                                        let rewritten = new_qname.to_string();
3495                                        if !self.procedure_resolves(&rewritten) {
3496                                            return Err(anyhow!(
3497                                                "ReplacementScanProvider rerouted procedure \
3498                                                 `{}` to `{}`, which also did not resolve",
3499                                                procedure,
3500                                                rewritten
3501                                            ));
3502                                        }
3503                                        tracing::debug!(
3504                                            target: "uni.plugin.registry",
3505                                            from = %procedure,
3506                                            to = %rewritten,
3507                                            "procedure rerouted via ReplacementScanProvider"
3508                                        );
3509                                        rewritten
3510                                    }
3511                                    Some(other) => {
3512                                        return Err(anyhow!(
3513                                            "ReplacementScanProvider returned wrong variant \
3514                                             for Procedure request `{}`: expected \
3515                                             `Procedure`, got {:?}",
3516                                            procedure,
3517                                            other
3518                                        ));
3519                                    }
3520                                    None => procedure.clone(),
3521                                }
3522                            } else {
3523                                procedure.clone()
3524                            };
3525                            let proc_plan = LogicalPlan::ProcedureCall {
3526                                procedure_name,
3527                                arguments: arguments.clone(),
3528                                yield_items: yields.clone(),
3529                            };
3530
3531                            if matches!(plan, LogicalPlan::Empty) {
3532                                // Standalone CALL (first clause) — use directly
3533                                plan = proc_plan;
3534                            } else if yields.is_empty() {
3535                                // In-query CALL with no YIELD (void procedure):
3536                                // preserve the input rows unchanged
3537                            } else {
3538                                // In-query CALL with YIELD: cross-join input × procedure output
3539                                plan = LogicalPlan::Apply {
3540                                    input: Box::new(plan),
3541                                    subquery: Box::new(proc_plan),
3542                                    input_filter: None,
3543                                };
3544                            }
3545                        }
3546                        CallKind::Subquery(query) => {
3547                            let subquery_plan =
3548                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3549
3550                            // Extract variables from subquery RETURN clause
3551                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3552
3553                            // Add new variables to scope (as Scalar since they come from subquery projection)
3554                            for var in subquery_vars {
3555                                if !is_var_in_scope(&vars_in_scope, &var) {
3556                                    add_var_to_scope(
3557                                        &mut vars_in_scope,
3558                                        &var,
3559                                        VariableType::Scalar,
3560                                    )?;
3561                                }
3562                            }
3563
3564                            plan = LogicalPlan::SubqueryCall {
3565                                input: Box::new(plan),
3566                                subquery: Box::new(subquery_plan),
3567                            };
3568                        }
3569                    }
3570                }
3571                Clause::Merge(merge_clause) => {
3572                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3573                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3574                    // read-only — reject MERGE that names one.
3575                    let merge_labels = collect_pattern_labels(&merge_clause.pattern);
3576                    self.reject_virtual_label_writes(&merge_labels, "MERGE")?;
3577
3578                    plan = LogicalPlan::Merge {
3579                        input: Box::new(plan),
3580                        pattern: merge_clause.pattern.clone(),
3581                        on_match: Some(SetClause {
3582                            items: merge_clause.on_match.clone(),
3583                        }),
3584                        on_create: Some(SetClause {
3585                            items: merge_clause.on_create.clone(),
3586                        }),
3587                    };
3588
3589                    for path in &merge_clause.pattern.paths {
3590                        if let Some(path_var) = &path.variable
3591                            && !path_var.is_empty()
3592                            && !is_var_in_scope(&vars_in_scope, path_var)
3593                        {
3594                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3595                        }
3596                        for element in &path.elements {
3597                            if let PatternElement::Node(n) = element {
3598                                if let Some(v) = &n.variable
3599                                    && !is_var_in_scope(&vars_in_scope, v)
3600                                {
3601                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3602                                }
3603                            } else if let PatternElement::Relationship(r) = element
3604                                && let Some(v) = &r.variable
3605                                && !is_var_in_scope(&vars_in_scope, v)
3606                            {
3607                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3608                            }
3609                        }
3610                    }
3611                }
3612                Clause::Create(create_clause) => {
3613                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3614                    // read-only — reject CREATE that names one.
3615                    let create_labels = collect_pattern_labels(&create_clause.pattern);
3616                    self.reject_virtual_label_writes(&create_labels, "CREATE")?;
3617                    // Validate CREATE patterns:
3618                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3619                    // - Bare nodes (v) are "references" if bound, "creations" if not
3620                    // - Relationships are always creations - can't rebind
3621                    // - Within CREATE, each new variable can only be defined once
3622                    // - Variables used in properties must be defined
3623                    let mut create_vars: Vec<&str> = Vec::new();
3624                    for path in &create_clause.pattern.paths {
3625                        let is_standalone_node = path.elements.len() == 1;
3626                        for element in &path.elements {
3627                            match element {
3628                                PatternElement::Node(n) => {
3629                                    validate_property_variables(
3630                                        &n.properties,
3631                                        &vars_in_scope,
3632                                        &create_vars,
3633                                    )?;
3634
3635                                    if let Some(v) = n.variable.as_deref()
3636                                        && !v.is_empty()
3637                                    {
3638                                        // A node is a "creation" if it has labels or properties
3639                                        let is_creation =
3640                                            !n.labels.is_empty() || n.properties.is_some();
3641
3642                                        if is_creation {
3643                                            check_not_already_bound(
3644                                                v,
3645                                                &vars_in_scope,
3646                                                &create_vars,
3647                                            )?;
3648                                            create_vars.push(v);
3649                                        } else if is_standalone_node
3650                                            && is_var_in_scope(&vars_in_scope, v)
3651                                            && !create_introduced_vars.contains(v)
3652                                        {
3653                                            // Standalone bare node referencing a variable from a
3654                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3655                                            // Bare nodes used as relationship endpoints
3656                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3657                                            return Err(anyhow!(
3658                                                "SyntaxError: VariableAlreadyBound - '{}'",
3659                                                v
3660                                            ));
3661                                        } else if !create_vars.contains(&v) {
3662                                            // New bare variable — register it
3663                                            create_vars.push(v);
3664                                        }
3665                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3666                                    }
3667                                }
3668                                PatternElement::Relationship(r) => {
3669                                    validate_property_variables(
3670                                        &r.properties,
3671                                        &vars_in_scope,
3672                                        &create_vars,
3673                                    )?;
3674
3675                                    if let Some(v) = r.variable.as_deref()
3676                                        && !v.is_empty()
3677                                    {
3678                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3679                                        create_vars.push(v);
3680                                    }
3681
3682                                    // Validate relationship constraints for CREATE
3683                                    if r.types.len() != 1 {
3684                                        return Err(anyhow!(
3685                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3686                                        ));
3687                                    }
3688                                    if r.direction == Direction::Both {
3689                                        return Err(anyhow!(
3690                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3691                                        ));
3692                                    }
3693                                    if r.range.is_some() {
3694                                        return Err(anyhow!(
3695                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3696                                        ));
3697                                    }
3698                                }
3699                                PatternElement::Parenthesized { .. } => {}
3700                            }
3701                        }
3702                    }
3703
3704                    // Batch consecutive CREATEs to avoid deep recursion
3705                    match &mut plan {
3706                        LogicalPlan::CreateBatch { patterns, .. } => {
3707                            // Append to existing batch
3708                            patterns.push(create_clause.pattern.clone());
3709                        }
3710                        LogicalPlan::Create { input, pattern } => {
3711                            // Convert single Create to CreateBatch with both patterns
3712                            let first_pattern = pattern.clone();
3713                            plan = LogicalPlan::CreateBatch {
3714                                input: input.clone(),
3715                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3716                            };
3717                        }
3718                        _ => {
3719                            // Start new Create (may become batch if more CREATEs follow)
3720                            plan = LogicalPlan::Create {
3721                                input: Box::new(plan),
3722                                pattern: create_clause.pattern.clone(),
3723                            };
3724                        }
3725                    }
3726                    // Add variables from created nodes and relationships to scope
3727                    for path in &create_clause.pattern.paths {
3728                        for element in &path.elements {
3729                            match element {
3730                                PatternElement::Node(n) => {
3731                                    if let Some(var) = &n.variable
3732                                        && !var.is_empty()
3733                                    {
3734                                        create_introduced_vars.insert(var.clone());
3735                                        add_var_to_scope(
3736                                            &mut vars_in_scope,
3737                                            var,
3738                                            VariableType::Node,
3739                                        )?;
3740                                    }
3741                                }
3742                                PatternElement::Relationship(r) => {
3743                                    if let Some(var) = &r.variable
3744                                        && !var.is_empty()
3745                                    {
3746                                        create_introduced_vars.insert(var.clone());
3747                                        add_var_to_scope(
3748                                            &mut vars_in_scope,
3749                                            var,
3750                                            VariableType::Edge,
3751                                        )?;
3752                                    }
3753                                }
3754                                PatternElement::Parenthesized { .. } => {
3755                                    // Skip for now - not commonly used in CREATE
3756                                }
3757                            }
3758                        }
3759                    }
3760                }
3761                Clause::Set(set_clause) => {
3762                    // Validate SET value expressions
3763                    for item in &set_clause.items {
3764                        match item {
3765                            SetItem::Property { value, .. }
3766                            | SetItem::Variable { value, .. }
3767                            | SetItem::VariablePlus { value, .. } => {
3768                                validate_expression_variables(value, &vars_in_scope)?;
3769                                validate_expression(value, &vars_in_scope)?;
3770                                if contains_pattern_predicate(value) {
3771                                    return Err(anyhow!(
3772                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3773                                    ));
3774                                }
3775                            }
3776                            SetItem::Labels { .. } => {}
3777                        }
3778                    }
3779                    plan = LogicalPlan::Set {
3780                        input: Box::new(plan),
3781                        items: set_clause.items.clone(),
3782                    };
3783                }
3784                Clause::Remove(remove_clause) => {
3785                    plan = LogicalPlan::Remove {
3786                        input: Box::new(plan),
3787                        items: remove_clause.items.clone(),
3788                    };
3789                }
3790                Clause::Delete(delete_clause) => {
3791                    // Validate DELETE targets
3792                    for item in &delete_clause.items {
3793                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3794                        if matches!(item, Expr::LabelCheck { .. }) {
3795                            return Err(anyhow!(
3796                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3797                            ));
3798                        }
3799                        let vars_used = collect_expr_variables(item);
3800                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3801                        if vars_used.is_empty() {
3802                            return Err(anyhow!(
3803                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3804                            ));
3805                        }
3806                        for var in &vars_used {
3807                            // Check if variable is defined
3808                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3809                                return Err(anyhow!(
3810                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3811                                    var
3812                                ));
3813                            }
3814                        }
3815                        // Strict type check only for simple variable references —
3816                        // complex expressions (property access, array index, etc.)
3817                        // may resolve to a node/edge at runtime even if the base
3818                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3819                        if let Expr::Variable(name) = item
3820                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3821                            && matches!(
3822                                info.var_type,
3823                                VariableType::Scalar | VariableType::ScalarLiteral
3824                            )
3825                        {
3826                            return Err(anyhow!(
3827                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3828                                name
3829                            ));
3830                        }
3831                    }
3832                    // Track deleted variables for later validation
3833                    for item in &delete_clause.items {
3834                        if let Expr::Variable(name) = item {
3835                            deleted_vars.insert(name.clone());
3836                        }
3837                    }
3838                    plan = LogicalPlan::Delete {
3839                        input: Box::new(plan),
3840                        items: delete_clause.items.clone(),
3841                        detach: delete_clause.detach,
3842                    };
3843                }
3844                Clause::With(with_clause) => {
3845                    let (new_plan, new_vars) =
3846                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3847                    plan = new_plan;
3848                    vars_in_scope = new_vars;
3849                }
3850                Clause::WithRecursive(with_recursive) => {
3851                    // Plan the recursive CTE
3852                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3853                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3854                    add_var_to_scope(
3855                        &mut vars_in_scope,
3856                        &with_recursive.name,
3857                        VariableType::Scalar,
3858                    )?;
3859                }
3860                Clause::Return(return_clause) => {
3861                    // Check for property/label access on deleted entities
3862                    if !deleted_vars.is_empty() {
3863                        for item in &return_clause.items {
3864                            if let ReturnItem::Expr { expr, .. } = item {
3865                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3866                            }
3867                        }
3868                    }
3869                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3870                } // All Clause variants are handled above - no catch-all needed
3871            }
3872        }
3873
3874        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3875        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3876        // If RETURN was used, the plan will have been wrapped in Project, so we only
3877        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3878        let plan = match &plan {
3879            LogicalPlan::Create { .. }
3880            | LogicalPlan::CreateBatch { .. }
3881            | LogicalPlan::Delete { .. }
3882            | LogicalPlan::Set { .. }
3883            | LogicalPlan::Remove { .. }
3884            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3885                input: Box::new(plan),
3886                skip: None,
3887                fetch: Some(0),
3888            },
3889            _ => plan,
3890        };
3891
3892        Ok(plan)
3893    }
3894
3895    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3896        match expr {
3897            Expr::Property(_, _)
3898                if !collected
3899                    .iter()
3900                    .any(|e| e.to_string_repr() == expr.to_string_repr()) =>
3901            {
3902                collected.push(expr.clone());
3903            }
3904            Expr::Property(_, _) => {}
3905            Expr::Variable(_) => {
3906                // Variables are already available, don't need to project them
3907            }
3908            Expr::BinaryOp { left, right, .. } => {
3909                Self::collect_properties_from_expr(left, collected);
3910                Self::collect_properties_from_expr(right, collected);
3911            }
3912            Expr::FunctionCall {
3913                args, window_spec, ..
3914            } => {
3915                for arg in args {
3916                    Self::collect_properties_from_expr(arg, collected);
3917                }
3918                if let Some(spec) = window_spec {
3919                    for partition_expr in &spec.partition_by {
3920                        Self::collect_properties_from_expr(partition_expr, collected);
3921                    }
3922                    for sort_item in &spec.order_by {
3923                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3924                    }
3925                }
3926            }
3927            Expr::List(items) => {
3928                for item in items {
3929                    Self::collect_properties_from_expr(item, collected);
3930                }
3931            }
3932            Expr::UnaryOp { expr: e, .. }
3933            | Expr::IsNull(e)
3934            | Expr::IsNotNull(e)
3935            | Expr::IsUnique(e) => {
3936                Self::collect_properties_from_expr(e, collected);
3937            }
3938            Expr::Case {
3939                expr,
3940                when_then,
3941                else_expr,
3942            } => {
3943                if let Some(e) = expr {
3944                    Self::collect_properties_from_expr(e, collected);
3945                }
3946                for (w, t) in when_then {
3947                    Self::collect_properties_from_expr(w, collected);
3948                    Self::collect_properties_from_expr(t, collected);
3949                }
3950                if let Some(e) = else_expr {
3951                    Self::collect_properties_from_expr(e, collected);
3952                }
3953            }
3954            Expr::In { expr, list } => {
3955                Self::collect_properties_from_expr(expr, collected);
3956                Self::collect_properties_from_expr(list, collected);
3957            }
3958            Expr::ArrayIndex { array, index } => {
3959                Self::collect_properties_from_expr(array, collected);
3960                Self::collect_properties_from_expr(index, collected);
3961            }
3962            Expr::ArraySlice { array, start, end } => {
3963                Self::collect_properties_from_expr(array, collected);
3964                if let Some(s) = start {
3965                    Self::collect_properties_from_expr(s, collected);
3966                }
3967                if let Some(e) = end {
3968                    Self::collect_properties_from_expr(e, collected);
3969                }
3970            }
3971            _ => {}
3972        }
3973    }
3974
3975    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3976        if let Expr::FunctionCall { window_spec, .. } = expr {
3977            // Collect any function with a window spec (OVER clause)
3978            if window_spec.is_some() {
3979                if !collected
3980                    .iter()
3981                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3982                {
3983                    collected.push(expr.clone());
3984                }
3985                return;
3986            }
3987        }
3988
3989        match expr {
3990            Expr::BinaryOp { left, right, .. } => {
3991                Self::collect_window_functions(left, collected);
3992                Self::collect_window_functions(right, collected);
3993            }
3994            Expr::FunctionCall { args, .. } => {
3995                for arg in args {
3996                    Self::collect_window_functions(arg, collected);
3997                }
3998            }
3999            Expr::List(items) => {
4000                for i in items {
4001                    Self::collect_window_functions(i, collected);
4002                }
4003            }
4004            Expr::Map(items) => {
4005                for (_, i) in items {
4006                    Self::collect_window_functions(i, collected);
4007                }
4008            }
4009            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
4010                Self::collect_window_functions(e, collected);
4011            }
4012            Expr::Case {
4013                expr,
4014                when_then,
4015                else_expr,
4016            } => {
4017                if let Some(e) = expr {
4018                    Self::collect_window_functions(e, collected);
4019                }
4020                for (w, t) in when_then {
4021                    Self::collect_window_functions(w, collected);
4022                    Self::collect_window_functions(t, collected);
4023                }
4024                if let Some(e) = else_expr {
4025                    Self::collect_window_functions(e, collected);
4026                }
4027            }
4028            Expr::Reduce {
4029                init, list, expr, ..
4030            } => {
4031                Self::collect_window_functions(init, collected);
4032                Self::collect_window_functions(list, collected);
4033                Self::collect_window_functions(expr, collected);
4034            }
4035            Expr::Quantifier {
4036                list, predicate, ..
4037            } => {
4038                Self::collect_window_functions(list, collected);
4039                Self::collect_window_functions(predicate, collected);
4040            }
4041            Expr::In { expr, list } => {
4042                Self::collect_window_functions(expr, collected);
4043                Self::collect_window_functions(list, collected);
4044            }
4045            Expr::ArrayIndex { array, index } => {
4046                Self::collect_window_functions(array, collected);
4047                Self::collect_window_functions(index, collected);
4048            }
4049            Expr::ArraySlice { array, start, end } => {
4050                Self::collect_window_functions(array, collected);
4051                if let Some(s) = start {
4052                    Self::collect_window_functions(s, collected);
4053                }
4054                if let Some(e) = end {
4055                    Self::collect_window_functions(e, collected);
4056                }
4057            }
4058            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
4059            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
4060            _ => {}
4061        }
4062    }
4063
4064    /// Transform property expressions in manual window functions to use qualified variable names.
4065    ///
4066    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
4067    /// so the executor can look up values directly from the row HashMap after the
4068    /// intermediate projection has materialized these properties with qualified names.
4069    ///
4070    /// Transforms ALL window functions (both manual and aggregate).
4071    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
4072    fn transform_window_expr_properties(expr: Expr) -> Expr {
4073        let Expr::FunctionCall {
4074            name,
4075            args,
4076            window_spec: Some(spec),
4077            distinct,
4078        } = expr
4079        else {
4080            return expr;
4081        };
4082
4083        // Transform arguments for ALL window functions
4084        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
4085        let transformed_args = args
4086            .into_iter()
4087            .map(Self::transform_property_to_variable)
4088            .collect();
4089
4090        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
4091        let transformed_partition_by = spec
4092            .partition_by
4093            .into_iter()
4094            .map(Self::transform_property_to_variable)
4095            .collect();
4096
4097        let transformed_order_by = spec
4098            .order_by
4099            .into_iter()
4100            .map(|item| SortItem {
4101                expr: Self::transform_property_to_variable(item.expr),
4102                ascending: item.ascending,
4103            })
4104            .collect();
4105
4106        Expr::FunctionCall {
4107            name,
4108            args: transformed_args,
4109            window_spec: Some(WindowSpec {
4110                partition_by: transformed_partition_by,
4111                order_by: transformed_order_by,
4112            }),
4113            distinct,
4114        }
4115    }
4116
4117    /// Transform a property expression to a variable expression with qualified name.
4118    ///
4119    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
4120    fn transform_property_to_variable(expr: Expr) -> Expr {
4121        let Expr::Property(base, prop) = expr else {
4122            return expr;
4123        };
4124
4125        match *base {
4126            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
4127            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
4128        }
4129    }
4130
4131    /// Transform VALID_AT macro into function call
4132    ///
4133    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
4134    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
4135    fn transform_valid_at_to_function(expr: Expr) -> Expr {
4136        match expr {
4137            Expr::ValidAt {
4138                entity,
4139                timestamp,
4140                start_prop,
4141                end_prop,
4142            } => {
4143                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
4144                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
4145
4146                Expr::FunctionCall {
4147                    name: "uni.temporal.validAt".to_string(),
4148                    args: vec![
4149                        Self::transform_valid_at_to_function(*entity),
4150                        Expr::Literal(CypherLiteral::String(start)),
4151                        Expr::Literal(CypherLiteral::String(end)),
4152                        Self::transform_valid_at_to_function(*timestamp),
4153                    ],
4154                    distinct: false,
4155                    window_spec: None,
4156                }
4157            }
4158            // Recursively transform nested expressions
4159            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4160                left: Box::new(Self::transform_valid_at_to_function(*left)),
4161                op,
4162                right: Box::new(Self::transform_valid_at_to_function(*right)),
4163            },
4164            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
4165                op,
4166                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4167            },
4168            Expr::FunctionCall {
4169                name,
4170                args,
4171                distinct,
4172                window_spec,
4173            } => Expr::FunctionCall {
4174                name,
4175                args: args
4176                    .into_iter()
4177                    .map(Self::transform_valid_at_to_function)
4178                    .collect(),
4179                distinct,
4180                window_spec,
4181            },
4182            Expr::Property(base, prop) => {
4183                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
4184            }
4185            Expr::List(items) => Expr::List(
4186                items
4187                    .into_iter()
4188                    .map(Self::transform_valid_at_to_function)
4189                    .collect(),
4190            ),
4191            Expr::In { expr, list } => Expr::In {
4192                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4193                list: Box::new(Self::transform_valid_at_to_function(*list)),
4194            },
4195            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
4196            Expr::IsNotNull(e) => {
4197                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
4198            }
4199            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
4200            // Other cases: return as-is
4201            other => other,
4202        }
4203    }
4204
4205    /// Rewrite system-metadata function calls (`id(v)`, `created_at(v)`,
4206    /// `updated_at(v)`) to direct property access on the corresponding
4207    /// internal column (`v._vid`, `v._created_at`, `v._updated_at`). This
4208    /// normalization enables predicate pushdown via the Property pattern
4209    /// recognized by `PredicateAnalyzer`.
4210    ///
4211    /// All three functions share the same shape: single-arg, argument
4212    /// must be a node/edge variable, returns the column value directly.
4213    fn rewrite_id_to_vid(expr: Expr, vars_in_scope: &[VariableInfo]) -> Expr {
4214        match expr {
4215            Expr::FunctionCall {
4216                name,
4217                args,
4218                distinct,
4219                window_spec,
4220            } if args.len() == 1 && Self::metadata_function_column(&name, None).is_some() => {
4221                if let Expr::Variable(ref var) = args[0] {
4222                    // `id()` resolves to `_eid` for an edge binding and `_vid`
4223                    // for a node — edge rows expose `_eid`, not `_vid`. Mirror
4224                    // the projection path (`df_expr.rs` translate of `id`).
4225                    let var_type = find_var_in_scope(vars_in_scope, var).map(|v| v.var_type);
4226                    let column = Self::metadata_function_column(&name, var_type)
4227                        .unwrap()
4228                        .to_string();
4229                    Expr::Property(Box::new(Expr::Variable(var.clone())), column)
4230                } else {
4231                    Expr::FunctionCall {
4232                        name,
4233                        args,
4234                        distinct,
4235                        window_spec,
4236                    }
4237                }
4238            }
4239            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4240                left: Box::new(Self::rewrite_id_to_vid(*left, vars_in_scope)),
4241                op,
4242                right: Box::new(Self::rewrite_id_to_vid(*right, vars_in_scope)),
4243            },
4244            Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
4245                op,
4246                expr: Box::new(Self::rewrite_id_to_vid(*inner, vars_in_scope)),
4247            },
4248            other => other,
4249        }
4250    }
4251
4252    /// Return the internal column name for a system-metadata function, or
4253    /// `None` if the name is not one of the recognised metadata functions.
4254    ///
4255    /// `id()` maps to `_eid` when its argument is a relationship
4256    /// (`VariableType::Edge`) and `_vid` otherwise; `var_type` is `None` when the
4257    /// caller only needs the is-metadata-function test.
4258    fn metadata_function_column(
4259        name: &str,
4260        var_type: Option<VariableType>,
4261    ) -> Option<&'static str> {
4262        if name.eq_ignore_ascii_case("id") {
4263            if matches!(var_type, Some(VariableType::Edge)) {
4264                Some("_eid")
4265            } else {
4266                Some("_vid")
4267            }
4268        } else if name.eq_ignore_ascii_case("created_at") {
4269            Some("_created_at")
4270        } else if name.eq_ignore_ascii_case("updated_at") {
4271            Some("_updated_at")
4272        } else {
4273            None
4274        }
4275    }
4276
4277    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
4278    fn plan_match_clause(
4279        &self,
4280        match_clause: &MatchClause,
4281        plan: LogicalPlan,
4282        vars_in_scope: &mut Vec<VariableInfo>,
4283    ) -> Result<LogicalPlan> {
4284        let mut plan = plan;
4285
4286        if match_clause.pattern.paths.is_empty() {
4287            return Err(anyhow!("Empty pattern"));
4288        }
4289
4290        // Track variables introduced by this OPTIONAL MATCH
4291        let vars_before_pattern = vars_in_scope.len();
4292
4293        for path in &match_clause.pattern.paths {
4294            if let Some(mode) = &path.shortest_path_mode {
4295                plan =
4296                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
4297            } else {
4298                plan = self.plan_path(
4299                    path,
4300                    plan,
4301                    vars_in_scope,
4302                    match_clause.optional,
4303                    vars_before_pattern,
4304                )?;
4305            }
4306        }
4307
4308        // Collect variables introduced by this OPTIONAL MATCH pattern
4309        let optional_vars: HashSet<String> = if match_clause.optional {
4310            vars_in_scope[vars_before_pattern..]
4311                .iter()
4312                .map(|v| v.name.clone())
4313                .collect()
4314        } else {
4315            HashSet::new()
4316        };
4317
4318        // Handle WHERE clause with vector_similarity and predicate pushdown
4319        if let Some(predicate) = &match_clause.where_clause {
4320            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
4321        }
4322
4323        Ok(plan)
4324    }
4325
4326    /// Plan a shortestPath pattern.
4327    fn plan_shortest_path(
4328        &self,
4329        path: &PathPattern,
4330        plan: LogicalPlan,
4331        vars_in_scope: &mut Vec<VariableInfo>,
4332        mode: &ShortestPathMode,
4333        _vars_before_pattern: usize,
4334    ) -> Result<LogicalPlan> {
4335        let mut plan = plan;
4336        let elements = &path.elements;
4337
4338        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
4339        if elements.len() < 3 || elements.len().is_multiple_of(2) {
4340            return Err(anyhow!(
4341                "shortestPath requires at least one relationship: (a)-[*]->(b)"
4342            ));
4343        }
4344
4345        let source_node = match &elements[0] {
4346            PatternElement::Node(n) => n,
4347            _ => return Err(anyhow!("ShortestPath must start with a node")),
4348        };
4349        let rel = match &elements[1] {
4350            PatternElement::Relationship(r) => r,
4351            _ => {
4352                return Err(anyhow!(
4353                    "ShortestPath middle element must be a relationship"
4354                ));
4355            }
4356        };
4357        let target_node = match &elements[2] {
4358            PatternElement::Node(n) => n,
4359            _ => return Err(anyhow!("ShortestPath must end with a node")),
4360        };
4361
4362        let source_var = source_node
4363            .variable
4364            .clone()
4365            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
4366        let target_var = target_node
4367            .variable
4368            .clone()
4369            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
4370        let path_var = path
4371            .variable
4372            .clone()
4373            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
4374
4375        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
4376        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
4377
4378        // Plan source node if not bound
4379        if !source_bound {
4380            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
4381        } else if let Some(prop_filter) =
4382            self.properties_to_expr(&source_var, &source_node.properties)
4383        {
4384            plan = LogicalPlan::Filter {
4385                input: Box::new(plan),
4386                predicate: prop_filter,
4387                optional_variables: HashSet::new(),
4388            };
4389        }
4390
4391        // Plan target node if not bound
4392        let target_label_id = if !target_bound {
4393            // Use first label for target_label_id
4394            let target_label_name = target_node
4395                .labels
4396                .first()
4397                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
4398            // Native lookup first; then consult `CatalogProvider` /
4399            // `ReplacementScanProvider` and allocate a virtual label-id
4400            // (M5b follow-up #6). Virtual ids dispatch to
4401            // `CatalogVertexScanExec` at physical-plan time.
4402            let target_label_id =
4403                if let Some(meta) = self.schema.get_label_case_insensitive(target_label_name) {
4404                    meta.id
4405                } else if let Some((vid, _)) = self.allocate_virtual_label(target_label_name)? {
4406                    vid
4407                } else {
4408                    return Err(anyhow!("Label {} not found", target_label_name));
4409                };
4410
4411            let target_scan = LogicalPlan::Scan {
4412                label_id: target_label_id,
4413                labels: target_node.labels.names().to_vec(),
4414                variable: target_var.clone(),
4415                filter: self.properties_to_expr(&target_var, &target_node.properties),
4416                optional: false,
4417            };
4418
4419            plan = Self::join_with_plan(plan, target_scan);
4420            target_label_id
4421        } else {
4422            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
4423            {
4424                plan = LogicalPlan::Filter {
4425                    input: Box::new(plan),
4426                    predicate: prop_filter,
4427                    optional_variables: HashSet::new(),
4428                };
4429            }
4430            0 // Wildcard for already-bound target
4431        };
4432
4433        // Add ShortestPath operator
4434        let edge_type_ids = if rel.types.is_empty() {
4435            // If no type specified, fetch all edge types (both schema and schemaless)
4436            self.schema.all_edge_type_ids()
4437        } else {
4438            let mut ids = Vec::new();
4439            for type_name in &rel.types {
4440                let id = if let Some(meta) = self.schema.edge_types.get(type_name) {
4441                    meta.id
4442                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
4443                    vid
4444                } else {
4445                    return Err(anyhow!("Edge type {} not found", type_name));
4446                };
4447                ids.push(id);
4448            }
4449            ids
4450        };
4451
4452        // Extract hop constraints from relationship pattern
4453        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
4454        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
4455
4456        let sp_plan = match mode {
4457            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
4458                input: Box::new(plan),
4459                edge_type_ids,
4460                direction: rel.direction.clone(),
4461                source_variable: source_var.clone(),
4462                target_variable: target_var.clone(),
4463                target_label_id,
4464                path_variable: path_var.clone(),
4465                min_hops,
4466                max_hops,
4467            },
4468            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
4469                input: Box::new(plan),
4470                edge_type_ids,
4471                direction: rel.direction.clone(),
4472                source_variable: source_var.clone(),
4473                target_variable: target_var.clone(),
4474                target_label_id,
4475                path_variable: path_var.clone(),
4476                min_hops,
4477                max_hops,
4478            },
4479        };
4480
4481        if !source_bound {
4482            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
4483        }
4484        if !target_bound {
4485            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
4486        }
4487        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
4488
4489        Ok(sp_plan)
4490    }
4491    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
4492    ///
4493    /// This is a public entry point for the Locy plan builder to reuse the
4494    /// existing pattern-planning logic for clause bodies.
4495    pub fn plan_pattern(
4496        &self,
4497        pattern: &Pattern,
4498        initial_vars: &[VariableInfo],
4499    ) -> Result<LogicalPlan> {
4500        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
4501        let vars_before_pattern = vars_in_scope.len();
4502        let mut plan = LogicalPlan::Empty;
4503        for path in &pattern.paths {
4504            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
4505        }
4506        Ok(plan)
4507    }
4508
4509    /// Plan a regular MATCH path (not shortestPath).
4510    fn plan_path(
4511        &self,
4512        path: &PathPattern,
4513        plan: LogicalPlan,
4514        vars_in_scope: &mut Vec<VariableInfo>,
4515        optional: bool,
4516        vars_before_pattern: usize,
4517    ) -> Result<LogicalPlan> {
4518        let mut plan = plan;
4519        let elements = &path.elements;
4520        let mut i = 0;
4521
4522        let path_variable = path.variable.clone();
4523
4524        // Check for VariableAlreadyBound: path variable already in scope
4525        if let Some(pv) = &path_variable
4526            && !pv.is_empty()
4527            && is_var_in_scope(vars_in_scope, pv)
4528        {
4529            return Err(anyhow!(
4530                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4531                pv
4532            ));
4533        }
4534
4535        // Check for VariableAlreadyBound: path variable conflicts with element variables
4536        if let Some(pv) = &path_variable
4537            && !pv.is_empty()
4538        {
4539            for element in elements {
4540                match element {
4541                    PatternElement::Node(n) => {
4542                        if let Some(v) = &n.variable
4543                            && v == pv
4544                        {
4545                            return Err(anyhow!(
4546                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4547                                pv
4548                            ));
4549                        }
4550                    }
4551                    PatternElement::Relationship(r) => {
4552                        if let Some(v) = &r.variable
4553                            && v == pv
4554                        {
4555                            return Err(anyhow!(
4556                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4557                                pv
4558                            ));
4559                        }
4560                    }
4561                    PatternElement::Parenthesized { .. } => {}
4562                }
4563            }
4564        }
4565
4566        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
4567        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
4568        let mut optional_pattern_vars: HashSet<String> = if optional {
4569            let mut vars = HashSet::new();
4570            for element in elements {
4571                match element {
4572                    PatternElement::Node(n) => {
4573                        if let Some(v) = &n.variable
4574                            && !v.is_empty()
4575                            && !is_var_in_scope(vars_in_scope, v)
4576                        {
4577                            vars.insert(v.clone());
4578                        }
4579                    }
4580                    PatternElement::Relationship(r) => {
4581                        if let Some(v) = &r.variable
4582                            && !v.is_empty()
4583                            && !is_var_in_scope(vars_in_scope, v)
4584                        {
4585                            vars.insert(v.clone());
4586                        }
4587                    }
4588                    PatternElement::Parenthesized { pattern, .. } => {
4589                        // Also check nested patterns
4590                        for nested_elem in &pattern.elements {
4591                            match nested_elem {
4592                                PatternElement::Node(n) => {
4593                                    if let Some(v) = &n.variable
4594                                        && !v.is_empty()
4595                                        && !is_var_in_scope(vars_in_scope, v)
4596                                    {
4597                                        vars.insert(v.clone());
4598                                    }
4599                                }
4600                                PatternElement::Relationship(r) => {
4601                                    if let Some(v) = &r.variable
4602                                        && !v.is_empty()
4603                                        && !is_var_in_scope(vars_in_scope, v)
4604                                    {
4605                                        vars.insert(v.clone());
4606                                    }
4607                                }
4608                                _ => {}
4609                            }
4610                        }
4611                    }
4612                }
4613            }
4614            // Include path variable if present
4615            if let Some(pv) = &path_variable
4616                && !pv.is_empty()
4617            {
4618                vars.insert(pv.clone());
4619            }
4620            vars
4621        } else {
4622            HashSet::new()
4623        };
4624
4625        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
4626        // These must participate in Trail mode (relationship uniqueness) enforcement
4627        // across ALL segments in this path, so that VLP segments like [*0..1] don't
4628        // traverse through edges already claimed by a bound relationship [r].
4629        let path_bound_edge_vars: HashSet<String> = {
4630            let mut bound = HashSet::new();
4631            for element in elements {
4632                if let PatternElement::Relationship(rel) = element
4633                    && let Some(ref var_name) = rel.variable
4634                    && !var_name.is_empty()
4635                    && vars_in_scope[..vars_before_pattern]
4636                        .iter()
4637                        .any(|v| v.name == *var_name)
4638                {
4639                    bound.insert(var_name.clone());
4640                }
4641            }
4642            bound
4643        };
4644
4645        // Track if any traverses were added (for zero-length path detection)
4646        let mut had_traverses = false;
4647        // Track the node variable for zero-length path binding
4648        let mut single_node_variable: Option<String> = None;
4649        // Collect node/edge variables for BindPath (fixed-length path binding)
4650        let mut path_node_vars: Vec<String> = Vec::new();
4651        let mut path_edge_vars: Vec<String> = Vec::new();
4652        // Track the last processed outer node variable for QPP source binding.
4653        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4654        let mut last_outer_node_var: Option<String> = None;
4655
4656        // Multi-hop path variables are now supported - path is accumulated across hops
4657        while i < elements.len() {
4658            let element = &elements[i];
4659            match element {
4660                PatternElement::Node(n) => {
4661                    let mut variable = n.variable.clone().unwrap_or_default();
4662                    if variable.is_empty() {
4663                        variable = self.next_anon_var();
4664                    }
4665                    // Track first node variable for zero-length path
4666                    if single_node_variable.is_none() {
4667                        single_node_variable = Some(variable.clone());
4668                    }
4669                    let is_bound =
4670                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4671                    if optional && !is_bound {
4672                        optional_pattern_vars.insert(variable.clone());
4673                    }
4674
4675                    if is_bound {
4676                        // Check for type conflict - can't use an Edge/Path as a Node
4677                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4678                            && !info.var_type.is_compatible_with(VariableType::Node)
4679                        {
4680                            return Err(anyhow!(
4681                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4682                                variable,
4683                                info.var_type
4684                            ));
4685                        }
4686                        if let Some(node_filter) =
4687                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4688                        {
4689                            plan = LogicalPlan::Filter {
4690                                input: Box::new(plan),
4691                                predicate: node_filter,
4692                                optional_variables: HashSet::new(),
4693                            };
4694                        }
4695                    } else {
4696                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4697                        if !variable.is_empty() {
4698                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4699                        }
4700                    }
4701
4702                    // Track source node for BindPath
4703                    if path_variable.is_some() && path_node_vars.is_empty() {
4704                        path_node_vars.push(variable.clone());
4705                    }
4706
4707                    // Look ahead for relationships
4708                    let mut current_source_var = variable;
4709                    last_outer_node_var = Some(current_source_var.clone());
4710                    i += 1;
4711                    while i < elements.len() {
4712                        if let PatternElement::Relationship(r) = &elements[i] {
4713                            if i + 1 < elements.len() {
4714                                let target_node_part = &elements[i + 1];
4715                                if let PatternElement::Node(n_target) = target_node_part {
4716                                    // For VLP traversals, pass path_variable through
4717                                    // For fixed-length, we use BindPath instead
4718                                    let is_vlp = r.range.is_some();
4719                                    let traverse_path_var =
4720                                        if is_vlp { path_variable.clone() } else { None };
4721
4722                                    // If we're about to start a VLP segment and there are
4723                                    // collected fixed-hop path vars, create an intermediate
4724                                    // BindPath for the fixed prefix first. The VLP will then
4725                                    // extend this existing path.
4726                                    if is_vlp
4727                                        && let Some(pv) = path_variable.as_ref()
4728                                        && !path_node_vars.is_empty()
4729                                    {
4730                                        plan = LogicalPlan::BindPath {
4731                                            input: Box::new(plan),
4732                                            node_variables: std::mem::take(&mut path_node_vars),
4733                                            edge_variables: std::mem::take(&mut path_edge_vars),
4734                                            path_variable: pv.clone(),
4735                                        };
4736                                        if !is_var_in_scope(vars_in_scope, pv) {
4737                                            add_var_to_scope(
4738                                                vars_in_scope,
4739                                                pv,
4740                                                VariableType::Path,
4741                                            )?;
4742                                        }
4743                                    }
4744
4745                                    // Plan the traverse from the current source node
4746                                    let target_was_bound =
4747                                        n_target.variable.as_ref().is_some_and(|v| {
4748                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4749                                        });
4750                                    let (new_plan, target_var, effective_target) = self
4751                                        .plan_traverse_with_source(
4752                                            plan,
4753                                            vars_in_scope,
4754                                            TraverseParams {
4755                                                rel: r,
4756                                                target_node: n_target,
4757                                                optional,
4758                                                path_variable: traverse_path_var,
4759                                                optional_pattern_vars: optional_pattern_vars
4760                                                    .clone(),
4761                                            },
4762                                            &current_source_var,
4763                                            vars_before_pattern,
4764                                            &path_bound_edge_vars,
4765                                        )?;
4766                                    plan = new_plan;
4767                                    if optional && !target_was_bound {
4768                                        optional_pattern_vars.insert(target_var.clone());
4769                                    }
4770
4771                                    // Track edge/target node for BindPath
4772                                    if path_variable.is_some() && !is_vlp {
4773                                        // Use the edge variable if given, otherwise use
4774                                        // the internal tracking column pattern.
4775                                        // Use effective_target (which may be __rebound_x
4776                                        // for bound-target traversals) to match the actual
4777                                        // column name produced by GraphTraverseExec.
4778                                        if let Some(ev) = &r.variable {
4779                                            path_edge_vars.push(ev.clone());
4780                                        } else {
4781                                            path_edge_vars
4782                                                .push(format!("__eid_to_{}", effective_target));
4783                                        }
4784                                        path_node_vars.push(target_var.clone());
4785                                    }
4786
4787                                    current_source_var = target_var;
4788                                    last_outer_node_var = Some(current_source_var.clone());
4789                                    had_traverses = true;
4790                                    i += 2;
4791                                } else {
4792                                    return Err(anyhow!("Relationship must be followed by a node"));
4793                                }
4794                            } else {
4795                                return Err(anyhow!("Relationship cannot be the last element"));
4796                            }
4797                        } else {
4798                            break;
4799                        }
4800                    }
4801                }
4802                PatternElement::Relationship(_) => {
4803                    return Err(anyhow!("Pattern must start with a node"));
4804                }
4805                PatternElement::Parenthesized { pattern, range } => {
4806                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4807                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4808                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4809                        return Err(anyhow!(
4810                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4811                        ));
4812                    }
4813
4814                    let source_node = match &pattern.elements[0] {
4815                        PatternElement::Node(n) => n,
4816                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4817                    };
4818
4819                    // Extract all relationship-node pairs (QPP steps)
4820                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4821                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4822                        let rel = match &pattern.elements[pair_idx] {
4823                            PatternElement::Relationship(r) => r,
4824                            _ => {
4825                                return Err(anyhow!(
4826                                    "Quantified pattern element at position {} must be a relationship",
4827                                    pair_idx
4828                                ));
4829                            }
4830                        };
4831                        let node = match &pattern.elements[pair_idx + 1] {
4832                            PatternElement::Node(n) => n,
4833                            _ => {
4834                                return Err(anyhow!(
4835                                    "Quantified pattern element at position {} must be a node",
4836                                    pair_idx + 1
4837                                ));
4838                            }
4839                        };
4840                        // Reject nested quantifiers
4841                        if rel.range.is_some() {
4842                            return Err(anyhow!(
4843                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4844                            ));
4845                        }
4846                        qpp_rels.push((rel, node));
4847                    }
4848
4849                    // Check if there's an outer target node after the Parenthesized element.
4850                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4851                    // target that should receive the traversal result.
4852                    let inner_target_node = qpp_rels.last().unwrap().1;
4853                    let outer_target_node = if i + 1 < elements.len() {
4854                        match &elements[i + 1] {
4855                            PatternElement::Node(n) => Some(n),
4856                            _ => None,
4857                        }
4858                    } else {
4859                        None
4860                    };
4861                    // Use the outer target for variable binding and filters; inner target
4862                    // labels are used for state constraints within the NFA.
4863                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4864
4865                    // For simple 3-element single-hop QPP without intermediate label constraints,
4866                    // fall back to existing VLP behavior (copy range to relationship).
4867                    let use_simple_vlp = qpp_rels.len() == 1
4868                        && inner_target_node
4869                            .labels
4870                            .first()
4871                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4872                            .is_none();
4873
4874                    // Plan source node.
4875                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4876                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4877                    // node variable, use it; otherwise fall back to the inner source.
4878                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4879                        // The preceding outer node is already bound and in scope
4880                        // Apply any property filters from the inner source node
4881                        if let Some(prop_filter) =
4882                            self.properties_to_expr(outer_src, &source_node.properties)
4883                        {
4884                            plan = LogicalPlan::Filter {
4885                                input: Box::new(plan),
4886                                predicate: prop_filter,
4887                                optional_variables: HashSet::new(),
4888                            };
4889                        }
4890                        outer_src.clone()
4891                    } else {
4892                        let sv = source_node
4893                            .variable
4894                            .clone()
4895                            .filter(|v| !v.is_empty())
4896                            .unwrap_or_else(|| self.next_anon_var());
4897
4898                        if is_var_in_scope(vars_in_scope, &sv) {
4899                            // Source is already bound, apply property filter if needed
4900                            if let Some(prop_filter) =
4901                                self.properties_to_expr(&sv, &source_node.properties)
4902                            {
4903                                plan = LogicalPlan::Filter {
4904                                    input: Box::new(plan),
4905                                    predicate: prop_filter,
4906                                    optional_variables: HashSet::new(),
4907                                };
4908                            }
4909                        } else {
4910                            // Source is unbound, scan it
4911                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4912                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4913                            if optional {
4914                                optional_pattern_vars.insert(sv.clone());
4915                            }
4916                        }
4917                        sv
4918                    };
4919
4920                    if use_simple_vlp {
4921                        // Simple single-hop QPP: apply range to relationship and use VLP path
4922                        let mut relationship = qpp_rels[0].0.clone();
4923                        relationship.range = range.clone();
4924
4925                        let target_was_bound = target_node
4926                            .variable
4927                            .as_ref()
4928                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4929                        let (new_plan, target_var, _effective_target) = self
4930                            .plan_traverse_with_source(
4931                                plan,
4932                                vars_in_scope,
4933                                TraverseParams {
4934                                    rel: &relationship,
4935                                    target_node,
4936                                    optional,
4937                                    path_variable: path_variable.clone(),
4938                                    optional_pattern_vars: optional_pattern_vars.clone(),
4939                                },
4940                                &source_variable,
4941                                vars_before_pattern,
4942                                &path_bound_edge_vars,
4943                            )?;
4944                        plan = new_plan;
4945                        if optional && !target_was_bound {
4946                            optional_pattern_vars.insert(target_var);
4947                        }
4948                    } else {
4949                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4950                        let mut qpp_step_infos = Vec::new();
4951                        let mut all_edge_type_ids = Vec::new();
4952
4953                        for (rel, node) in &qpp_rels {
4954                            let mut step_edge_type_ids = Vec::new();
4955                            if rel.types.is_empty() {
4956                                step_edge_type_ids = self.schema.all_edge_type_ids();
4957                            } else {
4958                                for type_name in &rel.types {
4959                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4960                                        step_edge_type_ids.push(edge_meta.id);
4961                                    }
4962                                }
4963                            }
4964                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4965
4966                            let target_label = node.labels.first().and_then(|l| {
4967                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4968                            });
4969
4970                            qpp_step_infos.push(QppStepInfo {
4971                                edge_type_ids: step_edge_type_ids,
4972                                direction: rel.direction.clone(),
4973                                target_label,
4974                            });
4975                        }
4976
4977                        // Deduplicate edge type IDs for adjacency warming
4978                        all_edge_type_ids.sort_unstable();
4979                        all_edge_type_ids.dedup();
4980
4981                        // Compute iteration bounds from range
4982                        let hops_per_iter = qpp_step_infos.len();
4983                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4984                        let (min_iter, max_iter) = if let Some(range) = range {
4985                            let min = range.min.unwrap_or(1) as usize;
4986                            let max = range
4987                                .max
4988                                .map(|m| m as usize)
4989                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4990                            (min, max)
4991                        } else {
4992                            (1, 1)
4993                        };
4994                        let min_hops = min_iter * hops_per_iter;
4995                        let max_hops = max_iter * hops_per_iter;
4996
4997                        // Target variable from the last node in the QPP sub-pattern
4998                        let target_variable = target_node
4999                            .variable
5000                            .clone()
5001                            .filter(|v| !v.is_empty())
5002                            .unwrap_or_else(|| self.next_anon_var());
5003
5004                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
5005
5006                        // Determine target label for the final node
5007                        let target_label_meta = target_node
5008                            .labels
5009                            .first()
5010                            .and_then(|l| self.schema.get_label_case_insensitive(l));
5011
5012                        // Collect scope match variables
5013                        let mut scope_match_variables: HashSet<String> = vars_in_scope
5014                            [vars_before_pattern..]
5015                            .iter()
5016                            .map(|v| v.name.clone())
5017                            .collect();
5018                        scope_match_variables.insert(target_variable.clone());
5019
5020                        // Handle bound target: use rebound variable for traverse
5021                        let rebound_target_var = if target_is_bound {
5022                            Some(target_variable.clone())
5023                        } else {
5024                            None
5025                        };
5026                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5027                            format!("__rebound_{}", bv)
5028                        } else {
5029                            target_variable.clone()
5030                        };
5031
5032                        plan = LogicalPlan::Traverse {
5033                            input: Box::new(plan),
5034                            edge_type_ids: all_edge_type_ids,
5035                            direction: qpp_rels[0].0.direction.clone(),
5036                            source_variable: source_variable.to_string(),
5037                            target_variable: effective_target_var.clone(),
5038                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
5039                            step_variable: None, // QPP doesn't expose intermediate edges
5040                            min_hops,
5041                            max_hops,
5042                            optional,
5043                            target_filter: self.node_filter_expr(
5044                                &target_variable,
5045                                &target_node.labels,
5046                                &target_node.properties,
5047                            ),
5048                            path_variable: path_variable.clone(),
5049                            edge_properties: HashSet::new(),
5050                            is_variable_length: true,
5051                            optional_pattern_vars: optional_pattern_vars.clone(),
5052                            scope_match_variables,
5053                            edge_filter_expr: None,
5054                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5055                            qpp_steps: Some(qpp_step_infos),
5056                        };
5057
5058                        // Handle bound target: filter rebound results against original variable
5059                        if let Some(ref btv) = rebound_target_var {
5060                            // Filter: __rebound_x._vid = x._vid
5061                            let filter_pred = Expr::BinaryOp {
5062                                left: Box::new(Expr::Property(
5063                                    Box::new(Expr::Variable(effective_target_var.clone())),
5064                                    "_vid".to_string(),
5065                                )),
5066                                op: BinaryOp::Eq,
5067                                right: Box::new(Expr::Property(
5068                                    Box::new(Expr::Variable(btv.clone())),
5069                                    "_vid".to_string(),
5070                                )),
5071                            };
5072                            plan = LogicalPlan::Filter {
5073                                input: Box::new(plan),
5074                                predicate: filter_pred,
5075                                optional_variables: if optional {
5076                                    optional_pattern_vars.clone()
5077                                } else {
5078                                    HashSet::new()
5079                                },
5080                            };
5081                        }
5082
5083                        // Add target variable to scope
5084                        if !target_is_bound {
5085                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5086                        }
5087
5088                        // Add path variable to scope
5089                        if let Some(ref pv) = path_variable
5090                            && !pv.is_empty()
5091                            && !is_var_in_scope(vars_in_scope, pv)
5092                        {
5093                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5094                        }
5095                    }
5096                    had_traverses = true;
5097
5098                    // Skip the outer target node if we consumed it
5099                    if outer_target_node.is_some() {
5100                        i += 2; // skip both Parenthesized and the following Node
5101                    } else {
5102                        i += 1;
5103                    }
5104                }
5105            }
5106        }
5107
5108        // If this is a single-node pattern with a path variable, bind the zero-length path
5109        // E.g., `p = (a)` should create a Path with one node and zero edges
5110        if let Some(ref path_var) = path_variable
5111            && !path_var.is_empty()
5112            && !had_traverses
5113            && let Some(node_var) = single_node_variable
5114        {
5115            plan = LogicalPlan::BindZeroLengthPath {
5116                input: Box::new(plan),
5117                node_variable: node_var,
5118                path_variable: path_var.clone(),
5119            };
5120            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5121        }
5122
5123        // Bind fixed-length path from collected node/edge variables
5124        if let Some(ref path_var) = path_variable
5125            && !path_var.is_empty()
5126            && had_traverses
5127            && !path_node_vars.is_empty()
5128            && !is_var_in_scope(vars_in_scope, path_var)
5129        {
5130            plan = LogicalPlan::BindPath {
5131                input: Box::new(plan),
5132                node_variables: path_node_vars,
5133                edge_variables: path_edge_vars,
5134                path_variable: path_var.clone(),
5135            };
5136            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5137        }
5138
5139        Ok(plan)
5140    }
5141
5142    /// Plan a traverse with an explicit source variable name.
5143    ///
5144    /// Returns `(plan, target_variable, effective_target_variable)` where:
5145    /// - `target_variable` is the semantic variable name for downstream scope
5146    /// - `effective_target_variable` is the actual column-name prefix used by
5147    ///   the traverse (may be `__rebound_x` for bound-target patterns)
5148    fn plan_traverse_with_source(
5149        &self,
5150        plan: LogicalPlan,
5151        vars_in_scope: &mut Vec<VariableInfo>,
5152        params: TraverseParams<'_>,
5153        source_variable: &str,
5154        vars_before_pattern: usize,
5155        path_bound_edge_vars: &HashSet<String>,
5156    ) -> Result<(LogicalPlan, String, String)> {
5157        // Check for parameter used as relationship predicate
5158        if let Some(Expr::Parameter(_)) = &params.rel.properties {
5159            return Err(anyhow!(
5160                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
5161            ));
5162        }
5163
5164        let mut edge_type_ids = Vec::new();
5165        let mut dst_labels = Vec::new();
5166        let mut unknown_types = Vec::new();
5167
5168        if params.rel.types.is_empty() {
5169            // All types - include both schema and schemaless edge types
5170            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
5171            edge_type_ids = self.schema.all_edge_type_ids();
5172            for meta in self.schema.edge_types.values() {
5173                dst_labels.extend(meta.dst_labels.iter().cloned());
5174            }
5175        } else {
5176            for type_name in &params.rel.types {
5177                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
5178                    // Known type - use standard Traverse with type_id
5179                    edge_type_ids.push(edge_meta.id);
5180                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
5181                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
5182                    // M5b.3: virtual edge type (plugin-registered CatalogTable).
5183                    // Resolving it into `edge_type_ids` (not `unknown_types`)
5184                    // lets the regular `Traverse` planner build a structured
5185                    // plan that the physical planner can dispatch to a
5186                    // `CatalogEdgeScanExec` mid-pattern.
5187                    edge_type_ids.push(vid);
5188                } else {
5189                    // Unknown type - will use TraverseMainByType
5190                    unknown_types.push(type_name.clone());
5191                }
5192            }
5193        }
5194
5195        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
5196        edge_type_ids.sort_unstable();
5197        edge_type_ids.dedup();
5198        unknown_types.sort_unstable();
5199        unknown_types.dedup();
5200
5201        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
5202        if target_variable.is_empty() {
5203            target_variable = self.next_anon_var();
5204        }
5205        let target_is_bound =
5206            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
5207
5208        // Check for VariableTypeConflict: relationship variable used as node
5209        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
5210        if let Some(rel_var) = &params.rel.variable
5211            && !rel_var.is_empty()
5212            && rel_var == &target_variable
5213        {
5214            return Err(anyhow!(
5215                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
5216                rel_var
5217            ));
5218        }
5219
5220        // Check for VariableTypeConflict/RelationshipUniquenessViolation
5221        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
5222        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
5223        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
5224        let mut bound_edge_var: Option<String> = None;
5225        let mut bound_edge_list_var: Option<String> = None;
5226        if let Some(rel_var) = &params.rel.variable
5227            && !rel_var.is_empty()
5228            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
5229        {
5230            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
5231                .iter()
5232                .any(|v| v.name == *rel_var);
5233
5234            if info.var_type == VariableType::Edge {
5235                // Check if this edge variable comes from a previous clause (before this MATCH)
5236                if is_from_previous_clause {
5237                    // Edge variable bound from previous clause - this is allowed
5238                    // We'll filter the traversal to match this specific edge
5239                    bound_edge_var = Some(rel_var.clone());
5240                } else {
5241                    // Same relationship variable used twice in the same MATCH clause
5242                    return Err(anyhow!(
5243                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
5244                        rel_var
5245                    ));
5246                }
5247            } else if params.rel.range.is_some()
5248                && is_from_previous_clause
5249                && matches!(
5250                    info.var_type,
5251                    VariableType::Scalar | VariableType::ScalarLiteral
5252                )
5253            {
5254                // Allow VLP rebound against a previously bound relationship list
5255                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
5256                bound_edge_list_var = Some(rel_var.clone());
5257            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
5258                return Err(anyhow!(
5259                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
5260                    rel_var,
5261                    info.var_type
5262                ));
5263            }
5264        }
5265
5266        // Check for VariableTypeConflict: target node variable already bound as non-Node
5267        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
5268        if target_is_bound
5269            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5270            && !info.var_type.is_compatible_with(VariableType::Node)
5271        {
5272            return Err(anyhow!(
5273                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
5274                target_variable,
5275                info.var_type
5276            ));
5277        }
5278
5279        // If all requested types are unknown (schemaless), use TraverseMainByType
5280        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
5281        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
5282        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
5283            // All types are unknown - use schemaless traversal
5284
5285            let is_variable_length = params.rel.range.is_some();
5286
5287            const DEFAULT_MAX_HOPS: usize = 100;
5288            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5289                let min = range.min.unwrap_or(1) as usize;
5290                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5291                (min, max)
5292            } else {
5293                (1, 1)
5294            };
5295
5296            // For both single-hop and variable-length paths:
5297            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5298            //   Single-hop: step_var holds a single edge object
5299            //   VLP: step_var holds a list of edge objects
5300            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5301            let step_var = params.rel.variable.clone();
5302            let path_var = params.path_variable.clone();
5303
5304            // Compute scope_match_variables for relationship uniqueness scoping.
5305            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5306                .iter()
5307                .map(|v| v.name.clone())
5308                .collect();
5309            if let Some(ref sv) = step_var {
5310                // Only add the step variable to scope if it's NOT rebound from a previous clause.
5311                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
5312                // filtering because the second MATCH intentionally reuses the same edge.
5313                if bound_edge_var.is_none() {
5314                    scope_match_variables.insert(sv.clone());
5315                }
5316            }
5317            scope_match_variables.insert(target_variable.clone());
5318            // Include bound edge variables from this path for cross-segment Trail mode
5319            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
5320            // edges already claimed by a bound relationship [r] in the same path.
5321            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
5322            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
5323            scope_match_variables.extend(
5324                path_bound_edge_vars
5325                    .iter()
5326                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
5327                    .cloned(),
5328            );
5329
5330            let mut plan = LogicalPlan::TraverseMainByType {
5331                type_names: unknown_types,
5332                input: Box::new(plan),
5333                direction: params.rel.direction.clone(),
5334                source_variable: source_variable.to_string(),
5335                target_variable: target_variable.clone(),
5336                step_variable: step_var.clone(),
5337                min_hops,
5338                max_hops,
5339                optional: params.optional,
5340                target_filter: self.node_filter_expr(
5341                    &target_variable,
5342                    &params.target_node.labels,
5343                    &params.target_node.properties,
5344                ),
5345                path_variable: path_var.clone(),
5346                is_variable_length,
5347                optional_pattern_vars: params.optional_pattern_vars.clone(),
5348                scope_match_variables,
5349                edge_filter_expr: if is_variable_length {
5350                    let filter_var = step_var
5351                        .clone()
5352                        .unwrap_or_else(|| "__anon_edge".to_string());
5353                    self.properties_to_expr(&filter_var, &params.rel.properties)
5354                } else {
5355                    None
5356                },
5357                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5358            };
5359
5360            // Only apply bound target filter for Imported variables (from outer scope/subquery).
5361            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5362            // uses Parameter which requires the value to be in params (subquery context).
5363            if target_is_bound
5364                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5365                && info.var_type == VariableType::Imported
5366            {
5367                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5368            }
5369
5370            // Apply relationship property predicates for fixed-length schemaless
5371            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
5372            // For VLP, predicates are stored inline in edge_filter_expr (above).
5373            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5374            if !is_variable_length
5375                && let Some(edge_var_name) = step_var.as_ref()
5376                && let Some(edge_prop_filter) =
5377                    self.properties_to_expr(edge_var_name, &params.rel.properties)
5378            {
5379                let filter_optional_vars = if params.optional {
5380                    params.optional_pattern_vars.clone()
5381                } else {
5382                    HashSet::new()
5383                };
5384                plan = LogicalPlan::Filter {
5385                    input: Box::new(plan),
5386                    predicate: edge_prop_filter,
5387                    optional_variables: filter_optional_vars,
5388                };
5389            }
5390
5391            // Add the bound variables to scope
5392            if let Some(sv) = &step_var {
5393                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5394                if is_variable_length
5395                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5396                {
5397                    info.is_vlp = true;
5398                }
5399            }
5400            if let Some(pv) = &path_var
5401                && !is_var_in_scope(vars_in_scope, pv)
5402            {
5403                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5404            }
5405            if !is_var_in_scope(vars_in_scope, &target_variable) {
5406                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5407            }
5408
5409            return Ok((plan, target_variable.clone(), target_variable));
5410        }
5411
5412        // If we have a mix of known and unknown types, error for now
5413        // (could be extended to Union of Traverse + TraverseMainByType)
5414        if !unknown_types.is_empty() {
5415            return Err(anyhow!(
5416                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
5417                unknown_types
5418            ));
5419        }
5420
5421        // Resolve target label to either a schema id or a virtual id from the
5422        // plugin registry. Mid-pattern virtual-label dispatch (M5b.3) requires
5423        // the virtual id to flow into `Traverse.target_label_id` so the
5424        // physical planner can layer a `CatalogVertexScanExec` join on the
5425        // traverse output. Mirrors the schema-then-virtual fallthrough used
5426        // by single-vertex `Scan` planning (~`plan_node_pattern` below).
5427        let mut virtual_target_label_id: Option<u16> = None;
5428        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
5429            // Use first label for target_label_id
5430            // For schemaless support, allow unknown target labels
5431            match self.schema.get_label_case_insensitive(label_name) {
5432                Some(meta) => Some(meta),
5433                None => {
5434                    if let Some((vid, _)) = self.allocate_virtual_label(label_name)? {
5435                        virtual_target_label_id = Some(vid);
5436                    }
5437                    None
5438                }
5439            }
5440        } else if !target_is_bound {
5441            // Infer from edge type(s)
5442            let unique_dsts: Vec<_> = dst_labels
5443                .into_iter()
5444                .collect::<HashSet<_>>()
5445                .into_iter()
5446                .collect();
5447            if unique_dsts.len() == 1 {
5448                let label_name = &unique_dsts[0];
5449                self.schema.get_label_case_insensitive(label_name)
5450            } else {
5451                // Multiple or no destination labels inferred - allow any target
5452                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
5453                // where the edge type can connect to multiple labels
5454                None
5455            }
5456        } else {
5457            None
5458        };
5459
5460        // Check if this is a variable-length pattern (has range specifier like *1..3)
5461        let is_variable_length = params.rel.range.is_some();
5462
5463        // For VLP patterns, default min to 1 and max to a reasonable limit.
5464        // For single-hop patterns (no range), both are 1.
5465        const DEFAULT_MAX_HOPS: usize = 100;
5466        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5467            let min = range.min.unwrap_or(1) as usize;
5468            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5469            (min, max)
5470        } else {
5471            (1, 1)
5472        };
5473
5474        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5475        //   Single-hop: step_var holds a single edge object
5476        //   VLP: step_var holds a list of edge objects
5477        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5478        let step_var = params.rel.variable.clone();
5479        let path_var = params.path_variable.clone();
5480
5481        // If we have a bound edge variable from a previous clause, use a temp variable
5482        // for the Traverse step, then filter to match the bound edge
5483        let rebound_var = bound_edge_var
5484            .as_ref()
5485            .or(bound_edge_list_var.as_ref())
5486            .cloned();
5487        let effective_step_var = if let Some(ref bv) = rebound_var {
5488            Some(format!("__rebound_{}", bv))
5489        } else {
5490            step_var.clone()
5491        };
5492
5493        // If we have a bound target variable from a previous clause (e.g. WITH),
5494        // use a temp variable for the Traverse step, then filter to match the bound
5495        // target — mirroring the bound edge pattern above.
5496        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
5497            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
5498                .map(|info| info.var_type == VariableType::Imported)
5499                .unwrap_or(false);
5500            if !is_imported {
5501                Some(target_variable.clone())
5502            } else {
5503                None
5504            }
5505        } else {
5506            None
5507        };
5508
5509        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5510            format!("__rebound_{}", bv)
5511        } else {
5512            target_variable.clone()
5513        };
5514
5515        // Collect all variables (node + edge) from the current MATCH clause scope
5516        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
5517        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
5518        // if their associated variable is in this set. This prevents relationship
5519        // uniqueness from being enforced across disconnected MATCH clauses.
5520        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5521            .iter()
5522            .map(|v| v.name.clone())
5523            .collect();
5524        // Include the current traverse's edge variable (not yet added to vars_in_scope)
5525        if let Some(ref sv) = effective_step_var {
5526            scope_match_variables.insert(sv.clone());
5527        }
5528        // Include the target variable (not yet added to vars_in_scope)
5529        scope_match_variables.insert(effective_target_var.clone());
5530        // Include bound edge variables from this path for cross-segment Trail mode
5531        // enforcement (same as the schemaless path above).
5532        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
5533
5534        let mut plan = LogicalPlan::Traverse {
5535            input: Box::new(plan),
5536            edge_type_ids,
5537            direction: params.rel.direction.clone(),
5538            source_variable: source_variable.to_string(),
5539            target_variable: effective_target_var.clone(),
5540            target_label_id: target_label_meta
5541                .map(|m| m.id)
5542                .or(virtual_target_label_id)
5543                .unwrap_or(0),
5544            step_variable: effective_step_var.clone(),
5545            min_hops,
5546            max_hops,
5547            optional: params.optional,
5548            target_filter: self.node_filter_expr(
5549                &target_variable,
5550                &params.target_node.labels,
5551                &params.target_node.properties,
5552            ),
5553            path_variable: path_var.clone(),
5554            edge_properties: HashSet::new(),
5555            is_variable_length,
5556            optional_pattern_vars: params.optional_pattern_vars.clone(),
5557            scope_match_variables,
5558            edge_filter_expr: if is_variable_length {
5559                // Use the step variable name, or a fallback for anonymous edges.
5560                // The variable name is used by properties_to_expr to build
5561                // `var.prop = value` expressions. For BFS property checking,
5562                // only the property name and value matter (the variable name
5563                // is stripped during extraction).
5564                let filter_var = effective_step_var
5565                    .clone()
5566                    .unwrap_or_else(|| "__anon_edge".to_string());
5567                self.properties_to_expr(&filter_var, &params.rel.properties)
5568            } else {
5569                None
5570            },
5571            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5572            qpp_steps: None,
5573        };
5574
5575        // Pre-compute optional variables set for filter nodes in this traverse.
5576        // Used by relationship property filters and bound-edge filters below.
5577        let filter_optional_vars = if params.optional {
5578            params.optional_pattern_vars.clone()
5579        } else {
5580            HashSet::new()
5581        };
5582
5583        // Apply relationship property predicates (e.g. [r {k: v}]).
5584        // For VLP, predicates are stored inline in edge_filter_expr (above).
5585        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5586        if !is_variable_length
5587            && let Some(edge_var_name) = effective_step_var.as_ref()
5588            && let Some(edge_prop_filter) =
5589                self.properties_to_expr(edge_var_name, &params.rel.properties)
5590        {
5591            plan = LogicalPlan::Filter {
5592                input: Box::new(plan),
5593                predicate: edge_prop_filter,
5594                optional_variables: filter_optional_vars.clone(),
5595            };
5596        }
5597
5598        // Only apply bound target filter for Imported variables (from outer scope/subquery).
5599        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5600        // uses Parameter which requires the value to be in params (subquery context).
5601        if target_is_bound
5602            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5603            && info.var_type == VariableType::Imported
5604        {
5605            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5606        }
5607
5608        // If we have a bound edge variable, add a filter to match it
5609        if let Some(ref bv) = bound_edge_var {
5610            let temp_var = format!("__rebound_{}", bv);
5611            let bound_check = Expr::BinaryOp {
5612                left: Box::new(Expr::Property(
5613                    Box::new(Expr::Variable(temp_var)),
5614                    "_eid".to_string(),
5615                )),
5616                op: BinaryOp::Eq,
5617                right: Box::new(Expr::Property(
5618                    Box::new(Expr::Variable(bv.clone())),
5619                    "_eid".to_string(),
5620                )),
5621            };
5622            plan = LogicalPlan::Filter {
5623                input: Box::new(plan),
5624                predicate: bound_check,
5625                optional_variables: filter_optional_vars.clone(),
5626            };
5627        }
5628
5629        // If we have a bound relationship list variable for a VLP pattern,
5630        // add a filter to match the traversed relationship list exactly.
5631        if let Some(ref bv) = bound_edge_list_var {
5632            let temp_var = format!("__rebound_{}", bv);
5633            let temp_eids = Expr::ListComprehension {
5634                variable: "__rebound_edge".to_string(),
5635                list: Box::new(Expr::Variable(temp_var)),
5636                where_clause: None,
5637                map_expr: Box::new(Expr::FunctionCall {
5638                    name: "toInteger".to_string(),
5639                    args: vec![Expr::Property(
5640                        Box::new(Expr::Variable("__rebound_edge".to_string())),
5641                        "_eid".to_string(),
5642                    )],
5643                    distinct: false,
5644                    window_spec: None,
5645                }),
5646            };
5647            let bound_eids = Expr::ListComprehension {
5648                variable: "__bound_edge".to_string(),
5649                list: Box::new(Expr::Variable(bv.clone())),
5650                where_clause: None,
5651                map_expr: Box::new(Expr::FunctionCall {
5652                    name: "toInteger".to_string(),
5653                    args: vec![Expr::Property(
5654                        Box::new(Expr::Variable("__bound_edge".to_string())),
5655                        "_eid".to_string(),
5656                    )],
5657                    distinct: false,
5658                    window_spec: None,
5659                }),
5660            };
5661            let bound_list_check = Expr::BinaryOp {
5662                left: Box::new(temp_eids),
5663                op: BinaryOp::Eq,
5664                right: Box::new(bound_eids),
5665            };
5666            plan = LogicalPlan::Filter {
5667                input: Box::new(plan),
5668                predicate: bound_list_check,
5669                optional_variables: filter_optional_vars.clone(),
5670            };
5671        }
5672
5673        // If we have a bound target variable (non-imported), add a filter to constrain
5674        // the traversal output to match the previously bound target node.
5675        if let Some(ref bv) = rebound_target_var {
5676            let temp_var = format!("__rebound_{}", bv);
5677            let bound_check = Expr::BinaryOp {
5678                left: Box::new(Expr::Property(
5679                    Box::new(Expr::Variable(temp_var.clone())),
5680                    "_vid".to_string(),
5681                )),
5682                op: BinaryOp::Eq,
5683                right: Box::new(Expr::Property(
5684                    Box::new(Expr::Variable(bv.clone())),
5685                    "_vid".to_string(),
5686                )),
5687            };
5688            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5689            // so that OptionalFilterExec excludes it from the grouping key and
5690            // properly nullifies it in recovery rows when all matches are filtered out.
5691            // Without this, each traverse result creates its own group (keyed by
5692            // __rebound_c._vid), and null-row recovery emits a spurious null row
5693            // for every non-matching target instead of one per source group.
5694            let mut rebound_filter_vars = filter_optional_vars;
5695            if params.optional {
5696                rebound_filter_vars.insert(temp_var);
5697            }
5698            plan = LogicalPlan::Filter {
5699                input: Box::new(plan),
5700                predicate: bound_check,
5701                optional_variables: rebound_filter_vars,
5702            };
5703        }
5704
5705        // Add the bound variables to scope
5706        // Skip adding the edge variable if it's already bound from a previous clause
5707        if let Some(sv) = &step_var
5708            && bound_edge_var.is_none()
5709            && bound_edge_list_var.is_none()
5710        {
5711            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5712            if is_variable_length
5713                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5714            {
5715                info.is_vlp = true;
5716            }
5717        }
5718        if let Some(pv) = &path_var
5719            && !is_var_in_scope(vars_in_scope, pv)
5720        {
5721            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5722        }
5723        if !is_var_in_scope(vars_in_scope, &target_variable) {
5724            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5725        }
5726
5727        Ok((plan, target_variable, effective_target_var))
5728    }
5729
5730    /// Combine a new scan plan with an existing plan.
5731    ///
5732    /// If the existing plan is `Empty`, returns the new plan directly.
5733    /// Otherwise, wraps them in a `CrossJoin`.
5734    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5735        if matches!(existing, LogicalPlan::Empty) {
5736            new
5737        } else {
5738            LogicalPlan::CrossJoin {
5739                left: Box::new(existing),
5740                right: Box::new(new),
5741            }
5742        }
5743    }
5744
5745    /// Split node map predicates into scan-pushable and residual filters.
5746    ///
5747    /// A predicate is scan-pushable when its value expression references only
5748    /// the node variable itself (or no variables). Predicates referencing other
5749    /// in-scope variables (correlated predicates) are returned as residual so
5750    /// they can be applied after joining with the existing plan.
5751    fn split_node_property_filters_for_scan(
5752        &self,
5753        variable: &str,
5754        properties: &Option<Expr>,
5755    ) -> (Option<Expr>, Option<Expr>) {
5756        let entries = match properties {
5757            Some(Expr::Map(entries)) => entries,
5758            _ => return (None, None),
5759        };
5760
5761        if entries.is_empty() {
5762            return (None, None);
5763        }
5764
5765        let mut pushdown_entries = Vec::new();
5766        let mut residual_entries = Vec::new();
5767
5768        for (prop, val_expr) in entries {
5769            let vars = collect_expr_variables(val_expr);
5770            if vars.iter().all(|v| v == variable) {
5771                pushdown_entries.push((prop.clone(), val_expr.clone()));
5772            } else {
5773                residual_entries.push((prop.clone(), val_expr.clone()));
5774            }
5775        }
5776
5777        let pushdown_map = if pushdown_entries.is_empty() {
5778            None
5779        } else {
5780            Some(Expr::Map(pushdown_entries))
5781        };
5782        let residual_map = if residual_entries.is_empty() {
5783            None
5784        } else {
5785            Some(Expr::Map(residual_entries))
5786        };
5787
5788        (
5789            self.properties_to_expr(variable, &pushdown_map),
5790            self.properties_to_expr(variable, &residual_map),
5791        )
5792    }
5793
5794    /// Decide whether per-label `Scan` branches for a label disjunction can
5795    /// safely be combined under `LogicalPlan::Union`. Returns `true` iff every
5796    /// label in `labels` is registered in the schema AND every pair shares an
5797    /// identical property name+type set.
5798    ///
5799    /// When this returns `false`, the disjunction must fall back to a single
5800    /// `ScanMainByLabels` over all labels — otherwise DataFusion's
5801    /// `UnionExec::try_new` panics in `union_schema` because the per-label
5802    /// `GraphScanExec` outputs (`_vid` + `_labels` + per-label projected
5803    /// properties) have different field counts. Issue rustic-ai/uni-db#62.
5804    ///
5805    /// We deliberately compare full schema property sets rather than only the
5806    /// properties referenced by the current query: at this logical-planning
5807    /// stage we have not yet collected `all_properties`, and `*` wildcards
5808    /// (e.g. from unknown function calls) would expand per-label downstream
5809    /// in `df_planner::resolve_properties` even when the query text only
5810    /// touches common columns.
5811    fn label_branches_share_property_schema(&self, labels: &[String]) -> bool {
5812        if labels.len() < 2 {
5813            return true;
5814        }
5815        let mut iter = labels.iter();
5816        let first = iter.next().expect("len >= 2");
5817        let Some(first_props) = self.schema.properties.get(first) else {
5818            return false;
5819        };
5820        for label in iter {
5821            let Some(props) = self.schema.properties.get(label) else {
5822                return false;
5823            };
5824            if props.len() != first_props.len() {
5825                return false;
5826            }
5827            for (name, meta) in first_props {
5828                let Some(other_meta) = props.get(name) else {
5829                    return false;
5830                };
5831                if meta.r#type != other_meta.r#type {
5832                    return false;
5833                }
5834            }
5835        }
5836        true
5837    }
5838
5839    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5840    fn plan_unbound_node(
5841        &self,
5842        node: &NodePattern,
5843        variable: &str,
5844        plan: LogicalPlan,
5845        optional: bool,
5846    ) -> Result<LogicalPlan> {
5847        // Properties handling
5848        let properties = match &node.properties {
5849            Some(Expr::Map(entries)) => entries.as_slice(),
5850            Some(Expr::Parameter(_)) => {
5851                return Err(anyhow!(
5852                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5853                ));
5854            }
5855            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5856            None => &[],
5857        };
5858
5859        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5860
5861        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5862            if let Some(predicate) = residual {
5863                LogicalPlan::Filter {
5864                    input: Box::new(input),
5865                    predicate,
5866                    optional_variables: HashSet::new(),
5867                }
5868            } else {
5869                input
5870            }
5871        };
5872
5873        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5874            self.split_node_property_filters_for_scan(variable, &node.properties)
5875        } else {
5876            (self.properties_to_expr(variable, &node.properties), None)
5877        };
5878
5879        // Check for ext_id in properties when no label is specified
5880        if node.labels.is_empty() {
5881            // Try to find ext_id property for main table lookup
5882            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5883                // Extract the ext_id value as a string
5884                let ext_id = match ext_id_value {
5885                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5886                    _ => {
5887                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5888                    }
5889                };
5890
5891                // Build filter for remaining properties (excluding ext_id)
5892                let remaining_props: Vec<_> = properties
5893                    .iter()
5894                    .filter(|(k, _)| k != "ext_id")
5895                    .cloned()
5896                    .collect();
5897
5898                let remaining_expr = if remaining_props.is_empty() {
5899                    None
5900                } else {
5901                    Some(Expr::Map(remaining_props))
5902                };
5903
5904                let (prop_filter, residual_filter) = if has_existing_scope {
5905                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5906                } else {
5907                    (self.properties_to_expr(variable, &remaining_expr), None)
5908                };
5909
5910                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5911                    variable: variable.to_string(),
5912                    ext_id,
5913                    filter: prop_filter,
5914                    optional,
5915                };
5916
5917                let joined = Self::join_with_plan(plan, ext_id_lookup);
5918                return Ok(apply_residual_filter(joined, residual_filter));
5919            }
5920
5921            // No ext_id: create ScanAll for unlabeled node pattern
5922            let scan_all = LogicalPlan::ScanAll {
5923                variable: variable.to_string(),
5924                filter: node_scan_filter,
5925                optional,
5926            };
5927
5928            let joined = Self::join_with_plan(plan, scan_all);
5929            return Ok(apply_residual_filter(joined, node_residual_filter));
5930        }
5931
5932        // Label disjunction `(n:A|B|C)` — emit Union of label-scoped Scans.
5933        //
5934        // Storage fact: a multi-labeled vertex is fanned out into every
5935        // per-label table it carries (uni-store/src/runtime/writer.rs's
5936        // `push_vertex_to_labels`), so the same vid can appear in both the
5937        // `A` scan and the `B` scan of a disjunctive query. Use
5938        // `Union { all: false }` so the combined result deduplicates by row
5939        // contents (which include the vid) rather than emitting the same
5940        // vertex twice. The single-label-disjunction case (`Disjunction(["A"])`)
5941        // is encoded the same way the parser already encodes single edge
5942        // types, and reduces to one Scan with no Union wrapping.
5943        if node.labels.is_proper_disjunction() {
5944            let label_names: Vec<String> = node.labels.names().to_vec();
5945
5946            // Per-label branches under a `Union` only line up when every
5947            // branch produces the same Arrow schema. The narrow-scan
5948            // `Scan` path resolves columns *per label*, so heterogeneous
5949            // property sets (or any schemaless label in the mix) yield
5950            // mismatched widths and DataFusion's `UnionExec::try_new`
5951            // panics inside `union_schema` (issue rustic-ai/uni-db#62).
5952            //
5953            // For those cases, lower every branch to a *single-label*
5954            // `ScanMainByLabels` instead. The schemaless main-table scan
5955            // resolves columns from `all_properties` directly (no per-label
5956            // expansion), so all branches emit a uniform schema and the
5957            // outer `Union { all: false }` deduplicates correctly. We
5958            // keep the per-branch Union shape (rather than collapsing to
5959            // a single multi-label scan) because multi-label
5960            // `ScanMainByLabels` has AND/intersection semantics — wrong
5961            // for a disjunction.
5962            let use_main_table_branches = !self.label_branches_share_property_schema(&label_names);
5963
5964            let mut branches: Vec<LogicalPlan> = Vec::with_capacity(label_names.len());
5965            for label_name in &label_names {
5966                let branch = if use_main_table_branches {
5967                    LogicalPlan::ScanMainByLabels {
5968                        labels: vec![label_name.clone()],
5969                        variable: variable.to_string(),
5970                        filter: node_scan_filter.clone(),
5971                        optional,
5972                    }
5973                } else {
5974                    let meta = self
5975                        .schema
5976                        .get_label_case_insensitive(label_name)
5977                        .expect("share_property_schema true implies all labels in schema");
5978                    LogicalPlan::Scan {
5979                        label_id: meta.id,
5980                        labels: vec![label_name.clone()],
5981                        variable: variable.to_string(),
5982                        filter: node_scan_filter.clone(),
5983                        optional,
5984                    }
5985                };
5986                branches.push(branch);
5987            }
5988            // Left-leaning Union: Union(Union(A, B), C). All inner
5989            // unions dedupe by row, so the outer one does too.
5990            let mut iter = branches.into_iter();
5991            let mut union_plan = iter
5992                .next()
5993                .expect("is_proper_disjunction implies at least 2 labels");
5994            for next in iter {
5995                union_plan = LogicalPlan::Union {
5996                    left: Box::new(union_plan),
5997                    right: Box::new(next),
5998                    all: false,
5999                };
6000            }
6001            let joined = Self::join_with_plan(plan, union_plan);
6002            return Ok(apply_residual_filter(joined, node_residual_filter));
6003        }
6004
6005        // Use first label for label_id (primary label for dataset selection)
6006        let label_name = &node.labels[0];
6007
6008        // Check if label exists in schema
6009        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
6010            // Known label: use standard Scan
6011            let scan = LogicalPlan::Scan {
6012                label_id: label_meta.id,
6013                labels: node.labels.names().to_vec(),
6014                variable: variable.to_string(),
6015                filter: node_scan_filter,
6016                optional,
6017            };
6018
6019            let joined = Self::join_with_plan(plan, scan);
6020            Ok(apply_residual_filter(joined, node_residual_filter))
6021        } else {
6022            // Unknown label. Try a CatalogProvider / ReplacementScanProvider
6023            // claim first: on success allocate a virtual label-ID and emit a
6024            // regular `Scan` against the virtual id (`df_planner` dispatches
6025            // to `CatalogVertexScanExec`). When no provider claims and the
6026            // replacement-scan gate is on, strict-mode errors. When the gate
6027            // is off and no provider claims, preserve today's silent-empty
6028            // schemaless `ScanMainByLabels` behavior bit-for-bit.
6029            if let Some((virtual_id, _)) = self.allocate_virtual_label(label_name)? {
6030                let scan = LogicalPlan::Scan {
6031                    label_id: virtual_id,
6032                    labels: node.labels.names().to_vec(),
6033                    variable: variable.to_string(),
6034                    filter: node_scan_filter,
6035                    optional,
6036                };
6037                let joined = Self::join_with_plan(plan, scan);
6038                return Ok(apply_residual_filter(joined, node_residual_filter));
6039            }
6040            if self.replacement_scans_enabled {
6041                return Err(anyhow!(
6042                    "Label `{}` is not defined in schema and no \
6043                     CatalogProvider or ReplacementScanProvider claimed it; \
6044                     strict-mode (replacement_scans=true) requires the label \
6045                     to resolve",
6046                    label_name
6047                ));
6048            }
6049
6050            let scan_main = LogicalPlan::ScanMainByLabels {
6051                labels: node.labels.names().to_vec(),
6052                variable: variable.to_string(),
6053                filter: node_scan_filter,
6054                optional,
6055            };
6056
6057            let joined = Self::join_with_plan(plan, scan_main);
6058            Ok(apply_residual_filter(joined, node_residual_filter))
6059        }
6060    }
6061
6062    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
6063    ///
6064    /// When `optional_vars` is non-empty, the Filter will preserve rows where
6065    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
6066    fn plan_where_clause(
6067        &self,
6068        predicate: &Expr,
6069        plan: LogicalPlan,
6070        vars_in_scope: &[VariableInfo],
6071        optional_vars: HashSet<String>,
6072    ) -> Result<LogicalPlan> {
6073        // Validate no aggregation functions in WHERE clause
6074        validate_no_aggregation_in_where(predicate)?;
6075
6076        // Validate all variables used are in scope
6077        validate_expression_variables(predicate, vars_in_scope)?;
6078
6079        // Validate expression types (function args, boolean operators)
6080        validate_expression(predicate, vars_in_scope)?;
6081
6082        // Check that WHERE predicate isn't a bare node/edge/path variable
6083        if let Expr::Variable(var_name) = predicate
6084            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
6085            && matches!(
6086                info.var_type,
6087                VariableType::Node | VariableType::Edge | VariableType::Path
6088            )
6089        {
6090            return Err(anyhow!(
6091                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
6092                info.var_type
6093            ));
6094        }
6095
6096        let mut plan = plan;
6097
6098        // Transform VALID_AT macro to function call
6099        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
6100
6101        // Rewrite id(var) to var._vid (or var._eid for an edge) so
6102        // PredicateAnalyzer can push it down.
6103        let transformed_predicate = Self::rewrite_id_to_vid(transformed_predicate, vars_in_scope);
6104
6105        let mut current_predicate =
6106            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
6107
6108        // 1. Try to extract vector_similarity predicate for optimization
6109        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
6110            let vs = &extraction.predicate;
6111            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
6112                plan = Self::replace_scan_with_knn(
6113                    plan,
6114                    &vs.variable,
6115                    &vs.property,
6116                    vs.query.clone(),
6117                    vs.threshold,
6118                );
6119                if let Some(residual) = extraction.residual {
6120                    current_predicate = residual;
6121                } else {
6122                    current_predicate = Expr::TRUE;
6123                }
6124            }
6125        }
6126
6127        // 2. Label/type disjunction → narrow-scan rewrite.
6128        //
6129        // `WHERE n:A OR n:B` and `WHERE type(r) = 'A' OR type(r) = 'B'`
6130        // are functionally identical to the inline forms `(n:A|B)` and
6131        // `[r:A|B]`, but a literal pattern lowering would route them
6132        // through `Filter(LabelCheck OR LabelCheck)` over `ScanAll` —
6133        // a full vertex/edge scan plus residual filter, missing the
6134        // narrow-scan fast-path that the inline forms get for free.
6135        // Detect those OR-chains here and rewrite the upstream
6136        // `ScanAll` / `Traverse` accordingly.
6137        let conjuncts = Self::split_and_conjuncts(&current_predicate);
6138        let mut keep: Vec<Expr> = Vec::with_capacity(conjuncts.len());
6139        for conj in conjuncts {
6140            let mut consumed = false;
6141            for var in vars_in_scope {
6142                if optional_vars.contains(&var.name) {
6143                    continue;
6144                }
6145                // Node label disjunction → Union of label-scoped Scans.
6146                if Self::is_scan_all_for(&plan, &var.name)
6147                    && let Some(labels) = try_label_or_to_union(&conj, &var.name)
6148                {
6149                    plan = self.replace_scan_all_with_label_union(plan, &var.name, &labels, false);
6150                    consumed = true;
6151                    break;
6152                }
6153                // Edge type disjunction → merge into Traverse.edge_type_ids.
6154                if let Some(types) = try_type_or_to_union(&conj, &var.name)
6155                    && Self::merge_traverse_types_for(&plan, &var.name, &types).is_some()
6156                {
6157                    let mut ids: Vec<u32> = Vec::with_capacity(types.len());
6158                    let mut all_known = true;
6159                    for t in &types {
6160                        match self.schema.edge_types.get(t) {
6161                            Some(meta) => ids.push(meta.id),
6162                            None => {
6163                                all_known = false;
6164                                break;
6165                            }
6166                        }
6167                    }
6168                    if all_known {
6169                        plan = Self::set_traverse_edge_type_ids(plan, &var.name, ids);
6170                        consumed = true;
6171                        break;
6172                    }
6173                }
6174            }
6175            if !consumed {
6176                keep.push(conj);
6177            }
6178        }
6179        current_predicate = Self::combine_predicates(keep).unwrap_or(Expr::TRUE);
6180
6181        // 3. Push eligible predicates to Scan OR Traverse filters
6182        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
6183        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
6184        // rows. Let them stay in the Filter operator which handles NULL preservation.
6185        for var in vars_in_scope {
6186            // Skip pushdown for optional variables - they need NULL preservation in Filter
6187            if optional_vars.contains(&var.name) {
6188                continue;
6189            }
6190
6191            // Check if var is produced by a Scan
6192            if Self::find_scan_label_id(&plan, &var.name).is_some() {
6193                let (pushable, residual) =
6194                    Self::extract_variable_predicates(&current_predicate, &var.name);
6195
6196                for pred in pushable {
6197                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
6198                }
6199
6200                if let Some(r) = residual {
6201                    current_predicate = r;
6202                } else {
6203                    current_predicate = Expr::TRUE;
6204                }
6205            } else if Self::is_traverse_target(&plan, &var.name) {
6206                // Push to Traverse
6207                let (pushable, residual) =
6208                    Self::extract_variable_predicates(&current_predicate, &var.name);
6209
6210                for pred in pushable {
6211                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
6212                }
6213
6214                if let Some(r) = residual {
6215                    current_predicate = r;
6216                } else {
6217                    current_predicate = Expr::TRUE;
6218                }
6219            }
6220        }
6221
6222        // 4. Push predicates to Apply.input_filter
6223        // This filters input rows BEFORE executing correlated subqueries.
6224        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
6225
6226        // 5. Add Filter node for any remaining predicates
6227        if !current_predicate.is_true_literal() {
6228            plan = LogicalPlan::Filter {
6229                input: Box::new(plan),
6230                predicate: current_predicate,
6231                optional_variables: optional_vars,
6232            };
6233        }
6234
6235        Ok(plan)
6236    }
6237
6238    fn rewrite_predicates_using_indexes(
6239        &self,
6240        predicate: &Expr,
6241        plan: &LogicalPlan,
6242        vars_in_scope: &[VariableInfo],
6243    ) -> Result<Expr> {
6244        let mut rewritten = predicate.clone();
6245
6246        for var in vars_in_scope {
6247            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
6248                // Find label name
6249                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
6250
6251                if let Some(label) = label_name
6252                    && let Some(props) = self.schema.properties.get(&label)
6253                {
6254                    for (gen_col, meta) in props {
6255                        if meta.generation_expression.is_some() {
6256                            // Use cached parsed expression
6257                            if let Some(schema_expr) =
6258                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
6259                            {
6260                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
6261                                rewritten = Self::replace_expression(
6262                                    rewritten,
6263                                    schema_expr,
6264                                    &var.name,
6265                                    gen_col,
6266                                );
6267                            }
6268                        }
6269                    }
6270                }
6271            }
6272        }
6273        Ok(rewritten)
6274    }
6275
6276    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
6277        // First, normalize schema_expr to use query_var
6278        let schema_var = schema_expr.extract_variable();
6279
6280        if let Some(s_var) = schema_var {
6281            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
6282
6283            if expr == target_expr {
6284                return Expr::Property(
6285                    Box::new(Expr::Variable(query_var.to_string())),
6286                    gen_col.to_string(),
6287                );
6288            }
6289        }
6290
6291        // Recurse
6292        match expr {
6293            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
6294                left: Box::new(Self::replace_expression(
6295                    *left,
6296                    schema_expr,
6297                    query_var,
6298                    gen_col,
6299                )),
6300                op,
6301                right: Box::new(Self::replace_expression(
6302                    *right,
6303                    schema_expr,
6304                    query_var,
6305                    gen_col,
6306                )),
6307            },
6308            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
6309                op,
6310                expr: Box::new(Self::replace_expression(
6311                    *expr,
6312                    schema_expr,
6313                    query_var,
6314                    gen_col,
6315                )),
6316            },
6317            Expr::FunctionCall {
6318                name,
6319                args,
6320                distinct,
6321                window_spec,
6322            } => Expr::FunctionCall {
6323                name,
6324                args: args
6325                    .into_iter()
6326                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
6327                    .collect(),
6328                distinct,
6329                window_spec,
6330            },
6331            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
6332                *expr,
6333                schema_expr,
6334                query_var,
6335                gen_col,
6336            ))),
6337            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
6338                *expr,
6339                schema_expr,
6340                query_var,
6341                gen_col,
6342            ))),
6343            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
6344                *expr,
6345                schema_expr,
6346                query_var,
6347                gen_col,
6348            ))),
6349            Expr::ArrayIndex {
6350                array: e,
6351                index: idx,
6352            } => Expr::ArrayIndex {
6353                array: Box::new(Self::replace_expression(
6354                    *e,
6355                    schema_expr,
6356                    query_var,
6357                    gen_col,
6358                )),
6359                index: Box::new(Self::replace_expression(
6360                    *idx,
6361                    schema_expr,
6362                    query_var,
6363                    gen_col,
6364                )),
6365            },
6366            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
6367                array: Box::new(Self::replace_expression(
6368                    *array,
6369                    schema_expr,
6370                    query_var,
6371                    gen_col,
6372                )),
6373                start: start.map(|s| {
6374                    Box::new(Self::replace_expression(
6375                        *s,
6376                        schema_expr,
6377                        query_var,
6378                        gen_col,
6379                    ))
6380                }),
6381                end: end.map(|e| {
6382                    Box::new(Self::replace_expression(
6383                        *e,
6384                        schema_expr,
6385                        query_var,
6386                        gen_col,
6387                    ))
6388                }),
6389            },
6390            Expr::List(exprs) => Expr::List(
6391                exprs
6392                    .into_iter()
6393                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
6394                    .collect(),
6395            ),
6396            Expr::Map(entries) => Expr::Map(
6397                entries
6398                    .into_iter()
6399                    .map(|(k, v)| {
6400                        (
6401                            k,
6402                            Self::replace_expression(v, schema_expr, query_var, gen_col),
6403                        )
6404                    })
6405                    .collect(),
6406            ),
6407            Expr::Property(e, prop) => Expr::Property(
6408                Box::new(Self::replace_expression(
6409                    *e,
6410                    schema_expr,
6411                    query_var,
6412                    gen_col,
6413                )),
6414                prop,
6415            ),
6416            Expr::Case {
6417                expr: case_expr,
6418                when_then,
6419                else_expr,
6420            } => Expr::Case {
6421                expr: case_expr.map(|e| {
6422                    Box::new(Self::replace_expression(
6423                        *e,
6424                        schema_expr,
6425                        query_var,
6426                        gen_col,
6427                    ))
6428                }),
6429                when_then: when_then
6430                    .into_iter()
6431                    .map(|(w, t)| {
6432                        (
6433                            Self::replace_expression(w, schema_expr, query_var, gen_col),
6434                            Self::replace_expression(t, schema_expr, query_var, gen_col),
6435                        )
6436                    })
6437                    .collect(),
6438                else_expr: else_expr.map(|e| {
6439                    Box::new(Self::replace_expression(
6440                        *e,
6441                        schema_expr,
6442                        query_var,
6443                        gen_col,
6444                    ))
6445                }),
6446            },
6447            Expr::Reduce {
6448                accumulator,
6449                init,
6450                variable: reduce_var,
6451                list,
6452                expr: reduce_expr,
6453            } => Expr::Reduce {
6454                accumulator,
6455                init: Box::new(Self::replace_expression(
6456                    *init,
6457                    schema_expr,
6458                    query_var,
6459                    gen_col,
6460                )),
6461                variable: reduce_var,
6462                list: Box::new(Self::replace_expression(
6463                    *list,
6464                    schema_expr,
6465                    query_var,
6466                    gen_col,
6467                )),
6468                expr: Box::new(Self::replace_expression(
6469                    *reduce_expr,
6470                    schema_expr,
6471                    query_var,
6472                    gen_col,
6473                )),
6474            },
6475
6476            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
6477            _ => expr,
6478        }
6479    }
6480
6481    /// Returns `true` iff `variable` is bound to a `ScanAll` operator
6482    /// (somewhere under `plan`). Used to gate the
6483    /// `WHERE n:A OR n:B` → `Union(Scan{A}, Scan{B})` rewrite — we only
6484    /// fire it when the variable is currently doing a full vertex scan,
6485    /// not when it's already bound to a labeled `Scan`.
6486    fn is_scan_all_for(plan: &LogicalPlan, variable: &str) -> bool {
6487        match plan {
6488            LogicalPlan::ScanAll { variable: var, .. } => var == variable,
6489            LogicalPlan::Filter { input, .. }
6490            | LogicalPlan::Project { input, .. }
6491            | LogicalPlan::Sort { input, .. }
6492            | LogicalPlan::Limit { input, .. }
6493            | LogicalPlan::Aggregate { input, .. }
6494            | LogicalPlan::Apply { input, .. }
6495            | LogicalPlan::Traverse { input, .. } => Self::is_scan_all_for(input, variable),
6496            LogicalPlan::CrossJoin { left, right } => {
6497                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6498            }
6499            LogicalPlan::Union { left, right, .. } => {
6500                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6501            }
6502            _ => false,
6503        }
6504    }
6505
6506    /// Replace the `ScanAll` for `variable` in `plan` with a left-leaning
6507    /// `Union` of label-scoped `Scan` (or `ScanMainByLabels` for unknown
6508    /// labels) operators built from `labels`. Used by the
6509    /// `WHERE n:A OR n:B` rewrite.
6510    fn replace_scan_all_with_label_union(
6511        &self,
6512        plan: LogicalPlan,
6513        variable: &str,
6514        labels: &[String],
6515        optional: bool,
6516    ) -> LogicalPlan {
6517        match plan {
6518            LogicalPlan::ScanAll {
6519                variable: var,
6520                filter,
6521                optional: scan_optional,
6522            } if var == variable => {
6523                // Heterogeneous (or any-schemaless) disjunction: route every
6524                // branch through a single-label `ScanMainByLabels` so all
6525                // branches emit a uniform schemaless schema. Avoids the
6526                // DataFusion `union_schema` panic. See `plan_unbound_node`
6527                // and issue rustic-ai/uni-db#62.
6528                let use_main_table_branches = !self.label_branches_share_property_schema(labels);
6529
6530                let mut branches: Vec<LogicalPlan> = Vec::with_capacity(labels.len());
6531                for label in labels {
6532                    let branch = if use_main_table_branches {
6533                        LogicalPlan::ScanMainByLabels {
6534                            labels: vec![label.clone()],
6535                            variable: variable.to_string(),
6536                            filter: filter.clone(),
6537                            optional: scan_optional || optional,
6538                        }
6539                    } else {
6540                        let meta = self
6541                            .schema
6542                            .get_label_case_insensitive(label)
6543                            .expect("share_property_schema true implies all labels in schema");
6544                        LogicalPlan::Scan {
6545                            label_id: meta.id,
6546                            labels: vec![label.clone()],
6547                            variable: variable.to_string(),
6548                            filter: filter.clone(),
6549                            optional: scan_optional || optional,
6550                        }
6551                    };
6552                    branches.push(branch);
6553                }
6554                let mut iter = branches.into_iter();
6555                let mut union_plan = iter.next().expect("at least one label");
6556                for next in iter {
6557                    union_plan = LogicalPlan::Union {
6558                        left: Box::new(union_plan),
6559                        right: Box::new(next),
6560                        all: false,
6561                    };
6562                }
6563                union_plan
6564            }
6565            LogicalPlan::Filter {
6566                input,
6567                predicate,
6568                optional_variables,
6569            } => LogicalPlan::Filter {
6570                input: Box::new(
6571                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6572                ),
6573                predicate,
6574                optional_variables,
6575            },
6576            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6577                input: Box::new(
6578                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6579                ),
6580                projections,
6581            },
6582            LogicalPlan::CrossJoin { left, right } => {
6583                if Self::is_scan_all_for(&left, variable) {
6584                    LogicalPlan::CrossJoin {
6585                        left: Box::new(
6586                            self.replace_scan_all_with_label_union(
6587                                *left, variable, labels, optional,
6588                            ),
6589                        ),
6590                        right,
6591                    }
6592                } else {
6593                    LogicalPlan::CrossJoin {
6594                        left,
6595                        right: Box::new(
6596                            self.replace_scan_all_with_label_union(
6597                                *right, variable, labels, optional,
6598                            ),
6599                        ),
6600                    }
6601                }
6602            }
6603            LogicalPlan::Traverse {
6604                input,
6605                edge_type_ids,
6606                direction,
6607                source_variable,
6608                target_variable,
6609                target_label_id,
6610                step_variable,
6611                min_hops,
6612                max_hops,
6613                optional: trav_optional,
6614                target_filter,
6615                path_variable,
6616                edge_properties,
6617                is_variable_length,
6618                optional_pattern_vars,
6619                scope_match_variables,
6620                edge_filter_expr,
6621                path_mode,
6622                qpp_steps,
6623            } => LogicalPlan::Traverse {
6624                input: Box::new(
6625                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6626                ),
6627                edge_type_ids,
6628                direction,
6629                source_variable,
6630                target_variable,
6631                target_label_id,
6632                step_variable,
6633                min_hops,
6634                max_hops,
6635                optional: trav_optional,
6636                target_filter,
6637                path_variable,
6638                edge_properties,
6639                is_variable_length,
6640                optional_pattern_vars,
6641                scope_match_variables,
6642                edge_filter_expr,
6643                path_mode,
6644                qpp_steps,
6645            },
6646            other => other,
6647        }
6648    }
6649
6650    /// Returns `Some(())` iff `variable` is the `step_variable` (i.e. the
6651    /// edge variable) of some `Traverse` operator in `plan`. Used to gate
6652    /// the `WHERE type(r) = 'A' OR type(r) = 'B'` rewrite — we need a
6653    /// Traverse whose types we can merge into.
6654    fn merge_traverse_types_for(
6655        plan: &LogicalPlan,
6656        edge_var: &str,
6657        _types: &[String],
6658    ) -> Option<()> {
6659        match plan {
6660            LogicalPlan::Traverse {
6661                step_variable,
6662                input,
6663                ..
6664            } => {
6665                if step_variable.as_deref() == Some(edge_var) {
6666                    Some(())
6667                } else {
6668                    Self::merge_traverse_types_for(input, edge_var, _types)
6669                }
6670            }
6671            LogicalPlan::Filter { input, .. }
6672            | LogicalPlan::Project { input, .. }
6673            | LogicalPlan::Sort { input, .. }
6674            | LogicalPlan::Limit { input, .. }
6675            | LogicalPlan::Aggregate { input, .. }
6676            | LogicalPlan::Apply { input, .. } => {
6677                Self::merge_traverse_types_for(input, edge_var, _types)
6678            }
6679            LogicalPlan::CrossJoin { left, right } | LogicalPlan::Union { left, right, .. } => {
6680                Self::merge_traverse_types_for(left, edge_var, _types)
6681                    .or_else(|| Self::merge_traverse_types_for(right, edge_var, _types))
6682            }
6683            _ => None,
6684        }
6685    }
6686
6687    /// Replace `edge_type_ids` on the Traverse whose `step_variable`
6688    /// equals `edge_var`. Used by the type-OR rewrite.
6689    fn set_traverse_edge_type_ids(
6690        plan: LogicalPlan,
6691        edge_var: &str,
6692        new_ids: Vec<u32>,
6693    ) -> LogicalPlan {
6694        match plan {
6695            LogicalPlan::Traverse {
6696                input,
6697                edge_type_ids,
6698                direction,
6699                source_variable,
6700                target_variable,
6701                target_label_id,
6702                step_variable,
6703                min_hops,
6704                max_hops,
6705                optional,
6706                target_filter,
6707                path_variable,
6708                edge_properties,
6709                is_variable_length,
6710                optional_pattern_vars,
6711                scope_match_variables,
6712                edge_filter_expr,
6713                path_mode,
6714                qpp_steps,
6715            } => {
6716                let matches_var = step_variable.as_deref() == Some(edge_var);
6717                let recursed_input = if matches_var {
6718                    input
6719                } else {
6720                    Box::new(Self::set_traverse_edge_type_ids(
6721                        *input,
6722                        edge_var,
6723                        new_ids.clone(),
6724                    ))
6725                };
6726                LogicalPlan::Traverse {
6727                    input: recursed_input,
6728                    edge_type_ids: if matches_var { new_ids } else { edge_type_ids },
6729                    direction,
6730                    source_variable,
6731                    target_variable,
6732                    target_label_id,
6733                    step_variable,
6734                    min_hops,
6735                    max_hops,
6736                    optional,
6737                    target_filter,
6738                    path_variable,
6739                    edge_properties,
6740                    is_variable_length,
6741                    optional_pattern_vars,
6742                    scope_match_variables,
6743                    edge_filter_expr,
6744                    path_mode,
6745                    qpp_steps,
6746                }
6747            }
6748            LogicalPlan::Filter {
6749                input,
6750                predicate,
6751                optional_variables,
6752            } => LogicalPlan::Filter {
6753                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6754                predicate,
6755                optional_variables,
6756            },
6757            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6758                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6759                projections,
6760            },
6761            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6762                left: Box::new(Self::set_traverse_edge_type_ids(
6763                    *left,
6764                    edge_var,
6765                    new_ids.clone(),
6766                )),
6767                right: Box::new(Self::set_traverse_edge_type_ids(*right, edge_var, new_ids)),
6768            },
6769            other => other,
6770        }
6771    }
6772
6773    /// Check if the variable is the target of a Traverse node
6774    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
6775        match plan {
6776            LogicalPlan::Traverse {
6777                target_variable,
6778                input,
6779                ..
6780            } => target_variable == variable || Self::is_traverse_target(input, variable),
6781            LogicalPlan::Filter { input, .. }
6782            | LogicalPlan::Project { input, .. }
6783            | LogicalPlan::Sort { input, .. }
6784            | LogicalPlan::Limit { input, .. }
6785            | LogicalPlan::Aggregate { input, .. }
6786            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
6787            LogicalPlan::CrossJoin { left, right } => {
6788                Self::is_traverse_target(left, variable)
6789                    || Self::is_traverse_target(right, variable)
6790            }
6791            _ => false,
6792        }
6793    }
6794
6795    /// Push a predicate into a Traverse's target_filter for the specified variable
6796    fn push_predicate_to_traverse(
6797        plan: LogicalPlan,
6798        variable: &str,
6799        predicate: Expr,
6800    ) -> LogicalPlan {
6801        match plan {
6802            LogicalPlan::Traverse {
6803                input,
6804                edge_type_ids,
6805                direction,
6806                source_variable,
6807                target_variable,
6808                target_label_id,
6809                step_variable,
6810                min_hops,
6811                max_hops,
6812                optional,
6813                target_filter,
6814                path_variable,
6815                edge_properties,
6816                is_variable_length,
6817                optional_pattern_vars,
6818                scope_match_variables,
6819                edge_filter_expr,
6820                path_mode,
6821                qpp_steps,
6822            } => {
6823                if target_variable == variable {
6824                    // Found the traverse producing this variable
6825                    let new_filter = match target_filter {
6826                        Some(existing) => Some(Expr::BinaryOp {
6827                            left: Box::new(existing),
6828                            op: BinaryOp::And,
6829                            right: Box::new(predicate),
6830                        }),
6831                        None => Some(predicate),
6832                    };
6833                    LogicalPlan::Traverse {
6834                        input,
6835                        edge_type_ids,
6836                        direction,
6837                        source_variable,
6838                        target_variable,
6839                        target_label_id,
6840                        step_variable,
6841                        min_hops,
6842                        max_hops,
6843                        optional,
6844                        target_filter: new_filter,
6845                        path_variable,
6846                        edge_properties,
6847                        is_variable_length,
6848                        optional_pattern_vars,
6849                        scope_match_variables,
6850                        edge_filter_expr,
6851                        path_mode,
6852                        qpp_steps,
6853                    }
6854                } else {
6855                    // Recurse into input
6856                    LogicalPlan::Traverse {
6857                        input: Box::new(Self::push_predicate_to_traverse(
6858                            *input, variable, predicate,
6859                        )),
6860                        edge_type_ids,
6861                        direction,
6862                        source_variable,
6863                        target_variable,
6864                        target_label_id,
6865                        step_variable,
6866                        min_hops,
6867                        max_hops,
6868                        optional,
6869                        target_filter,
6870                        path_variable,
6871                        edge_properties,
6872                        is_variable_length,
6873                        optional_pattern_vars,
6874                        scope_match_variables,
6875                        edge_filter_expr,
6876                        path_mode,
6877                        qpp_steps,
6878                    }
6879                }
6880            }
6881            LogicalPlan::Filter {
6882                input,
6883                predicate: p,
6884                optional_variables: opt_vars,
6885            } => LogicalPlan::Filter {
6886                input: Box::new(Self::push_predicate_to_traverse(
6887                    *input, variable, predicate,
6888                )),
6889                predicate: p,
6890                optional_variables: opt_vars,
6891            },
6892            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6893                input: Box::new(Self::push_predicate_to_traverse(
6894                    *input, variable, predicate,
6895                )),
6896                projections,
6897            },
6898            LogicalPlan::CrossJoin { left, right } => {
6899                // Check which side has the variable
6900                if Self::is_traverse_target(&left, variable) {
6901                    LogicalPlan::CrossJoin {
6902                        left: Box::new(Self::push_predicate_to_traverse(
6903                            *left, variable, predicate,
6904                        )),
6905                        right,
6906                    }
6907                } else {
6908                    LogicalPlan::CrossJoin {
6909                        left,
6910                        right: Box::new(Self::push_predicate_to_traverse(
6911                            *right, variable, predicate,
6912                        )),
6913                    }
6914                }
6915            }
6916            other => other,
6917        }
6918    }
6919
6920    /// Plan a WITH clause, handling aggregations and projections.
6921    fn plan_with_clause(
6922        &self,
6923        with_clause: &WithClause,
6924        plan: LogicalPlan,
6925        vars_in_scope: &[VariableInfo],
6926    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
6927        let mut plan = plan;
6928        let mut group_by: Vec<Expr> = Vec::new();
6929        let mut aggregates: Vec<Expr> = Vec::new();
6930        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
6931        let mut has_agg = false;
6932        let mut projections = Vec::new();
6933        let mut new_vars: Vec<VariableInfo> = Vec::new();
6934        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
6935        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
6936        let mut projected_aliases: HashSet<String> = HashSet::new();
6937        let mut has_unaliased_non_variable_expr = false;
6938
6939        for item in &with_clause.items {
6940            match item {
6941                ReturnItem::All => {
6942                    // WITH * - add all variables in scope
6943                    for v in vars_in_scope {
6944                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6945                        projected_aliases.insert(v.name.clone());
6946                        projected_simple_reprs.insert(v.name.clone());
6947                    }
6948                    new_vars.extend(vars_in_scope.iter().cloned());
6949                }
6950                ReturnItem::Expr { expr, alias, .. } => {
6951                    if matches!(expr, Expr::Wildcard) {
6952                        for v in vars_in_scope {
6953                            projections
6954                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6955                            projected_aliases.insert(v.name.clone());
6956                            projected_simple_reprs.insert(v.name.clone());
6957                        }
6958                        new_vars.extend(vars_in_scope.iter().cloned());
6959                    } else {
6960                        // Validate expression variables and syntax
6961                        validate_expression_variables(expr, vars_in_scope)?;
6962                        validate_expression(expr, vars_in_scope)?;
6963                        // Pattern predicates are not allowed in WITH
6964                        if contains_pattern_predicate(expr) {
6965                            return Err(anyhow!(
6966                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
6967                            ));
6968                        }
6969
6970                        projections.push((expr.clone(), alias.clone()));
6971                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
6972                            // Bare aggregate — push directly
6973                            has_agg = true;
6974                            aggregates.push(expr.clone());
6975                            projected_aggregate_reprs.insert(expr.to_string_repr());
6976                        } else if !is_window_function(expr)
6977                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
6978                        {
6979                            // Compound aggregate or expression containing aggregates
6980                            has_agg = true;
6981                            compound_agg_exprs.push(expr.clone());
6982                            for inner in extract_inner_aggregates(expr) {
6983                                let repr = inner.to_string_repr();
6984                                if !projected_aggregate_reprs.contains(&repr) {
6985                                    aggregates.push(inner);
6986                                    projected_aggregate_reprs.insert(repr);
6987                                }
6988                            }
6989                        } else if !group_by.contains(expr) {
6990                            group_by.push(expr.clone());
6991                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
6992                                projected_simple_reprs.insert(expr.to_string_repr());
6993                            }
6994                        }
6995
6996                        // Preserve non-scalar type information when WITH aliases
6997                        // entity/path-capable expressions.
6998                        if let Some(a) = alias {
6999                            if projected_aliases.contains(a) {
7000                                return Err(anyhow!(
7001                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
7002                                    a
7003                                ));
7004                            }
7005                            let inferred = infer_with_output_type(expr, vars_in_scope);
7006                            new_vars.push(VariableInfo::new(a.clone(), inferred));
7007                            projected_aliases.insert(a.clone());
7008                        } else if let Expr::Variable(v) = expr {
7009                            if projected_aliases.contains(v) {
7010                                return Err(anyhow!(
7011                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
7012                                    v
7013                                ));
7014                            }
7015                            // Preserve the original type if the variable is just passed through
7016                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
7017                                new_vars.push(existing.clone());
7018                            } else {
7019                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
7020                            }
7021                            projected_aliases.insert(v.clone());
7022                        } else {
7023                            has_unaliased_non_variable_expr = true;
7024                        }
7025                    }
7026                }
7027            }
7028        }
7029
7030        // Collect extra variables that need to survive the projection stage
7031        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
7032        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
7033        let mut passthrough_extras: Vec<String> = Vec::new();
7034        let mut seen_passthrough: HashSet<String> = HashSet::new();
7035
7036        if let Some(predicate) = &with_clause.where_clause {
7037            for name in collect_expr_variables(predicate) {
7038                if !projected_names.contains(name.as_str())
7039                    && find_var_in_scope(vars_in_scope, &name).is_some()
7040                    && seen_passthrough.insert(name.clone())
7041                {
7042                    passthrough_extras.push(name);
7043                }
7044            }
7045        }
7046
7047        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
7048        // Carry those variables through the projection so Sort can resolve them.
7049        if !has_agg && let Some(order_by) = &with_clause.order_by {
7050            for item in order_by {
7051                for name in collect_expr_variables(&item.expr) {
7052                    if !projected_names.contains(name.as_str())
7053                        && find_var_in_scope(vars_in_scope, &name).is_some()
7054                        && seen_passthrough.insert(name.clone())
7055                    {
7056                        passthrough_extras.push(name);
7057                    }
7058                }
7059            }
7060        }
7061
7062        let needs_cleanup = !passthrough_extras.is_empty();
7063        for extra in &passthrough_extras {
7064            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
7065        }
7066
7067        // Validate compound aggregate expressions: non-aggregate refs must be
7068        // individually present in the group_by as simple variables or properties.
7069        if has_agg {
7070            let group_by_reprs: HashSet<String> =
7071                group_by.iter().map(|e| e.to_string_repr()).collect();
7072            for expr in &compound_agg_exprs {
7073                let mut refs = Vec::new();
7074                collect_non_aggregate_refs(expr, false, &mut refs);
7075                for r in &refs {
7076                    let is_covered = match r {
7077                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
7078                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
7079                    };
7080                    if !is_covered {
7081                        return Err(anyhow!(
7082                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
7083                        ));
7084                    }
7085                }
7086            }
7087        }
7088
7089        if has_agg {
7090            plan = LogicalPlan::Aggregate {
7091                input: Box::new(plan),
7092                group_by,
7093                aggregates,
7094            };
7095
7096            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
7097            // can reference the WITH aliases instead of raw column names.
7098            let rename_projections: Vec<(Expr, Option<String>)> = projections
7099                .iter()
7100                .map(|(expr, alias)| {
7101                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
7102                        // Bare aggregate — reference by column name
7103                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
7104                    } else if is_compound_aggregate(expr)
7105                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
7106                    {
7107                        // Compound aggregate — replace inner aggregates with
7108                        // column references, keep outer expression
7109                        (replace_aggregates_with_columns(expr), alias.clone())
7110                    } else {
7111                        (Expr::Variable(expr.to_string_repr()), alias.clone())
7112                    }
7113                })
7114                .collect();
7115            plan = LogicalPlan::Project {
7116                input: Box::new(plan),
7117                projections: rename_projections,
7118            };
7119        } else if !projections.is_empty() {
7120            plan = LogicalPlan::Project {
7121                input: Box::new(plan),
7122                projections: projections.clone(),
7123            };
7124        }
7125
7126        // Apply the WHERE filter (post-projection, with extras still visible).
7127        if let Some(predicate) = &with_clause.where_clause {
7128            plan = LogicalPlan::Filter {
7129                input: Box::new(plan),
7130                predicate: predicate.clone(),
7131                optional_variables: HashSet::new(),
7132            };
7133        }
7134
7135        // Validate and apply ORDER BY for WITH clause.
7136        // Keep pre-WITH vars in scope for parser compatibility, then apply
7137        // stricter checks for aggregate-containing ORDER BY items.
7138        if let Some(order_by) = &with_clause.order_by {
7139            // Build a mapping from aliases and projected expression reprs to
7140            // output columns of the preceding Project/Aggregate pipeline.
7141            let with_order_aliases: HashMap<String, Expr> = projections
7142                .iter()
7143                .flat_map(|(expr, alias)| {
7144                    let output_col = if let Some(a) = alias {
7145                        a.clone()
7146                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
7147                        aggregate_column_name(expr)
7148                    } else {
7149                        expr.to_string_repr()
7150                    };
7151
7152                    let mut entries = Vec::new();
7153                    // ORDER BY alias
7154                    if let Some(a) = alias {
7155                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
7156                    }
7157                    // ORDER BY projected expression (e.g. me.age)
7158                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
7159                    entries
7160                })
7161                .collect();
7162
7163            let order_by_scope: Vec<VariableInfo> = {
7164                let mut scope = new_vars.clone();
7165                for v in vars_in_scope {
7166                    if !is_var_in_scope(&scope, &v.name) {
7167                        scope.push(v.clone());
7168                    }
7169                }
7170                scope
7171            };
7172            for item in order_by {
7173                validate_expression_variables(&item.expr, &order_by_scope)?;
7174                validate_expression(&item.expr, &order_by_scope)?;
7175                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
7176                if has_aggregate_in_item && !has_agg {
7177                    return Err(anyhow!(
7178                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
7179                    ));
7180                }
7181                if has_agg && has_aggregate_in_item {
7182                    validate_with_order_by_aggregate_item(
7183                        &item.expr,
7184                        &projected_aggregate_reprs,
7185                        &projected_simple_reprs,
7186                        &projected_aliases,
7187                    )?;
7188                }
7189            }
7190            let rewritten_order_by: Vec<SortItem> = order_by
7191                .iter()
7192                .map(|item| {
7193                    let mut expr =
7194                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
7195                    if has_agg {
7196                        // Rewrite any aggregate calls to the aggregate output
7197                        // columns produced by Aggregate.
7198                        expr = replace_aggregates_with_columns(&expr);
7199                        // Then re-map projected property expressions to aliases
7200                        // from the WITH projection.
7201                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
7202                    }
7203                    SortItem {
7204                        expr,
7205                        ascending: item.ascending,
7206                    }
7207                })
7208                .collect();
7209            plan = LogicalPlan::Sort {
7210                input: Box::new(plan),
7211                order_by: rewritten_order_by,
7212            };
7213        }
7214
7215        // Non-variable expressions in WITH must be aliased.
7216        // This check is intentionally placed after ORDER BY validation so
7217        // higher-priority semantic errors (e.g., ambiguous aggregation in
7218        // ORDER BY) can surface first.
7219        if has_unaliased_non_variable_expr {
7220            return Err(anyhow!(
7221                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
7222            ));
7223        }
7224
7225        // Validate and apply SKIP/LIMIT for WITH clause
7226        let skip = with_clause
7227            .skip
7228            .as_ref()
7229            .map(|e| {
7230                self.note_folded_limit_skip(e);
7231                parse_non_negative_integer(e, "SKIP", &self.params)
7232            })
7233            .transpose()?
7234            .flatten();
7235        let fetch = with_clause
7236            .limit
7237            .as_ref()
7238            .map(|e| {
7239                self.note_folded_limit_skip(e);
7240                parse_non_negative_integer(e, "LIMIT", &self.params)
7241            })
7242            .transpose()?
7243            .flatten();
7244
7245        if skip.is_some() || fetch.is_some() {
7246            plan = LogicalPlan::Limit {
7247                input: Box::new(plan),
7248                skip,
7249                fetch,
7250            };
7251        }
7252
7253        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
7254        if needs_cleanup {
7255            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
7256                .iter()
7257                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
7258                .collect();
7259            plan = LogicalPlan::Project {
7260                input: Box::new(plan),
7261                projections: cleanup_projections,
7262            };
7263        }
7264
7265        if with_clause.distinct {
7266            plan = LogicalPlan::Distinct {
7267                input: Box::new(plan),
7268            };
7269        }
7270
7271        Ok((plan, new_vars))
7272    }
7273
7274    fn plan_with_recursive(
7275        &self,
7276        with_recursive: &WithRecursiveClause,
7277        _prev_plan: LogicalPlan,
7278        vars_in_scope: &[VariableInfo],
7279    ) -> Result<LogicalPlan> {
7280        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
7281        match &*with_recursive.query {
7282            Query::Union { left, right, .. } => {
7283                // Plan the anchor (initial) query with current scope
7284                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
7285
7286                // Plan the recursive query with the CTE name added to scope
7287                // so it can reference itself
7288                let mut recursive_scope = vars_in_scope.to_vec();
7289                recursive_scope.push(VariableInfo::new(
7290                    with_recursive.name.clone(),
7291                    VariableType::Scalar,
7292                ));
7293                let recursive_plan =
7294                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
7295
7296                Ok(LogicalPlan::RecursiveCTE {
7297                    cte_name: with_recursive.name.clone(),
7298                    initial: Box::new(initial_plan),
7299                    recursive: Box::new(recursive_plan),
7300                })
7301            }
7302            _ => Err(anyhow::anyhow!(
7303                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
7304            )),
7305        }
7306    }
7307
7308    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
7309        let entries = match properties {
7310            Some(Expr::Map(entries)) => entries,
7311            _ => return None,
7312        };
7313
7314        if entries.is_empty() {
7315            return None;
7316        }
7317        let mut final_expr = None;
7318        for (prop, val_expr) in entries {
7319            let eq_expr = Expr::BinaryOp {
7320                left: Box::new(Expr::Property(
7321                    Box::new(Expr::Variable(variable.to_string())),
7322                    prop.clone(),
7323                )),
7324                op: BinaryOp::Eq,
7325                right: Box::new(val_expr.clone()),
7326            };
7327
7328            if let Some(e) = final_expr {
7329                final_expr = Some(Expr::BinaryOp {
7330                    left: Box::new(e),
7331                    op: BinaryOp::And,
7332                    right: Box::new(eq_expr),
7333                });
7334            } else {
7335                final_expr = Some(eq_expr);
7336            }
7337        }
7338        final_expr
7339    }
7340
7341    /// Build a filter expression from node properties and labels.
7342    ///
7343    /// This is used for TraverseMainByType where we need to filter target nodes
7344    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
7345    pub fn node_filter_expr(
7346        &self,
7347        variable: &str,
7348        labels: &[String],
7349        properties: &Option<Expr>,
7350    ) -> Option<Expr> {
7351        let mut final_expr = None;
7352
7353        // Add label checks using hasLabel(variable, 'label')
7354        for label in labels {
7355            let label_check = Expr::FunctionCall {
7356                name: "hasLabel".to_string(),
7357                args: vec![
7358                    Expr::Variable(variable.to_string()),
7359                    Expr::Literal(CypherLiteral::String(label.clone())),
7360                ],
7361                distinct: false,
7362                window_spec: None,
7363            };
7364
7365            final_expr = match final_expr {
7366                Some(e) => Some(Expr::BinaryOp {
7367                    left: Box::new(e),
7368                    op: BinaryOp::And,
7369                    right: Box::new(label_check),
7370                }),
7371                None => Some(label_check),
7372            };
7373        }
7374
7375        // Add property checks
7376        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
7377            final_expr = match final_expr {
7378                Some(e) => Some(Expr::BinaryOp {
7379                    left: Box::new(e),
7380                    op: BinaryOp::And,
7381                    right: Box::new(prop_expr),
7382                }),
7383                None => Some(prop_expr),
7384            };
7385        }
7386
7387        final_expr
7388    }
7389
7390    /// Create a filter plan that ensures traversed target matches a bound variable.
7391    ///
7392    /// Used in EXISTS subquery patterns where the target is already bound.
7393    /// Compares the target's VID against the bound variable's VID.
7394    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
7395        // Compare the traverse-discovered target's VID against the bound variable's VID.
7396        // Left side: Property access on the variable from current scope.
7397        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
7398        // We use Variable("{var}._vid") to access the VID column from the traverse output,
7399        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
7400        let bound_check = Expr::BinaryOp {
7401            left: Box::new(Expr::Property(
7402                Box::new(Expr::Variable(target_variable.to_string())),
7403                "_vid".to_string(),
7404            )),
7405            op: BinaryOp::Eq,
7406            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
7407        };
7408        LogicalPlan::Filter {
7409            input: Box::new(plan),
7410            predicate: bound_check,
7411            optional_variables: HashSet::new(),
7412        }
7413    }
7414
7415    /// Replace a Scan node matching the variable with a VectorKnn node
7416    fn replace_scan_with_knn(
7417        plan: LogicalPlan,
7418        variable: &str,
7419        property: &str,
7420        query: Expr,
7421        threshold: Option<f32>,
7422    ) -> LogicalPlan {
7423        match plan {
7424            LogicalPlan::Scan {
7425                label_id,
7426                labels,
7427                variable: scan_var,
7428                filter,
7429                optional,
7430            } => {
7431                if scan_var == variable {
7432                    // Inject any existing scan filter into VectorKnn?
7433                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
7434                    // Typically filter is applied post-Knn or during Knn if supported.
7435                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
7436                    // If `filter` is present on Scan, it must be preserved.
7437                    // We can wrap VectorKnn in Filter if Scan had filter.
7438
7439                    let knn = LogicalPlan::VectorKnn {
7440                        label_id,
7441                        variable: variable.to_string(),
7442                        property: property.to_string(),
7443                        query,
7444                        k: 100, // Default K, should push down LIMIT
7445                        threshold,
7446                    };
7447
7448                    if let Some(f) = filter {
7449                        LogicalPlan::Filter {
7450                            input: Box::new(knn),
7451                            predicate: f,
7452                            optional_variables: HashSet::new(),
7453                        }
7454                    } else {
7455                        knn
7456                    }
7457                } else {
7458                    LogicalPlan::Scan {
7459                        label_id,
7460                        labels,
7461                        variable: scan_var,
7462                        filter,
7463                        optional,
7464                    }
7465                }
7466            }
7467            LogicalPlan::Filter {
7468                input,
7469                predicate,
7470                optional_variables,
7471            } => LogicalPlan::Filter {
7472                input: Box::new(Self::replace_scan_with_knn(
7473                    *input, variable, property, query, threshold,
7474                )),
7475                predicate,
7476                optional_variables,
7477            },
7478            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7479                input: Box::new(Self::replace_scan_with_knn(
7480                    *input, variable, property, query, threshold,
7481                )),
7482                projections,
7483            },
7484            LogicalPlan::Limit { input, skip, fetch } => {
7485                // If we encounter Limit, we should ideally push K down to VectorKnn
7486                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
7487                // Limit comes later.
7488                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
7489                LogicalPlan::Limit {
7490                    input: Box::new(Self::replace_scan_with_knn(
7491                        *input, variable, property, query, threshold,
7492                    )),
7493                    skip,
7494                    fetch,
7495                }
7496            }
7497            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7498                left: Box::new(Self::replace_scan_with_knn(
7499                    *left,
7500                    variable,
7501                    property,
7502                    query.clone(),
7503                    threshold,
7504                )),
7505                right: Box::new(Self::replace_scan_with_knn(
7506                    *right, variable, property, query, threshold,
7507                )),
7508            },
7509            other => other,
7510        }
7511    }
7512
7513    /// Find the label_id for a Scan node matching the given variable
7514    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
7515        match plan {
7516            LogicalPlan::Scan {
7517                label_id,
7518                variable: var,
7519                ..
7520            } if var == variable => Some(*label_id),
7521            LogicalPlan::ScanAll { variable: var, .. } if var == variable => Some(0),
7522            LogicalPlan::Filter { input, .. }
7523            | LogicalPlan::Project { input, .. }
7524            | LogicalPlan::Sort { input, .. }
7525            | LogicalPlan::Limit { input, .. }
7526            | LogicalPlan::Aggregate { input, .. }
7527            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
7528            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
7529                .or_else(|| Self::find_scan_label_id(right, variable)),
7530            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
7531            _ => None,
7532        }
7533    }
7534
7535    /// Push a predicate into a Scan's filter for the specified variable
7536    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
7537        match plan {
7538            LogicalPlan::Scan {
7539                label_id,
7540                labels,
7541                variable: var,
7542                filter,
7543                optional,
7544            } if var == variable => {
7545                // Merge the predicate with existing filter
7546                let new_filter = match filter {
7547                    Some(existing) => Some(Expr::BinaryOp {
7548                        left: Box::new(existing),
7549                        op: BinaryOp::And,
7550                        right: Box::new(predicate),
7551                    }),
7552                    None => Some(predicate),
7553                };
7554                LogicalPlan::Scan {
7555                    label_id,
7556                    labels,
7557                    variable: var,
7558                    filter: new_filter,
7559                    optional,
7560                }
7561            }
7562            LogicalPlan::ScanAll {
7563                variable: var,
7564                filter,
7565                optional,
7566            } if var == variable => {
7567                let new_filter = match filter {
7568                    Some(existing) => Some(Expr::BinaryOp {
7569                        left: Box::new(existing),
7570                        op: BinaryOp::And,
7571                        right: Box::new(predicate),
7572                    }),
7573                    None => Some(predicate),
7574                };
7575                LogicalPlan::ScanAll {
7576                    variable: var,
7577                    filter: new_filter,
7578                    optional,
7579                }
7580            }
7581            LogicalPlan::Filter {
7582                input,
7583                predicate: p,
7584                optional_variables: opt_vars,
7585            } => LogicalPlan::Filter {
7586                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7587                predicate: p,
7588                optional_variables: opt_vars,
7589            },
7590            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7591                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7592                projections,
7593            },
7594            LogicalPlan::CrossJoin { left, right } => {
7595                // Check which side has the variable
7596                if Self::find_scan_label_id(&left, variable).is_some() {
7597                    LogicalPlan::CrossJoin {
7598                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
7599                        right,
7600                    }
7601                } else {
7602                    LogicalPlan::CrossJoin {
7603                        left,
7604                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
7605                    }
7606                }
7607            }
7608            LogicalPlan::Traverse {
7609                input,
7610                edge_type_ids,
7611                direction,
7612                source_variable,
7613                target_variable,
7614                target_label_id,
7615                step_variable,
7616                min_hops,
7617                max_hops,
7618                optional,
7619                target_filter,
7620                path_variable,
7621                edge_properties,
7622                is_variable_length,
7623                optional_pattern_vars,
7624                scope_match_variables,
7625                edge_filter_expr,
7626                path_mode,
7627                qpp_steps,
7628            } => LogicalPlan::Traverse {
7629                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7630                edge_type_ids,
7631                direction,
7632                source_variable,
7633                target_variable,
7634                target_label_id,
7635                step_variable,
7636                min_hops,
7637                max_hops,
7638                optional,
7639                target_filter,
7640                path_variable,
7641                edge_properties,
7642                is_variable_length,
7643                optional_pattern_vars,
7644                scope_match_variables,
7645                edge_filter_expr,
7646                path_mode,
7647                qpp_steps,
7648            },
7649            other => other,
7650        }
7651    }
7652
7653    /// Extract predicates that reference only the specified variable
7654    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
7655        let analyzer = PredicateAnalyzer::new();
7656        let analysis = analyzer.analyze(predicate, variable);
7657
7658        // Return pushable predicates and combined residual
7659        let residual = if analysis.residual.is_empty() {
7660            None
7661        } else {
7662            let mut iter = analysis.residual.into_iter();
7663            let first = iter.next().unwrap();
7664            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
7665                left: Box::new(acc),
7666                op: BinaryOp::And,
7667                right: Box::new(e),
7668            }))
7669        };
7670
7671        (analysis.pushable, residual)
7672    }
7673
7674    // =====================================================================
7675    // Apply Predicate Pushdown - Helper Functions
7676    // =====================================================================
7677
7678    /// Split AND-connected predicates into a list.
7679    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
7680        match expr {
7681            Expr::BinaryOp {
7682                left,
7683                op: BinaryOp::And,
7684                right,
7685            } => {
7686                let mut result = Self::split_and_conjuncts(left);
7687                result.extend(Self::split_and_conjuncts(right));
7688                result
7689            }
7690            _ => vec![expr.clone()],
7691        }
7692    }
7693
7694    /// Combine predicates with AND.
7695    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
7696        if predicates.is_empty() {
7697            return None;
7698        }
7699        let mut result = predicates[0].clone();
7700        for pred in predicates.iter().skip(1) {
7701            result = Expr::BinaryOp {
7702                left: Box::new(result),
7703                op: BinaryOp::And,
7704                right: Box::new(pred.clone()),
7705            };
7706        }
7707        Some(result)
7708    }
7709
7710    /// Collect all variable names referenced in an expression.
7711    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
7712        let mut vars = HashSet::new();
7713        Self::collect_expr_variables_impl(expr, &mut vars);
7714        vars
7715    }
7716
7717    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
7718        match expr {
7719            Expr::Variable(name) => {
7720                vars.insert(name.clone());
7721            }
7722            Expr::Property(inner, _) => {
7723                if let Expr::Variable(name) = inner.as_ref() {
7724                    vars.insert(name.clone());
7725                } else {
7726                    Self::collect_expr_variables_impl(inner, vars);
7727                }
7728            }
7729            Expr::BinaryOp { left, right, .. } => {
7730                Self::collect_expr_variables_impl(left, vars);
7731                Self::collect_expr_variables_impl(right, vars);
7732            }
7733            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7734            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
7735            Expr::FunctionCall { args, .. } => {
7736                for arg in args {
7737                    Self::collect_expr_variables_impl(arg, vars);
7738                }
7739            }
7740            Expr::List(items) => {
7741                for item in items {
7742                    Self::collect_expr_variables_impl(item, vars);
7743                }
7744            }
7745            Expr::Case {
7746                expr,
7747                when_then,
7748                else_expr,
7749            } => {
7750                if let Some(e) = expr {
7751                    Self::collect_expr_variables_impl(e, vars);
7752                }
7753                for (w, t) in when_then {
7754                    Self::collect_expr_variables_impl(w, vars);
7755                    Self::collect_expr_variables_impl(t, vars);
7756                }
7757                if let Some(e) = else_expr {
7758                    Self::collect_expr_variables_impl(e, vars);
7759                }
7760            }
7761            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7762            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
7763            // they introduce local variable bindings not in outer scope.
7764            _ => {}
7765        }
7766    }
7767
7768    /// Collect all variables produced by a logical plan.
7769    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
7770        let mut vars = HashSet::new();
7771        Self::collect_plan_variables_impl(plan, &mut vars);
7772        vars
7773    }
7774
7775    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
7776        match plan {
7777            LogicalPlan::Scan { variable, .. } => {
7778                vars.insert(variable.clone());
7779            }
7780            LogicalPlan::Traverse {
7781                target_variable,
7782                step_variable,
7783                input,
7784                path_variable,
7785                ..
7786            } => {
7787                vars.insert(target_variable.clone());
7788                if let Some(sv) = step_variable {
7789                    vars.insert(sv.clone());
7790                }
7791                if let Some(pv) = path_variable {
7792                    vars.insert(pv.clone());
7793                }
7794                Self::collect_plan_variables_impl(input, vars);
7795            }
7796            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
7797            LogicalPlan::Project { input, projections } => {
7798                for (expr, alias) in projections {
7799                    if let Some(a) = alias {
7800                        vars.insert(a.clone());
7801                    } else if let Expr::Variable(v) = expr {
7802                        vars.insert(v.clone());
7803                    }
7804                }
7805                Self::collect_plan_variables_impl(input, vars);
7806            }
7807            LogicalPlan::Apply {
7808                input, subquery, ..
7809            } => {
7810                Self::collect_plan_variables_impl(input, vars);
7811                Self::collect_plan_variables_impl(subquery, vars);
7812            }
7813            LogicalPlan::CrossJoin { left, right } => {
7814                Self::collect_plan_variables_impl(left, vars);
7815                Self::collect_plan_variables_impl(right, vars);
7816            }
7817            LogicalPlan::Unwind {
7818                input, variable, ..
7819            } => {
7820                vars.insert(variable.clone());
7821                Self::collect_plan_variables_impl(input, vars);
7822            }
7823            LogicalPlan::Aggregate { input, .. } => {
7824                Self::collect_plan_variables_impl(input, vars);
7825            }
7826            LogicalPlan::Distinct { input } => {
7827                Self::collect_plan_variables_impl(input, vars);
7828            }
7829            LogicalPlan::Sort { input, .. } => {
7830                Self::collect_plan_variables_impl(input, vars);
7831            }
7832            LogicalPlan::Limit { input, .. } => {
7833                Self::collect_plan_variables_impl(input, vars);
7834            }
7835            LogicalPlan::VectorKnn { variable, .. } => {
7836                vars.insert(variable.clone());
7837            }
7838            LogicalPlan::ProcedureCall { yield_items, .. } => {
7839                for (name, alias) in yield_items {
7840                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
7841                }
7842            }
7843            LogicalPlan::ShortestPath {
7844                input,
7845                path_variable,
7846                ..
7847            } => {
7848                vars.insert(path_variable.clone());
7849                Self::collect_plan_variables_impl(input, vars);
7850            }
7851            LogicalPlan::AllShortestPaths {
7852                input,
7853                path_variable,
7854                ..
7855            } => {
7856                vars.insert(path_variable.clone());
7857                Self::collect_plan_variables_impl(input, vars);
7858            }
7859            LogicalPlan::RecursiveCTE {
7860                initial, recursive, ..
7861            } => {
7862                Self::collect_plan_variables_impl(initial, vars);
7863                Self::collect_plan_variables_impl(recursive, vars);
7864            }
7865            LogicalPlan::SubqueryCall {
7866                input, subquery, ..
7867            } => {
7868                Self::collect_plan_variables_impl(input, vars);
7869                Self::collect_plan_variables_impl(subquery, vars);
7870            }
7871            _ => {}
7872        }
7873    }
7874
7875    /// Extract predicates that only reference variables from Apply's input.
7876    /// Returns (input_only_predicates, remaining_predicates).
7877    fn extract_apply_input_predicates(
7878        predicate: &Expr,
7879        input_variables: &HashSet<String>,
7880        subquery_new_variables: &HashSet<String>,
7881    ) -> (Vec<Expr>, Vec<Expr>) {
7882        let conjuncts = Self::split_and_conjuncts(predicate);
7883        let mut input_preds = Vec::new();
7884        let mut remaining = Vec::new();
7885
7886        for conj in conjuncts {
7887            let vars = Self::collect_expr_variables(&conj);
7888
7889            // Predicate only references input variables (none from subquery)
7890            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
7891            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
7892
7893            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
7894                input_preds.push(conj);
7895            } else {
7896                remaining.push(conj);
7897            }
7898        }
7899
7900        (input_preds, remaining)
7901    }
7902
7903    /// Push eligible predicates into Apply.input_filter.
7904    /// This filters input rows BEFORE executing the correlated subquery.
7905    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
7906        match plan {
7907            LogicalPlan::Apply {
7908                input,
7909                subquery,
7910                input_filter,
7911            } => {
7912                // Collect variables from input plan
7913                let input_vars = Self::collect_plan_variables(&input);
7914
7915                // Collect NEW variables introduced by subquery (not in input)
7916                let subquery_vars = Self::collect_plan_variables(&subquery);
7917                let new_subquery_vars: HashSet<String> =
7918                    subquery_vars.difference(&input_vars).cloned().collect();
7919
7920                // Extract predicates that only reference input variables
7921                let (input_preds, remaining) = Self::extract_apply_input_predicates(
7922                    current_predicate,
7923                    &input_vars,
7924                    &new_subquery_vars,
7925                );
7926
7927                // Update current_predicate to only remaining predicates
7928                *current_predicate = if remaining.is_empty() {
7929                    Expr::TRUE
7930                } else {
7931                    Self::combine_predicates(remaining).unwrap()
7932                };
7933
7934                // Combine extracted predicates with existing input_filter
7935                let new_input_filter = if input_preds.is_empty() {
7936                    input_filter
7937                } else {
7938                    let extracted = Self::combine_predicates(input_preds).unwrap();
7939                    match input_filter {
7940                        Some(existing) => Some(Expr::BinaryOp {
7941                            left: Box::new(existing),
7942                            op: BinaryOp::And,
7943                            right: Box::new(extracted),
7944                        }),
7945                        None => Some(extracted),
7946                    }
7947                };
7948
7949                // Recurse into input plan
7950                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
7951
7952                LogicalPlan::Apply {
7953                    input: Box::new(new_input),
7954                    subquery,
7955                    input_filter: new_input_filter,
7956                }
7957            }
7958            // Recurse into other plan nodes
7959            LogicalPlan::Filter {
7960                input,
7961                predicate,
7962                optional_variables,
7963            } => LogicalPlan::Filter {
7964                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7965                predicate,
7966                optional_variables,
7967            },
7968            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7969                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7970                projections,
7971            },
7972            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
7973                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7974                order_by,
7975            },
7976            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
7977                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7978                skip,
7979                fetch,
7980            },
7981            LogicalPlan::Aggregate {
7982                input,
7983                group_by,
7984                aggregates,
7985            } => LogicalPlan::Aggregate {
7986                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7987                group_by,
7988                aggregates,
7989            },
7990            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7991                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
7992                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
7993            },
7994            LogicalPlan::Traverse {
7995                input,
7996                edge_type_ids,
7997                direction,
7998                source_variable,
7999                target_variable,
8000                target_label_id,
8001                step_variable,
8002                min_hops,
8003                max_hops,
8004                optional,
8005                target_filter,
8006                path_variable,
8007                edge_properties,
8008                is_variable_length,
8009                optional_pattern_vars,
8010                scope_match_variables,
8011                edge_filter_expr,
8012                path_mode,
8013                qpp_steps,
8014            } => LogicalPlan::Traverse {
8015                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
8016                edge_type_ids,
8017                direction,
8018                source_variable,
8019                target_variable,
8020                target_label_id,
8021                step_variable,
8022                min_hops,
8023                max_hops,
8024                optional,
8025                target_filter,
8026                path_variable,
8027                edge_properties,
8028                is_variable_length,
8029                optional_pattern_vars,
8030                scope_match_variables,
8031                edge_filter_expr,
8032                path_mode,
8033                qpp_steps,
8034            },
8035            other => other,
8036        }
8037    }
8038}
8039
8040/// Get the expected column name for an aggregate expression.
8041///
8042/// This is the single source of truth for aggregate column naming, used by:
8043/// - Logical planner (to create column references)
8044/// - Physical planner (to rename DataFusion's auto-generated column names)
8045/// - Fallback executor (to name result columns)
8046pub fn aggregate_column_name(expr: &Expr) -> String {
8047    expr.to_string_repr()
8048}
8049
8050/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
8051#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8052pub struct ExplainOutput {
8053    /// Debug-formatted logical plan tree.
8054    pub plan_text: String,
8055    /// Index availability report for each scan in the plan.
8056    pub index_usage: Vec<IndexUsage>,
8057    /// Rough row and cost estimates for the full plan.
8058    pub cost_estimates: CostEstimates,
8059    /// Planner warnings (e.g., missing index, forced full scan).
8060    pub warnings: Vec<String>,
8061    /// Suggested indexes that would improve this query.
8062    pub suggestions: Vec<IndexSuggestion>,
8063}
8064
8065/// Suggestion for creating an index to improve query performance.
8066#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8067pub struct IndexSuggestion {
8068    /// Label or edge type that would benefit from the index.
8069    pub label_or_type: String,
8070    /// Property to index.
8071    pub property: String,
8072    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
8073    pub index_type: String,
8074    /// Human-readable explanation of the performance benefit.
8075    pub reason: String,
8076    /// Ready-to-execute Cypher statement to create the index.
8077    pub create_statement: String,
8078}
8079
8080/// Index availability report for a single scan operator.
8081#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8082pub struct IndexUsage {
8083    pub label_or_type: String,
8084    pub property: String,
8085    pub index_type: String,
8086    /// Whether the index was actually used for this scan.
8087    pub used: bool,
8088    /// Human-readable explanation of why the index was or was not used.
8089    pub reason: Option<String>,
8090}
8091
8092/// Rough cost and row count estimates for a complete logical plan.
8093#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8094pub struct CostEstimates {
8095    /// Estimated number of rows the plan will produce.
8096    pub estimated_rows: f64,
8097    /// Abstract cost units (lower is cheaper).
8098    pub estimated_cost: f64,
8099}
8100
8101impl QueryPlanner {
8102    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
8103    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
8104        let plan = self.plan(ast)?;
8105        self.explain_logical_plan(&plan)
8106    }
8107
8108    /// Produce an EXPLAIN report for an already-planned logical plan.
8109    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
8110        let index_usage = self.analyze_index_usage(plan)?;
8111        let cost_estimates = self.estimate_costs(plan)?;
8112        let suggestions = self.collect_index_suggestions(plan);
8113        let warnings = Vec::new();
8114        let plan_text = format!("{:#?}", plan);
8115
8116        Ok(ExplainOutput {
8117            plan_text,
8118            index_usage,
8119            cost_estimates,
8120            warnings,
8121            suggestions,
8122        })
8123    }
8124
8125    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
8126        let mut usage = Vec::new();
8127        self.collect_index_usage(plan, &mut usage);
8128        Ok(usage)
8129    }
8130
8131    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
8132        match plan {
8133            LogicalPlan::Scan {
8134                label_id,
8135                filter: Some(filter),
8136                ..
8137            } => {
8138                // Detect indexed-property pushdown — issue #57. Run the same
8139                // analyzer the physical planner uses; if it reports a
8140                // hash-index hit, surface it in EXPLAIN.
8141                if let Some(label_name) = self.schema.label_name_by_id(*label_id) {
8142                    let analyzer = crate::query::pushdown::IndexAwareAnalyzer::new(&self.schema);
8143                    // The variable name is the scan's binding variable; we
8144                    // reach for it via the Scan node directly.
8145                    if let LogicalPlan::Scan { variable, .. } = plan {
8146                        let strategy = analyzer.analyze(filter, variable, *label_id);
8147                        for prop in strategy.hash_index_columns {
8148                            usage.push(IndexUsage {
8149                                label_or_type: label_name.to_string(),
8150                                property: prop,
8151                                index_type: "HASH".to_string(),
8152                                used: true,
8153                                reason: Some(
8154                                    "Hash index point lookup pushed into Lance scan".to_string(),
8155                                ),
8156                            });
8157                        }
8158                    }
8159                }
8160            }
8161            LogicalPlan::Scan { .. } => {}
8162            LogicalPlan::VectorKnn {
8163                label_id, property, ..
8164            } => {
8165                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
8166                usage.push(IndexUsage {
8167                    label_or_type: label_name.to_string(),
8168                    property: property.clone(),
8169                    index_type: "VECTOR".to_string(),
8170                    used: true,
8171                    reason: None,
8172                });
8173            }
8174            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
8175            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
8176            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
8177            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
8178            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
8179            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
8180            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
8181            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8182                self.collect_index_usage(left, usage);
8183                self.collect_index_usage(right, usage);
8184            }
8185            _ => {}
8186        }
8187    }
8188
8189    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
8190        Ok(CostEstimates {
8191            estimated_rows: 100.0,
8192            estimated_cost: 10.0,
8193        })
8194    }
8195
8196    /// Collect index suggestions based on query patterns.
8197    ///
8198    /// Currently detects:
8199    /// - Temporal predicates from `uni.validAt()` function calls
8200    /// - Temporal predicates from `VALID_AT` macro expansion
8201    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
8202        let mut suggestions = Vec::new();
8203        self.collect_temporal_suggestions(plan, &mut suggestions);
8204        suggestions
8205    }
8206
8207    /// Recursively collect temporal index suggestions from the plan.
8208    fn collect_temporal_suggestions(
8209        &self,
8210        plan: &LogicalPlan,
8211        suggestions: &mut Vec<IndexSuggestion>,
8212    ) {
8213        match plan {
8214            LogicalPlan::Filter {
8215                input, predicate, ..
8216            } => {
8217                // Check for temporal patterns in the predicate
8218                self.detect_temporal_pattern(predicate, suggestions);
8219                // Recurse into input
8220                self.collect_temporal_suggestions(input, suggestions);
8221            }
8222            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
8223            LogicalPlan::Project { input, .. } => {
8224                self.collect_temporal_suggestions(input, suggestions)
8225            }
8226            LogicalPlan::Limit { input, .. } => {
8227                self.collect_temporal_suggestions(input, suggestions)
8228            }
8229            LogicalPlan::Sort { input, .. } => {
8230                self.collect_temporal_suggestions(input, suggestions)
8231            }
8232            LogicalPlan::Aggregate { input, .. } => {
8233                self.collect_temporal_suggestions(input, suggestions)
8234            }
8235            LogicalPlan::Traverse { input, .. } => {
8236                self.collect_temporal_suggestions(input, suggestions)
8237            }
8238            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8239                self.collect_temporal_suggestions(left, suggestions);
8240                self.collect_temporal_suggestions(right, suggestions);
8241            }
8242            _ => {}
8243        }
8244    }
8245
8246    /// Detect temporal predicate patterns and suggest indexes.
8247    ///
8248    /// Detects two patterns:
8249    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
8250    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
8251    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
8252        match expr {
8253            // Pattern 1: uni.temporal.validAt() function call
8254            Expr::FunctionCall { name, args, .. }
8255                if (name.eq_ignore_ascii_case("uni.temporal.validAt")
8256                    || name.eq_ignore_ascii_case("validAt"))
8257                    && args.len() >= 2 =>
8258            {
8259                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
8260                let start_prop = if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1)
8261                {
8262                    s.clone()
8263                } else {
8264                    "valid_from".to_string()
8265                };
8266
8267                // Try to extract label from the node expression
8268                if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
8269                    self.suggest_temporal_index(&var, &start_prop, suggestions);
8270                }
8271            }
8272
8273            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
8274            Expr::BinaryOp {
8275                left,
8276                op: BinaryOp::And,
8277                right,
8278            } => {
8279                // Check left side for `prop <= time` pattern (temporal start condition)
8280                if let Expr::BinaryOp {
8281                    left: prop_expr,
8282                    op: BinaryOp::LtEq,
8283                    ..
8284                } = left.as_ref()
8285                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
8286                    && (prop_name == "valid_from"
8287                        || prop_name.contains("start")
8288                        || prop_name.contains("from")
8289                        || prop_name.contains("begin"))
8290                    && let Some(var) = base.extract_variable()
8291                {
8292                    self.suggest_temporal_index(&var, prop_name, suggestions);
8293                }
8294
8295                // Recurse into both sides of AND
8296                self.detect_temporal_pattern(left.as_ref(), suggestions);
8297                self.detect_temporal_pattern(right.as_ref(), suggestions);
8298            }
8299
8300            // Recurse into other binary ops
8301            Expr::BinaryOp { left, right, .. } => {
8302                self.detect_temporal_pattern(left.as_ref(), suggestions);
8303                self.detect_temporal_pattern(right.as_ref(), suggestions);
8304            }
8305
8306            _ => {}
8307        }
8308    }
8309
8310    /// Suggest a scalar index for a temporal property if one doesn't already exist.
8311    fn suggest_temporal_index(
8312        &self,
8313        _variable: &str,
8314        property: &str,
8315        suggestions: &mut Vec<IndexSuggestion>,
8316    ) {
8317        // Check if a scalar index already exists for this property
8318        // We need to check all labels since we may not know the exact label from the variable
8319        let mut has_index = false;
8320
8321        for index in &self.schema.indexes {
8322            if let IndexDefinition::Scalar(config) = index
8323                && config.properties.contains(&property.to_string())
8324            {
8325                has_index = true;
8326                break;
8327            }
8328        }
8329
8330        if !has_index {
8331            // Avoid duplicate suggestions
8332            let already_suggested = suggestions.iter().any(|s| s.property == property);
8333            if !already_suggested {
8334                suggestions.push(IndexSuggestion {
8335                    label_or_type: "(detected from temporal query)".to_string(),
8336                    property: property.to_string(),
8337                    index_type: "SCALAR (BTree)".to_string(),
8338                    reason: format!(
8339                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
8340                        property
8341                    ),
8342                    create_statement: format!(
8343                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
8344                        property, property
8345                    ),
8346                });
8347            }
8348        }
8349    }
8350
8351    /// Helper functions for expression normalization
8352    /// Normalize an expression for storage: strip variable prefixes
8353    /// For simple property: u.email -> "email"
8354    /// For expressions: lower(u.email) -> "lower(email)"
8355    fn normalize_expression_for_storage(expr: &Expr) -> String {
8356        match expr {
8357            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
8358            _ => {
8359                // Serialize expression and strip variable prefix
8360                let expr_str = expr.to_string_repr();
8361                Self::strip_variable_prefix(&expr_str)
8362            }
8363        }
8364    }
8365
8366    /// Strip variable references like "u.prop" from expression strings
8367    /// Converts "lower(u.email)" to "lower(email)"
8368    fn strip_variable_prefix(expr_str: &str) -> String {
8369        use regex::Regex;
8370        // Match patterns like "word.property" and replace with just "property"
8371        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
8372        re.replace_all(expr_str, "$1").to_string()
8373    }
8374
8375    /// Plan a schema command from the new AST
8376    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
8377        match cmd {
8378            SchemaCommand::CreateVectorIndex(c) => {
8379                use uni_common::vector_index_opts::{
8380                    VectorIndexOpts, build_vector_index_type, parse_vector_metric,
8381                };
8382                // `CREATE VECTOR INDEX … OPTIONS{type:'sparse'}` shares the vector DDL
8383                // surface but is a scored inverted index, not a dense ANN — route it to
8384                // the sparse path (mirrors the `uni.schema.createIndex` SPARSE arm in
8385                // `ddl_procedures.rs`). `build_vector_index_type` has no "sparse" case
8386                // and would otherwise fall through to the dense IVF_PQ default.
8387                if c.options.get("type").and_then(|v| v.as_str()) == Some("sparse") {
8388                    let dimensions = self
8389                        .schema
8390                        .properties
8391                        .get(&c.label)
8392                        .and_then(|props| props.get(&c.property))
8393                        .and_then(|meta| match &meta.r#type {
8394                            uni_common::DataType::SparseVector { dimensions } => Some(*dimensions),
8395                            _ => None,
8396                        })
8397                        .ok_or_else(|| {
8398                            anyhow::anyhow!(
8399                                "Property '{}' is not a SparseVector column; cannot create a sparse index",
8400                                c.property
8401                            )
8402                        })?;
8403                    let quantize = c
8404                        .options
8405                        .get("quantize")
8406                        .and_then(|v| v.as_bool())
8407                        .unwrap_or(true);
8408                    // `OPTIONS{type:'sparse', embedding:{alias, source}}` auto-embeds
8409                    // a text column into the sparse column (same parser as dense).
8410                    let embedding_config = match c.options.get("embedding") {
8411                        Some(emb_val) => Self::parse_embedding_config(emb_val)?,
8412                        None => None,
8413                    };
8414                    let config = SparseVectorIndexConfig {
8415                        name: c.name,
8416                        label: c.label,
8417                        property: c.property,
8418                        dimensions,
8419                        quantize,
8420                        embedding_config,
8421                        metadata: Default::default(),
8422                    };
8423                    return Ok(LogicalPlan::CreateSparseIndex {
8424                        config,
8425                        if_not_exists: c.if_not_exists,
8426                    });
8427                }
8428                // Accept either a numeric value (`partitions: 256`) or a quoted string
8429                // (`partitions: '256'`) — Cypher map literals produce the former.
8430                let opt = |key: &str| -> Option<u32> {
8431                    c.options.get(key).and_then(|v| {
8432                        v.as_u64()
8433                            .map(|n| n as u32)
8434                            .or_else(|| v.as_str().and_then(|s| s.parse::<u32>().ok()))
8435                    })
8436                };
8437                let opt_u8 = |key: &str| -> Option<u8> {
8438                    c.options.get(key).and_then(|v| {
8439                        v.as_u64()
8440                            .map(|n| n as u8)
8441                            .or_else(|| v.as_str().and_then(|s| s.parse::<u8>().ok()))
8442                    })
8443                };
8444                let opt_u64 = |key: &str| -> Option<u64> {
8445                    c.options.get(key).and_then(|v| {
8446                        v.as_u64()
8447                            .or_else(|| v.as_str().and_then(|s| s.parse::<u64>().ok()))
8448                    })
8449                };
8450                // Single source of truth (shared with the `uni.create_vector_index`
8451                // procedure) so dense / native-multivector / MUVERA behave identically.
8452                let index_type = build_vector_index_type(&VectorIndexOpts {
8453                    type_name: c.options.get("type").and_then(|v| v.as_str()),
8454                    partitions: opt("partitions"),
8455                    m: opt("m"),
8456                    ef_construction: opt("ef_construction"),
8457                    sub_vectors: opt("sub_vectors"),
8458                    num_bits: opt_u8("num_bits"),
8459                    k_sim: opt("k_sim"),
8460                    reps: opt("reps"),
8461                    d_proj: opt("d_proj"),
8462                    seed: opt_u64("seed"),
8463                    inner: c.options.get("inner").and_then(|v| v.as_str()),
8464                });
8465
8466                // Parse embedding config from options
8467                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
8468                    Self::parse_embedding_config(emb_val)?
8469                } else {
8470                    None
8471                };
8472
8473                // Parse the distance metric from OPTIONS (default Cosine).
8474                let metric = parse_vector_metric(c.options.get("metric").and_then(|v| v.as_str()))?;
8475
8476                let config = VectorIndexConfig {
8477                    name: c.name,
8478                    label: c.label,
8479                    property: c.property,
8480                    metric,
8481                    index_type,
8482                    embedding_config,
8483                    metadata: Default::default(),
8484                };
8485                Ok(LogicalPlan::CreateVectorIndex {
8486                    config,
8487                    if_not_exists: c.if_not_exists,
8488                })
8489            }
8490            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
8491                config: FullTextIndexConfig {
8492                    name: cfg.name,
8493                    label: cfg.label,
8494                    properties: cfg.properties,
8495                    tokenizer: TokenizerConfig::Standard,
8496                    with_positions: true,
8497                    metadata: Default::default(),
8498                },
8499                if_not_exists: cfg.if_not_exists,
8500            }),
8501            SchemaCommand::CreateScalarIndex(cfg) => {
8502                // Convert expressions to storage strings (strip variable prefix)
8503                let properties: Vec<String> = cfg
8504                    .expressions
8505                    .iter()
8506                    .map(Self::normalize_expression_for_storage)
8507                    .collect();
8508
8509                Ok(LogicalPlan::CreateScalarIndex {
8510                    config: ScalarIndexConfig {
8511                        name: cfg.name,
8512                        label: cfg.label,
8513                        properties,
8514                        index_type: ScalarIndexType::BTree,
8515                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
8516                        metadata: Default::default(),
8517                    },
8518                    if_not_exists: cfg.if_not_exists,
8519                })
8520            }
8521            SchemaCommand::CreateJsonFtsIndex(cfg) => {
8522                let with_positions = cfg
8523                    .options
8524                    .get("with_positions")
8525                    .and_then(|v| v.as_bool())
8526                    .unwrap_or(false);
8527                Ok(LogicalPlan::CreateJsonFtsIndex {
8528                    config: JsonFtsIndexConfig {
8529                        name: cfg.name,
8530                        label: cfg.label,
8531                        column: cfg.column,
8532                        paths: Vec::new(),
8533                        with_positions,
8534                        metadata: Default::default(),
8535                    },
8536                    if_not_exists: cfg.if_not_exists,
8537                })
8538            }
8539            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
8540                name: drop.name,
8541                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
8542            }),
8543            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
8544            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
8545            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
8546            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
8547            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
8548            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
8549            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
8550            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
8551            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
8552            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
8553            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
8554            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
8555            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
8556            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
8557            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
8558            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
8559                destination: path,
8560                options: HashMap::new(),
8561            }),
8562            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
8563                label: cmd.label,
8564                path: cmd.path,
8565                format: cmd.format,
8566                options: cmd.options,
8567            }),
8568            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
8569                label: cmd.label,
8570                path: cmd.path,
8571                format: cmd.format,
8572                options: cmd.options,
8573            }),
8574        }
8575    }
8576
8577    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
8578        let obj = emb_val
8579            .as_object()
8580            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
8581
8582        // Parse alias (required)
8583        let alias = obj
8584            .get("alias")
8585            .and_then(|v| v.as_str())
8586            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
8587
8588        // Parse source properties (required)
8589        let source_properties = obj
8590            .get("source")
8591            .and_then(|v| v.as_array())
8592            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
8593            .iter()
8594            .filter_map(|v| v.as_str().map(|s| s.to_string()))
8595            .collect::<Vec<_>>();
8596
8597        if source_properties.is_empty() {
8598            return Err(anyhow!(
8599                "embedding.source must contain at least one property"
8600            ));
8601        }
8602
8603        let batch_size = obj
8604            .get("batch_size")
8605            .and_then(|v| v.as_u64())
8606            .map(|v| v as usize)
8607            .unwrap_or(32);
8608
8609        let document_prefix = obj
8610            .get("document_prefix")
8611            .and_then(|v| v.as_str())
8612            .map(|s| s.to_string());
8613
8614        let query_prefix = obj
8615            .get("query_prefix")
8616            .and_then(|v| v.as_str())
8617            .map(|s| s.to_string());
8618
8619        Ok(Some(EmbeddingConfig {
8620            alias: alias.to_string(),
8621            source_properties,
8622            batch_size,
8623            document_prefix,
8624            query_prefix,
8625        }))
8626    }
8627}
8628
8629/// Collect all properties referenced anywhere in the LogicalPlan tree.
8630///
8631/// This is critical for window functions: properties must be materialized
8632/// at the Scan node so they're available for window operations later.
8633///
8634/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
8635pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
8636    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
8637    collect_properties_recursive(plan, &mut properties);
8638    properties
8639}
8640
8641/// Recursively walk the LogicalPlan tree and collect all property references.
8642fn collect_properties_recursive(
8643    plan: &LogicalPlan,
8644    properties: &mut HashMap<String, HashSet<String>>,
8645) {
8646    match plan {
8647        LogicalPlan::Window {
8648            input,
8649            window_exprs,
8650        } => {
8651            // Collect from window expressions
8652            for expr in window_exprs {
8653                collect_properties_from_expr_into(expr, properties);
8654            }
8655            collect_properties_recursive(input, properties);
8656        }
8657        LogicalPlan::Project { input, projections } => {
8658            for (expr, _alias) in projections {
8659                collect_properties_from_expr_into(expr, properties);
8660            }
8661            collect_properties_recursive(input, properties);
8662        }
8663        LogicalPlan::Sort { input, order_by } => {
8664            for sort_item in order_by {
8665                collect_properties_from_expr_into(&sort_item.expr, properties);
8666            }
8667            collect_properties_recursive(input, properties);
8668        }
8669        LogicalPlan::Filter {
8670            input, predicate, ..
8671        } => {
8672            collect_properties_from_expr_into(predicate, properties);
8673            collect_properties_recursive(input, properties);
8674        }
8675        LogicalPlan::Aggregate {
8676            input,
8677            group_by,
8678            aggregates,
8679        } => {
8680            for expr in group_by {
8681                collect_properties_from_expr_into(expr, properties);
8682            }
8683            for expr in aggregates {
8684                collect_properties_from_expr_into(expr, properties);
8685            }
8686            collect_properties_recursive(input, properties);
8687        }
8688        LogicalPlan::Scan {
8689            filter: Some(expr), ..
8690        } => {
8691            collect_properties_from_expr_into(expr, properties);
8692        }
8693        LogicalPlan::Scan { filter: None, .. } => {}
8694        LogicalPlan::ExtIdLookup {
8695            filter: Some(expr), ..
8696        } => {
8697            collect_properties_from_expr_into(expr, properties);
8698        }
8699        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
8700        LogicalPlan::ScanAll {
8701            filter: Some(expr), ..
8702        } => {
8703            collect_properties_from_expr_into(expr, properties);
8704        }
8705        LogicalPlan::ScanAll { filter: None, .. } => {}
8706        LogicalPlan::ScanMainByLabels {
8707            filter: Some(expr), ..
8708        } => {
8709            collect_properties_from_expr_into(expr, properties);
8710        }
8711        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
8712        LogicalPlan::TraverseMainByType {
8713            input,
8714            target_filter,
8715            ..
8716        } => {
8717            if let Some(expr) = target_filter {
8718                collect_properties_from_expr_into(expr, properties);
8719            }
8720            collect_properties_recursive(input, properties);
8721        }
8722        LogicalPlan::Traverse {
8723            input,
8724            target_filter,
8725            step_variable: _,
8726            ..
8727        } => {
8728            if let Some(expr) = target_filter {
8729                collect_properties_from_expr_into(expr, properties);
8730            }
8731            // Note: Edge properties (step_variable) will be collected from expressions
8732            // that reference them. The edge_properties field in LogicalPlan is populated
8733            // later during physical planning based on this collected map.
8734            collect_properties_recursive(input, properties);
8735        }
8736        LogicalPlan::Unwind { input, expr, .. } => {
8737            collect_properties_from_expr_into(expr, properties);
8738            collect_properties_recursive(input, properties);
8739        }
8740        LogicalPlan::Create { input, pattern } => {
8741            // Mark variables referenced in CREATE patterns with "*" so plan_scan
8742            // adds structural projections (bare entity columns). Without this,
8743            // execute_create_pattern() can't find bound variables and creates
8744            // spurious new nodes instead of using existing MATCH'd ones.
8745            mark_pattern_variables(pattern, properties);
8746            collect_properties_recursive(input, properties);
8747        }
8748        LogicalPlan::CreateBatch { input, patterns } => {
8749            for pattern in patterns {
8750                mark_pattern_variables(pattern, properties);
8751            }
8752            collect_properties_recursive(input, properties);
8753        }
8754        LogicalPlan::Merge {
8755            input,
8756            pattern,
8757            on_match,
8758            on_create,
8759        } => {
8760            mark_pattern_variables(pattern, properties);
8761            if let Some(set_clause) = on_match {
8762                mark_set_item_variables(&set_clause.items, properties);
8763            }
8764            if let Some(set_clause) = on_create {
8765                mark_set_item_variables(&set_clause.items, properties);
8766            }
8767            collect_properties_recursive(input, properties);
8768        }
8769        LogicalPlan::Set { input, items } => {
8770            mark_set_item_variables(items, properties);
8771            collect_properties_recursive(input, properties);
8772        }
8773        LogicalPlan::Remove { input, items } => {
8774            for item in items {
8775                match item {
8776                    RemoveItem::Property(expr) => {
8777                        // REMOVE n.prop — collect the property and mark the variable
8778                        // with "*" so full structural projection is applied.
8779                        collect_properties_from_expr_into(expr, properties);
8780                        if let Expr::Property(base, _) = expr
8781                            && let Expr::Variable(var) = base.as_ref()
8782                        {
8783                            properties
8784                                .entry(var.clone())
8785                                .or_default()
8786                                .insert("*".to_string());
8787                        }
8788                    }
8789                    RemoveItem::Labels { variable, .. } => {
8790                        // REMOVE n:Label — mark n with "*"
8791                        properties
8792                            .entry(variable.clone())
8793                            .or_default()
8794                            .insert("*".to_string());
8795                    }
8796                }
8797            }
8798            collect_properties_recursive(input, properties);
8799        }
8800        LogicalPlan::Delete { input, items, .. } => {
8801            for expr in items {
8802                collect_properties_from_expr_into(expr, properties);
8803            }
8804            collect_properties_recursive(input, properties);
8805        }
8806        LogicalPlan::Foreach {
8807            input, list, body, ..
8808        } => {
8809            collect_properties_from_expr_into(list, properties);
8810            for plan in body {
8811                collect_properties_recursive(plan, properties);
8812            }
8813            collect_properties_recursive(input, properties);
8814        }
8815        LogicalPlan::Limit { input, .. } => {
8816            collect_properties_recursive(input, properties);
8817        }
8818        LogicalPlan::CrossJoin { left, right } => {
8819            collect_properties_recursive(left, properties);
8820            collect_properties_recursive(right, properties);
8821        }
8822        LogicalPlan::Apply {
8823            input,
8824            subquery,
8825            input_filter,
8826        } => {
8827            if let Some(expr) = input_filter {
8828                collect_properties_from_expr_into(expr, properties);
8829            }
8830            collect_properties_recursive(input, properties);
8831            collect_properties_recursive(subquery, properties);
8832        }
8833        LogicalPlan::Union { left, right, .. } => {
8834            collect_properties_recursive(left, properties);
8835            collect_properties_recursive(right, properties);
8836        }
8837        LogicalPlan::RecursiveCTE {
8838            initial, recursive, ..
8839        } => {
8840            collect_properties_recursive(initial, properties);
8841            collect_properties_recursive(recursive, properties);
8842        }
8843        LogicalPlan::ProcedureCall { arguments, .. } => {
8844            for arg in arguments {
8845                collect_properties_from_expr_into(arg, properties);
8846            }
8847        }
8848        LogicalPlan::VectorKnn { query, .. } => {
8849            collect_properties_from_expr_into(query, properties);
8850        }
8851        LogicalPlan::InvertedIndexLookup { terms, .. } => {
8852            collect_properties_from_expr_into(terms, properties);
8853        }
8854        LogicalPlan::ShortestPath { input, .. } => {
8855            collect_properties_recursive(input, properties);
8856        }
8857        LogicalPlan::AllShortestPaths { input, .. } => {
8858            collect_properties_recursive(input, properties);
8859        }
8860        LogicalPlan::Distinct { input } => {
8861            collect_properties_recursive(input, properties);
8862        }
8863        LogicalPlan::QuantifiedPattern {
8864            input,
8865            pattern_plan,
8866            ..
8867        } => {
8868            collect_properties_recursive(input, properties);
8869            collect_properties_recursive(pattern_plan, properties);
8870        }
8871        LogicalPlan::BindZeroLengthPath { input, .. } => {
8872            collect_properties_recursive(input, properties);
8873        }
8874        LogicalPlan::BindPath { input, .. } => {
8875            collect_properties_recursive(input, properties);
8876        }
8877        LogicalPlan::SubqueryCall { input, subquery } => {
8878            collect_properties_recursive(input, properties);
8879            collect_properties_recursive(subquery, properties);
8880        }
8881        LogicalPlan::LocyProject {
8882            input, projections, ..
8883        } => {
8884            for (expr, _alias) in projections {
8885                match expr {
8886                    // Bare variable in LocyProject: only need _vid for node variables
8887                    // (plan_locy_project extracts VID directly). Adding "*" would create
8888                    // a structural Struct column that conflicts with derived scan columns.
8889                    Expr::Variable(name) if !name.contains('.') => {
8890                        properties
8891                            .entry(name.clone())
8892                            .or_default()
8893                            .insert("_vid".to_string());
8894                    }
8895                    _ => collect_properties_from_expr_into(expr, properties),
8896                }
8897            }
8898            collect_properties_recursive(input, properties);
8899        }
8900        LogicalPlan::LocyFold {
8901            input,
8902            fold_bindings,
8903            ..
8904        } => {
8905            for (_name, expr) in fold_bindings {
8906                collect_properties_from_expr_into(expr, properties);
8907            }
8908            collect_properties_recursive(input, properties);
8909        }
8910        LogicalPlan::LocyBestBy {
8911            input, criteria, ..
8912        } => {
8913            for (expr, _asc) in criteria {
8914                collect_properties_from_expr_into(expr, properties);
8915            }
8916            collect_properties_recursive(input, properties);
8917        }
8918        LogicalPlan::LocyPriority { input, .. } => {
8919            collect_properties_recursive(input, properties);
8920        }
8921        LogicalPlan::LocyModelInvoke { input, .. } => {
8922            // Model invocations don't introduce new property accesses
8923            // — feature expressions are lifted to hidden YIELD items
8924            // by `extract_model_invocations` (uni-locy typecheck) and
8925            // their property refs are already collected via the
8926            // wrapped LocyProject's projection walk.
8927            collect_properties_recursive(input, properties);
8928        }
8929        // DDL and other plans don't reference properties
8930        _ => {}
8931    }
8932}
8933
8934/// Mark target variables from SET items with "*" and collect value expressions.
8935fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
8936    for item in items {
8937        match item {
8938            SetItem::Property { expr, value } => {
8939                // SET n.prop = val — mark n with STRUCT_ONLY_SENTINEL so the
8940                // scan builds the bare `n` struct column (needed for executor
8941                // `row.get(var_name)`) WITHOUT pulling the full schema. The
8942                // explicit `prop` is collected via `collect_properties_from_expr_into`
8943                // below and joins the variable's HashSet alongside the sentinel.
8944                //
8945                // If the same variable is also referenced bare elsewhere
8946                // (e.g. `SET n.x = 1 RETURN n`), `collect_properties_from_expr_into`
8947                // inserts "*" through the bare-Variable path; "*" dominates
8948                // the sentinel in `resolve_properties`, so the full schema
8949                // is still pulled when actually required.
8950                collect_properties_from_expr_into(expr, properties);
8951                collect_properties_from_expr_into(value, properties);
8952                if let Expr::Property(base, _) = expr
8953                    && let Expr::Variable(var) = base.as_ref()
8954                {
8955                    properties
8956                        .entry(var.clone())
8957                        .or_default()
8958                        .insert(STRUCT_ONLY_SENTINEL.to_string());
8959                }
8960            }
8961            SetItem::Labels { variable, .. } => {
8962                // SET n:Label — need full access to n
8963                properties
8964                    .entry(variable.clone())
8965                    .or_default()
8966                    .insert("*".to_string());
8967            }
8968            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
8969                // SET n = {props} or SET n += {props}
8970                properties
8971                    .entry(variable.clone())
8972                    .or_default()
8973                    .insert("*".to_string());
8974                collect_properties_from_expr_into(value, properties);
8975            }
8976        }
8977    }
8978}
8979
8980/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
8981/// adds structural projections (bare entity Struct columns) for them.
8982/// This is needed so that execute_create_pattern() can find bound variables
8983/// in the row HashMap and reuse existing nodes instead of creating new ones.
8984fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
8985    for path in &pattern.paths {
8986        if let Some(ref v) = path.variable {
8987            properties
8988                .entry(v.clone())
8989                .or_default()
8990                .insert("*".to_string());
8991        }
8992        for element in &path.elements {
8993            match element {
8994                PatternElement::Node(n) => {
8995                    if let Some(ref v) = n.variable {
8996                        properties
8997                            .entry(v.clone())
8998                            .or_default()
8999                            .insert("*".to_string());
9000                    }
9001                    // Also collect properties from inline property expressions
9002                    if let Some(ref props) = n.properties {
9003                        collect_properties_from_expr_into(props, properties);
9004                    }
9005                }
9006                PatternElement::Relationship(r) => {
9007                    if let Some(ref v) = r.variable {
9008                        properties
9009                            .entry(v.clone())
9010                            .or_default()
9011                            .insert("*".to_string());
9012                    }
9013                    if let Some(ref props) = r.properties {
9014                        collect_properties_from_expr_into(props, properties);
9015                    }
9016                }
9017                PatternElement::Parenthesized { pattern, .. } => {
9018                    let sub = Pattern {
9019                        paths: vec![pattern.as_ref().clone()],
9020                    };
9021                    mark_pattern_variables(&sub, properties);
9022                }
9023            }
9024        }
9025    }
9026}
9027
9028/// Collect properties from an expression into a HashMap.
9029fn collect_properties_from_expr_into(
9030    expr: &Expr,
9031    properties: &mut HashMap<String, HashSet<String>>,
9032) {
9033    match expr {
9034        Expr::PatternComprehension {
9035            where_clause,
9036            map_expr,
9037            ..
9038        } => {
9039            // Collect properties from the WHERE clause and map expression.
9040            // The pattern itself creates local bindings that don't need
9041            // property collection from the outer scope.
9042            if let Some(where_expr) = where_clause {
9043                collect_properties_from_expr_into(where_expr, properties);
9044            }
9045            collect_properties_from_expr_into(map_expr, properties);
9046        }
9047        Expr::Variable(name) => {
9048            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
9049            if let Some((var, prop)) = name.split_once('.') {
9050                properties
9051                    .entry(var.to_string())
9052                    .or_default()
9053                    .insert(prop.to_string());
9054            } else {
9055                // Bare variable (e.g., RETURN n) — needs all properties materialized
9056                properties
9057                    .entry(name.clone())
9058                    .or_default()
9059                    .insert("*".to_string());
9060            }
9061        }
9062        Expr::Property(base, name) => {
9063            // Extract variable name from the base expression
9064            if let Expr::Variable(var) = base.as_ref() {
9065                properties
9066                    .entry(var.clone())
9067                    .or_default()
9068                    .insert(name.clone());
9069                // Don't recurse into Variable — that would mark it as a bare
9070                // variable reference (adding "*") when it's just a property base.
9071            } else {
9072                // Recurse for complex base expressions (nested property, function call, etc.)
9073                collect_properties_from_expr_into(base, properties);
9074            }
9075        }
9076        Expr::BinaryOp { left, right, .. } => {
9077            collect_properties_from_expr_into(left, properties);
9078            collect_properties_from_expr_into(right, properties);
9079        }
9080        Expr::FunctionCall {
9081            name,
9082            args,
9083            window_spec,
9084            ..
9085        } => {
9086            // Analyze function for property requirements (pushdown hydration)
9087            analyze_function_property_requirements(name, args, properties);
9088
9089            // Collect from arguments
9090            for arg in args {
9091                collect_properties_from_expr_into(arg, properties);
9092            }
9093
9094            // Collect from window spec (PARTITION BY, ORDER BY)
9095            if let Some(spec) = window_spec {
9096                for part_expr in &spec.partition_by {
9097                    collect_properties_from_expr_into(part_expr, properties);
9098                }
9099                for sort_item in &spec.order_by {
9100                    collect_properties_from_expr_into(&sort_item.expr, properties);
9101                }
9102            }
9103        }
9104        Expr::UnaryOp { expr, .. } => {
9105            collect_properties_from_expr_into(expr, properties);
9106        }
9107        Expr::List(items) => {
9108            for item in items {
9109                collect_properties_from_expr_into(item, properties);
9110            }
9111        }
9112        Expr::Map(entries) => {
9113            for (_key, value) in entries {
9114                collect_properties_from_expr_into(value, properties);
9115            }
9116        }
9117        Expr::ListComprehension {
9118            list,
9119            where_clause,
9120            map_expr,
9121            ..
9122        } => {
9123            collect_properties_from_expr_into(list, properties);
9124            if let Some(where_expr) = where_clause {
9125                collect_properties_from_expr_into(where_expr, properties);
9126            }
9127            collect_properties_from_expr_into(map_expr, properties);
9128        }
9129        Expr::Case {
9130            expr,
9131            when_then,
9132            else_expr,
9133        } => {
9134            if let Some(scrutinee_expr) = expr {
9135                collect_properties_from_expr_into(scrutinee_expr, properties);
9136            }
9137            for (when, then) in when_then {
9138                collect_properties_from_expr_into(when, properties);
9139                collect_properties_from_expr_into(then, properties);
9140            }
9141            if let Some(default_expr) = else_expr {
9142                collect_properties_from_expr_into(default_expr, properties);
9143            }
9144        }
9145        Expr::Quantifier {
9146            list, predicate, ..
9147        } => {
9148            collect_properties_from_expr_into(list, properties);
9149            collect_properties_from_expr_into(predicate, properties);
9150        }
9151        Expr::Reduce {
9152            init, list, expr, ..
9153        } => {
9154            collect_properties_from_expr_into(init, properties);
9155            collect_properties_from_expr_into(list, properties);
9156            collect_properties_from_expr_into(expr, properties);
9157        }
9158        Expr::Exists { query, .. } => {
9159            // Walk into EXISTS body to collect property references for outer-scope variables.
9160            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
9161            // are included in the outer scan's property list. Extra properties collected for
9162            // inner-only variables are harmless — the outer scan ignores unknown variable names.
9163            collect_properties_from_subquery(query, properties);
9164        }
9165        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
9166            collect_properties_from_subquery(query, properties);
9167        }
9168        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
9169            collect_properties_from_expr_into(expr, properties);
9170        }
9171        Expr::In { expr, list } => {
9172            collect_properties_from_expr_into(expr, properties);
9173            collect_properties_from_expr_into(list, properties);
9174        }
9175        Expr::ArrayIndex { array, index } => {
9176            if let Expr::Variable(var) = array.as_ref() {
9177                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
9178                    // Static string key: e['name'] → only need that specific property
9179                    properties
9180                        .entry(var.clone())
9181                        .or_default()
9182                        .insert(prop_name.clone());
9183                } else {
9184                    // Dynamic property access: e[prop] → need all properties
9185                    properties
9186                        .entry(var.clone())
9187                        .or_default()
9188                        .insert("*".to_string());
9189                }
9190            }
9191            collect_properties_from_expr_into(array, properties);
9192            collect_properties_from_expr_into(index, properties);
9193        }
9194        Expr::ArraySlice { array, start, end } => {
9195            collect_properties_from_expr_into(array, properties);
9196            if let Some(start_expr) = start {
9197                collect_properties_from_expr_into(start_expr, properties);
9198            }
9199            if let Some(end_expr) = end {
9200                collect_properties_from_expr_into(end_expr, properties);
9201            }
9202        }
9203        Expr::ValidAt {
9204            entity,
9205            timestamp,
9206            start_prop,
9207            end_prop,
9208        } => {
9209            // Extract property requirements from ValidAt expression
9210            if let Expr::Variable(var) = entity.as_ref() {
9211                if let Some(prop) = start_prop {
9212                    properties
9213                        .entry(var.clone())
9214                        .or_default()
9215                        .insert(prop.clone());
9216                }
9217                if let Some(prop) = end_prop {
9218                    properties
9219                        .entry(var.clone())
9220                        .or_default()
9221                        .insert(prop.clone());
9222                }
9223            }
9224            collect_properties_from_expr_into(entity, properties);
9225            collect_properties_from_expr_into(timestamp, properties);
9226        }
9227        Expr::MapProjection { base, items } => {
9228            collect_properties_from_expr_into(base, properties);
9229            for item in items {
9230                match item {
9231                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
9232                        if let Expr::Variable(var) = base.as_ref() {
9233                            properties
9234                                .entry(var.clone())
9235                                .or_default()
9236                                .insert(prop.clone());
9237                        }
9238                    }
9239                    uni_cypher::ast::MapProjectionItem::AllProperties => {
9240                        if let Expr::Variable(var) = base.as_ref() {
9241                            properties
9242                                .entry(var.clone())
9243                                .or_default()
9244                                .insert("*".to_string());
9245                        }
9246                    }
9247                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
9248                        collect_properties_from_expr_into(expr, properties);
9249                    }
9250                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
9251                }
9252            }
9253        }
9254        Expr::LabelCheck { expr, .. } => {
9255            collect_properties_from_expr_into(expr, properties);
9256        }
9257        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
9258        // Mark them with "*" so the outer scan produces structural projections that
9259        // extract_row_params can resolve.
9260        Expr::Parameter(name) => {
9261            properties
9262                .entry(name.clone())
9263                .or_default()
9264                .insert("*".to_string());
9265        }
9266        // Literals and wildcard don't reference properties
9267        Expr::Literal(_) | Expr::Wildcard => {}
9268    }
9269}
9270
9271/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
9272///
9273/// This is needed so that correlated property accesses like `a.city` inside
9274/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
9275/// scan to include `a.city` in its projected columns.
9276fn collect_properties_from_subquery(
9277    query: &Query,
9278    properties: &mut HashMap<String, HashSet<String>>,
9279) {
9280    match query {
9281        Query::Single(stmt) => {
9282            for clause in &stmt.clauses {
9283                match clause {
9284                    Clause::Match(m) => {
9285                        if let Some(ref wc) = m.where_clause {
9286                            collect_properties_from_expr_into(wc, properties);
9287                        }
9288                    }
9289                    Clause::With(w) => {
9290                        for item in &w.items {
9291                            if let ReturnItem::Expr { expr, .. } = item {
9292                                collect_properties_from_expr_into(expr, properties);
9293                            }
9294                        }
9295                        if let Some(ref wc) = w.where_clause {
9296                            collect_properties_from_expr_into(wc, properties);
9297                        }
9298                    }
9299                    Clause::Return(r) => {
9300                        for item in &r.items {
9301                            if let ReturnItem::Expr { expr, .. } = item {
9302                                collect_properties_from_expr_into(expr, properties);
9303                            }
9304                        }
9305                    }
9306                    _ => {}
9307                }
9308            }
9309        }
9310        Query::Union { left, right, .. } => {
9311            collect_properties_from_subquery(left, properties);
9312            collect_properties_from_subquery(right, properties);
9313        }
9314        _ => {}
9315    }
9316}
9317
9318/// Analyze function calls to extract property requirements for pushdown hydration
9319///
9320/// This function examines function calls and their arguments to determine which properties
9321/// need to be loaded for entity arguments. For example:
9322/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
9323/// - keys(n) -> n needs all properties (*)
9324///
9325/// The extracted requirements are added to the properties map for later use during
9326/// scan planning.
9327fn analyze_function_property_requirements(
9328    name: &str,
9329    args: &[Expr],
9330    properties: &mut HashMap<String, HashSet<String>>,
9331) {
9332    use crate::query::function_props::get_function_spec;
9333
9334    /// Helper to mark a variable as needing all properties.
9335    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
9336        properties
9337            .entry(var.to_string())
9338            .or_default()
9339            .insert("*".to_string());
9340    }
9341
9342    // System-managed timestamp functions: require only the corresponding
9343    // `_created_at` / `_updated_at` column, not full entity materialization.
9344    if name.eq_ignore_ascii_case("created_at") || name.eq_ignore_ascii_case("updated_at") {
9345        if let Some(Expr::Variable(var)) = args.first() {
9346            let col = if name.eq_ignore_ascii_case("created_at") {
9347                "_created_at"
9348            } else {
9349                "_updated_at"
9350            };
9351            properties
9352                .entry(var.clone())
9353                .or_default()
9354                .insert(col.to_string());
9355        }
9356        return;
9357    }
9358
9359    let Some(spec) = get_function_spec(name) else {
9360        // Unknown function: conservatively require all properties for variable args
9361        for arg in args {
9362            if let Expr::Variable(var) = arg {
9363                mark_wildcard(var, properties);
9364            }
9365        }
9366        return;
9367    };
9368
9369    // Extract property names from string literal arguments
9370    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
9371        let entity_arg = args.get(entity_arg_idx);
9372        let prop_arg = args.get(prop_arg_idx);
9373
9374        match (entity_arg, prop_arg) {
9375            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
9376                properties
9377                    .entry(var.clone())
9378                    .or_default()
9379                    .insert(prop.clone());
9380            }
9381            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
9382                // Parameter property name: need all properties
9383                mark_wildcard(var, properties);
9384            }
9385            _ => {}
9386        }
9387    }
9388
9389    // Handle full entity requirement (keys(), properties())
9390    if spec.needs_full_entity {
9391        for &idx in spec.entity_args {
9392            if let Some(Expr::Variable(var)) = args.get(idx) {
9393                mark_wildcard(var, properties);
9394            }
9395        }
9396    }
9397}
9398
9399// ============================================================================
9400// Phase 5a-impl — fork-aware fusion rewrite
9401// ============================================================================
9402
9403/// Trait that exposes the per-fork "is there a fork-local index for
9404/// `(label, column)`?" lookup. Implemented for `StorageManager` so
9405/// callers don't need to depend on the fork module directly; tests
9406/// can mock by implementing it on a `HashMap`.
9407pub trait ForkIndexLookup {
9408    fn fork_index_for(
9409        &self,
9410        label: &str,
9411        column: &str,
9412    ) -> Option<uni_store::fork::ForkLocalIndexKind>;
9413
9414    /// Phase 5b followup: resolve a label id, then dispatch to
9415    /// `fork_index_for`. Used by the rewrite when wrapping
9416    /// `VectorKnn` and `InvertedIndexLookup` nodes which carry
9417    /// `label_id: u16` rather than the label name. Default returns
9418    /// `None`; the `StorageManager` impl resolves via its
9419    /// `schema_manager`.
9420    fn fork_index_for_label_id(
9421        &self,
9422        _label_id: u16,
9423        _column: &str,
9424    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9425        None
9426    }
9427}
9428
9429impl ForkIndexLookup for uni_store::storage::StorageManager {
9430    fn fork_index_for(
9431        &self,
9432        label: &str,
9433        column: &str,
9434    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9435        self.fork_index_exists(label, column)
9436    }
9437
9438    fn fork_index_for_label_id(
9439        &self,
9440        label_id: u16,
9441        column: &str,
9442    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9443        let schema = self.schema_manager().schema();
9444        let label_name = schema.label_name_by_id(label_id)?;
9445        self.fork_index_exists(label_name, column)
9446    }
9447}
9448
9449/// Fold a trailing `SET var.prop = value` into the freshly-created entity's
9450/// inline property map, eliminating the separate `Set` write pass.
9451///
9452/// Rewrites `CREATE (a)-[r:T]->(b) SET r.x = e.v` into the equivalent of
9453/// `CREATE (a)-[r:T {x: e.v}]->(b)`, so the plan collapses from `Set → Create`
9454/// to a single `Create`. This removes an entire read-modify-write operator
9455/// (`MutationSetExec`) — measured at ~38% of per-edge `UNWIND … CREATE … SET`
9456/// execution — that the bulk write path never pays.
9457///
9458/// # Examples
9459///
9460/// ```ignore
9461/// // CREATE (a)-[r:LINK]->(b) SET r.role = e.role   ==>
9462/// // CREATE (a)-[r:LINK {role: e.role}]->(b)
9463/// let fused = fuse_create_set(plan);
9464/// ```
9465///
9466/// The fold is **all-or-nothing per `SET` clause** and only fires when every
9467/// item is safe:
9468/// - the item is the simple `Variable.property = value` form (not `+=`, label
9469///   set `SET n:L`, or whole-entity map assignment `SET n = {...}`),
9470/// - the target variable is introduced by the immediately-preceding
9471///   `Create`/`CreateBatch` (a MATCHed variable is left untouched),
9472/// - the target element's inline properties are absent or a map literal (a
9473///   parameter-map form such as `CREATE (n $props)` cannot be merged),
9474/// - the value references no variable created in the same statement, so
9475///   evaluating it at create time is observably identical to SET time.
9476///
9477/// When any item fails these checks the whole `Set` node is preserved, keeping
9478/// semantics unchanged. The pass is idempotent: a plan with no fusable
9479/// `Set`/`Create` adjacency passes through untouched.
9480#[must_use]
9481pub fn fuse_create_set(plan: LogicalPlan) -> LogicalPlan {
9482    match plan {
9483        LogicalPlan::Set { input, items } => {
9484            // Fuse any deeper adjacency first so chained
9485            // `CREATE … SET … CREATE … SET` collapses bottom-up.
9486            let input = fuse_create_set(*input);
9487            match input {
9488                LogicalPlan::Create {
9489                    input: child,
9490                    pattern,
9491                } => {
9492                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9493                    match try_fuse_set_items(std::slice::from_ref(&pattern), &items, &bound_vars) {
9494                        Some(mut patterns) => LogicalPlan::Create {
9495                            input: child,
9496                            // try_fuse_set_items returns exactly as many patterns
9497                            // as it was given (one here).
9498                            pattern: patterns
9499                                .pop()
9500                                .expect("one pattern in yields one pattern out"),
9501                        },
9502                        None => LogicalPlan::Set {
9503                            input: Box::new(LogicalPlan::Create {
9504                                input: child,
9505                                pattern,
9506                            }),
9507                            items,
9508                        },
9509                    }
9510                }
9511                LogicalPlan::CreateBatch {
9512                    input: child,
9513                    patterns,
9514                } => {
9515                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9516                    match try_fuse_set_items(&patterns, &items, &bound_vars) {
9517                        Some(fused) => LogicalPlan::CreateBatch {
9518                            input: child,
9519                            patterns: fused,
9520                        },
9521                        None => LogicalPlan::Set {
9522                            input: Box::new(LogicalPlan::CreateBatch {
9523                                input: child,
9524                                patterns,
9525                            }),
9526                            items,
9527                        },
9528                    }
9529                }
9530                other => LogicalPlan::Set {
9531                    input: Box::new(other),
9532                    items,
9533                },
9534            }
9535        }
9536        // Recurse through the operators that can sit above a write clause so a
9537        // `Set` under RETURN/ORDER BY/LIMIT is still reached. This mirrors the
9538        // pragmatic recursion of `rewrite_for_fork_fusion`: variants that never
9539        // sit above a write clause fall through `other => other` unchanged.
9540        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9541            input: Box::new(fuse_create_set(*input)),
9542            projections,
9543        },
9544        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9545            input: Box::new(fuse_create_set(*input)),
9546            skip,
9547            fetch,
9548        },
9549        LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
9550            input: Box::new(fuse_create_set(*input)),
9551            order_by,
9552        },
9553        LogicalPlan::Filter {
9554            input,
9555            predicate,
9556            optional_variables,
9557        } => LogicalPlan::Filter {
9558            input: Box::new(fuse_create_set(*input)),
9559            predicate,
9560            optional_variables,
9561        },
9562        LogicalPlan::Create { input, pattern } => LogicalPlan::Create {
9563            input: Box::new(fuse_create_set(*input)),
9564            pattern,
9565        },
9566        LogicalPlan::CreateBatch { input, patterns } => LogicalPlan::CreateBatch {
9567            input: Box::new(fuse_create_set(*input)),
9568            patterns,
9569        },
9570        other => other,
9571    }
9572}
9573
9574/// Try to fold every `SET` item into the given CREATE patterns.
9575///
9576/// Returns the rewritten patterns when *all* items fuse safely (see
9577/// [`fuse_create_set`] for the conditions); returns `None` the moment any item
9578/// is unfusable, so the caller can keep the original `Set` node untouched.
9579///
9580/// `bound_vars` are the variables produced by the CREATE's input plan (e.g. an
9581/// upstream MATCH). A CREATE pattern may *reuse* such a variable as an endpoint
9582/// (`MATCH (a) CREATE (a)-[r:T]->(b)`), so `pattern_variable_names` alone cannot
9583/// tell a freshly-created variable from a reused one. Reused variables are
9584/// excluded from `owner`: a `SET` on them must not fuse, because the executor
9585/// skips inline properties on already-bound elements (which would silently drop
9586/// the write).
9587fn try_fuse_set_items(
9588    patterns: &[Pattern],
9589    items: &[SetItem],
9590    bound_vars: &HashSet<String>,
9591) -> Option<Vec<Pattern>> {
9592    // Map each freshly-created variable to the index of the pattern that
9593    // introduces it, skipping any variable already bound upstream.
9594    let mut owner: HashMap<String, usize> = HashMap::new();
9595    for (idx, pattern) in patterns.iter().enumerate() {
9596        for var in crate::query::df_graph::mutation_common::pattern_variable_names(pattern) {
9597            if bound_vars.contains(&var) {
9598                continue;
9599            }
9600            owner.entry(var).or_insert(idx);
9601        }
9602    }
9603
9604    let mut out = patterns.to_vec();
9605    for item in items {
9606        let SetItem::Property { expr, value } = item else {
9607            return None; // `+=`, label set, or whole-entity map assignment
9608        };
9609        let Expr::Property(base, prop) = expr else {
9610            return None; // not a property target
9611        };
9612        let Expr::Variable(var) = base.as_ref() else {
9613            return None; // e.g. `n[expr].x` or a deeper path
9614        };
9615        let Some(&idx) = owner.get(var) else {
9616            return None; // target is a MATCHed (not created) variable
9617        };
9618        // Evaluating the value at create time must equal evaluating it at SET
9619        // time: reject any reference to a variable created in this statement
9620        // (its value may not yet exist when the element is constructed).
9621        if collect_expr_variables(value)
9622            .iter()
9623            .any(|referenced| owner.contains_key(referenced))
9624        {
9625            return None;
9626        }
9627        if !merge_pattern_property(&mut out[idx], var, prop, value) {
9628            return None; // element absent or has a non-map property form
9629        }
9630    }
9631    Some(out)
9632}
9633
9634/// Merge `var.prop = value` into the matching element's inline property map.
9635///
9636/// Returns `false` (leaving the pattern unchanged) when the variable's element
9637/// is not found or its existing properties are a non-map expression that cannot
9638/// be merged. Any pre-existing entry for `prop` is replaced so the SET's
9639/// last-write-wins precedence is preserved.
9640fn merge_pattern_property(pattern: &mut Pattern, var: &str, prop: &str, value: &Expr) -> bool {
9641    for path in &mut pattern.paths {
9642        if merge_into_elements(&mut path.elements, var, prop, value) {
9643            return true;
9644        }
9645    }
9646    false
9647}
9648
9649/// Recursive worker for [`merge_pattern_property`] over a list of elements.
9650fn merge_into_elements(
9651    elements: &mut [PatternElement],
9652    var: &str,
9653    prop: &str,
9654    value: &Expr,
9655) -> bool {
9656    for element in elements {
9657        match element {
9658            PatternElement::Node(n) if n.variable.as_deref() == Some(var) => {
9659                return set_map_property(&mut n.properties, prop, value.clone());
9660            }
9661            PatternElement::Relationship(r) if r.variable.as_deref() == Some(var) => {
9662                return set_map_property(&mut r.properties, prop, value.clone());
9663            }
9664            PatternElement::Parenthesized { pattern, .. } => {
9665                if merge_into_elements(&mut pattern.elements, var, prop, value) {
9666                    return true;
9667                }
9668            }
9669            _ => {}
9670        }
9671    }
9672    false
9673}
9674
9675/// Set `prop = value` on an optional inline property map, last-write-wins.
9676///
9677/// Returns `false` without mutating when the properties are present but are not
9678/// a map literal (e.g. `CREATE (n $params)`), which cannot accept a single key.
9679fn set_map_property(props: &mut Option<Expr>, prop: &str, value: Expr) -> bool {
9680    match props {
9681        None => {
9682            *props = Some(Expr::Map(vec![(prop.to_string(), value)]));
9683            true
9684        }
9685        Some(Expr::Map(entries)) => {
9686            entries.retain(|(k, _)| k != prop);
9687            entries.push((prop.to_string(), value));
9688            true
9689        }
9690        Some(_) => false,
9691    }
9692}
9693
9694/// Walk a [`LogicalPlan`] tree and rewrite each `Scan` whose target
9695/// `(label, column)` has a registered fork-local index into the
9696/// matching `FusedIndexScan` variant.
9697///
9698/// Phase 5a-impl Step 4 covers `VidUidForkFirst`; Steps 5 and 6 add
9699/// `BtreeUnion` and `SortedKWayMerge` by extending `kind_for_filter`.
9700///
9701/// Idempotent: a tree that already contains `FusedIndexScan` nodes
9702/// passes through unchanged.
9703#[must_use]
9704pub fn rewrite_for_fork_fusion<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9705    rewrite_node(plan, lookup)
9706}
9707
9708fn rewrite_node<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9709    match plan {
9710        LogicalPlan::Scan {
9711            label_id,
9712            labels,
9713            variable,
9714            filter,
9715            optional,
9716        } => {
9717            // VidUid fusion only fires on a single-label scan with an
9718            // equality filter on a registered UID column. BTree and
9719            // Sorted will extend this match in Steps 5 and 6.
9720            let kind = if labels.len() == 1
9721                && let Some(col) = filter
9722                    .as_ref()
9723                    .and_then(|f| equality_target_column(f, &variable))
9724                && let Some(idx_kind) = lookup.fork_index_for(&labels[0], &col)
9725            {
9726                into_fusion_kind(idx_kind)
9727            } else {
9728                None
9729            };
9730            match kind {
9731                Some(kind) => LogicalPlan::FusedIndexScan {
9732                    label_id,
9733                    labels,
9734                    variable,
9735                    filter,
9736                    optional,
9737                    kind,
9738                },
9739                None => LogicalPlan::Scan {
9740                    label_id,
9741                    labels,
9742                    variable,
9743                    filter,
9744                    optional,
9745                },
9746            }
9747        }
9748        // Phase 5b followup: wrap lossy leaf operators when a
9749        // matching fork-local index has been registered. The wrap
9750        // preserves the original node's fields (the physical
9751        // planner unwraps and recurses); only the explain-plan
9752        // surface and runtime-stats operator name change. The
9753        // actual fusion still happens at the `BranchedBackend`
9754        // layer via Lance's per-branch reads.
9755        //
9756        // The CALL-style vector/FTS queries land as `ProcedureCall`
9757        // (not the dedicated `VectorKnn`/`InvertedIndexLookup`
9758        // operators); recognize those by procedure name and the
9759        // shape of their first two arguments (`label, column, ...`).
9760        LogicalPlan::ProcedureCall {
9761            procedure_name,
9762            arguments,
9763            yield_items,
9764        } => {
9765            let kind = procedure_call_fusion_kind(&procedure_name, &arguments, lookup);
9766            let inner = LogicalPlan::ProcedureCall {
9767                procedure_name,
9768                arguments,
9769                yield_items,
9770            };
9771            match kind {
9772                Some(kind) => LogicalPlan::FusedIndexScanWrapped {
9773                    inner: Box::new(inner),
9774                    kind,
9775                },
9776                None => inner,
9777            }
9778        }
9779        LogicalPlan::VectorKnn {
9780            label_id,
9781            variable,
9782            property,
9783            query,
9784            k,
9785            threshold,
9786        } => {
9787            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9788                && let Some(kind) = into_fusion_kind(idx_kind)
9789            {
9790                LogicalPlan::FusedIndexScanWrapped {
9791                    inner: Box::new(LogicalPlan::VectorKnn {
9792                        label_id,
9793                        variable,
9794                        property,
9795                        query,
9796                        k,
9797                        threshold,
9798                    }),
9799                    kind,
9800                }
9801            } else {
9802                LogicalPlan::VectorKnn {
9803                    label_id,
9804                    variable,
9805                    property,
9806                    query,
9807                    k,
9808                    threshold,
9809                }
9810            }
9811        }
9812        LogicalPlan::InvertedIndexLookup {
9813            label_id,
9814            variable,
9815            property,
9816            terms,
9817        } => {
9818            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9819                && let Some(kind) = into_fusion_kind(idx_kind)
9820            {
9821                LogicalPlan::FusedIndexScanWrapped {
9822                    inner: Box::new(LogicalPlan::InvertedIndexLookup {
9823                        label_id,
9824                        variable,
9825                        property,
9826                        terms,
9827                    }),
9828                    kind,
9829                }
9830            } else {
9831                LogicalPlan::InvertedIndexLookup {
9832                    label_id,
9833                    variable,
9834                    property,
9835                    terms,
9836                }
9837            }
9838        }
9839        // Tree-recursive variants — only the ones that can carry a
9840        // Scan in their subtree need to recurse here. Adding more is
9841        // safe (a missing recursion just means fusion doesn't fire
9842        // for that nested context, not incorrect results).
9843        LogicalPlan::Filter {
9844            input,
9845            predicate,
9846            optional_variables,
9847        } => LogicalPlan::Filter {
9848            input: Box::new(rewrite_node(*input, lookup)),
9849            predicate,
9850            optional_variables,
9851        },
9852        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9853            input: Box::new(rewrite_node(*input, lookup)),
9854            projections,
9855        },
9856        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9857            input: Box::new(rewrite_node(*input, lookup)),
9858            skip,
9859            fetch,
9860        },
9861        LogicalPlan::Sort { input, order_by } => {
9862            // Phase 5a-impl Sorted fusion: when the immediate child
9863            // is a single-label Scan AND the sole sort key is a
9864            // single-column property reference on that scan's
9865            // variable AND the column has a fork-local Sorted index
9866            // registered, rewrite to FusedIndexScan { SortedKWayMerge }.
9867            // Otherwise recurse normally.
9868            let new_input = match (*input, &order_by[..]) {
9869                (
9870                    LogicalPlan::Scan {
9871                        label_id,
9872                        labels,
9873                        variable,
9874                        filter,
9875                        optional,
9876                    },
9877                    [single_sort],
9878                ) if labels.len() == 1
9879                    && let Some(col) = column_of_scan_variable(&single_sort.expr, &variable)
9880                    && let Some(uni_store::fork::ForkLocalIndexKind::Sorted) =
9881                        lookup.fork_index_for(&labels[0], &col) =>
9882                {
9883                    LogicalPlan::FusedIndexScan {
9884                        label_id,
9885                        labels,
9886                        variable,
9887                        filter,
9888                        optional,
9889                        kind: FusionKind::SortedKWayMerge,
9890                    }
9891                }
9892                (other_input, _) => rewrite_node(other_input, lookup),
9893            };
9894            LogicalPlan::Sort {
9895                input: Box::new(new_input),
9896                order_by,
9897            }
9898        }
9899        LogicalPlan::Union { left, right, all } => LogicalPlan::Union {
9900            left: Box::new(rewrite_node(*left, lookup)),
9901            right: Box::new(rewrite_node(*right, lookup)),
9902            all,
9903        },
9904        // Everything else passes through unchanged. Adding more
9905        // arms is purely additive — fusion just doesn't fire inside
9906        // un-recursed-into subtrees.
9907        other => other,
9908    }
9909}
9910
9911/// Phase 5b followup: inspect a CALL-style procedure invocation
9912/// for a `(label, column)` pair and check whether a fork-local
9913/// index has been registered for it.
9914///
9915/// Recognizes:
9916/// - `uni.vector.query(label, column, query_vec, k)` → `AnnRerank`
9917///   when a `Vector` fork-local index exists.
9918/// - `uni.fts.query(label, column, query, k)` → `Bm25Rrf` when a
9919///   `FullText` fork-local index exists.
9920/// - `uni.sparse.query(label, column, query_vec, k)` → `SparseDot`
9921///   when a `Sparse` fork-local index marker exists.
9922///
9923/// Returns `None` for any other procedure (no rewrite) or when the
9924/// registry has no matching entry.
9925fn procedure_call_fusion_kind<L: ForkIndexLookup>(
9926    procedure_name: &str,
9927    arguments: &[Expr],
9928    lookup: &L,
9929) -> Option<FusionKind> {
9930    if arguments.len() < 2 {
9931        return None;
9932    }
9933
9934    // `uni.search` hybrid: a `sparse` key in the inline properties map means the
9935    // call fuses a learned-sparse source via RRF (`run_hybrid_search`). This is
9936    // independent of fork-local indexes, so it is not gated on `lookup`.
9937    // Limitation: a properties map passed as a `$param` (not an inline
9938    // `Expr::Map`) is opaque here and stays unlabeled.
9939    if procedure_name == "uni.search" {
9940        if let Expr::Map(entries) = &arguments[1]
9941            && entries.iter().any(|(key, _)| key.as_str() == "sparse")
9942        {
9943            return Some(FusionKind::SparseRrf);
9944        }
9945        return None;
9946    }
9947
9948    let label = match &arguments[0] {
9949        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9950        _ => return None,
9951    };
9952    let column = match &arguments[1] {
9953        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9954        _ => return None,
9955    };
9956    let expected = match procedure_name {
9957        "uni.vector.query" => uni_store::fork::ForkLocalIndexKind::Vector,
9958        "uni.fts.query" => uni_store::fork::ForkLocalIndexKind::FullText,
9959        // `uni.sparse.query` fork-fusion observability: a registered fork-local
9960        // `Sparse` marker (issue #95 Task #4) switches the call to the `SparseDot`
9961        // fused operator. Retrieval itself is a brute-force branch scan re-scored
9962        // by `sparse_dot` (`StorageManager::sparse_search`); the marker drives the
9963        // planner/EXPLAIN view, the `AnnRerank`/`Bm25Rrf` analogue.
9964        "uni.sparse.query" => uni_store::fork::ForkLocalIndexKind::Sparse,
9965        _ => return None,
9966    };
9967    let registered = lookup.fork_index_for(label, column)?;
9968    if registered != expected {
9969        return None;
9970    }
9971    into_fusion_kind(registered)
9972}
9973
9974/// Map a fork-local index kind to its planner-side fusion variant.
9975/// Returns `None` for any future `ForkLocalIndexKind` we don't yet
9976/// know how to fuse — the caller falls back to a regular Scan.
9977fn into_fusion_kind(kind: uni_store::fork::ForkLocalIndexKind) -> Option<FusionKind> {
9978    use uni_store::fork::ForkLocalIndexKind as K;
9979    match kind {
9980        K::VidUid => Some(FusionKind::VidUidForkFirst),
9981        K::ScalarBtree => Some(FusionKind::BtreeUnion),
9982        K::Sorted => Some(FusionKind::SortedKWayMerge),
9983        K::Vector => Some(FusionKind::AnnRerank),
9984        K::FullText => Some(FusionKind::Bm25Rrf),
9985        K::Sparse => Some(FusionKind::SparseDot),
9986        // `ForkLocalIndexKind` is `#[non_exhaustive]`; future kinds
9987        // we don't yet handle are silently passed through as a
9988        // regular Scan so a forward-incompatible binary doesn't
9989        // panic — just misses the fusion opportunity.
9990        _ => None,
9991    }
9992}
9993
9994/// Inspect a Scan filter `Expr` for a single-column equality predicate
9995/// against the scan's variable. Returns the column name if the
9996/// predicate matches the shape `variable.column = <literal_or_param>`
9997/// (or its commuted form). Returns `None` for any other shape — fusion
9998/// only fires on the simple case in Phase 5a-impl.
9999fn equality_target_column(filter: &Expr, scan_variable: &str) -> Option<String> {
10000    let (lhs, rhs) = match filter {
10001        Expr::BinaryOp {
10002            left,
10003            op: uni_cypher::ast::BinaryOp::Eq,
10004            right,
10005        } => (left.as_ref(), right.as_ref()),
10006        _ => return None,
10007    };
10008    // Try lhs = column-of-scan-var, rhs = literal/param; or commuted.
10009    if let Some(col) = column_of_scan_variable(lhs, scan_variable)
10010        && is_constant_or_param(rhs)
10011    {
10012        return Some(col);
10013    }
10014    if let Some(col) = column_of_scan_variable(rhs, scan_variable)
10015        && is_constant_or_param(lhs)
10016    {
10017        return Some(col);
10018    }
10019    None
10020}
10021
10022fn column_of_scan_variable(expr: &Expr, scan_variable: &str) -> Option<String> {
10023    if let Expr::Property(base, prop) = expr
10024        && let Expr::Variable(v) = base.as_ref()
10025        && v == scan_variable
10026    {
10027        return Some(prop.clone());
10028    }
10029    None
10030}
10031
10032fn is_constant_or_param(expr: &Expr) -> bool {
10033    matches!(expr, Expr::Literal(_) | Expr::Parameter(_))
10034}
10035
10036#[cfg(test)]
10037mod pushdown_tests {
10038    use super::*;
10039
10040    #[test]
10041    fn test_validat_extracts_property_names() {
10042        // validAt(e, 'start', 'end', ts) → e: {start, end}
10043        let mut properties = HashMap::new();
10044
10045        let args = vec![
10046            Expr::Variable("e".to_string()),
10047            Expr::Literal(CypherLiteral::String("start".to_string())),
10048            Expr::Literal(CypherLiteral::String("end".to_string())),
10049            Expr::Variable("ts".to_string()),
10050        ];
10051
10052        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
10053
10054        assert!(properties.contains_key("e"));
10055        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
10056            .iter()
10057            .cloned()
10058            .collect();
10059        assert_eq!(properties.get("e").unwrap(), &e_props);
10060    }
10061
10062    #[test]
10063    fn test_keys_requires_wildcard() {
10064        // keys(n) → n: {*}
10065        let mut properties = HashMap::new();
10066
10067        let args = vec![Expr::Variable("n".to_string())];
10068
10069        analyze_function_property_requirements("keys", &args, &mut properties);
10070
10071        assert!(properties.contains_key("n"));
10072        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10073        assert_eq!(properties.get("n").unwrap(), &n_props);
10074    }
10075
10076    #[test]
10077    fn test_properties_requires_wildcard() {
10078        // properties(n) → n: {*}
10079        let mut properties = HashMap::new();
10080
10081        let args = vec![Expr::Variable("n".to_string())];
10082
10083        analyze_function_property_requirements("properties", &args, &mut properties);
10084
10085        assert!(properties.contains_key("n"));
10086        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10087        assert_eq!(properties.get("n").unwrap(), &n_props);
10088    }
10089
10090    #[test]
10091    fn test_unknown_function_conservative() {
10092        // customUdf(e) → e: {*}
10093        let mut properties = HashMap::new();
10094
10095        let args = vec![Expr::Variable("e".to_string())];
10096
10097        analyze_function_property_requirements("customUdf", &args, &mut properties);
10098
10099        assert!(properties.contains_key("e"));
10100        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10101        assert_eq!(properties.get("e").unwrap(), &e_props);
10102    }
10103
10104    #[test]
10105    fn test_parameter_property_name() {
10106        // validAt(e, $start, $end, ts) → e: {*}
10107        let mut properties = HashMap::new();
10108
10109        let args = vec![
10110            Expr::Variable("e".to_string()),
10111            Expr::Parameter("start".to_string()),
10112            Expr::Parameter("end".to_string()),
10113            Expr::Variable("ts".to_string()),
10114        ];
10115
10116        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
10117
10118        assert!(properties.contains_key("e"));
10119        assert!(properties.get("e").unwrap().contains("*"));
10120    }
10121
10122    #[test]
10123    fn test_validat_expr_extracts_properties() {
10124        // Test Expr::ValidAt variant property extraction
10125        let mut properties = HashMap::new();
10126
10127        let validat_expr = Expr::ValidAt {
10128            entity: Box::new(Expr::Variable("e".to_string())),
10129            timestamp: Box::new(Expr::Variable("ts".to_string())),
10130            start_prop: Some("valid_from".to_string()),
10131            end_prop: Some("valid_to".to_string()),
10132        };
10133
10134        collect_properties_from_expr_into(&validat_expr, &mut properties);
10135
10136        assert!(properties.contains_key("e"));
10137        assert!(properties.get("e").unwrap().contains("valid_from"));
10138        assert!(properties.get("e").unwrap().contains("valid_to"));
10139    }
10140
10141    #[test]
10142    fn test_array_index_requires_wildcard() {
10143        // e[prop] → e: {*}
10144        let mut properties = HashMap::new();
10145
10146        let array_index_expr = Expr::ArrayIndex {
10147            array: Box::new(Expr::Variable("e".to_string())),
10148            index: Box::new(Expr::Variable("prop".to_string())),
10149        };
10150
10151        collect_properties_from_expr_into(&array_index_expr, &mut properties);
10152
10153        assert!(properties.contains_key("e"));
10154        assert!(properties.get("e").unwrap().contains("*"));
10155    }
10156
10157    #[test]
10158    fn test_property_access_extraction() {
10159        // e.name → e: {name}
10160        let mut properties = HashMap::new();
10161
10162        let prop_access = Expr::Property(
10163            Box::new(Expr::Variable("e".to_string())),
10164            "name".to_string(),
10165        );
10166
10167        collect_properties_from_expr_into(&prop_access, &mut properties);
10168
10169        assert!(properties.contains_key("e"));
10170        assert!(properties.get("e").unwrap().contains("name"));
10171    }
10172}