Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::{PredicateAnalyzer, try_label_or_to_union, try_type_or_to_union};
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig,
14    ScalarIndexConfig, ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15    VectorIndexType,
16};
17use uni_cypher::ast::{
18    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
19    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
20    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
21    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
22    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
23    WithRecursiveClause,
24};
25
26/// Sentinel column name inserted into a variable's property set to request
27/// that the planner build the bare struct column (`add_structural_projection`)
28/// WITHOUT pulling the full schema.
29///
30/// Emitted by `mark_set_item_variables` for `SetItem::Property` targets only.
31/// Other SET variants (`Labels`, `Variable`, `VariablePlus`) and REMOVE still
32/// emit `"*"` because they replace/merge the whole node.
33///
34/// **Union semantics:** When both `"*"` and the sentinel appear in the same
35/// variable's HashSet (e.g. `SET n.x = 1 RETURN n` collects both), `"*"`
36/// dominates — schema expansion still happens. The sentinel only changes
37/// behavior when it's the sole structural marker present.
38///
39/// Reserved-name convention: the double-underscore prefix marks this as
40/// internal. Schema validation should reject user-declared properties with
41/// this name (deferred follow-up).
42pub(crate) const STRUCT_ONLY_SENTINEL: &str = "__set_struct__";
43
44/// Type of variable in scope for semantic validation.
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum VariableType {
47    /// Node variable (from MATCH (n), CREATE (n), etc.)
48    Node,
49    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
50    Edge,
51    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
52    Path,
53    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
54    /// Could hold a map or dynamic value — property access is allowed.
55    Scalar,
56    /// Scalar from a known non-graph literal (int, float, bool, string, list).
57    /// Property access is NOT allowed on these at compile time.
58    ScalarLiteral,
59    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
60    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
61    Imported,
62}
63
64impl VariableType {
65    /// Returns true if this type is compatible with the expected type.
66    ///
67    /// `Imported` is always compatible because the actual type is unknown at plan time.
68    fn is_compatible_with(self, expected: VariableType) -> bool {
69        self == expected
70            || self == VariableType::Imported
71            // ScalarLiteral behaves like Scalar for compatibility checks
72            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
73    }
74}
75
76/// Information about a variable in scope during planning.
77#[derive(Debug, Clone)]
78pub struct VariableInfo {
79    /// Variable name as written in the query.
80    pub name: String,
81    /// Semantic type of the variable.
82    pub var_type: VariableType,
83    /// True if this is a variable-length path (VLP) step variable.
84    ///
85    /// VLP step variables are typed as Edge but semantically hold edge lists.
86    pub is_vlp: bool,
87}
88
89impl VariableInfo {
90    pub fn new(name: String, var_type: VariableType) -> Self {
91        Self {
92            name,
93            var_type,
94            is_vlp: false,
95        }
96    }
97}
98
99/// Find a variable in scope by name.
100fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
101    vars.iter().find(|v| v.name == name)
102}
103
104/// Check if a variable is in scope.
105fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
106    find_var_in_scope(vars, name).is_some()
107}
108
109/// Check if an expression contains a pattern predicate.
110fn contains_pattern_predicate(expr: &Expr) -> bool {
111    if matches!(
112        expr,
113        Expr::Exists {
114            from_pattern_predicate: true,
115            ..
116        }
117    ) {
118        return true;
119    }
120    let mut found = false;
121    expr.for_each_child(&mut |child| {
122        if !found {
123            found = contains_pattern_predicate(child);
124        }
125    });
126    found
127}
128
129/// Add a variable to scope with type conflict validation.
130/// Returns an error if the variable already exists with a different type.
131fn add_var_to_scope(
132    vars: &mut Vec<VariableInfo>,
133    name: &str,
134    var_type: VariableType,
135) -> Result<()> {
136    if name.is_empty() {
137        return Ok(());
138    }
139
140    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
141        if existing.var_type == VariableType::Imported {
142            // Imported vars upgrade to the concrete type
143            existing.var_type = var_type;
144        } else if var_type == VariableType::Imported || existing.var_type == var_type {
145            // New type is Imported (keep existing) or same type — no conflict
146        } else if matches!(
147            existing.var_type,
148            VariableType::Scalar | VariableType::ScalarLiteral
149        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
150        {
151            // Scalar can be used as Node/Edge in CREATE context — a scalar
152            // holding a node/edge reference is valid for pattern use
153            existing.var_type = var_type;
154        } else {
155            return Err(anyhow!(
156                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
157                name,
158                existing.var_type,
159                var_type
160            ));
161        }
162    } else {
163        vars.push(VariableInfo::new(name.to_string(), var_type));
164    }
165    Ok(())
166}
167
168/// Convert VariableInfo vec to String vec for backward compatibility
169fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
170    vars.iter().map(|v| v.name.clone()).collect()
171}
172
173fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
174    match expr {
175        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
176            .map(|info| info.var_type)
177            .unwrap_or(VariableType::Scalar),
178        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
179        // Known non-graph literals: property access is NOT valid on these.
180        Expr::Literal(CypherLiteral::Integer(_))
181        | Expr::Literal(CypherLiteral::Float(_))
182        | Expr::Literal(CypherLiteral::String(_))
183        | Expr::Literal(CypherLiteral::Bool(_))
184        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
185        Expr::FunctionCall { name, args, .. } => {
186            let lower = name.to_lowercase();
187            if lower == "coalesce" {
188                infer_coalesce_type(args, vars_in_scope)
189            } else if lower == "collect" && !args.is_empty() {
190                let collected = infer_with_output_type(&args[0], vars_in_scope);
191                if matches!(
192                    collected,
193                    VariableType::Node
194                        | VariableType::Edge
195                        | VariableType::Path
196                        | VariableType::Imported
197                ) {
198                    collected
199                } else {
200                    VariableType::Scalar
201                }
202            } else {
203                VariableType::Scalar
204            }
205        }
206        // WITH list literals/expressions produce scalar list values. Preserving
207        // entity typing here causes invalid node/edge reuse in later MATCH clauses
208        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
209        // Lists are ScalarLiteral since property access is not valid on them.
210        Expr::List(_) => VariableType::ScalarLiteral,
211        _ => VariableType::Scalar,
212    }
213}
214
215fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
216    let mut resolved: Option<VariableType> = None;
217    let mut saw_imported = false;
218    for arg in args {
219        let t = infer_with_output_type(arg, vars_in_scope);
220        match t {
221            VariableType::Node | VariableType::Edge | VariableType::Path => {
222                if let Some(existing) = resolved {
223                    if existing != t {
224                        return VariableType::Scalar;
225                    }
226                } else {
227                    resolved = Some(t);
228                }
229            }
230            VariableType::Imported => saw_imported = true,
231            VariableType::Scalar | VariableType::ScalarLiteral => {}
232        }
233    }
234    if let Some(t) = resolved {
235        t
236    } else if saw_imported {
237        VariableType::Imported
238    } else {
239        VariableType::Scalar
240    }
241}
242
243fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
244    match expr {
245        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
246            .map(|info| info.var_type)
247            .unwrap_or(VariableType::Scalar),
248        Expr::FunctionCall { name, args, .. }
249            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
250        {
251            infer_with_output_type(&args[0], vars_in_scope)
252        }
253        Expr::List(items) => {
254            let mut inferred: Option<VariableType> = None;
255            for item in items {
256                let t = infer_with_output_type(item, vars_in_scope);
257                if !matches!(
258                    t,
259                    VariableType::Node
260                        | VariableType::Edge
261                        | VariableType::Path
262                        | VariableType::Imported
263                ) {
264                    return VariableType::Scalar;
265                }
266                if let Some(existing) = inferred {
267                    if existing != t
268                        && t != VariableType::Imported
269                        && existing != VariableType::Imported
270                    {
271                        return VariableType::Scalar;
272                    }
273                    if existing == VariableType::Imported && t != VariableType::Imported {
274                        inferred = Some(t);
275                    }
276                } else {
277                    inferred = Some(t);
278                }
279            }
280            inferred.unwrap_or(VariableType::Scalar)
281        }
282        _ => VariableType::Scalar,
283    }
284}
285
286/// Collect all variable names referenced in an expression
287fn collect_expr_variables(expr: &Expr) -> Vec<String> {
288    let mut vars = Vec::new();
289    collect_expr_variables_inner(expr, &mut vars);
290    vars
291}
292
293fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
294    let mut add_var = |name: &String| {
295        if !vars.contains(name) {
296            vars.push(name.clone());
297        }
298    };
299
300    match expr {
301        Expr::Variable(name) => add_var(name),
302        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
303        Expr::BinaryOp { left, right, .. } => {
304            collect_expr_variables_inner(left, vars);
305            collect_expr_variables_inner(right, vars);
306        }
307        Expr::UnaryOp { expr: e, .. }
308        | Expr::IsNull(e)
309        | Expr::IsNotNull(e)
310        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
311        Expr::FunctionCall { args, .. } => {
312            for a in args {
313                collect_expr_variables_inner(a, vars);
314            }
315        }
316        Expr::List(items) => {
317            for item in items {
318                collect_expr_variables_inner(item, vars);
319            }
320        }
321        Expr::In { expr: e, list } => {
322            collect_expr_variables_inner(e, vars);
323            collect_expr_variables_inner(list, vars);
324        }
325        Expr::Case {
326            expr: case_expr,
327            when_then,
328            else_expr,
329        } => {
330            if let Some(e) = case_expr {
331                collect_expr_variables_inner(e, vars);
332            }
333            for (w, t) in when_then {
334                collect_expr_variables_inner(w, vars);
335                collect_expr_variables_inner(t, vars);
336            }
337            if let Some(e) = else_expr {
338                collect_expr_variables_inner(e, vars);
339            }
340        }
341        Expr::Map(entries) => {
342            for (_, v) in entries {
343                collect_expr_variables_inner(v, vars);
344            }
345        }
346        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
347        Expr::ArrayIndex { array, index } => {
348            collect_expr_variables_inner(array, vars);
349            collect_expr_variables_inner(index, vars);
350        }
351        Expr::ArraySlice { array, start, end } => {
352            collect_expr_variables_inner(array, vars);
353            if let Some(s) = start {
354                collect_expr_variables_inner(s, vars);
355            }
356            if let Some(e) = end {
357                collect_expr_variables_inner(e, vars);
358            }
359        }
360        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
361        // they introduce local variable bindings not in outer scope.
362        _ => {}
363    }
364}
365
366/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
367///
368/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
369/// before the final RETURN projection without losing alias semantics.
370fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
371    let repr = expr.to_string_repr();
372    if let Some(rewritten) = aliases.get(&repr) {
373        return rewritten.clone();
374    }
375
376    match expr {
377        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
378        Expr::Property(base, prop) => Expr::Property(
379            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
380            prop.clone(),
381        ),
382        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
383            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
384            op: *op,
385            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
386        },
387        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
388            op: *op,
389            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
390        },
391        Expr::FunctionCall {
392            name,
393            args,
394            distinct,
395            window_spec,
396        } => Expr::FunctionCall {
397            name: name.clone(),
398            args: args
399                .iter()
400                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
401                .collect(),
402            distinct: *distinct,
403            window_spec: window_spec.clone(),
404        },
405        Expr::List(items) => Expr::List(
406            items
407                .iter()
408                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
409                .collect(),
410        ),
411        Expr::Map(entries) => Expr::Map(
412            entries
413                .iter()
414                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
415                .collect(),
416        ),
417        Expr::Case {
418            expr: case_expr,
419            when_then,
420            else_expr,
421        } => Expr::Case {
422            expr: case_expr
423                .as_ref()
424                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
425            when_then: when_then
426                .iter()
427                .map(|(w, t)| {
428                    (
429                        rewrite_order_by_expr_with_aliases(w, aliases),
430                        rewrite_order_by_expr_with_aliases(t, aliases),
431                    )
432                })
433                .collect(),
434            else_expr: else_expr
435                .as_ref()
436                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
437        },
438        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
439        // they introduce local variable bindings that could shadow aliases.
440        _ => expr.clone(),
441    }
442}
443
444/// Validate function call argument types.
445/// Returns error if type constraints are violated.
446fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
447    let name_lower = name.to_lowercase();
448
449    // labels() requires Node
450    if name_lower == "labels"
451        && let Some(Expr::Variable(var_name)) = args.first()
452        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
453        && !info.var_type.is_compatible_with(VariableType::Node)
454    {
455        return Err(anyhow!(
456            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
457        ));
458    }
459
460    // type() requires Edge
461    if name_lower == "type"
462        && let Some(Expr::Variable(var_name)) = args.first()
463        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
464        && !info.var_type.is_compatible_with(VariableType::Edge)
465    {
466        return Err(anyhow!(
467            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
468        ));
469    }
470
471    // properties() requires Node/Edge/Map (not scalar literals)
472    if name_lower == "properties"
473        && let Some(arg) = args.first()
474    {
475        match arg {
476            Expr::Literal(CypherLiteral::Integer(_))
477            | Expr::Literal(CypherLiteral::Float(_))
478            | Expr::Literal(CypherLiteral::String(_))
479            | Expr::Literal(CypherLiteral::Bool(_))
480            | Expr::List(_) => {
481                return Err(anyhow!(
482                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
483                ));
484            }
485            Expr::Variable(var_name) => {
486                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
487                    && matches!(
488                        info.var_type,
489                        VariableType::Scalar | VariableType::ScalarLiteral
490                    )
491                {
492                    return Err(anyhow!(
493                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
494                    ));
495                }
496            }
497            _ => {}
498        }
499    }
500
501    // nodes()/relationships() require Path
502    if (name_lower == "nodes" || name_lower == "relationships")
503        && let Some(Expr::Variable(var_name)) = args.first()
504        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
505        && !info.var_type.is_compatible_with(VariableType::Path)
506    {
507        return Err(anyhow!(
508            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
509            name_lower
510        ));
511    }
512
513    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
514    if name_lower == "size"
515        && let Some(Expr::Variable(var_name)) = args.first()
516        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
517        && info.var_type == VariableType::Path
518    {
519        return Err(anyhow!(
520            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
521        ));
522    }
523
524    // length()/size() do NOT accept Node or single-Edge arguments.
525    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
526    // but are actually edge lists — size()/length() is valid on those.
527    if (name_lower == "length" || name_lower == "size")
528        && let Some(Expr::Variable(var_name)) = args.first()
529        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
530        && (info.var_type == VariableType::Node
531            || (info.var_type == VariableType::Edge && !info.is_vlp))
532    {
533        return Err(anyhow!(
534            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
535            name_lower
536        ));
537    }
538
539    Ok(())
540}
541
542/// Check if an expression is a non-boolean literal.
543fn is_non_boolean_literal(expr: &Expr) -> bool {
544    matches!(
545        expr,
546        Expr::Literal(CypherLiteral::Integer(_))
547            | Expr::Literal(CypherLiteral::Float(_))
548            | Expr::Literal(CypherLiteral::String(_))
549            | Expr::List(_)
550            | Expr::Map(_)
551    )
552}
553
554/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
555fn validate_boolean_expression(expr: &Expr) -> Result<()> {
556    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
557    if let Expr::BinaryOp { left, op, right } = expr
558        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
559    {
560        let op_name = format!("{op:?}").to_uppercase();
561        for operand in [left.as_ref(), right.as_ref()] {
562            if is_non_boolean_literal(operand) {
563                return Err(anyhow!(
564                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
565                    op_name
566                ));
567            }
568        }
569    }
570    if let Expr::UnaryOp {
571        op: uni_cypher::ast::UnaryOp::Not,
572        expr: inner,
573    } = expr
574        && is_non_boolean_literal(inner)
575    {
576        return Err(anyhow!(
577            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
578        ));
579    }
580    let mut result = Ok(());
581    expr.for_each_child(&mut |child| {
582        if result.is_ok() {
583            result = validate_boolean_expression(child);
584        }
585    });
586    result
587}
588
589/// Validate that all variables used in an expression are in scope.
590fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
591    let used_vars = collect_expr_variables(expr);
592    for var in used_vars {
593        if !is_var_in_scope(vars_in_scope, &var) {
594            return Err(anyhow!(
595                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
596                var
597            ));
598        }
599    }
600    Ok(())
601}
602
603/// Check if a function name (lowercase) is an aggregate function.
604fn is_aggregate_function_name(name: &str) -> bool {
605    matches!(
606        name.to_lowercase().as_str(),
607        "count"
608            | "sum"
609            | "avg"
610            | "min"
611            | "max"
612            | "collect"
613            | "stdev"
614            | "stdevp"
615            | "percentiledisc"
616            | "percentilecont"
617            | "btic_min"
618            | "btic_max"
619            | "btic_span_agg"
620            | "btic_count_at"
621    ) || uni_cypher::is_known_plugin_aggregate(name)
622}
623
624/// Returns true if the expression is a window function (FunctionCall with window_spec).
625fn is_window_function(expr: &Expr) -> bool {
626    matches!(
627        expr,
628        Expr::FunctionCall {
629            window_spec: Some(_),
630            ..
631        }
632    )
633}
634
635/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
636/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
637/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
638/// size() call, BinaryOp, etc.).
639fn is_compound_aggregate(expr: &Expr) -> bool {
640    if !expr.is_aggregate() {
641        return false;
642    }
643    match expr {
644        Expr::FunctionCall {
645            name, window_spec, ..
646        } => {
647            // A bare aggregate FunctionCall is NOT compound
648            if window_spec.is_some() {
649                return true; // window wrapping an aggregate — treat as compound
650            }
651            !is_aggregate_function_name(name)
652        }
653        // Subquery aggregates are "bare" (not compound)
654        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
655        // Everything else (ListComprehension, BinaryOp, etc.) is compound
656        _ => true,
657    }
658}
659
660/// Recursively collect all bare aggregate FunctionCall sub-expressions from
661/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
662/// the outermost aggregate boundaries).
663///
664/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
665/// is searched because the body (`map_expr`, `predicate`, `expr`) references
666/// the loop variable, not outer-scope aggregates.
667fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
668    let mut out = Vec::new();
669    extract_inner_aggregates_rec(expr, &mut out);
670    out
671}
672
673fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
674    match expr {
675        Expr::FunctionCall {
676            name, window_spec, ..
677        } if window_spec.is_none() && is_aggregate_function_name(name) => {
678            // Found a bare aggregate — collect it and stop recursing
679            out.push(expr.clone());
680        }
681        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
682            out.push(expr.clone());
683        }
684        // For list comprehension, only search the `list` source for aggregates
685        Expr::ListComprehension { list, .. } => {
686            extract_inner_aggregates_rec(list, out);
687        }
688        // For quantifier, only search the `list` source
689        Expr::Quantifier { list, .. } => {
690            extract_inner_aggregates_rec(list, out);
691        }
692        // For reduce, search `init` and `list` (not the body `expr`)
693        Expr::Reduce { init, list, .. } => {
694            extract_inner_aggregates_rec(init, out);
695            extract_inner_aggregates_rec(list, out);
696        }
697        // Standard recursive cases
698        Expr::FunctionCall { args, .. } => {
699            for arg in args {
700                extract_inner_aggregates_rec(arg, out);
701            }
702        }
703        Expr::BinaryOp { left, right, .. } => {
704            extract_inner_aggregates_rec(left, out);
705            extract_inner_aggregates_rec(right, out);
706        }
707        Expr::UnaryOp { expr: e, .. }
708        | Expr::IsNull(e)
709        | Expr::IsNotNull(e)
710        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
711        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
712        Expr::List(items) => {
713            for item in items {
714                extract_inner_aggregates_rec(item, out);
715            }
716        }
717        Expr::Case {
718            expr: case_expr,
719            when_then,
720            else_expr,
721        } => {
722            if let Some(e) = case_expr {
723                extract_inner_aggregates_rec(e, out);
724            }
725            for (w, t) in when_then {
726                extract_inner_aggregates_rec(w, out);
727                extract_inner_aggregates_rec(t, out);
728            }
729            if let Some(e) = else_expr {
730                extract_inner_aggregates_rec(e, out);
731            }
732        }
733        Expr::In {
734            expr: in_expr,
735            list,
736        } => {
737            extract_inner_aggregates_rec(in_expr, out);
738            extract_inner_aggregates_rec(list, out);
739        }
740        Expr::ArrayIndex { array, index } => {
741            extract_inner_aggregates_rec(array, out);
742            extract_inner_aggregates_rec(index, out);
743        }
744        Expr::ArraySlice { array, start, end } => {
745            extract_inner_aggregates_rec(array, out);
746            if let Some(s) = start {
747                extract_inner_aggregates_rec(s, out);
748            }
749            if let Some(e) = end {
750                extract_inner_aggregates_rec(e, out);
751            }
752        }
753        Expr::Map(entries) => {
754            for (_, v) in entries {
755                extract_inner_aggregates_rec(v, out);
756            }
757        }
758        _ => {}
759    }
760}
761
762/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
763/// `Expr::Variable(aggregate_column_name(agg))`.
764///
765/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
766/// rewritten (the body references the loop variable, not outer-scope columns).
767fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
768    match expr {
769        Expr::FunctionCall {
770            name, window_spec, ..
771        } if window_spec.is_none() && is_aggregate_function_name(name) => {
772            // Replace bare aggregate with column reference
773            Expr::Variable(aggregate_column_name(expr))
774        }
775        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
776            Expr::Variable(aggregate_column_name(expr))
777        }
778        Expr::ListComprehension {
779            variable,
780            list,
781            where_clause,
782            map_expr,
783        } => Expr::ListComprehension {
784            variable: variable.clone(),
785            list: Box::new(replace_aggregates_with_columns(list)),
786            where_clause: where_clause.clone(), // don't touch — references loop var
787            map_expr: map_expr.clone(),         // don't touch — references loop var
788        },
789        Expr::Quantifier {
790            quantifier,
791            variable,
792            list,
793            predicate,
794        } => Expr::Quantifier {
795            quantifier: *quantifier,
796            variable: variable.clone(),
797            list: Box::new(replace_aggregates_with_columns(list)),
798            predicate: predicate.clone(), // don't touch — references loop var
799        },
800        Expr::Reduce {
801            accumulator,
802            init,
803            variable,
804            list,
805            expr: body,
806        } => Expr::Reduce {
807            accumulator: accumulator.clone(),
808            init: Box::new(replace_aggregates_with_columns(init)),
809            variable: variable.clone(),
810            list: Box::new(replace_aggregates_with_columns(list)),
811            expr: body.clone(), // don't touch — references loop var
812        },
813        Expr::FunctionCall {
814            name,
815            args,
816            distinct,
817            window_spec,
818        } => Expr::FunctionCall {
819            name: name.clone(),
820            args: args.iter().map(replace_aggregates_with_columns).collect(),
821            distinct: *distinct,
822            window_spec: window_spec.clone(),
823        },
824        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
825            left: Box::new(replace_aggregates_with_columns(left)),
826            op: *op,
827            right: Box::new(replace_aggregates_with_columns(right)),
828        },
829        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
830            op: *op,
831            expr: Box::new(replace_aggregates_with_columns(e)),
832        },
833        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
834        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
835        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
836        Expr::Property(base, prop) => Expr::Property(
837            Box::new(replace_aggregates_with_columns(base)),
838            prop.clone(),
839        ),
840        Expr::List(items) => {
841            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
842        }
843        Expr::Case {
844            expr: case_expr,
845            when_then,
846            else_expr,
847        } => Expr::Case {
848            expr: case_expr
849                .as_ref()
850                .map(|e| Box::new(replace_aggregates_with_columns(e))),
851            when_then: when_then
852                .iter()
853                .map(|(w, t)| {
854                    (
855                        replace_aggregates_with_columns(w),
856                        replace_aggregates_with_columns(t),
857                    )
858                })
859                .collect(),
860            else_expr: else_expr
861                .as_ref()
862                .map(|e| Box::new(replace_aggregates_with_columns(e))),
863        },
864        Expr::In {
865            expr: in_expr,
866            list,
867        } => Expr::In {
868            expr: Box::new(replace_aggregates_with_columns(in_expr)),
869            list: Box::new(replace_aggregates_with_columns(list)),
870        },
871        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
872            array: Box::new(replace_aggregates_with_columns(array)),
873            index: Box::new(replace_aggregates_with_columns(index)),
874        },
875        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
876            array: Box::new(replace_aggregates_with_columns(array)),
877            start: start
878                .as_ref()
879                .map(|e| Box::new(replace_aggregates_with_columns(e))),
880            end: end
881                .as_ref()
882                .map(|e| Box::new(replace_aggregates_with_columns(e))),
883        },
884        Expr::Map(entries) => Expr::Map(
885            entries
886                .iter()
887                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
888                .collect(),
889        ),
890        // Leaf expressions — return as-is
891        other => other.clone(),
892    }
893}
894
895/// Check if an expression contains any aggregate function (recursively).
896fn contains_aggregate_recursive(expr: &Expr) -> bool {
897    match expr {
898        Expr::FunctionCall { name, args, .. } => {
899            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
900        }
901        Expr::BinaryOp { left, right, .. } => {
902            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
903        }
904        Expr::UnaryOp { expr: e, .. }
905        | Expr::IsNull(e)
906        | Expr::IsNotNull(e)
907        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
908        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
909        Expr::Case {
910            expr,
911            when_then,
912            else_expr,
913        } => {
914            expr.as_deref().is_some_and(contains_aggregate_recursive)
915                || when_then.iter().any(|(w, t)| {
916                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
917                })
918                || else_expr
919                    .as_deref()
920                    .is_some_and(contains_aggregate_recursive)
921        }
922        Expr::In { expr, list } => {
923            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
924        }
925        Expr::Property(base, _) => contains_aggregate_recursive(base),
926        Expr::ListComprehension { list, .. } => {
927            // Only check the list source — where_clause/map_expr reference the loop variable
928            contains_aggregate_recursive(list)
929        }
930        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
931        Expr::Reduce { init, list, .. } => {
932            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
933        }
934        Expr::ArrayIndex { array, index } => {
935            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
936        }
937        Expr::ArraySlice { array, start, end } => {
938            contains_aggregate_recursive(array)
939                || start.as_deref().is_some_and(contains_aggregate_recursive)
940                || end.as_deref().is_some_and(contains_aggregate_recursive)
941        }
942        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
943        _ => false,
944    }
945}
946
947/// Check if an expression contains a non-deterministic function (e.g. rand()).
948fn contains_non_deterministic(expr: &Expr) -> bool {
949    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
950        return true;
951    }
952    let mut found = false;
953    expr.for_each_child(&mut |child| {
954        if !found {
955            found = contains_non_deterministic(child);
956        }
957    });
958    found
959}
960
961fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
962    match expr {
963        Expr::FunctionCall { name, args, .. } => {
964            if is_aggregate_function_name(name) {
965                out.insert(expr.to_string_repr());
966                return;
967            }
968            for arg in args {
969                collect_aggregate_reprs(arg, out);
970            }
971        }
972        Expr::BinaryOp { left, right, .. } => {
973            collect_aggregate_reprs(left, out);
974            collect_aggregate_reprs(right, out);
975        }
976        Expr::UnaryOp { expr, .. }
977        | Expr::IsNull(expr)
978        | Expr::IsNotNull(expr)
979        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
980        Expr::List(items) => {
981            for item in items {
982                collect_aggregate_reprs(item, out);
983            }
984        }
985        Expr::Case {
986            expr,
987            when_then,
988            else_expr,
989        } => {
990            if let Some(e) = expr {
991                collect_aggregate_reprs(e, out);
992            }
993            for (w, t) in when_then {
994                collect_aggregate_reprs(w, out);
995                collect_aggregate_reprs(t, out);
996            }
997            if let Some(e) = else_expr {
998                collect_aggregate_reprs(e, out);
999            }
1000        }
1001        Expr::In { expr, list } => {
1002            collect_aggregate_reprs(expr, out);
1003            collect_aggregate_reprs(list, out);
1004        }
1005        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
1006        Expr::ListComprehension { list, .. } => {
1007            collect_aggregate_reprs(list, out);
1008        }
1009        Expr::Quantifier { list, .. } => {
1010            collect_aggregate_reprs(list, out);
1011        }
1012        Expr::Reduce { init, list, .. } => {
1013            collect_aggregate_reprs(init, out);
1014            collect_aggregate_reprs(list, out);
1015        }
1016        Expr::ArrayIndex { array, index } => {
1017            collect_aggregate_reprs(array, out);
1018            collect_aggregate_reprs(index, out);
1019        }
1020        Expr::ArraySlice { array, start, end } => {
1021            collect_aggregate_reprs(array, out);
1022            if let Some(s) = start {
1023                collect_aggregate_reprs(s, out);
1024            }
1025            if let Some(e) = end {
1026                collect_aggregate_reprs(e, out);
1027            }
1028        }
1029        _ => {}
1030    }
1031}
1032
1033#[derive(Debug, Clone)]
1034enum NonAggregateRef {
1035    Var(String),
1036    Property {
1037        repr: String,
1038        base_var: Option<String>,
1039    },
1040}
1041
1042fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1043    match expr {
1044        Expr::FunctionCall { name, args, .. } => {
1045            if is_aggregate_function_name(name) {
1046                return;
1047            }
1048            for arg in args {
1049                collect_non_aggregate_refs(arg, inside_agg, out);
1050            }
1051        }
1052        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1053        Expr::Property(base, _) if !inside_agg => {
1054            let base_var = if let Expr::Variable(v) = base.as_ref() {
1055                Some(v.clone())
1056            } else {
1057                None
1058            };
1059            out.push(NonAggregateRef::Property {
1060                repr: expr.to_string_repr(),
1061                base_var,
1062            });
1063        }
1064        Expr::BinaryOp { left, right, .. } => {
1065            collect_non_aggregate_refs(left, inside_agg, out);
1066            collect_non_aggregate_refs(right, inside_agg, out);
1067        }
1068        Expr::UnaryOp { expr, .. }
1069        | Expr::IsNull(expr)
1070        | Expr::IsNotNull(expr)
1071        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1072        Expr::List(items) => {
1073            for item in items {
1074                collect_non_aggregate_refs(item, inside_agg, out);
1075            }
1076        }
1077        Expr::Case {
1078            expr,
1079            when_then,
1080            else_expr,
1081        } => {
1082            if let Some(e) = expr {
1083                collect_non_aggregate_refs(e, inside_agg, out);
1084            }
1085            for (w, t) in when_then {
1086                collect_non_aggregate_refs(w, inside_agg, out);
1087                collect_non_aggregate_refs(t, inside_agg, out);
1088            }
1089            if let Some(e) = else_expr {
1090                collect_non_aggregate_refs(e, inside_agg, out);
1091            }
1092        }
1093        Expr::In { expr, list } => {
1094            collect_non_aggregate_refs(expr, inside_agg, out);
1095            collect_non_aggregate_refs(list, inside_agg, out);
1096        }
1097        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1098        // source. The body references the loop variable, not outer-scope vars.
1099        Expr::ListComprehension { list, .. } => {
1100            collect_non_aggregate_refs(list, inside_agg, out);
1101        }
1102        Expr::Quantifier { list, .. } => {
1103            collect_non_aggregate_refs(list, inside_agg, out);
1104        }
1105        Expr::Reduce { init, list, .. } => {
1106            collect_non_aggregate_refs(init, inside_agg, out);
1107            collect_non_aggregate_refs(list, inside_agg, out);
1108        }
1109        _ => {}
1110    }
1111}
1112
1113fn validate_with_order_by_aggregate_item(
1114    expr: &Expr,
1115    projected_aggregate_reprs: &HashSet<String>,
1116    projected_simple_reprs: &HashSet<String>,
1117    projected_aliases: &HashSet<String>,
1118) -> Result<()> {
1119    let mut aggregate_reprs = HashSet::new();
1120    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1121    for agg in aggregate_reprs {
1122        if !projected_aggregate_reprs.contains(&agg) {
1123            return Err(anyhow!(
1124                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1125                agg
1126            ));
1127        }
1128    }
1129
1130    let mut refs = Vec::new();
1131    collect_non_aggregate_refs(expr, false, &mut refs);
1132    refs.retain(|r| match r {
1133        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1134        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1135    });
1136
1137    let mut dedup = HashSet::new();
1138    refs.retain(|r| {
1139        let key = match r {
1140            NonAggregateRef::Var(v) => format!("v:{v}"),
1141            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1142        };
1143        dedup.insert(key)
1144    });
1145
1146    if refs.len() > 1 {
1147        return Err(anyhow!(
1148            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1149        ));
1150    }
1151
1152    if let Some(r) = refs.first() {
1153        return match r {
1154            NonAggregateRef::Var(v) => Err(anyhow!(
1155                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1156                v
1157            )),
1158            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1159                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1160                base_var
1161                    .clone()
1162                    .unwrap_or_else(|| "<property-base>".to_string())
1163            )),
1164        };
1165    }
1166
1167    Ok(())
1168}
1169
1170/// Validate that no aggregation functions appear in WHERE clause.
1171fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1172    if contains_aggregate_recursive(predicate) {
1173        return Err(anyhow!(
1174            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1175        ));
1176    }
1177    Ok(())
1178}
1179
1180#[derive(Debug, Clone, Copy)]
1181enum ConstNumber {
1182    Int(i64),
1183    Float(f64),
1184}
1185
1186impl ConstNumber {
1187    fn to_f64(self) -> f64 {
1188        match self {
1189            Self::Int(v) => v as f64,
1190            Self::Float(v) => v,
1191        }
1192    }
1193}
1194
1195fn eval_const_numeric_expr(
1196    expr: &Expr,
1197    params: &HashMap<String, uni_common::Value>,
1198) -> Result<ConstNumber> {
1199    match expr {
1200        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1201        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1202        Expr::Parameter(name) => match params.get(name) {
1203            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1204            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1205            Some(uni_common::Value::Null) => Err(anyhow!(
1206                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1207                name
1208            )),
1209            Some(other) => Err(anyhow!(
1210                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1211                name,
1212                other
1213            )),
1214            None => Err(anyhow!(
1215                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1216            )),
1217        },
1218        Expr::UnaryOp {
1219            op: uni_cypher::ast::UnaryOp::Neg,
1220            expr,
1221        } => match eval_const_numeric_expr(expr, params)? {
1222            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1223            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1224        },
1225        Expr::BinaryOp { left, op, right } => {
1226            let l = eval_const_numeric_expr(left, params)?;
1227            let r = eval_const_numeric_expr(right, params)?;
1228            match op {
1229                BinaryOp::Add => match (l, r) {
1230                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1231                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1232                },
1233                BinaryOp::Sub => match (l, r) {
1234                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1235                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1236                },
1237                BinaryOp::Mul => match (l, r) {
1238                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1239                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1240                },
1241                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1242                BinaryOp::Mod => match (l, r) {
1243                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1244                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1245                },
1246                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1247                _ => Err(anyhow!(
1248                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1249                )),
1250            }
1251        }
1252        Expr::FunctionCall { name, args, .. } => {
1253            let lower = name.to_lowercase();
1254            match lower.as_str() {
1255                "rand" if args.is_empty() => {
1256                    use rand::RngExt;
1257                    let mut rng = rand::rng();
1258                    Ok(ConstNumber::Float(rng.random::<f64>()))
1259                }
1260                "tointeger" | "toint" if args.len() == 1 => {
1261                    match eval_const_numeric_expr(&args[0], params)? {
1262                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1263                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1264                    }
1265                }
1266                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1267                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1268                )),
1269                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1270                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1271                )),
1272                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1273                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1274                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1275                },
1276                _ => Err(anyhow!(
1277                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1278                )),
1279            }
1280        }
1281        _ => Err(anyhow!(
1282            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1283        )),
1284    }
1285}
1286
1287/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1288/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1289fn parse_non_negative_integer(
1290    expr: &Expr,
1291    clause_name: &str,
1292    params: &HashMap<String, uni_common::Value>,
1293) -> Result<Option<usize>> {
1294    let referenced_vars = collect_expr_variables(expr);
1295    if !referenced_vars.is_empty() {
1296        return Err(anyhow!(
1297            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1298            clause_name
1299        ));
1300    }
1301
1302    let value = eval_const_numeric_expr(expr, params)?;
1303    let as_int = match value {
1304        ConstNumber::Int(v) => v,
1305        ConstNumber::Float(v) => {
1306            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1307                return Err(anyhow!(
1308                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1309                    clause_name
1310                ));
1311            }
1312            v as i64
1313        }
1314    };
1315    if as_int < 0 {
1316        return Err(anyhow!(
1317            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1318            clause_name
1319        ));
1320    }
1321    Ok(Some(as_int as usize))
1322}
1323
1324/// Validate that aggregation functions are not nested.
1325fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1326    if let Expr::FunctionCall { name, args, .. } = expr
1327        && is_aggregate_function_name(name)
1328    {
1329        for arg in args {
1330            if contains_aggregate_recursive(arg) {
1331                return Err(anyhow!(
1332                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1333                ));
1334            }
1335            if contains_non_deterministic(arg) {
1336                return Err(anyhow!(
1337                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1338                ));
1339            }
1340        }
1341    }
1342    let mut result = Ok(());
1343    expr.for_each_child(&mut |child| {
1344        if result.is_ok() {
1345            result = validate_no_nested_aggregation(child);
1346        }
1347    });
1348    result
1349}
1350
1351/// Validate that an expression does not access properties or labels of
1352/// deleted entities. `type(r)` on a deleted relationship is allowed per
1353/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1354fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1355    // Check n.prop on a deleted variable
1356    if let Expr::Property(inner, _) = expr
1357        && let Expr::Variable(name) = inner.as_ref()
1358        && deleted_vars.contains(name)
1359    {
1360        return Err(anyhow!(
1361            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1362            name
1363        ));
1364    }
1365    // Check labels(n) or keys(n) on a deleted variable
1366    if let Expr::FunctionCall { name, args, .. } = expr
1367        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1368        && args.len() == 1
1369        && let Expr::Variable(var) = &args[0]
1370        && deleted_vars.contains(var)
1371    {
1372        return Err(anyhow!(
1373            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1374            name.to_lowercase(),
1375            var
1376        ));
1377    }
1378    let mut result = Ok(());
1379    expr.for_each_child(&mut |child| {
1380        if result.is_ok() {
1381            result = validate_no_deleted_entity_access(child, deleted_vars);
1382        }
1383    });
1384    result
1385}
1386
1387/// Validate that all variables referenced in properties are defined,
1388/// either in scope or in the local CREATE variable list.
1389fn validate_property_variables(
1390    properties: &Option<Expr>,
1391    vars_in_scope: &[VariableInfo],
1392    create_vars: &[&str],
1393) -> Result<()> {
1394    if let Some(props) = properties {
1395        for var in collect_expr_variables(props) {
1396            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1397                return Err(anyhow!(
1398                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1399                    var
1400                ));
1401            }
1402        }
1403    }
1404    Ok(())
1405}
1406
1407/// Check that a variable name is not already bound in scope or in the local CREATE list.
1408/// Used to prevent rebinding in CREATE clauses.
1409fn check_not_already_bound(
1410    name: &str,
1411    vars_in_scope: &[VariableInfo],
1412    create_vars: &[&str],
1413) -> Result<()> {
1414    if is_var_in_scope(vars_in_scope, name) {
1415        return Err(anyhow!(
1416            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1417            name
1418        ));
1419    }
1420    if create_vars.contains(&name) {
1421        return Err(anyhow!(
1422            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1423            name
1424        ));
1425    }
1426    Ok(())
1427}
1428
1429fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1430    let mut scope = vars_in_scope.to_vec();
1431
1432    for path in &pattern.paths {
1433        if let Some(path_var) = &path.variable
1434            && !path_var.is_empty()
1435            && !is_var_in_scope(&scope, path_var)
1436        {
1437            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1438        }
1439        for element in &path.elements {
1440            match element {
1441                PatternElement::Node(n) => {
1442                    if let Some(v) = &n.variable
1443                        && !v.is_empty()
1444                        && !is_var_in_scope(&scope, v)
1445                    {
1446                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1447                    }
1448                }
1449                PatternElement::Relationship(r) => {
1450                    if let Some(v) = &r.variable
1451                        && !v.is_empty()
1452                        && !is_var_in_scope(&scope, v)
1453                    {
1454                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1455                    }
1456                }
1457                PatternElement::Parenthesized { .. } => {}
1458            }
1459        }
1460    }
1461
1462    scope
1463}
1464
1465fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1466    match item {
1467        SetItem::Property { expr, value } => {
1468            validate_expression_variables(expr, vars_in_scope)?;
1469            validate_expression(expr, vars_in_scope)?;
1470            validate_expression_variables(value, vars_in_scope)?;
1471            validate_expression(value, vars_in_scope)?;
1472            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1473                return Err(anyhow!(
1474                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1475                ));
1476            }
1477        }
1478        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1479            if !is_var_in_scope(vars_in_scope, variable) {
1480                return Err(anyhow!(
1481                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1482                    variable
1483                ));
1484            }
1485            validate_expression_variables(value, vars_in_scope)?;
1486            validate_expression(value, vars_in_scope)?;
1487            if contains_pattern_predicate(value) {
1488                return Err(anyhow!(
1489                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1490                ));
1491            }
1492        }
1493        SetItem::Labels { variable, .. } => {
1494            if !is_var_in_scope(vars_in_scope, variable) {
1495                return Err(anyhow!(
1496                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1497                    variable
1498                ));
1499            }
1500        }
1501    }
1502
1503    Ok(())
1504}
1505
1506/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1507/// The OpenCypher spec requires all property values in MERGE to be non-null.
1508fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1509    if let Some(Expr::Map(entries)) = properties {
1510        for (key, value) in entries {
1511            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1512                return Err(anyhow!(
1513                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1514                    key
1515                ));
1516            }
1517        }
1518    }
1519    Ok(())
1520}
1521
1522/// Flatten every label name appearing in a `Pattern` (across all paths
1523/// and node elements). Used by the M5 follow-up #6 write-rejection
1524/// guard to refuse CREATE/MERGE that names a virtual catalog-resolved
1525/// label.
1526fn collect_pattern_labels(pattern: &uni_cypher::ast::Pattern) -> Vec<String> {
1527    let mut out = Vec::new();
1528    for path in &pattern.paths {
1529        for element in &path.elements {
1530            if let PatternElement::Node(n) = element {
1531                for l in n.labels.names() {
1532                    out.push(l.clone());
1533                }
1534            }
1535        }
1536    }
1537    out
1538}
1539
1540fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1541    for path in &merge_clause.pattern.paths {
1542        for element in &path.elements {
1543            match element {
1544                PatternElement::Node(n) => {
1545                    if let Some(Expr::Parameter(_)) = &n.properties {
1546                        return Err(anyhow!(
1547                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1548                        ));
1549                    }
1550                    reject_null_merge_properties(&n.properties)?;
1551                    // VariableAlreadyBound: reject if a bound variable is used
1552                    // as a standalone MERGE node or introduces new labels/properties.
1553                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1554                    if let Some(variable) = &n.variable
1555                        && !variable.is_empty()
1556                        && is_var_in_scope(vars_in_scope, variable)
1557                    {
1558                        let is_standalone = path.elements.len() == 1;
1559                        let has_new_labels = !n.labels.is_empty();
1560                        let has_new_properties = n.properties.is_some();
1561                        if is_standalone || has_new_labels || has_new_properties {
1562                            return Err(anyhow!(
1563                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1564                                variable
1565                            ));
1566                        }
1567                    }
1568                }
1569                PatternElement::Relationship(r) => {
1570                    if let Some(variable) = &r.variable
1571                        && !variable.is_empty()
1572                        && is_var_in_scope(vars_in_scope, variable)
1573                    {
1574                        return Err(anyhow!(
1575                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1576                            variable
1577                        ));
1578                    }
1579                    if r.types.len() != 1 {
1580                        return Err(anyhow!(
1581                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1582                        ));
1583                    }
1584                    if r.range.is_some() {
1585                        return Err(anyhow!(
1586                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1587                        ));
1588                    }
1589                    if let Some(Expr::Parameter(_)) = &r.properties {
1590                        return Err(anyhow!(
1591                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1592                        ));
1593                    }
1594                    reject_null_merge_properties(&r.properties)?;
1595                }
1596                PatternElement::Parenthesized { .. } => {}
1597            }
1598        }
1599    }
1600
1601    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1602    for item in &merge_clause.on_create {
1603        validate_merge_set_item(item, &merge_scope)?;
1604    }
1605    for item in &merge_clause.on_match {
1606        validate_merge_set_item(item, &merge_scope)?;
1607    }
1608
1609    Ok(())
1610}
1611
1612/// Recursively validate an expression for type errors, undefined variables, etc.
1613fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1614    // Validate boolean operators and nested aggregation first
1615    validate_boolean_expression(expr)?;
1616    validate_no_nested_aggregation(expr)?;
1617
1618    // Helper to validate multiple expressions
1619    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1620        for e in exprs {
1621            validate_expression(e, vars)?;
1622        }
1623        Ok(())
1624    }
1625
1626    match expr {
1627        Expr::FunctionCall { name, args, .. } => {
1628            validate_function_call(name, args, vars_in_scope)?;
1629            validate_all(args, vars_in_scope)
1630        }
1631        Expr::BinaryOp { left, right, .. } => {
1632            validate_expression(left, vars_in_scope)?;
1633            validate_expression(right, vars_in_scope)
1634        }
1635        Expr::UnaryOp { expr: e, .. }
1636        | Expr::IsNull(e)
1637        | Expr::IsNotNull(e)
1638        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1639        Expr::Property(base, prop) => {
1640            if let Expr::Variable(var_name) = base.as_ref()
1641                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1642            {
1643                // Paths don't have properties
1644                if var_info.var_type == VariableType::Path {
1645                    return Err(anyhow!(
1646                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1647                        var_name,
1648                        prop
1649                    ));
1650                }
1651                // Known non-graph literals (int, float, bool, string, list) don't have properties
1652                if var_info.var_type == VariableType::ScalarLiteral {
1653                    return Err(anyhow!(
1654                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1655                    ));
1656                }
1657            }
1658            validate_expression(base, vars_in_scope)
1659        }
1660        Expr::List(items) => validate_all(items, vars_in_scope),
1661        Expr::Case {
1662            expr: case_expr,
1663            when_then,
1664            else_expr,
1665        } => {
1666            if let Some(e) = case_expr {
1667                validate_expression(e, vars_in_scope)?;
1668            }
1669            for (w, t) in when_then {
1670                validate_expression(w, vars_in_scope)?;
1671                validate_expression(t, vars_in_scope)?;
1672            }
1673            if let Some(e) = else_expr {
1674                validate_expression(e, vars_in_scope)?;
1675            }
1676            Ok(())
1677        }
1678        Expr::In { expr: e, list } => {
1679            validate_expression(e, vars_in_scope)?;
1680            validate_expression(list, vars_in_scope)
1681        }
1682        Expr::Exists {
1683            query,
1684            from_pattern_predicate: true,
1685        } => {
1686            // Pattern predicates cannot introduce new named variables.
1687            // Extract named vars from inner MATCH pattern, check each is in scope.
1688            if let Query::Single(stmt) = query.as_ref() {
1689                for clause in &stmt.clauses {
1690                    if let Clause::Match(m) = clause {
1691                        for path in &m.pattern.paths {
1692                            for elem in &path.elements {
1693                                match elem {
1694                                    PatternElement::Node(n) => {
1695                                        if let Some(var) = &n.variable
1696                                            && !is_var_in_scope(vars_in_scope, var)
1697                                        {
1698                                            return Err(anyhow!(
1699                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1700                                                var
1701                                            ));
1702                                        }
1703                                    }
1704                                    PatternElement::Relationship(r) => {
1705                                        if let Some(var) = &r.variable
1706                                            && !is_var_in_scope(vars_in_scope, var)
1707                                        {
1708                                            return Err(anyhow!(
1709                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1710                                                var
1711                                            ));
1712                                        }
1713                                    }
1714                                    _ => {}
1715                                }
1716                            }
1717                        }
1718                    }
1719                }
1720            }
1721            Ok(())
1722        }
1723        _ => Ok(()),
1724    }
1725}
1726
1727/// One step (hop) in a Quantified Path Pattern sub-pattern.
1728///
1729/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1730#[derive(Debug, Clone)]
1731pub struct QppStepInfo {
1732    /// Edge type IDs that this step can traverse.
1733    pub edge_type_ids: Vec<u32>,
1734    /// Traversal direction for this step.
1735    pub direction: Direction,
1736    /// Optional label constraint on the target node.
1737    pub target_label: Option<String>,
1738}
1739
1740/// Phase 5a-impl: per-type fusion strategy for `LogicalPlan::FusedIndexScan`.
1741///
1742/// `#[non_exhaustive]` so Phase 5b can add `AnnRerank` and `Bm25Rrf`
1743/// without breaking downstream pattern-match exhaustiveness.
1744#[derive(Debug, Clone)]
1745#[non_exhaustive]
1746pub enum FusionKind {
1747    /// Union of parent + fork-local BTree hits, deduped by VID.
1748    BtreeUnion,
1749    /// k-way merge of pre-sorted parent + fork streams (ORDER BY).
1750    SortedKWayMerge,
1751    /// Fork-first UID lookup; falls back to parent on miss. Used
1752    /// when a fork rebinds an external UID and queries must see the
1753    /// fork's binding before the parent's.
1754    VidUidForkFirst,
1755    /// Phase 5b — vector ANN rerank: top-k from primary's index +
1756    /// top-k from fork-local index, merged and reranked by exact
1757    /// distance. Recall ≥ 95% per spec §8.2.
1758    AnnRerank,
1759    /// Phase 5b — BM25 reciprocal rank fusion: ranked lists from
1760    /// primary's and fork-local FTS indexes combined via standard
1761    /// RRF (`score = sum 1 / (k_rrf + rank_i)`, k_rrf = 60).
1762    Bm25Rrf,
1763}
1764
1765/// Logical query plan produced by [`QueryPlanner`].
1766///
1767/// Each variant represents one step in the Cypher execution pipeline.
1768/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1769/// transform or join them, and the root node defines the final output.
1770#[derive(Debug, Clone)]
1771pub enum LogicalPlan {
1772    /// UNION / UNION ALL of two sub-plans.
1773    Union {
1774        left: Box<LogicalPlan>,
1775        right: Box<LogicalPlan>,
1776        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1777        all: bool,
1778    },
1779    /// Scan vertices of a single labeled dataset.
1780    Scan {
1781        label_id: u16,
1782        labels: Vec<String>,
1783        variable: String,
1784        filter: Option<Expr>,
1785        optional: bool,
1786    },
1787    /// Phase 5a-impl: fused scan over both primary's index and the
1788    /// forked session's fork-local index. Emitted by the planner only
1789    /// when (a) the session is forked AND (b) `StorageManager::fork_index_exists`
1790    /// returns `Some(_)` for the target column. Otherwise the planner
1791    /// keeps emitting `Scan` and Lance's `base_paths` chain transparently
1792    /// covers parent-inherited indexes.
1793    ///
1794    /// `kind` selects the per-type fusion strategy:
1795    /// - `BtreeUnion` — union of parent + fork hits, dedup by VID.
1796    /// - `SortedKWayMerge` — k-way merge of two pre-sorted streams.
1797    /// - `VidUidForkFirst` — probe fork's branch first, fall back to
1798    ///   parent's UID index on miss.
1799    FusedIndexScan {
1800        label_id: u16,
1801        labels: Vec<String>,
1802        variable: String,
1803        filter: Option<Expr>,
1804        optional: bool,
1805        kind: FusionKind,
1806    },
1807    /// Phase 5b followup: planner-side observability marker for the
1808    /// lossy fusion types. Wraps the original `VectorKnn` or
1809    /// `InvertedIndexLookup` (or any future leaf operator whose
1810    /// shape differs from `Scan`) without changing its fields, so
1811    /// the physical planner can decay it to `inner` unchanged.
1812    ///
1813    /// Runtime behavior is identical to running `inner` directly;
1814    /// the wrap is purely for explain-plan and runtime-stats
1815    /// observability. The actual fusion happens at the
1816    /// `BranchedBackend` layer (per-branch Lance reads via
1817    /// `base_paths`), exactly as in Phase 5b's core ship.
1818    FusedIndexScanWrapped {
1819        inner: Box<LogicalPlan>,
1820        kind: FusionKind,
1821    },
1822    /// Lookup vertices by ext_id using the main vertices table.
1823    /// Used when a query references ext_id without specifying a label.
1824    ExtIdLookup {
1825        variable: String,
1826        ext_id: String,
1827        filter: Option<Expr>,
1828        optional: bool,
1829    },
1830    /// Scan all vertices from main table (MATCH (n) without label).
1831    /// Used for schemaless queries that don't specify any label.
1832    ScanAll {
1833        variable: String,
1834        filter: Option<Expr>,
1835        optional: bool,
1836    },
1837    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1838    /// Used for labels not defined in schema (schemaless support).
1839    /// Scan main vertices table by label name(s) for schemaless support.
1840    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1841    ScanMainByLabels {
1842        labels: Vec<String>,
1843        variable: String,
1844        filter: Option<Expr>,
1845        optional: bool,
1846    },
1847    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1848    Empty,
1849    /// UNWIND: expand a list expression into one row per element.
1850    Unwind {
1851        input: Box<LogicalPlan>,
1852        expr: Expr,
1853        variable: String,
1854    },
1855    Traverse {
1856        input: Box<LogicalPlan>,
1857        edge_type_ids: Vec<u32>,
1858        direction: Direction,
1859        source_variable: String,
1860        target_variable: String,
1861        target_label_id: u16,
1862        step_variable: Option<String>,
1863        min_hops: usize,
1864        max_hops: usize,
1865        optional: bool,
1866        target_filter: Option<Expr>,
1867        path_variable: Option<String>,
1868        edge_properties: HashSet<String>,
1869        /// Whether this is a variable-length pattern (has `*` range specifier).
1870        /// When true, step_variable holds a list of edges (even for *1..1).
1871        is_variable_length: bool,
1872        /// All variables from this OPTIONAL MATCH pattern.
1873        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1874        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1875        optional_pattern_vars: HashSet<String>,
1876        /// Variable names (node + edge) from the current MATCH clause scope.
1877        /// Used for relationship uniqueness scoping: only edge ID columns whose
1878        /// associated variable is in this set participate in uniqueness filtering.
1879        /// Variables from previous disconnected MATCH clauses are excluded.
1880        scope_match_variables: HashSet<String>,
1881        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1882        edge_filter_expr: Option<Expr>,
1883        /// Path traversal semantics (Trail by default for OpenCypher).
1884        path_mode: crate::query::df_graph::nfa::PathMode,
1885        /// QPP steps for multi-hop quantified path patterns.
1886        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1887        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1888        qpp_steps: Option<Vec<QppStepInfo>>,
1889    },
1890    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1891    /// Used for edge types not defined in schema (schemaless support).
1892    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1893    TraverseMainByType {
1894        type_names: Vec<String>,
1895        input: Box<LogicalPlan>,
1896        direction: Direction,
1897        source_variable: String,
1898        target_variable: String,
1899        step_variable: Option<String>,
1900        min_hops: usize,
1901        max_hops: usize,
1902        optional: bool,
1903        target_filter: Option<Expr>,
1904        path_variable: Option<String>,
1905        /// Whether this is a variable-length pattern (has `*` range specifier).
1906        /// When true, step_variable holds a list of edges (even for *1..1).
1907        is_variable_length: bool,
1908        /// All variables from this OPTIONAL MATCH pattern.
1909        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1910        optional_pattern_vars: HashSet<String>,
1911        /// Variables belonging to the current MATCH clause scope.
1912        /// Used for relationship uniqueness scoping: only edge columns whose
1913        /// associated variable is in this set participate in uniqueness filtering.
1914        scope_match_variables: HashSet<String>,
1915        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1916        edge_filter_expr: Option<Expr>,
1917        /// Path traversal semantics (Trail by default for OpenCypher).
1918        path_mode: crate::query::df_graph::nfa::PathMode,
1919    },
1920    Filter {
1921        input: Box<LogicalPlan>,
1922        predicate: Expr,
1923        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1924        /// When evaluating the filter, if any of these variables are NULL,
1925        /// the row is preserved regardless of the predicate result.
1926        optional_variables: HashSet<String>,
1927    },
1928    Create {
1929        input: Box<LogicalPlan>,
1930        pattern: Pattern,
1931    },
1932    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1933    ///
1934    /// This variant combines multiple CREATE patterns into a single plan node
1935    /// to avoid deep recursion when executing many CREATEs sequentially.
1936    CreateBatch {
1937        input: Box<LogicalPlan>,
1938        patterns: Vec<Pattern>,
1939    },
1940    Merge {
1941        input: Box<LogicalPlan>,
1942        pattern: Pattern,
1943        on_match: Option<SetClause>,
1944        on_create: Option<SetClause>,
1945    },
1946    Set {
1947        input: Box<LogicalPlan>,
1948        items: Vec<SetItem>,
1949    },
1950    Remove {
1951        input: Box<LogicalPlan>,
1952        items: Vec<RemoveItem>,
1953    },
1954    Delete {
1955        input: Box<LogicalPlan>,
1956        items: Vec<Expr>,
1957        detach: bool,
1958    },
1959    /// FOREACH (variable IN list | clauses)
1960    Foreach {
1961        input: Box<LogicalPlan>,
1962        variable: String,
1963        list: Expr,
1964        body: Vec<LogicalPlan>,
1965    },
1966    Sort {
1967        input: Box<LogicalPlan>,
1968        order_by: Vec<SortItem>,
1969    },
1970    Limit {
1971        input: Box<LogicalPlan>,
1972        skip: Option<usize>,
1973        fetch: Option<usize>,
1974    },
1975    Aggregate {
1976        input: Box<LogicalPlan>,
1977        group_by: Vec<Expr>,
1978        aggregates: Vec<Expr>,
1979    },
1980    Distinct {
1981        input: Box<LogicalPlan>,
1982    },
1983    Window {
1984        input: Box<LogicalPlan>,
1985        window_exprs: Vec<Expr>,
1986    },
1987    Project {
1988        input: Box<LogicalPlan>,
1989        projections: Vec<(Expr, Option<String>)>,
1990    },
1991    CrossJoin {
1992        left: Box<LogicalPlan>,
1993        right: Box<LogicalPlan>,
1994    },
1995    Apply {
1996        input: Box<LogicalPlan>,
1997        subquery: Box<LogicalPlan>,
1998        input_filter: Option<Expr>,
1999    },
2000    RecursiveCTE {
2001        cte_name: String,
2002        initial: Box<LogicalPlan>,
2003        recursive: Box<LogicalPlan>,
2004    },
2005    ProcedureCall {
2006        procedure_name: String,
2007        arguments: Vec<Expr>,
2008        yield_items: Vec<(String, Option<String>)>,
2009    },
2010    SubqueryCall {
2011        input: Box<LogicalPlan>,
2012        subquery: Box<LogicalPlan>,
2013    },
2014    VectorKnn {
2015        label_id: u16,
2016        variable: String,
2017        property: String,
2018        query: Expr,
2019        k: usize,
2020        threshold: Option<f32>,
2021    },
2022    InvertedIndexLookup {
2023        label_id: u16,
2024        variable: String,
2025        property: String,
2026        terms: Expr,
2027    },
2028    ShortestPath {
2029        input: Box<LogicalPlan>,
2030        edge_type_ids: Vec<u32>,
2031        direction: Direction,
2032        source_variable: String,
2033        target_variable: String,
2034        target_label_id: u16,
2035        path_variable: String,
2036        /// Minimum number of hops (edges) in the path. Default is 1.
2037        min_hops: u32,
2038        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2039        max_hops: u32,
2040    },
2041    /// allShortestPaths() - Returns all paths with minimum length
2042    AllShortestPaths {
2043        input: Box<LogicalPlan>,
2044        edge_type_ids: Vec<u32>,
2045        direction: Direction,
2046        source_variable: String,
2047        target_variable: String,
2048        target_label_id: u16,
2049        path_variable: String,
2050        /// Minimum number of hops (edges) in the path. Default is 1.
2051        min_hops: u32,
2052        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2053        max_hops: u32,
2054    },
2055    QuantifiedPattern {
2056        input: Box<LogicalPlan>,
2057        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
2058        min_iterations: u32,
2059        max_iterations: u32,
2060        path_variable: Option<String>,
2061        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
2062        binding_variable: String, // Output variable of iteration (e.g. 'b')
2063    },
2064    // DDL Plans
2065    CreateVectorIndex {
2066        config: VectorIndexConfig,
2067        if_not_exists: bool,
2068    },
2069    CreateFullTextIndex {
2070        config: FullTextIndexConfig,
2071        if_not_exists: bool,
2072    },
2073    CreateScalarIndex {
2074        config: ScalarIndexConfig,
2075        if_not_exists: bool,
2076    },
2077    CreateJsonFtsIndex {
2078        config: JsonFtsIndexConfig,
2079        if_not_exists: bool,
2080    },
2081    DropIndex {
2082        name: String,
2083        if_exists: bool,
2084    },
2085    ShowIndexes {
2086        filter: Option<String>,
2087    },
2088    Copy {
2089        target: String,
2090        source: String,
2091        is_export: bool,
2092        options: HashMap<String, Value>,
2093    },
2094    Backup {
2095        destination: String,
2096        options: HashMap<String, Value>,
2097    },
2098    Explain {
2099        plan: Box<LogicalPlan>,
2100    },
2101    // Admin Plans
2102    ShowDatabase,
2103    ShowConfig,
2104    ShowStatistics,
2105    Vacuum,
2106    Checkpoint,
2107    CopyTo {
2108        label: String,
2109        path: String,
2110        format: String,
2111        options: HashMap<String, Value>,
2112    },
2113    CopyFrom {
2114        label: String,
2115        path: String,
2116        format: String,
2117        options: HashMap<String, Value>,
2118    },
2119    // Schema DDL
2120    CreateLabel(CreateLabel),
2121    CreateEdgeType(CreateEdgeType),
2122    AlterLabel(AlterLabel),
2123    AlterEdgeType(AlterEdgeType),
2124    DropLabel(DropLabel),
2125    DropEdgeType(DropEdgeType),
2126    // Constraints
2127    CreateConstraint(CreateConstraint),
2128    DropConstraint(DropConstraint),
2129    ShowConstraints(ShowConstraints),
2130    /// Bind a zero-length path (single node pattern with path variable).
2131    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2132    BindZeroLengthPath {
2133        input: Box<LogicalPlan>,
2134        node_variable: String,
2135        path_variable: String,
2136    },
2137    /// Bind a fixed-length path from already-computed node and edge columns.
2138    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2139    BindPath {
2140        input: Box<LogicalPlan>,
2141        node_variables: Vec<String>,
2142        edge_variables: Vec<String>,
2143        path_variable: String,
2144    },
2145
2146    // ── Locy variants ──────────────────────────────────────────
2147    /// Top-level Locy program: stratified rules + commands.
2148    LocyProgram {
2149        strata: Vec<super::planner_locy_types::LocyStratum>,
2150        commands: Vec<super::planner_locy_types::LocyCommand>,
2151        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2152        max_iterations: usize,
2153        timeout: std::time::Duration,
2154        max_derived_bytes: usize,
2155        deterministic_best_by: bool,
2156        strict_probability_domain: bool,
2157        probability_epsilon: f64,
2158        exact_probability: bool,
2159        max_bdd_variables: usize,
2160        top_k_proofs: usize,
2161        /// Active probability semiring (rollout D-7). Defaults to
2162        /// `AddMultProb` (Phase 1/2 byte-identical behavior). `BddExact`
2163        /// is selected by `LocyConfig::resolve()` when `exact_probability`
2164        /// is true.
2165        semiring_kind: uni_locy::SemiringKind,
2166        /// Phase B Slice 3: per-evaluation registry of neural classifiers
2167        /// keyed by model name. Empty for programs without `CREATE MODEL`.
2168        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2169        /// Phase B follow-up: optional memoization cache. `None` →
2170        /// runtime creates a fresh per-query cache; `Some` → shared
2171        /// across queries (caller-managed).
2172        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2173        /// Phase C B1-B3 follow-up: per-query side-channel store
2174        /// for per-invocation (raw, calibrated, confidence_band)
2175        /// records. Flows alongside `classifier_cache` into
2176        /// `LocyProgramExec`.
2177        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2178    },
2179    /// FOLD operator: lattice-join non-key columns per KEY group.
2180    LocyFold {
2181        input: Box<LogicalPlan>,
2182        key_columns: Vec<String>,
2183        fold_bindings: Vec<(String, Expr)>,
2184        strict_probability_domain: bool,
2185        probability_epsilon: f64,
2186    },
2187    /// BEST BY operator: select best row per KEY group by ordered criteria.
2188    LocyBestBy {
2189        input: Box<LogicalPlan>,
2190        key_columns: Vec<String>,
2191        /// (expression, ascending) pairs.
2192        criteria: Vec<(Expr, bool)>,
2193    },
2194    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2195    LocyPriority {
2196        input: Box<LogicalPlan>,
2197        key_columns: Vec<String>,
2198    },
2199    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2200    LocyDerivedScan {
2201        scan_index: usize,
2202        data: Arc<RwLock<Vec<RecordBatch>>>,
2203        schema: SchemaRef,
2204    },
2205    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2206    /// without carrying through helper/property columns like the regular Project.
2207    LocyProject {
2208        input: Box<LogicalPlan>,
2209        projections: Vec<(Expr, Option<String>)>,
2210        /// Expected output Arrow type per projection (for CAST support).
2211        target_types: Vec<DataType>,
2212    },
2213    /// Phase B A4: invoke registered neural classifiers against the
2214    /// input batches and overwrite the per-invocation placeholder
2215    /// column with each row's predicted probability. Wraps a Locy
2216    /// clause body plan when `CompiledClause.model_invocations` is
2217    /// non-empty; transparent (passes batches through unchanged) when
2218    /// the list is empty.
2219    ///
2220    /// Registry and cache are carried on the node so that
2221    /// `execute_subplan` — which spins up a fresh
2222    /// `HybridPhysicalPlanner` per call — can lower it to a physical
2223    /// `LocyModelInvokeExec` without depending on planner-side
2224    /// runtime state.
2225    LocyModelInvoke {
2226        input: Box<LogicalPlan>,
2227        invocations: Vec<uni_locy::ModelInvocation>,
2228        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2229        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2230        /// Phase C B1-B3 follow-up: per-query side-channel store
2231        /// for per-invocation (raw, calibrated, confidence_band)
2232        /// records. `LocyModelInvokeExec` writes here after each
2233        /// classifier call; EXPLAIN reads via collect_neural_calls
2234        /// to surface NeuralProvenance for ALONG/FOLD-position
2235        /// invocations and Mode B re-execution paths.
2236        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2237        /// Phase D D3 runtime: one handle per `path_context.source_rule`
2238        /// referenced by any invocation on this node. The handle's
2239        /// `data: Arc<RwLock<Vec<RecordBatch>>>` is shared with the
2240        /// `DerivedScanRegistry`; the source rule's derived facts are
2241        /// already converged by the time this node executes (the
2242        /// dependency-graph builder ensures source rules sit in
2243        /// earlier strata).
2244        path_context_handles: std::collections::HashMap<
2245            String,
2246            super::df_graph::locy_model_invoke::PathContextHandle,
2247        >,
2248    },
2249}
2250
2251/// Extracted vector similarity predicate info for optimization
2252struct VectorSimilarityPredicate {
2253    variable: String,
2254    property: String,
2255    query: Expr,
2256    threshold: Option<f32>,
2257}
2258
2259/// Result of extracting vector_similarity from a predicate
2260struct VectorSimilarityExtraction {
2261    /// The extracted vector similarity predicate
2262    predicate: VectorSimilarityPredicate,
2263    /// Remaining predicates that couldn't be optimized (if any)
2264    residual: Option<Expr>,
2265}
2266
2267/// Try to extract a vector_similarity predicate from an expression.
2268/// Matches patterns like:
2269/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2270/// - n.embedding ~= $query
2271///
2272/// Also handles AND predicates.
2273fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2274    match expr {
2275        Expr::BinaryOp { left, op, right } => {
2276            // Handle AND: check both sides for vector_similarity
2277            if matches!(op, BinaryOp::And) {
2278                // Try left side first
2279                if let Some(vs) = extract_simple_vector_similarity(left) {
2280                    return Some(VectorSimilarityExtraction {
2281                        predicate: vs,
2282                        residual: Some(right.as_ref().clone()),
2283                    });
2284                }
2285                // Try right side
2286                if let Some(vs) = extract_simple_vector_similarity(right) {
2287                    return Some(VectorSimilarityExtraction {
2288                        predicate: vs,
2289                        residual: Some(left.as_ref().clone()),
2290                    });
2291                }
2292                // Recursively check within left/right for nested ANDs
2293                if let Some(mut extraction) = extract_vector_similarity(left) {
2294                    extraction.residual = Some(combine_with_and(
2295                        extraction.residual,
2296                        right.as_ref().clone(),
2297                    ));
2298                    return Some(extraction);
2299                }
2300                if let Some(mut extraction) = extract_vector_similarity(right) {
2301                    extraction.residual =
2302                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2303                    return Some(extraction);
2304                }
2305                return None;
2306            }
2307
2308            // Simple case: direct vector_similarity comparison
2309            if let Some(vs) = extract_simple_vector_similarity(expr) {
2310                return Some(VectorSimilarityExtraction {
2311                    predicate: vs,
2312                    residual: None,
2313                });
2314            }
2315            None
2316        }
2317        _ => None,
2318    }
2319}
2320
2321/// Helper to combine an optional expression with another using AND
2322fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2323    match opt_expr {
2324        Some(e) => Expr::BinaryOp {
2325            left: Box::new(e),
2326            op: BinaryOp::And,
2327            right: Box::new(other),
2328        },
2329        None => other,
2330    }
2331}
2332
2333/// Extract a simple vector_similarity comparison (no AND)
2334fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2335    match expr {
2336        Expr::BinaryOp { left, op, right } => {
2337            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2338            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2339                && let (Some(vs), Some(thresh)) = (
2340                    extract_vector_similarity_call(left),
2341                    extract_float_literal(right),
2342                )
2343            {
2344                return Some(VectorSimilarityPredicate {
2345                    variable: vs.0,
2346                    property: vs.1,
2347                    query: vs.2,
2348                    threshold: Some(thresh),
2349                });
2350            }
2351            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2352            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2353                && let (Some(thresh), Some(vs)) = (
2354                    extract_float_literal(left),
2355                    extract_vector_similarity_call(right),
2356                )
2357            {
2358                return Some(VectorSimilarityPredicate {
2359                    variable: vs.0,
2360                    property: vs.1,
2361                    query: vs.2,
2362                    threshold: Some(thresh),
2363                });
2364            }
2365            // Pattern: n.embedding ~= query
2366            if matches!(op, BinaryOp::ApproxEq)
2367                && let Expr::Property(var_expr, prop) = left.as_ref()
2368                && let Expr::Variable(var) = var_expr.as_ref()
2369            {
2370                return Some(VectorSimilarityPredicate {
2371                    variable: var.clone(),
2372                    property: prop.clone(),
2373                    query: right.as_ref().clone(),
2374                    threshold: None,
2375                });
2376            }
2377            None
2378        }
2379        _ => None,
2380    }
2381}
2382
2383/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2384fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2385    if let Expr::FunctionCall { name, args, .. } = expr
2386        && name.eq_ignore_ascii_case("vector_similarity")
2387        && args.len() == 2
2388    {
2389        // First arg should be Property(Identifier(var), prop)
2390        if let Expr::Property(var_expr, prop) = &args[0]
2391            && let Expr::Variable(var) = var_expr.as_ref()
2392        {
2393            // Second arg is query
2394            return Some((var.clone(), prop.clone(), args[1].clone()));
2395        }
2396    }
2397    None
2398}
2399
2400/// Extract a float value from a literal expression
2401fn extract_float_literal(expr: &Expr) -> Option<f32> {
2402    match expr {
2403        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2404        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2405        _ => None,
2406    }
2407}
2408
2409/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2410///
2411/// `QueryPlanner` applies semantic validation (variable scoping, label
2412/// resolution, type checking) and produces a plan tree that the executor
2413/// can run against storage.
2414#[derive(Debug)]
2415pub struct QueryPlanner {
2416    schema: Arc<Schema>,
2417    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2418    gen_expr_cache: HashMap<(String, String), Expr>,
2419    /// Counter for generating unique anonymous variable names.
2420    anon_counter: std::sync::atomic::AtomicUsize,
2421    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2422    params: HashMap<String, uni_common::Value>,
2423    /// Optional plugin registry consulted when label / edge-type / identifier
2424    /// resolution misses the local schema (M5b — Catalog / ReplacementScan).
2425    plugin_registry: Option<Arc<uni_plugin::PluginRegistry>>,
2426    /// Gate for replacement-scan dispatch on unknown identifiers (M5b).
2427    replacement_scans_enabled: bool,
2428}
2429
2430struct TraverseParams<'a> {
2431    rel: &'a RelationshipPattern,
2432    target_node: &'a NodePattern,
2433    optional: bool,
2434    path_variable: Option<String>,
2435    /// All variables from this OPTIONAL MATCH pattern.
2436    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2437    optional_pattern_vars: HashSet<String>,
2438}
2439
2440impl QueryPlanner {
2441    /// Create a new planner for the given schema.
2442    ///
2443    /// Pre-parses all generation expressions defined in the schema so that
2444    /// repeated plan calls avoid redundant parsing.
2445    pub fn new(schema: Arc<Schema>) -> Self {
2446        // Pre-parse all generation expressions for caching
2447        let mut gen_expr_cache = HashMap::new();
2448        for (label, props) in &schema.properties {
2449            for (gen_col, meta) in props {
2450                if let Some(expr_str) = &meta.generation_expression
2451                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2452                {
2453                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2454                }
2455            }
2456        }
2457        Self {
2458            schema,
2459            gen_expr_cache,
2460            anon_counter: std::sync::atomic::AtomicUsize::new(0),
2461            params: HashMap::new(),
2462            plugin_registry: None,
2463            replacement_scans_enabled: false,
2464        }
2465    }
2466
2467    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2468    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2469        self.params = params;
2470        self
2471    }
2472
2473    /// Attach a plugin registry for catalog / replacement-scan fallbacks
2474    /// (M5b). When absent, label / edge-type resolution behaves exactly as
2475    /// before; when present, an unknown label is offered to each
2476    /// `CatalogProvider` before erroring.
2477    #[must_use]
2478    pub fn with_plugin_registry(mut self, registry: Arc<uni_plugin::PluginRegistry>) -> Self {
2479        self.plugin_registry = Some(registry);
2480        self
2481    }
2482
2483    /// Enable replacement-scan dispatch on unknown identifiers (M5b §4.23).
2484    /// Default off; opt-in only.
2485    #[must_use]
2486    pub fn with_replacement_scans(mut self, enabled: bool) -> Self {
2487        self.replacement_scans_enabled = enabled;
2488        self
2489    }
2490
2491    /// Allocate (or look up) a virtual label ID for `name` by consulting
2492    /// every registered `CatalogProvider` and then every registered
2493    /// `ReplacementScanProvider` (only the latter when the replacement-
2494    /// scan gate is on). On a first claim the catalog table is stashed
2495    /// on the host's [`uni_plugin::PluginRegistry`] under a freshly
2496    /// allocated virtual ID; subsequent calls with the same name return
2497    /// the cached ID and refresh the stashed table.
2498    ///
2499    /// Returns `None` if no provider claims the label or no plugin
2500    /// registry is attached. Returns `Some((id, table))` on a hit; the
2501    /// `id` lies in `[VIRTUAL_LABEL_ID_START, VIRTUAL_LABEL_ID_SENTINEL)`.
2502    /// Errors are surfaced as `Some(Err(_))`-equivalent via `Result`.
2503    fn allocate_virtual_label(
2504        &self,
2505        name: &str,
2506    ) -> Result<Option<(u16, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2507        let Some(registry) = self.plugin_registry.as_ref() else {
2508            return Ok(None);
2509        };
2510        // 1. CatalogProvider (always consulted, no gate — Batch 2 semantics).
2511        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2512        for cat in registry.catalogs() {
2513            if let Some(t) = cat.resolve_label(name) {
2514                claimed = Some(t);
2515                break;
2516            }
2517        }
2518        // 2. ReplacementScanProvider (gated). Only consult if no
2519        //    CatalogProvider already claimed.
2520        if claimed.is_none() {
2521            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2522            if let Some(Replacement::CatalogTable(t)) =
2523                self.consult_replacement_scan(ReplacementRequest::Label(name))
2524            {
2525                claimed = Some(t);
2526            }
2527        }
2528        let Some(table) = claimed else {
2529            return Ok(None);
2530        };
2531        let id = registry
2532            .register_virtual_label(name, Arc::clone(&table))
2533            .map_err(|e| anyhow!("virtual label registration failed for `{name}`: {e}"))?;
2534        Ok(Some((id, table)))
2535    }
2536
2537    /// Reject any write operation that names a label currently allocated
2538    /// as a virtual (catalog-backed) label. Catalog tables are read-only
2539    /// in this milestone — there is no write-back path through
2540    /// `CatalogTable::scan` to the originating provider, so silently
2541    /// allowing the write would produce ghosted state on the host side
2542    /// without affecting the external catalog. Errors with a clear,
2543    /// actionable message.
2544    fn reject_virtual_label_writes(&self, labels: &[String], op: &str) -> Result<()> {
2545        let Some(registry) = self.plugin_registry.as_ref() else {
2546            return Ok(());
2547        };
2548        for label in labels {
2549            if registry.virtual_label_by_name(label).is_some() {
2550                return Err(anyhow!(
2551                    "Cannot {op} on virtual (catalog-resolved) label `{label}` — virtual \
2552                     labels are read-only; write back via the originating catalog \
2553                     instead"
2554                ));
2555            }
2556        }
2557        Ok(())
2558    }
2559
2560    /// Edge-type analog of [`Self::allocate_virtual_label`].
2561    fn allocate_virtual_edge_type(
2562        &self,
2563        name: &str,
2564    ) -> Result<Option<(u32, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2565        let Some(registry) = self.plugin_registry.as_ref() else {
2566            return Ok(None);
2567        };
2568        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2569        for cat in registry.catalogs() {
2570            if let Some(t) = cat.resolve_edge_type(name) {
2571                claimed = Some(t);
2572                break;
2573            }
2574        }
2575        let Some(table) = claimed else {
2576            return Ok(None);
2577        };
2578        let id = registry
2579            .register_virtual_edge_type(name, Arc::clone(&table))
2580            .map_err(|e| anyhow!("virtual edge-type registration failed for `{name}`: {e}"))?;
2581        Ok(Some((id, table)))
2582    }
2583
2584    /// Try to resolve an unknown identifier through replacement-scan providers
2585    /// (gated by [`Self::with_replacement_scans`]). Returns the first
2586    /// [`Replacement`] any registered provider produces, or `None` if the
2587    /// gate is off, no registry is attached, or no provider claims the
2588    /// identifier. First-match wins (mirrors DuckDB).
2589    pub(crate) fn consult_replacement_scan(
2590        &self,
2591        request: uni_plugin::traits::catalog::ReplacementRequest<'_>,
2592    ) -> Option<uni_plugin::traits::catalog::Replacement> {
2593        if !self.replacement_scans_enabled {
2594            return None;
2595        }
2596        let registry = self.plugin_registry.as_ref()?;
2597        for r in registry.replacement_scans().iter() {
2598            if let Some(replacement) = r.replace(&request) {
2599                tracing::debug!(
2600                    target: "uni.plugin.registry",
2601                    ?request,
2602                    ?replacement,
2603                    "identifier resolved via ReplacementScanProvider"
2604                );
2605                return Some(replacement);
2606            }
2607        }
2608        None
2609    }
2610
2611    /// Resolve a user-typed procedure name against the attached plugin
2612    /// registry, applying the same namespace-prefix rules as
2613    /// `ProcedureRegistry::resolve_user_procedure` (host-coupled
2614    /// procedure dispatch). Returns `true` if any namespace claims the
2615    /// name. Used by the procedure-call replacement-scan gate to decide
2616    /// whether to consult before substituting.
2617    fn procedure_resolves(&self, user_name: &str) -> bool {
2618        let Some(registry) = self.plugin_registry.as_ref() else {
2619            return false;
2620        };
2621        if let Some((ns, local)) = user_name.split_once('.')
2622            && registry
2623                .procedure(&uni_plugin::QName::new(ns, local))
2624                .is_some()
2625        {
2626            return true;
2627        }
2628        let stripped = user_name.strip_prefix("uni.").unwrap_or(user_name);
2629        for plugin_id in ["uni", "builtin", "apoc-core", "custom"] {
2630            if registry
2631                .procedure(&uni_plugin::QName::new(plugin_id, stripped))
2632                .is_some()
2633            {
2634                return true;
2635            }
2636        }
2637        false
2638    }
2639
2640    /// Construct a [`uni_plugin::QName`] from a user-typed identifier for
2641    /// passing to [`Replacement`]-scan providers. If the name is dotted,
2642    /// the last segment is the local and the rest is the namespace
2643    /// (mirroring `QName::parse`). Bare names — which Cypher allows for
2644    /// procedures (`CALL foo()`) and functions (`RETURN foo(x)`) — are
2645    /// encoded with the conventional `"user"` namespace; providers that
2646    /// want to match a bare-typed name should inspect `.local()`.
2647    fn qname_from_user(name: &str) -> uni_plugin::QName {
2648        uni_plugin::QName::parse(name).unwrap_or_else(|_| uni_plugin::QName::new("user", name))
2649    }
2650
2651    /// Apply `ReplacementScanProvider`-driven function rewrites to the
2652    /// query's AST. When the gate is off or no registry is attached, the
2653    /// walker is short-circuited and the query is returned unchanged.
2654    /// Otherwise, every [`uni_cypher::ast::Expr::FunctionCall`] is offered
2655    /// to registered providers (first-match wins); a returned
2656    /// `Replacement::Function(new_qname)` substitutes the name in place.
2657    /// Rewrite depth is capped at 1 — the rewritten name is NOT re-
2658    /// consulted (a chained `A→B→A` provider therefore stops after the
2659    /// first hop). Wrong-variant returns (`CatalogTable`, `Procedure`)
2660    /// error immediately.
2661    fn rewrite_function_calls_in_query(
2662        &self,
2663        query: uni_cypher::ast::Query,
2664    ) -> Result<uni_cypher::ast::Query> {
2665        if !self.replacement_scans_enabled || self.plugin_registry.is_none() {
2666            return Ok(query);
2667        }
2668        let mut rename = |name: &str| -> Result<Option<String>> {
2669            let qname = Self::qname_from_user(name);
2670            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2671            match self.consult_replacement_scan(ReplacementRequest::Function(&qname)) {
2672                Some(Replacement::Function(new_qname)) => {
2673                    // Cypher function-call dispatch is bare-name-keyed
2674                    // (the per-category translators in `df_expr` match on
2675                    // `name.to_uppercase()` against bare local strings —
2676                    // "UPPER", "ABS", etc.). When the provider returns a
2677                    // synthetic-namespace target (`builtin.*` or `user.*`),
2678                    // strip the namespace so the AST name is what those
2679                    // dispatchers expect; for plugin-namespaced targets,
2680                    // preserve the full dotted form (matches how users
2681                    // type them).
2682                    let rewritten = match new_qname.namespace() {
2683                        "builtin" | "user" => new_qname.local().to_string(),
2684                        _ => new_qname.to_string(),
2685                    };
2686                    tracing::debug!(
2687                        target: "uni.plugin.registry",
2688                        from = %name,
2689                        to = %rewritten,
2690                        "function call rerouted via ReplacementScanProvider"
2691                    );
2692                    Ok(Some(rewritten))
2693                }
2694                Some(other) => Err(anyhow!(
2695                    "ReplacementScanProvider returned wrong variant for Function \
2696                     request `{}`: expected `Function`, got {:?}",
2697                    name,
2698                    other
2699                )),
2700                None => Ok(None),
2701            }
2702        };
2703        crate::query::rewrite::function_rename::rewrite_function_calls_in_query(query, &mut rename)
2704    }
2705
2706    /// Plan a Cypher query with no pre-bound variables.
2707    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2708        self.plan_with_scope(query, Vec::new())
2709    }
2710
2711    /// Plan a Cypher query with a set of externally pre-bound variable names.
2712    ///
2713    /// `vars` lists variable names already in scope before this query executes
2714    /// (e.g., from an enclosing Locy rule body).
2715    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2716        // Apply query rewrites before planning
2717        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2718        // M5 follow-up #5: function-call rewrite via ReplacementScanProvider.
2719        // Done as an AST pass *before* planning so the rewritten name flows
2720        // through every downstream stage (translation, UDF resolution,
2721        // execution) as if the user had typed it. No-op when the gate is
2722        // off or no provider claims the call. First-match wins; hard-cap
2723        // at one rewrite per call site (the rewritten name is NOT re-
2724        // consulted) — see `rewrite_function_calls_in_query`.
2725        let rewritten_query = self.rewrite_function_calls_in_query(rewritten_query)?;
2726        if Self::has_mixed_union_modes(&rewritten_query) {
2727            return Err(anyhow!(
2728                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2729            ));
2730        }
2731
2732        match rewritten_query {
2733            Query::Single(stmt) => self.plan_single(stmt, vars),
2734            Query::Union { left, right, all } => {
2735                let l = self.plan_with_scope(*left, vars.clone())?;
2736                let r = self.plan_with_scope(*right, vars)?;
2737
2738                // Validate that both sides have the same column names
2739                let left_cols = Self::extract_projection_columns(&l);
2740                let right_cols = Self::extract_projection_columns(&r);
2741
2742                if left_cols != right_cols {
2743                    return Err(anyhow!(
2744                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2745                    ));
2746                }
2747
2748                Ok(LogicalPlan::Union {
2749                    left: Box::new(l),
2750                    right: Box::new(r),
2751                    all,
2752                })
2753            }
2754            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2755            Query::Explain(inner) => {
2756                let inner_plan = self.plan_with_scope(*inner, vars)?;
2757                Ok(LogicalPlan::Explain {
2758                    plan: Box::new(inner_plan),
2759                })
2760            }
2761            Query::TimeTravel { .. } => {
2762                unreachable!("TimeTravel should be resolved at API layer before planning")
2763            }
2764        }
2765    }
2766
2767    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2768        match query {
2769            Query::Union { left, right, all } => {
2770                out.insert(*all);
2771                Self::collect_union_modes(left, out);
2772                Self::collect_union_modes(right, out);
2773            }
2774            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2775            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2776            Query::Single(_) | Query::Schema(_) => {}
2777        }
2778    }
2779
2780    fn has_mixed_union_modes(query: &Query) -> bool {
2781        let mut modes = HashSet::new();
2782        Self::collect_union_modes(query, &mut modes);
2783        modes.len() > 1
2784    }
2785
2786    fn next_anon_var(&self) -> String {
2787        let id = self
2788            .anon_counter
2789            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2790        format!("_anon_{}", id)
2791    }
2792
2793    /// Extract projection column names from a logical plan.
2794    /// Used for UNION column validation.
2795    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2796        match plan {
2797            LogicalPlan::Project { projections, .. } => projections
2798                .iter()
2799                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2800                .collect(),
2801            LogicalPlan::Limit { input, .. }
2802            | LogicalPlan::Sort { input, .. }
2803            | LogicalPlan::Distinct { input, .. }
2804            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2805            LogicalPlan::Union { left, right, .. } => {
2806                let left_cols = Self::extract_projection_columns(left);
2807                if left_cols.is_empty() {
2808                    Self::extract_projection_columns(right)
2809                } else {
2810                    left_cols
2811                }
2812            }
2813            LogicalPlan::Aggregate {
2814                group_by,
2815                aggregates,
2816                ..
2817            } => {
2818                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2819                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2820                cols
2821            }
2822            _ => Vec::new(),
2823        }
2824    }
2825
2826    fn plan_return_clause(
2827        &self,
2828        return_clause: &ReturnClause,
2829        plan: LogicalPlan,
2830        vars_in_scope: &[VariableInfo],
2831    ) -> Result<LogicalPlan> {
2832        let mut plan = plan;
2833        let mut group_by = Vec::new();
2834        let mut aggregates = Vec::new();
2835        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2836        let mut has_agg = false;
2837        let mut projections = Vec::new();
2838        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2839        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2840        let mut projected_aliases: HashSet<String> = HashSet::new();
2841
2842        for item in &return_clause.items {
2843            match item {
2844                ReturnItem::All => {
2845                    // RETURN * - add all user-named variables in scope
2846                    // (anonymous variables like _anon_0 are excluded)
2847                    let user_vars: Vec<_> = vars_in_scope
2848                        .iter()
2849                        .filter(|v| !v.name.starts_with("_anon_"))
2850                        .collect();
2851                    if user_vars.is_empty() {
2852                        return Err(anyhow!(
2853                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2854                        ));
2855                    }
2856                    for v in user_vars {
2857                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2858                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2859                            group_by.push(Expr::Variable(v.name.clone()));
2860                        }
2861                        projected_aliases.insert(v.name.clone());
2862                        projected_simple_reprs.insert(v.name.clone());
2863                    }
2864                }
2865                ReturnItem::Expr {
2866                    expr,
2867                    alias,
2868                    source_text,
2869                } => {
2870                    if matches!(expr, Expr::Wildcard) {
2871                        for v in vars_in_scope {
2872                            projections
2873                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2874                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2875                                group_by.push(Expr::Variable(v.name.clone()));
2876                            }
2877                            projected_aliases.insert(v.name.clone());
2878                            projected_simple_reprs.insert(v.name.clone());
2879                        }
2880                    } else {
2881                        // Validate expression variables are defined
2882                        validate_expression_variables(expr, vars_in_scope)?;
2883                        // Validate function argument types and boolean operators
2884                        validate_expression(expr, vars_in_scope)?;
2885                        // Pattern predicates are not allowed in RETURN
2886                        if contains_pattern_predicate(expr) {
2887                            return Err(anyhow!(
2888                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2889                            ));
2890                        }
2891
2892                        // Use source text as column name when no explicit alias
2893                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2894                        projections.push((expr.clone(), effective_alias));
2895                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2896                            // Bare aggregate — push directly
2897                            has_agg = true;
2898                            aggregates.push(expr.clone());
2899                            projected_aggregate_reprs.insert(expr.to_string_repr());
2900                        } else if !is_window_function(expr)
2901                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2902                        {
2903                            // Compound aggregate or expression containing aggregates —
2904                            // extract the inner bare aggregates for the Aggregate node
2905                            has_agg = true;
2906                            compound_agg_exprs.push(expr.clone());
2907                            for inner in extract_inner_aggregates(expr) {
2908                                let repr = inner.to_string_repr();
2909                                if !projected_aggregate_reprs.contains(&repr) {
2910                                    aggregates.push(inner);
2911                                    projected_aggregate_reprs.insert(repr);
2912                                }
2913                            }
2914                        } else if !group_by.contains(expr) {
2915                            group_by.push(expr.clone());
2916                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2917                                projected_simple_reprs.insert(expr.to_string_repr());
2918                            }
2919                        }
2920
2921                        if let Some(a) = alias {
2922                            if projected_aliases.contains(a) {
2923                                return Err(anyhow!(
2924                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2925                                    a
2926                                ));
2927                            }
2928                            projected_aliases.insert(a.clone());
2929                        } else if let Expr::Variable(v) = expr {
2930                            if projected_aliases.contains(v) {
2931                                return Err(anyhow!(
2932                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2933                                    v
2934                                ));
2935                            }
2936                            projected_aliases.insert(v.clone());
2937                        }
2938                    }
2939                }
2940            }
2941        }
2942
2943        // Validate compound aggregate expressions: non-aggregate refs must be
2944        // individually present in the group_by as simple variables or properties.
2945        if has_agg {
2946            let group_by_reprs: HashSet<String> =
2947                group_by.iter().map(|e| e.to_string_repr()).collect();
2948            for expr in &compound_agg_exprs {
2949                let mut refs = Vec::new();
2950                collect_non_aggregate_refs(expr, false, &mut refs);
2951                for r in &refs {
2952                    let is_covered = match r {
2953                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
2954                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
2955                    };
2956                    if !is_covered {
2957                        return Err(anyhow!(
2958                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
2959                        ));
2960                    }
2961                }
2962            }
2963        }
2964
2965        if has_agg {
2966            plan = LogicalPlan::Aggregate {
2967                input: Box::new(plan),
2968                group_by,
2969                aggregates,
2970            };
2971        }
2972
2973        let mut window_exprs = Vec::new();
2974        for (expr, _) in &projections {
2975            Self::collect_window_functions(expr, &mut window_exprs);
2976        }
2977
2978        if let Some(order_by) = &return_clause.order_by {
2979            for item in order_by {
2980                Self::collect_window_functions(&item.expr, &mut window_exprs);
2981            }
2982        }
2983
2984        let has_window_exprs = !window_exprs.is_empty();
2985
2986        if has_window_exprs {
2987            // Before creating the Window node, we need to ensure all properties
2988            // referenced by window functions are available. Create a Project node
2989            // that loads these properties.
2990            let mut props_needed_for_window: Vec<Expr> = Vec::new();
2991            for window_expr in &window_exprs {
2992                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
2993            }
2994
2995            // Also include non-window expressions from projections that might be needed
2996            // Preserve qualified names (e.g., "e.salary") as aliases for properties
2997            let non_window_projections: Vec<_> = projections
2998                .iter()
2999                .filter_map(|(expr, alias)| {
3000                    // Keep expressions that don't have window_spec
3001                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
3002                        window_spec.is_none()
3003                    } else {
3004                        true
3005                    };
3006
3007                    if keep {
3008                        // For property references, use the qualified name as alias
3009                        let new_alias = if matches!(expr, Expr::Property(..)) {
3010                            Some(expr.to_string_repr())
3011                        } else {
3012                            alias.clone()
3013                        };
3014                        Some((expr.clone(), new_alias))
3015                    } else {
3016                        None
3017                    }
3018                })
3019                .collect();
3020
3021            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
3022                let mut intermediate_projections = non_window_projections;
3023                // Add any additional property references needed by window functions
3024                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
3025                for prop in &props_needed_for_window {
3026                    if !intermediate_projections
3027                        .iter()
3028                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
3029                    {
3030                        let qualified_name = prop.to_string_repr();
3031                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
3032                    }
3033                }
3034
3035                if !intermediate_projections.is_empty() {
3036                    plan = LogicalPlan::Project {
3037                        input: Box::new(plan),
3038                        projections: intermediate_projections,
3039                    };
3040                }
3041            }
3042
3043            // Transform property expressions in window functions to use qualified variable names
3044            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
3045            let transformed_window_exprs: Vec<Expr> = window_exprs
3046                .into_iter()
3047                .map(Self::transform_window_expr_properties)
3048                .collect();
3049
3050            plan = LogicalPlan::Window {
3051                input: Box::new(plan),
3052                window_exprs: transformed_window_exprs,
3053            };
3054        }
3055
3056        if let Some(order_by) = &return_clause.order_by {
3057            let alias_exprs: HashMap<String, Expr> = projections
3058                .iter()
3059                .filter_map(|(expr, alias)| {
3060                    alias.as_ref().map(|a| {
3061                        // ORDER BY is planned before the final RETURN projection.
3062                        // In aggregate contexts, aliases must resolve to the
3063                        // post-aggregate output columns, not raw aggregate calls.
3064                        let rewritten = if has_agg && !has_window_exprs {
3065                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
3066                                Expr::Variable(aggregate_column_name(expr))
3067                            } else if is_compound_aggregate(expr)
3068                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
3069                            {
3070                                replace_aggregates_with_columns(expr)
3071                            } else {
3072                                Expr::Variable(expr.to_string_repr())
3073                            }
3074                        } else {
3075                            expr.clone()
3076                        };
3077                        (a.clone(), rewritten)
3078                    })
3079                })
3080                .collect();
3081
3082            // Build an extended scope that includes RETURN aliases so ORDER BY
3083            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
3084            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
3085                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
3086                // Keep aliases and directly returned variables in scope.
3087                let mut scope = Vec::new();
3088                for (expr, alias) in &projections {
3089                    if let Some(a) = alias
3090                        && !is_var_in_scope(&scope, a)
3091                    {
3092                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3093                    }
3094                    if let Expr::Variable(v) = expr
3095                        && !is_var_in_scope(&scope, v)
3096                    {
3097                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3098                    }
3099                }
3100                scope
3101            } else {
3102                let mut scope = vars_in_scope.to_vec();
3103                for (expr, alias) in &projections {
3104                    if let Some(a) = alias
3105                        && !is_var_in_scope(&scope, a)
3106                    {
3107                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3108                    } else if let Expr::Variable(v) = expr
3109                        && !is_var_in_scope(&scope, v)
3110                    {
3111                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3112                    }
3113                }
3114                scope
3115            };
3116            // Validate ORDER BY expressions against the extended scope
3117            for item in order_by {
3118                // DISTINCT allows ORDER BY on the same projected expression
3119                // even when underlying variables are not otherwise visible.
3120                let matches_projected_expr = return_clause.distinct
3121                    && projections
3122                        .iter()
3123                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
3124                if !matches_projected_expr {
3125                    validate_expression_variables(&item.expr, &order_by_scope)?;
3126                    validate_expression(&item.expr, &order_by_scope)?;
3127                }
3128                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
3129                if has_aggregate_in_item && !has_agg {
3130                    return Err(anyhow!(
3131                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
3132                    ));
3133                }
3134                if has_agg && has_aggregate_in_item {
3135                    validate_with_order_by_aggregate_item(
3136                        &item.expr,
3137                        &projected_aggregate_reprs,
3138                        &projected_simple_reprs,
3139                        &projected_aliases,
3140                    )?;
3141                }
3142            }
3143            let rewritten_order_by: Vec<SortItem> = order_by
3144                .iter()
3145                .map(|item| SortItem {
3146                    expr: {
3147                        let mut rewritten =
3148                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
3149                        if has_agg && !has_window_exprs {
3150                            rewritten = replace_aggregates_with_columns(&rewritten);
3151                        }
3152                        rewritten
3153                    },
3154                    ascending: item.ascending,
3155                })
3156                .collect();
3157            plan = LogicalPlan::Sort {
3158                input: Box::new(plan),
3159                order_by: rewritten_order_by,
3160            };
3161        }
3162
3163        if return_clause.skip.is_some() || return_clause.limit.is_some() {
3164            let skip = return_clause
3165                .skip
3166                .as_ref()
3167                .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
3168                .transpose()?
3169                .flatten();
3170            let fetch = return_clause
3171                .limit
3172                .as_ref()
3173                .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
3174                .transpose()?
3175                .flatten();
3176
3177            plan = LogicalPlan::Limit {
3178                input: Box::new(plan),
3179                skip,
3180                fetch,
3181            };
3182        }
3183
3184        if !projections.is_empty() {
3185            // If we created an Aggregate or Window node, we need to adjust the final projections
3186            // to reference aggregate/window function results as columns instead of re-evaluating them
3187            let final_projections = if has_agg || has_window_exprs {
3188                projections
3189                    .into_iter()
3190                    .map(|(expr, alias)| {
3191                        // Check if this expression is an aggregate function
3192                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
3193                        {
3194                            // Bare aggregate — replace with column reference
3195                            let col_name = aggregate_column_name(&expr);
3196                            (Expr::Variable(col_name), alias)
3197                        } else if !has_window_exprs
3198                            && (is_compound_aggregate(&expr)
3199                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
3200                        {
3201                            // Compound aggregate — replace inner aggregates with
3202                            // column references, keep outer expression for Project
3203                            (replace_aggregates_with_columns(&expr), alias)
3204                        }
3205                        // For grouped RETURN projections, reference the pre-computed
3206                        // group-by output column instead of re-evaluating the expression
3207                        // against the aggregate schema (which no longer has original vars).
3208                        else if has_agg
3209                            && !has_window_exprs
3210                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
3211                        {
3212                            (Expr::Variable(expr.to_string_repr()), alias)
3213                        }
3214                        // Check if this expression is a window function
3215                        else if let Expr::FunctionCall {
3216                            window_spec: Some(_),
3217                            ..
3218                        } = &expr
3219                        {
3220                            // Replace window function with a column reference to its result
3221                            // The column name in the Window output is the full expression string
3222                            let window_col_name = expr.to_string_repr();
3223                            // Keep the original alias for the final output
3224                            (Expr::Variable(window_col_name), alias)
3225                        } else {
3226                            (expr, alias)
3227                        }
3228                    })
3229                    .collect()
3230            } else {
3231                projections
3232            };
3233
3234            plan = LogicalPlan::Project {
3235                input: Box::new(plan),
3236                projections: final_projections,
3237            };
3238        }
3239
3240        if return_clause.distinct {
3241            plan = LogicalPlan::Distinct {
3242                input: Box::new(plan),
3243            };
3244        }
3245
3246        Ok(plan)
3247    }
3248
3249    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
3250        let typed_vars: Vec<VariableInfo> = initial_vars
3251            .into_iter()
3252            .map(|name| VariableInfo::new(name, VariableType::Imported))
3253            .collect();
3254        self.plan_single_typed(query, typed_vars)
3255    }
3256
3257    /// Rewrite a query then plan it, preserving typed variable scope when possible.
3258    ///
3259    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
3260    /// variable info through and avoid false type-conflict errors in subqueries.
3261    /// For unions and other compound queries, falls back to `plan_with_scope`.
3262    fn rewrite_and_plan_typed(
3263        &self,
3264        query: Query,
3265        typed_vars: &[VariableInfo],
3266    ) -> Result<LogicalPlan> {
3267        let rewritten = crate::query::rewrite::rewrite_query(query)?;
3268        match rewritten {
3269            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
3270            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
3271        }
3272    }
3273
3274    fn plan_single_typed(
3275        &self,
3276        query: Statement,
3277        initial_vars: Vec<VariableInfo>,
3278    ) -> Result<LogicalPlan> {
3279        let mut plan = LogicalPlan::Empty;
3280
3281        if !initial_vars.is_empty() {
3282            // Project bound variables from outer scope as parameters.
3283            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
3284            // Use Parameter expressions to read from params, not Variable which would read from input row.
3285            let projections = initial_vars
3286                .iter()
3287                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
3288                .collect();
3289            plan = LogicalPlan::Project {
3290                input: Box::new(plan),
3291                projections,
3292            };
3293        }
3294
3295        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
3296        // Track variables introduced by CREATE clauses so we can distinguish
3297        // MATCH-introduced variables (which cannot be re-created as bare nodes)
3298        // from CREATE-introduced variables (which can be referenced as bare nodes).
3299        let mut create_introduced_vars: HashSet<String> = HashSet::new();
3300        // Track variables targeted by DELETE so we can reject property/label
3301        // access on deleted entities in subsequent RETURN clauses.
3302        let mut deleted_vars: HashSet<String> = HashSet::new();
3303
3304        let clause_count = query.clauses.len();
3305        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
3306            match clause {
3307                Clause::Match(match_clause) => {
3308                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
3309                }
3310                Clause::Unwind(unwind) => {
3311                    plan = LogicalPlan::Unwind {
3312                        input: Box::new(plan),
3313                        expr: unwind.expr.clone(),
3314                        variable: unwind.variable.clone(),
3315                    };
3316                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
3317                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
3318                }
3319                Clause::Call(call_clause) => {
3320                    match &call_clause.kind {
3321                        CallKind::Procedure {
3322                            procedure,
3323                            arguments,
3324                        } => {
3325                            // Validate that procedure arguments don't contain aggregation functions
3326                            for arg in arguments {
3327                                if contains_aggregate_recursive(arg) {
3328                                    return Err(anyhow!(
3329                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
3330                                    ));
3331                                }
3332                            }
3333
3334                            let has_yield_star = call_clause.yield_items.len() == 1
3335                                && call_clause.yield_items[0].name == "*"
3336                                && call_clause.yield_items[0].alias.is_none();
3337                            if has_yield_star && clause_idx + 1 < clause_count {
3338                                return Err(anyhow!(
3339                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
3340                                ));
3341                            }
3342
3343                            // Validate for duplicate yield names (VariableAlreadyBound)
3344                            let mut yield_names = Vec::new();
3345                            for item in &call_clause.yield_items {
3346                                if item.name == "*" {
3347                                    continue;
3348                                }
3349                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
3350                                if yield_names.contains(output_name) {
3351                                    return Err(anyhow!(
3352                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
3353                                        output_name
3354                                    ));
3355                                }
3356                                // Check against existing scope (in-query CALL must not shadow)
3357                                if clause_idx > 0
3358                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
3359                                {
3360                                    return Err(anyhow!(
3361                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
3362                                        output_name
3363                                    ));
3364                                }
3365                                yield_names.push(output_name.clone());
3366                            }
3367
3368                            let mut yields = Vec::new();
3369                            for item in &call_clause.yield_items {
3370                                if item.name == "*" {
3371                                    continue;
3372                                }
3373                                yields.push((item.name.clone(), item.alias.clone()));
3374                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
3375                                // Use Imported because procedure return types are unknown
3376                                // at plan time (could be nodes, edges, or scalars)
3377                                add_var_to_scope(
3378                                    &mut vars_in_scope,
3379                                    var_name,
3380                                    VariableType::Imported,
3381                                )?;
3382                            }
3383                            // M5 follow-up #5: if replacement-scan dispatch is
3384                            // enabled and the procedure name does not resolve
3385                            // against the plugin registry, consult registered
3386                            // `ReplacementScanProvider`s. A `Replacement::Procedure`
3387                            // substitutes the call's target name in the logical
3388                            // plan; the rewritten name must itself resolve or
3389                            // we error immediately (no second-tier consult — caps
3390                            // rewrite depth at one).
3391                            let procedure_name = if self.replacement_scans_enabled
3392                                && !self.procedure_resolves(procedure)
3393                            {
3394                                use uni_plugin::traits::catalog::{
3395                                    Replacement, ReplacementRequest,
3396                                };
3397                                let qname = Self::qname_from_user(procedure);
3398                                match self
3399                                    .consult_replacement_scan(ReplacementRequest::Procedure(&qname))
3400                                {
3401                                    Some(Replacement::Procedure(new_qname)) => {
3402                                        let rewritten = new_qname.to_string();
3403                                        if !self.procedure_resolves(&rewritten) {
3404                                            return Err(anyhow!(
3405                                                "ReplacementScanProvider rerouted procedure \
3406                                                 `{}` to `{}`, which also did not resolve",
3407                                                procedure,
3408                                                rewritten
3409                                            ));
3410                                        }
3411                                        tracing::debug!(
3412                                            target: "uni.plugin.registry",
3413                                            from = %procedure,
3414                                            to = %rewritten,
3415                                            "procedure rerouted via ReplacementScanProvider"
3416                                        );
3417                                        rewritten
3418                                    }
3419                                    Some(other) => {
3420                                        return Err(anyhow!(
3421                                            "ReplacementScanProvider returned wrong variant \
3422                                             for Procedure request `{}`: expected \
3423                                             `Procedure`, got {:?}",
3424                                            procedure,
3425                                            other
3426                                        ));
3427                                    }
3428                                    None => procedure.clone(),
3429                                }
3430                            } else {
3431                                procedure.clone()
3432                            };
3433                            let proc_plan = LogicalPlan::ProcedureCall {
3434                                procedure_name,
3435                                arguments: arguments.clone(),
3436                                yield_items: yields.clone(),
3437                            };
3438
3439                            if matches!(plan, LogicalPlan::Empty) {
3440                                // Standalone CALL (first clause) — use directly
3441                                plan = proc_plan;
3442                            } else if yields.is_empty() {
3443                                // In-query CALL with no YIELD (void procedure):
3444                                // preserve the input rows unchanged
3445                            } else {
3446                                // In-query CALL with YIELD: cross-join input × procedure output
3447                                plan = LogicalPlan::Apply {
3448                                    input: Box::new(plan),
3449                                    subquery: Box::new(proc_plan),
3450                                    input_filter: None,
3451                                };
3452                            }
3453                        }
3454                        CallKind::Subquery(query) => {
3455                            let subquery_plan =
3456                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3457
3458                            // Extract variables from subquery RETURN clause
3459                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3460
3461                            // Add new variables to scope (as Scalar since they come from subquery projection)
3462                            for var in subquery_vars {
3463                                if !is_var_in_scope(&vars_in_scope, &var) {
3464                                    add_var_to_scope(
3465                                        &mut vars_in_scope,
3466                                        &var,
3467                                        VariableType::Scalar,
3468                                    )?;
3469                                }
3470                            }
3471
3472                            plan = LogicalPlan::SubqueryCall {
3473                                input: Box::new(plan),
3474                                subquery: Box::new(subquery_plan),
3475                            };
3476                        }
3477                    }
3478                }
3479                Clause::Merge(merge_clause) => {
3480                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3481                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3482                    // read-only — reject MERGE that names one.
3483                    let merge_labels = collect_pattern_labels(&merge_clause.pattern);
3484                    self.reject_virtual_label_writes(&merge_labels, "MERGE")?;
3485
3486                    plan = LogicalPlan::Merge {
3487                        input: Box::new(plan),
3488                        pattern: merge_clause.pattern.clone(),
3489                        on_match: Some(SetClause {
3490                            items: merge_clause.on_match.clone(),
3491                        }),
3492                        on_create: Some(SetClause {
3493                            items: merge_clause.on_create.clone(),
3494                        }),
3495                    };
3496
3497                    for path in &merge_clause.pattern.paths {
3498                        if let Some(path_var) = &path.variable
3499                            && !path_var.is_empty()
3500                            && !is_var_in_scope(&vars_in_scope, path_var)
3501                        {
3502                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3503                        }
3504                        for element in &path.elements {
3505                            if let PatternElement::Node(n) = element {
3506                                if let Some(v) = &n.variable
3507                                    && !is_var_in_scope(&vars_in_scope, v)
3508                                {
3509                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3510                                }
3511                            } else if let PatternElement::Relationship(r) = element
3512                                && let Some(v) = &r.variable
3513                                && !is_var_in_scope(&vars_in_scope, v)
3514                            {
3515                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3516                            }
3517                        }
3518                    }
3519                }
3520                Clause::Create(create_clause) => {
3521                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3522                    // read-only — reject CREATE that names one.
3523                    let create_labels = collect_pattern_labels(&create_clause.pattern);
3524                    self.reject_virtual_label_writes(&create_labels, "CREATE")?;
3525                    // Validate CREATE patterns:
3526                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3527                    // - Bare nodes (v) are "references" if bound, "creations" if not
3528                    // - Relationships are always creations - can't rebind
3529                    // - Within CREATE, each new variable can only be defined once
3530                    // - Variables used in properties must be defined
3531                    let mut create_vars: Vec<&str> = Vec::new();
3532                    for path in &create_clause.pattern.paths {
3533                        let is_standalone_node = path.elements.len() == 1;
3534                        for element in &path.elements {
3535                            match element {
3536                                PatternElement::Node(n) => {
3537                                    validate_property_variables(
3538                                        &n.properties,
3539                                        &vars_in_scope,
3540                                        &create_vars,
3541                                    )?;
3542
3543                                    if let Some(v) = n.variable.as_deref()
3544                                        && !v.is_empty()
3545                                    {
3546                                        // A node is a "creation" if it has labels or properties
3547                                        let is_creation =
3548                                            !n.labels.is_empty() || n.properties.is_some();
3549
3550                                        if is_creation {
3551                                            check_not_already_bound(
3552                                                v,
3553                                                &vars_in_scope,
3554                                                &create_vars,
3555                                            )?;
3556                                            create_vars.push(v);
3557                                        } else if is_standalone_node
3558                                            && is_var_in_scope(&vars_in_scope, v)
3559                                            && !create_introduced_vars.contains(v)
3560                                        {
3561                                            // Standalone bare node referencing a variable from a
3562                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3563                                            // Bare nodes used as relationship endpoints
3564                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3565                                            return Err(anyhow!(
3566                                                "SyntaxError: VariableAlreadyBound - '{}'",
3567                                                v
3568                                            ));
3569                                        } else if !create_vars.contains(&v) {
3570                                            // New bare variable — register it
3571                                            create_vars.push(v);
3572                                        }
3573                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3574                                    }
3575                                }
3576                                PatternElement::Relationship(r) => {
3577                                    validate_property_variables(
3578                                        &r.properties,
3579                                        &vars_in_scope,
3580                                        &create_vars,
3581                                    )?;
3582
3583                                    if let Some(v) = r.variable.as_deref()
3584                                        && !v.is_empty()
3585                                    {
3586                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3587                                        create_vars.push(v);
3588                                    }
3589
3590                                    // Validate relationship constraints for CREATE
3591                                    if r.types.len() != 1 {
3592                                        return Err(anyhow!(
3593                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3594                                        ));
3595                                    }
3596                                    if r.direction == Direction::Both {
3597                                        return Err(anyhow!(
3598                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3599                                        ));
3600                                    }
3601                                    if r.range.is_some() {
3602                                        return Err(anyhow!(
3603                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3604                                        ));
3605                                    }
3606                                }
3607                                PatternElement::Parenthesized { .. } => {}
3608                            }
3609                        }
3610                    }
3611
3612                    // Batch consecutive CREATEs to avoid deep recursion
3613                    match &mut plan {
3614                        LogicalPlan::CreateBatch { patterns, .. } => {
3615                            // Append to existing batch
3616                            patterns.push(create_clause.pattern.clone());
3617                        }
3618                        LogicalPlan::Create { input, pattern } => {
3619                            // Convert single Create to CreateBatch with both patterns
3620                            let first_pattern = pattern.clone();
3621                            plan = LogicalPlan::CreateBatch {
3622                                input: input.clone(),
3623                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3624                            };
3625                        }
3626                        _ => {
3627                            // Start new Create (may become batch if more CREATEs follow)
3628                            plan = LogicalPlan::Create {
3629                                input: Box::new(plan),
3630                                pattern: create_clause.pattern.clone(),
3631                            };
3632                        }
3633                    }
3634                    // Add variables from created nodes and relationships to scope
3635                    for path in &create_clause.pattern.paths {
3636                        for element in &path.elements {
3637                            match element {
3638                                PatternElement::Node(n) => {
3639                                    if let Some(var) = &n.variable
3640                                        && !var.is_empty()
3641                                    {
3642                                        create_introduced_vars.insert(var.clone());
3643                                        add_var_to_scope(
3644                                            &mut vars_in_scope,
3645                                            var,
3646                                            VariableType::Node,
3647                                        )?;
3648                                    }
3649                                }
3650                                PatternElement::Relationship(r) => {
3651                                    if let Some(var) = &r.variable
3652                                        && !var.is_empty()
3653                                    {
3654                                        create_introduced_vars.insert(var.clone());
3655                                        add_var_to_scope(
3656                                            &mut vars_in_scope,
3657                                            var,
3658                                            VariableType::Edge,
3659                                        )?;
3660                                    }
3661                                }
3662                                PatternElement::Parenthesized { .. } => {
3663                                    // Skip for now - not commonly used in CREATE
3664                                }
3665                            }
3666                        }
3667                    }
3668                }
3669                Clause::Set(set_clause) => {
3670                    // Validate SET value expressions
3671                    for item in &set_clause.items {
3672                        match item {
3673                            SetItem::Property { value, .. }
3674                            | SetItem::Variable { value, .. }
3675                            | SetItem::VariablePlus { value, .. } => {
3676                                validate_expression_variables(value, &vars_in_scope)?;
3677                                validate_expression(value, &vars_in_scope)?;
3678                                if contains_pattern_predicate(value) {
3679                                    return Err(anyhow!(
3680                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3681                                    ));
3682                                }
3683                            }
3684                            SetItem::Labels { .. } => {}
3685                        }
3686                    }
3687                    plan = LogicalPlan::Set {
3688                        input: Box::new(plan),
3689                        items: set_clause.items.clone(),
3690                    };
3691                }
3692                Clause::Remove(remove_clause) => {
3693                    plan = LogicalPlan::Remove {
3694                        input: Box::new(plan),
3695                        items: remove_clause.items.clone(),
3696                    };
3697                }
3698                Clause::Delete(delete_clause) => {
3699                    // Validate DELETE targets
3700                    for item in &delete_clause.items {
3701                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3702                        if matches!(item, Expr::LabelCheck { .. }) {
3703                            return Err(anyhow!(
3704                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3705                            ));
3706                        }
3707                        let vars_used = collect_expr_variables(item);
3708                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3709                        if vars_used.is_empty() {
3710                            return Err(anyhow!(
3711                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3712                            ));
3713                        }
3714                        for var in &vars_used {
3715                            // Check if variable is defined
3716                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3717                                return Err(anyhow!(
3718                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3719                                    var
3720                                ));
3721                            }
3722                        }
3723                        // Strict type check only for simple variable references —
3724                        // complex expressions (property access, array index, etc.)
3725                        // may resolve to a node/edge at runtime even if the base
3726                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3727                        if let Expr::Variable(name) = item
3728                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3729                            && matches!(
3730                                info.var_type,
3731                                VariableType::Scalar | VariableType::ScalarLiteral
3732                            )
3733                        {
3734                            return Err(anyhow!(
3735                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3736                                name
3737                            ));
3738                        }
3739                    }
3740                    // Track deleted variables for later validation
3741                    for item in &delete_clause.items {
3742                        if let Expr::Variable(name) = item {
3743                            deleted_vars.insert(name.clone());
3744                        }
3745                    }
3746                    plan = LogicalPlan::Delete {
3747                        input: Box::new(plan),
3748                        items: delete_clause.items.clone(),
3749                        detach: delete_clause.detach,
3750                    };
3751                }
3752                Clause::With(with_clause) => {
3753                    let (new_plan, new_vars) =
3754                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3755                    plan = new_plan;
3756                    vars_in_scope = new_vars;
3757                }
3758                Clause::WithRecursive(with_recursive) => {
3759                    // Plan the recursive CTE
3760                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3761                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3762                    add_var_to_scope(
3763                        &mut vars_in_scope,
3764                        &with_recursive.name,
3765                        VariableType::Scalar,
3766                    )?;
3767                }
3768                Clause::Return(return_clause) => {
3769                    // Check for property/label access on deleted entities
3770                    if !deleted_vars.is_empty() {
3771                        for item in &return_clause.items {
3772                            if let ReturnItem::Expr { expr, .. } = item {
3773                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3774                            }
3775                        }
3776                    }
3777                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3778                } // All Clause variants are handled above - no catch-all needed
3779            }
3780        }
3781
3782        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3783        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3784        // If RETURN was used, the plan will have been wrapped in Project, so we only
3785        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3786        let plan = match &plan {
3787            LogicalPlan::Create { .. }
3788            | LogicalPlan::CreateBatch { .. }
3789            | LogicalPlan::Delete { .. }
3790            | LogicalPlan::Set { .. }
3791            | LogicalPlan::Remove { .. }
3792            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3793                input: Box::new(plan),
3794                skip: None,
3795                fetch: Some(0),
3796            },
3797            _ => plan,
3798        };
3799
3800        Ok(plan)
3801    }
3802
3803    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3804        match expr {
3805            Expr::Property(_, _)
3806                if !collected
3807                    .iter()
3808                    .any(|e| e.to_string_repr() == expr.to_string_repr()) =>
3809            {
3810                collected.push(expr.clone());
3811            }
3812            Expr::Property(_, _) => {}
3813            Expr::Variable(_) => {
3814                // Variables are already available, don't need to project them
3815            }
3816            Expr::BinaryOp { left, right, .. } => {
3817                Self::collect_properties_from_expr(left, collected);
3818                Self::collect_properties_from_expr(right, collected);
3819            }
3820            Expr::FunctionCall {
3821                args, window_spec, ..
3822            } => {
3823                for arg in args {
3824                    Self::collect_properties_from_expr(arg, collected);
3825                }
3826                if let Some(spec) = window_spec {
3827                    for partition_expr in &spec.partition_by {
3828                        Self::collect_properties_from_expr(partition_expr, collected);
3829                    }
3830                    for sort_item in &spec.order_by {
3831                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3832                    }
3833                }
3834            }
3835            Expr::List(items) => {
3836                for item in items {
3837                    Self::collect_properties_from_expr(item, collected);
3838                }
3839            }
3840            Expr::UnaryOp { expr: e, .. }
3841            | Expr::IsNull(e)
3842            | Expr::IsNotNull(e)
3843            | Expr::IsUnique(e) => {
3844                Self::collect_properties_from_expr(e, collected);
3845            }
3846            Expr::Case {
3847                expr,
3848                when_then,
3849                else_expr,
3850            } => {
3851                if let Some(e) = expr {
3852                    Self::collect_properties_from_expr(e, collected);
3853                }
3854                for (w, t) in when_then {
3855                    Self::collect_properties_from_expr(w, collected);
3856                    Self::collect_properties_from_expr(t, collected);
3857                }
3858                if let Some(e) = else_expr {
3859                    Self::collect_properties_from_expr(e, collected);
3860                }
3861            }
3862            Expr::In { expr, list } => {
3863                Self::collect_properties_from_expr(expr, collected);
3864                Self::collect_properties_from_expr(list, collected);
3865            }
3866            Expr::ArrayIndex { array, index } => {
3867                Self::collect_properties_from_expr(array, collected);
3868                Self::collect_properties_from_expr(index, collected);
3869            }
3870            Expr::ArraySlice { array, start, end } => {
3871                Self::collect_properties_from_expr(array, collected);
3872                if let Some(s) = start {
3873                    Self::collect_properties_from_expr(s, collected);
3874                }
3875                if let Some(e) = end {
3876                    Self::collect_properties_from_expr(e, collected);
3877                }
3878            }
3879            _ => {}
3880        }
3881    }
3882
3883    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3884        if let Expr::FunctionCall { window_spec, .. } = expr {
3885            // Collect any function with a window spec (OVER clause)
3886            if window_spec.is_some() {
3887                if !collected
3888                    .iter()
3889                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3890                {
3891                    collected.push(expr.clone());
3892                }
3893                return;
3894            }
3895        }
3896
3897        match expr {
3898            Expr::BinaryOp { left, right, .. } => {
3899                Self::collect_window_functions(left, collected);
3900                Self::collect_window_functions(right, collected);
3901            }
3902            Expr::FunctionCall { args, .. } => {
3903                for arg in args {
3904                    Self::collect_window_functions(arg, collected);
3905                }
3906            }
3907            Expr::List(items) => {
3908                for i in items {
3909                    Self::collect_window_functions(i, collected);
3910                }
3911            }
3912            Expr::Map(items) => {
3913                for (_, i) in items {
3914                    Self::collect_window_functions(i, collected);
3915                }
3916            }
3917            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3918                Self::collect_window_functions(e, collected);
3919            }
3920            Expr::Case {
3921                expr,
3922                when_then,
3923                else_expr,
3924            } => {
3925                if let Some(e) = expr {
3926                    Self::collect_window_functions(e, collected);
3927                }
3928                for (w, t) in when_then {
3929                    Self::collect_window_functions(w, collected);
3930                    Self::collect_window_functions(t, collected);
3931                }
3932                if let Some(e) = else_expr {
3933                    Self::collect_window_functions(e, collected);
3934                }
3935            }
3936            Expr::Reduce {
3937                init, list, expr, ..
3938            } => {
3939                Self::collect_window_functions(init, collected);
3940                Self::collect_window_functions(list, collected);
3941                Self::collect_window_functions(expr, collected);
3942            }
3943            Expr::Quantifier {
3944                list, predicate, ..
3945            } => {
3946                Self::collect_window_functions(list, collected);
3947                Self::collect_window_functions(predicate, collected);
3948            }
3949            Expr::In { expr, list } => {
3950                Self::collect_window_functions(expr, collected);
3951                Self::collect_window_functions(list, collected);
3952            }
3953            Expr::ArrayIndex { array, index } => {
3954                Self::collect_window_functions(array, collected);
3955                Self::collect_window_functions(index, collected);
3956            }
3957            Expr::ArraySlice { array, start, end } => {
3958                Self::collect_window_functions(array, collected);
3959                if let Some(s) = start {
3960                    Self::collect_window_functions(s, collected);
3961                }
3962                if let Some(e) = end {
3963                    Self::collect_window_functions(e, collected);
3964                }
3965            }
3966            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
3967            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
3968            _ => {}
3969        }
3970    }
3971
3972    /// Transform property expressions in manual window functions to use qualified variable names.
3973    ///
3974    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
3975    /// so the executor can look up values directly from the row HashMap after the
3976    /// intermediate projection has materialized these properties with qualified names.
3977    ///
3978    /// Transforms ALL window functions (both manual and aggregate).
3979    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
3980    fn transform_window_expr_properties(expr: Expr) -> Expr {
3981        let Expr::FunctionCall {
3982            name,
3983            args,
3984            window_spec: Some(spec),
3985            distinct,
3986        } = expr
3987        else {
3988            return expr;
3989        };
3990
3991        // Transform arguments for ALL window functions
3992        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
3993        let transformed_args = args
3994            .into_iter()
3995            .map(Self::transform_property_to_variable)
3996            .collect();
3997
3998        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
3999        let transformed_partition_by = spec
4000            .partition_by
4001            .into_iter()
4002            .map(Self::transform_property_to_variable)
4003            .collect();
4004
4005        let transformed_order_by = spec
4006            .order_by
4007            .into_iter()
4008            .map(|item| SortItem {
4009                expr: Self::transform_property_to_variable(item.expr),
4010                ascending: item.ascending,
4011            })
4012            .collect();
4013
4014        Expr::FunctionCall {
4015            name,
4016            args: transformed_args,
4017            window_spec: Some(WindowSpec {
4018                partition_by: transformed_partition_by,
4019                order_by: transformed_order_by,
4020            }),
4021            distinct,
4022        }
4023    }
4024
4025    /// Transform a property expression to a variable expression with qualified name.
4026    ///
4027    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
4028    fn transform_property_to_variable(expr: Expr) -> Expr {
4029        let Expr::Property(base, prop) = expr else {
4030            return expr;
4031        };
4032
4033        match *base {
4034            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
4035            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
4036        }
4037    }
4038
4039    /// Transform VALID_AT macro into function call
4040    ///
4041    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
4042    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
4043    fn transform_valid_at_to_function(expr: Expr) -> Expr {
4044        match expr {
4045            Expr::ValidAt {
4046                entity,
4047                timestamp,
4048                start_prop,
4049                end_prop,
4050            } => {
4051                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
4052                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
4053
4054                Expr::FunctionCall {
4055                    name: "uni.temporal.validAt".to_string(),
4056                    args: vec![
4057                        Self::transform_valid_at_to_function(*entity),
4058                        Expr::Literal(CypherLiteral::String(start)),
4059                        Expr::Literal(CypherLiteral::String(end)),
4060                        Self::transform_valid_at_to_function(*timestamp),
4061                    ],
4062                    distinct: false,
4063                    window_spec: None,
4064                }
4065            }
4066            // Recursively transform nested expressions
4067            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4068                left: Box::new(Self::transform_valid_at_to_function(*left)),
4069                op,
4070                right: Box::new(Self::transform_valid_at_to_function(*right)),
4071            },
4072            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
4073                op,
4074                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4075            },
4076            Expr::FunctionCall {
4077                name,
4078                args,
4079                distinct,
4080                window_spec,
4081            } => Expr::FunctionCall {
4082                name,
4083                args: args
4084                    .into_iter()
4085                    .map(Self::transform_valid_at_to_function)
4086                    .collect(),
4087                distinct,
4088                window_spec,
4089            },
4090            Expr::Property(base, prop) => {
4091                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
4092            }
4093            Expr::List(items) => Expr::List(
4094                items
4095                    .into_iter()
4096                    .map(Self::transform_valid_at_to_function)
4097                    .collect(),
4098            ),
4099            Expr::In { expr, list } => Expr::In {
4100                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4101                list: Box::new(Self::transform_valid_at_to_function(*list)),
4102            },
4103            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
4104            Expr::IsNotNull(e) => {
4105                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
4106            }
4107            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
4108            // Other cases: return as-is
4109            other => other,
4110        }
4111    }
4112
4113    /// Rewrite system-metadata function calls (`id(v)`, `created_at(v)`,
4114    /// `updated_at(v)`) to direct property access on the corresponding
4115    /// internal column (`v._vid`, `v._created_at`, `v._updated_at`). This
4116    /// normalization enables predicate pushdown via the Property pattern
4117    /// recognized by `PredicateAnalyzer`.
4118    ///
4119    /// All three functions share the same shape: single-arg, argument
4120    /// must be a node/edge variable, returns the column value directly.
4121    fn rewrite_id_to_vid(expr: Expr) -> Expr {
4122        match expr {
4123            Expr::FunctionCall {
4124                name,
4125                args,
4126                distinct,
4127                window_spec,
4128            } if args.len() == 1 && Self::metadata_function_column(&name).is_some() => {
4129                if let Expr::Variable(ref var) = args[0] {
4130                    let column = Self::metadata_function_column(&name).unwrap().to_string();
4131                    Expr::Property(Box::new(Expr::Variable(var.clone())), column)
4132                } else {
4133                    Expr::FunctionCall {
4134                        name,
4135                        args,
4136                        distinct,
4137                        window_spec,
4138                    }
4139                }
4140            }
4141            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4142                left: Box::new(Self::rewrite_id_to_vid(*left)),
4143                op,
4144                right: Box::new(Self::rewrite_id_to_vid(*right)),
4145            },
4146            Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
4147                op,
4148                expr: Box::new(Self::rewrite_id_to_vid(*inner)),
4149            },
4150            other => other,
4151        }
4152    }
4153
4154    /// Return the internal column name for a system-metadata function, or
4155    /// `None` if the name is not one of the recognised metadata functions.
4156    fn metadata_function_column(name: &str) -> Option<&'static str> {
4157        if name.eq_ignore_ascii_case("id") {
4158            Some("_vid")
4159        } else if name.eq_ignore_ascii_case("created_at") {
4160            Some("_created_at")
4161        } else if name.eq_ignore_ascii_case("updated_at") {
4162            Some("_updated_at")
4163        } else {
4164            None
4165        }
4166    }
4167
4168    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
4169    fn plan_match_clause(
4170        &self,
4171        match_clause: &MatchClause,
4172        plan: LogicalPlan,
4173        vars_in_scope: &mut Vec<VariableInfo>,
4174    ) -> Result<LogicalPlan> {
4175        let mut plan = plan;
4176
4177        if match_clause.pattern.paths.is_empty() {
4178            return Err(anyhow!("Empty pattern"));
4179        }
4180
4181        // Track variables introduced by this OPTIONAL MATCH
4182        let vars_before_pattern = vars_in_scope.len();
4183
4184        for path in &match_clause.pattern.paths {
4185            if let Some(mode) = &path.shortest_path_mode {
4186                plan =
4187                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
4188            } else {
4189                plan = self.plan_path(
4190                    path,
4191                    plan,
4192                    vars_in_scope,
4193                    match_clause.optional,
4194                    vars_before_pattern,
4195                )?;
4196            }
4197        }
4198
4199        // Collect variables introduced by this OPTIONAL MATCH pattern
4200        let optional_vars: HashSet<String> = if match_clause.optional {
4201            vars_in_scope[vars_before_pattern..]
4202                .iter()
4203                .map(|v| v.name.clone())
4204                .collect()
4205        } else {
4206            HashSet::new()
4207        };
4208
4209        // Handle WHERE clause with vector_similarity and predicate pushdown
4210        if let Some(predicate) = &match_clause.where_clause {
4211            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
4212        }
4213
4214        Ok(plan)
4215    }
4216
4217    /// Plan a shortestPath pattern.
4218    fn plan_shortest_path(
4219        &self,
4220        path: &PathPattern,
4221        plan: LogicalPlan,
4222        vars_in_scope: &mut Vec<VariableInfo>,
4223        mode: &ShortestPathMode,
4224        _vars_before_pattern: usize,
4225    ) -> Result<LogicalPlan> {
4226        let mut plan = plan;
4227        let elements = &path.elements;
4228
4229        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
4230        if elements.len() < 3 || elements.len().is_multiple_of(2) {
4231            return Err(anyhow!(
4232                "shortestPath requires at least one relationship: (a)-[*]->(b)"
4233            ));
4234        }
4235
4236        let source_node = match &elements[0] {
4237            PatternElement::Node(n) => n,
4238            _ => return Err(anyhow!("ShortestPath must start with a node")),
4239        };
4240        let rel = match &elements[1] {
4241            PatternElement::Relationship(r) => r,
4242            _ => {
4243                return Err(anyhow!(
4244                    "ShortestPath middle element must be a relationship"
4245                ));
4246            }
4247        };
4248        let target_node = match &elements[2] {
4249            PatternElement::Node(n) => n,
4250            _ => return Err(anyhow!("ShortestPath must end with a node")),
4251        };
4252
4253        let source_var = source_node
4254            .variable
4255            .clone()
4256            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
4257        let target_var = target_node
4258            .variable
4259            .clone()
4260            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
4261        let path_var = path
4262            .variable
4263            .clone()
4264            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
4265
4266        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
4267        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
4268
4269        // Plan source node if not bound
4270        if !source_bound {
4271            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
4272        } else if let Some(prop_filter) =
4273            self.properties_to_expr(&source_var, &source_node.properties)
4274        {
4275            plan = LogicalPlan::Filter {
4276                input: Box::new(plan),
4277                predicate: prop_filter,
4278                optional_variables: HashSet::new(),
4279            };
4280        }
4281
4282        // Plan target node if not bound
4283        let target_label_id = if !target_bound {
4284            // Use first label for target_label_id
4285            let target_label_name = target_node
4286                .labels
4287                .first()
4288                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
4289            // Native lookup first; then consult `CatalogProvider` /
4290            // `ReplacementScanProvider` and allocate a virtual label-id
4291            // (M5b follow-up #6). Virtual ids dispatch to
4292            // `CatalogVertexScanExec` at physical-plan time.
4293            let target_label_id =
4294                if let Some(meta) = self.schema.get_label_case_insensitive(target_label_name) {
4295                    meta.id
4296                } else if let Some((vid, _)) = self.allocate_virtual_label(target_label_name)? {
4297                    vid
4298                } else {
4299                    return Err(anyhow!("Label {} not found", target_label_name));
4300                };
4301
4302            let target_scan = LogicalPlan::Scan {
4303                label_id: target_label_id,
4304                labels: target_node.labels.names().to_vec(),
4305                variable: target_var.clone(),
4306                filter: self.properties_to_expr(&target_var, &target_node.properties),
4307                optional: false,
4308            };
4309
4310            plan = Self::join_with_plan(plan, target_scan);
4311            target_label_id
4312        } else {
4313            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
4314            {
4315                plan = LogicalPlan::Filter {
4316                    input: Box::new(plan),
4317                    predicate: prop_filter,
4318                    optional_variables: HashSet::new(),
4319                };
4320            }
4321            0 // Wildcard for already-bound target
4322        };
4323
4324        // Add ShortestPath operator
4325        let edge_type_ids = if rel.types.is_empty() {
4326            // If no type specified, fetch all edge types (both schema and schemaless)
4327            self.schema.all_edge_type_ids()
4328        } else {
4329            let mut ids = Vec::new();
4330            for type_name in &rel.types {
4331                let id = if let Some(meta) = self.schema.edge_types.get(type_name) {
4332                    meta.id
4333                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
4334                    vid
4335                } else {
4336                    return Err(anyhow!("Edge type {} not found", type_name));
4337                };
4338                ids.push(id);
4339            }
4340            ids
4341        };
4342
4343        // Extract hop constraints from relationship pattern
4344        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
4345        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
4346
4347        let sp_plan = match mode {
4348            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
4349                input: Box::new(plan),
4350                edge_type_ids,
4351                direction: rel.direction.clone(),
4352                source_variable: source_var.clone(),
4353                target_variable: target_var.clone(),
4354                target_label_id,
4355                path_variable: path_var.clone(),
4356                min_hops,
4357                max_hops,
4358            },
4359            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
4360                input: Box::new(plan),
4361                edge_type_ids,
4362                direction: rel.direction.clone(),
4363                source_variable: source_var.clone(),
4364                target_variable: target_var.clone(),
4365                target_label_id,
4366                path_variable: path_var.clone(),
4367                min_hops,
4368                max_hops,
4369            },
4370        };
4371
4372        if !source_bound {
4373            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
4374        }
4375        if !target_bound {
4376            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
4377        }
4378        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
4379
4380        Ok(sp_plan)
4381    }
4382    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
4383    ///
4384    /// This is a public entry point for the Locy plan builder to reuse the
4385    /// existing pattern-planning logic for clause bodies.
4386    pub fn plan_pattern(
4387        &self,
4388        pattern: &Pattern,
4389        initial_vars: &[VariableInfo],
4390    ) -> Result<LogicalPlan> {
4391        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
4392        let vars_before_pattern = vars_in_scope.len();
4393        let mut plan = LogicalPlan::Empty;
4394        for path in &pattern.paths {
4395            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
4396        }
4397        Ok(plan)
4398    }
4399
4400    /// Plan a regular MATCH path (not shortestPath).
4401    fn plan_path(
4402        &self,
4403        path: &PathPattern,
4404        plan: LogicalPlan,
4405        vars_in_scope: &mut Vec<VariableInfo>,
4406        optional: bool,
4407        vars_before_pattern: usize,
4408    ) -> Result<LogicalPlan> {
4409        let mut plan = plan;
4410        let elements = &path.elements;
4411        let mut i = 0;
4412
4413        let path_variable = path.variable.clone();
4414
4415        // Check for VariableAlreadyBound: path variable already in scope
4416        if let Some(pv) = &path_variable
4417            && !pv.is_empty()
4418            && is_var_in_scope(vars_in_scope, pv)
4419        {
4420            return Err(anyhow!(
4421                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4422                pv
4423            ));
4424        }
4425
4426        // Check for VariableAlreadyBound: path variable conflicts with element variables
4427        if let Some(pv) = &path_variable
4428            && !pv.is_empty()
4429        {
4430            for element in elements {
4431                match element {
4432                    PatternElement::Node(n) => {
4433                        if let Some(v) = &n.variable
4434                            && v == pv
4435                        {
4436                            return Err(anyhow!(
4437                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4438                                pv
4439                            ));
4440                        }
4441                    }
4442                    PatternElement::Relationship(r) => {
4443                        if let Some(v) = &r.variable
4444                            && v == pv
4445                        {
4446                            return Err(anyhow!(
4447                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4448                                pv
4449                            ));
4450                        }
4451                    }
4452                    PatternElement::Parenthesized { .. } => {}
4453                }
4454            }
4455        }
4456
4457        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
4458        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
4459        let mut optional_pattern_vars: HashSet<String> = if optional {
4460            let mut vars = HashSet::new();
4461            for element in elements {
4462                match element {
4463                    PatternElement::Node(n) => {
4464                        if let Some(v) = &n.variable
4465                            && !v.is_empty()
4466                            && !is_var_in_scope(vars_in_scope, v)
4467                        {
4468                            vars.insert(v.clone());
4469                        }
4470                    }
4471                    PatternElement::Relationship(r) => {
4472                        if let Some(v) = &r.variable
4473                            && !v.is_empty()
4474                            && !is_var_in_scope(vars_in_scope, v)
4475                        {
4476                            vars.insert(v.clone());
4477                        }
4478                    }
4479                    PatternElement::Parenthesized { pattern, .. } => {
4480                        // Also check nested patterns
4481                        for nested_elem in &pattern.elements {
4482                            match nested_elem {
4483                                PatternElement::Node(n) => {
4484                                    if let Some(v) = &n.variable
4485                                        && !v.is_empty()
4486                                        && !is_var_in_scope(vars_in_scope, v)
4487                                    {
4488                                        vars.insert(v.clone());
4489                                    }
4490                                }
4491                                PatternElement::Relationship(r) => {
4492                                    if let Some(v) = &r.variable
4493                                        && !v.is_empty()
4494                                        && !is_var_in_scope(vars_in_scope, v)
4495                                    {
4496                                        vars.insert(v.clone());
4497                                    }
4498                                }
4499                                _ => {}
4500                            }
4501                        }
4502                    }
4503                }
4504            }
4505            // Include path variable if present
4506            if let Some(pv) = &path_variable
4507                && !pv.is_empty()
4508            {
4509                vars.insert(pv.clone());
4510            }
4511            vars
4512        } else {
4513            HashSet::new()
4514        };
4515
4516        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
4517        // These must participate in Trail mode (relationship uniqueness) enforcement
4518        // across ALL segments in this path, so that VLP segments like [*0..1] don't
4519        // traverse through edges already claimed by a bound relationship [r].
4520        let path_bound_edge_vars: HashSet<String> = {
4521            let mut bound = HashSet::new();
4522            for element in elements {
4523                if let PatternElement::Relationship(rel) = element
4524                    && let Some(ref var_name) = rel.variable
4525                    && !var_name.is_empty()
4526                    && vars_in_scope[..vars_before_pattern]
4527                        .iter()
4528                        .any(|v| v.name == *var_name)
4529                {
4530                    bound.insert(var_name.clone());
4531                }
4532            }
4533            bound
4534        };
4535
4536        // Track if any traverses were added (for zero-length path detection)
4537        let mut had_traverses = false;
4538        // Track the node variable for zero-length path binding
4539        let mut single_node_variable: Option<String> = None;
4540        // Collect node/edge variables for BindPath (fixed-length path binding)
4541        let mut path_node_vars: Vec<String> = Vec::new();
4542        let mut path_edge_vars: Vec<String> = Vec::new();
4543        // Track the last processed outer node variable for QPP source binding.
4544        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4545        let mut last_outer_node_var: Option<String> = None;
4546
4547        // Multi-hop path variables are now supported - path is accumulated across hops
4548        while i < elements.len() {
4549            let element = &elements[i];
4550            match element {
4551                PatternElement::Node(n) => {
4552                    let mut variable = n.variable.clone().unwrap_or_default();
4553                    if variable.is_empty() {
4554                        variable = self.next_anon_var();
4555                    }
4556                    // Track first node variable for zero-length path
4557                    if single_node_variable.is_none() {
4558                        single_node_variable = Some(variable.clone());
4559                    }
4560                    let is_bound =
4561                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4562                    if optional && !is_bound {
4563                        optional_pattern_vars.insert(variable.clone());
4564                    }
4565
4566                    if is_bound {
4567                        // Check for type conflict - can't use an Edge/Path as a Node
4568                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4569                            && !info.var_type.is_compatible_with(VariableType::Node)
4570                        {
4571                            return Err(anyhow!(
4572                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4573                                variable,
4574                                info.var_type
4575                            ));
4576                        }
4577                        if let Some(node_filter) =
4578                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4579                        {
4580                            plan = LogicalPlan::Filter {
4581                                input: Box::new(plan),
4582                                predicate: node_filter,
4583                                optional_variables: HashSet::new(),
4584                            };
4585                        }
4586                    } else {
4587                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4588                        if !variable.is_empty() {
4589                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4590                        }
4591                    }
4592
4593                    // Track source node for BindPath
4594                    if path_variable.is_some() && path_node_vars.is_empty() {
4595                        path_node_vars.push(variable.clone());
4596                    }
4597
4598                    // Look ahead for relationships
4599                    let mut current_source_var = variable;
4600                    last_outer_node_var = Some(current_source_var.clone());
4601                    i += 1;
4602                    while i < elements.len() {
4603                        if let PatternElement::Relationship(r) = &elements[i] {
4604                            if i + 1 < elements.len() {
4605                                let target_node_part = &elements[i + 1];
4606                                if let PatternElement::Node(n_target) = target_node_part {
4607                                    // For VLP traversals, pass path_variable through
4608                                    // For fixed-length, we use BindPath instead
4609                                    let is_vlp = r.range.is_some();
4610                                    let traverse_path_var =
4611                                        if is_vlp { path_variable.clone() } else { None };
4612
4613                                    // If we're about to start a VLP segment and there are
4614                                    // collected fixed-hop path vars, create an intermediate
4615                                    // BindPath for the fixed prefix first. The VLP will then
4616                                    // extend this existing path.
4617                                    if is_vlp
4618                                        && let Some(pv) = path_variable.as_ref()
4619                                        && !path_node_vars.is_empty()
4620                                    {
4621                                        plan = LogicalPlan::BindPath {
4622                                            input: Box::new(plan),
4623                                            node_variables: std::mem::take(&mut path_node_vars),
4624                                            edge_variables: std::mem::take(&mut path_edge_vars),
4625                                            path_variable: pv.clone(),
4626                                        };
4627                                        if !is_var_in_scope(vars_in_scope, pv) {
4628                                            add_var_to_scope(
4629                                                vars_in_scope,
4630                                                pv,
4631                                                VariableType::Path,
4632                                            )?;
4633                                        }
4634                                    }
4635
4636                                    // Plan the traverse from the current source node
4637                                    let target_was_bound =
4638                                        n_target.variable.as_ref().is_some_and(|v| {
4639                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4640                                        });
4641                                    let (new_plan, target_var, effective_target) = self
4642                                        .plan_traverse_with_source(
4643                                            plan,
4644                                            vars_in_scope,
4645                                            TraverseParams {
4646                                                rel: r,
4647                                                target_node: n_target,
4648                                                optional,
4649                                                path_variable: traverse_path_var,
4650                                                optional_pattern_vars: optional_pattern_vars
4651                                                    .clone(),
4652                                            },
4653                                            &current_source_var,
4654                                            vars_before_pattern,
4655                                            &path_bound_edge_vars,
4656                                        )?;
4657                                    plan = new_plan;
4658                                    if optional && !target_was_bound {
4659                                        optional_pattern_vars.insert(target_var.clone());
4660                                    }
4661
4662                                    // Track edge/target node for BindPath
4663                                    if path_variable.is_some() && !is_vlp {
4664                                        // Use the edge variable if given, otherwise use
4665                                        // the internal tracking column pattern.
4666                                        // Use effective_target (which may be __rebound_x
4667                                        // for bound-target traversals) to match the actual
4668                                        // column name produced by GraphTraverseExec.
4669                                        if let Some(ev) = &r.variable {
4670                                            path_edge_vars.push(ev.clone());
4671                                        } else {
4672                                            path_edge_vars
4673                                                .push(format!("__eid_to_{}", effective_target));
4674                                        }
4675                                        path_node_vars.push(target_var.clone());
4676                                    }
4677
4678                                    current_source_var = target_var;
4679                                    last_outer_node_var = Some(current_source_var.clone());
4680                                    had_traverses = true;
4681                                    i += 2;
4682                                } else {
4683                                    return Err(anyhow!("Relationship must be followed by a node"));
4684                                }
4685                            } else {
4686                                return Err(anyhow!("Relationship cannot be the last element"));
4687                            }
4688                        } else {
4689                            break;
4690                        }
4691                    }
4692                }
4693                PatternElement::Relationship(_) => {
4694                    return Err(anyhow!("Pattern must start with a node"));
4695                }
4696                PatternElement::Parenthesized { pattern, range } => {
4697                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4698                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4699                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4700                        return Err(anyhow!(
4701                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4702                        ));
4703                    }
4704
4705                    let source_node = match &pattern.elements[0] {
4706                        PatternElement::Node(n) => n,
4707                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4708                    };
4709
4710                    // Extract all relationship-node pairs (QPP steps)
4711                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4712                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4713                        let rel = match &pattern.elements[pair_idx] {
4714                            PatternElement::Relationship(r) => r,
4715                            _ => {
4716                                return Err(anyhow!(
4717                                    "Quantified pattern element at position {} must be a relationship",
4718                                    pair_idx
4719                                ));
4720                            }
4721                        };
4722                        let node = match &pattern.elements[pair_idx + 1] {
4723                            PatternElement::Node(n) => n,
4724                            _ => {
4725                                return Err(anyhow!(
4726                                    "Quantified pattern element at position {} must be a node",
4727                                    pair_idx + 1
4728                                ));
4729                            }
4730                        };
4731                        // Reject nested quantifiers
4732                        if rel.range.is_some() {
4733                            return Err(anyhow!(
4734                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4735                            ));
4736                        }
4737                        qpp_rels.push((rel, node));
4738                    }
4739
4740                    // Check if there's an outer target node after the Parenthesized element.
4741                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4742                    // target that should receive the traversal result.
4743                    let inner_target_node = qpp_rels.last().unwrap().1;
4744                    let outer_target_node = if i + 1 < elements.len() {
4745                        match &elements[i + 1] {
4746                            PatternElement::Node(n) => Some(n),
4747                            _ => None,
4748                        }
4749                    } else {
4750                        None
4751                    };
4752                    // Use the outer target for variable binding and filters; inner target
4753                    // labels are used for state constraints within the NFA.
4754                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4755
4756                    // For simple 3-element single-hop QPP without intermediate label constraints,
4757                    // fall back to existing VLP behavior (copy range to relationship).
4758                    let use_simple_vlp = qpp_rels.len() == 1
4759                        && inner_target_node
4760                            .labels
4761                            .first()
4762                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4763                            .is_none();
4764
4765                    // Plan source node.
4766                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4767                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4768                    // node variable, use it; otherwise fall back to the inner source.
4769                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4770                        // The preceding outer node is already bound and in scope
4771                        // Apply any property filters from the inner source node
4772                        if let Some(prop_filter) =
4773                            self.properties_to_expr(outer_src, &source_node.properties)
4774                        {
4775                            plan = LogicalPlan::Filter {
4776                                input: Box::new(plan),
4777                                predicate: prop_filter,
4778                                optional_variables: HashSet::new(),
4779                            };
4780                        }
4781                        outer_src.clone()
4782                    } else {
4783                        let sv = source_node
4784                            .variable
4785                            .clone()
4786                            .filter(|v| !v.is_empty())
4787                            .unwrap_or_else(|| self.next_anon_var());
4788
4789                        if is_var_in_scope(vars_in_scope, &sv) {
4790                            // Source is already bound, apply property filter if needed
4791                            if let Some(prop_filter) =
4792                                self.properties_to_expr(&sv, &source_node.properties)
4793                            {
4794                                plan = LogicalPlan::Filter {
4795                                    input: Box::new(plan),
4796                                    predicate: prop_filter,
4797                                    optional_variables: HashSet::new(),
4798                                };
4799                            }
4800                        } else {
4801                            // Source is unbound, scan it
4802                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4803                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4804                            if optional {
4805                                optional_pattern_vars.insert(sv.clone());
4806                            }
4807                        }
4808                        sv
4809                    };
4810
4811                    if use_simple_vlp {
4812                        // Simple single-hop QPP: apply range to relationship and use VLP path
4813                        let mut relationship = qpp_rels[0].0.clone();
4814                        relationship.range = range.clone();
4815
4816                        let target_was_bound = target_node
4817                            .variable
4818                            .as_ref()
4819                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4820                        let (new_plan, target_var, _effective_target) = self
4821                            .plan_traverse_with_source(
4822                                plan,
4823                                vars_in_scope,
4824                                TraverseParams {
4825                                    rel: &relationship,
4826                                    target_node,
4827                                    optional,
4828                                    path_variable: path_variable.clone(),
4829                                    optional_pattern_vars: optional_pattern_vars.clone(),
4830                                },
4831                                &source_variable,
4832                                vars_before_pattern,
4833                                &path_bound_edge_vars,
4834                            )?;
4835                        plan = new_plan;
4836                        if optional && !target_was_bound {
4837                            optional_pattern_vars.insert(target_var);
4838                        }
4839                    } else {
4840                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4841                        let mut qpp_step_infos = Vec::new();
4842                        let mut all_edge_type_ids = Vec::new();
4843
4844                        for (rel, node) in &qpp_rels {
4845                            let mut step_edge_type_ids = Vec::new();
4846                            if rel.types.is_empty() {
4847                                step_edge_type_ids = self.schema.all_edge_type_ids();
4848                            } else {
4849                                for type_name in &rel.types {
4850                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4851                                        step_edge_type_ids.push(edge_meta.id);
4852                                    }
4853                                }
4854                            }
4855                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4856
4857                            let target_label = node.labels.first().and_then(|l| {
4858                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4859                            });
4860
4861                            qpp_step_infos.push(QppStepInfo {
4862                                edge_type_ids: step_edge_type_ids,
4863                                direction: rel.direction.clone(),
4864                                target_label,
4865                            });
4866                        }
4867
4868                        // Deduplicate edge type IDs for adjacency warming
4869                        all_edge_type_ids.sort_unstable();
4870                        all_edge_type_ids.dedup();
4871
4872                        // Compute iteration bounds from range
4873                        let hops_per_iter = qpp_step_infos.len();
4874                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4875                        let (min_iter, max_iter) = if let Some(range) = range {
4876                            let min = range.min.unwrap_or(1) as usize;
4877                            let max = range
4878                                .max
4879                                .map(|m| m as usize)
4880                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4881                            (min, max)
4882                        } else {
4883                            (1, 1)
4884                        };
4885                        let min_hops = min_iter * hops_per_iter;
4886                        let max_hops = max_iter * hops_per_iter;
4887
4888                        // Target variable from the last node in the QPP sub-pattern
4889                        let target_variable = target_node
4890                            .variable
4891                            .clone()
4892                            .filter(|v| !v.is_empty())
4893                            .unwrap_or_else(|| self.next_anon_var());
4894
4895                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4896
4897                        // Determine target label for the final node
4898                        let target_label_meta = target_node
4899                            .labels
4900                            .first()
4901                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4902
4903                        // Collect scope match variables
4904                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4905                            [vars_before_pattern..]
4906                            .iter()
4907                            .map(|v| v.name.clone())
4908                            .collect();
4909                        scope_match_variables.insert(target_variable.clone());
4910
4911                        // Handle bound target: use rebound variable for traverse
4912                        let rebound_target_var = if target_is_bound {
4913                            Some(target_variable.clone())
4914                        } else {
4915                            None
4916                        };
4917                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4918                            format!("__rebound_{}", bv)
4919                        } else {
4920                            target_variable.clone()
4921                        };
4922
4923                        plan = LogicalPlan::Traverse {
4924                            input: Box::new(plan),
4925                            edge_type_ids: all_edge_type_ids,
4926                            direction: qpp_rels[0].0.direction.clone(),
4927                            source_variable: source_variable.to_string(),
4928                            target_variable: effective_target_var.clone(),
4929                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4930                            step_variable: None, // QPP doesn't expose intermediate edges
4931                            min_hops,
4932                            max_hops,
4933                            optional,
4934                            target_filter: self.node_filter_expr(
4935                                &target_variable,
4936                                &target_node.labels,
4937                                &target_node.properties,
4938                            ),
4939                            path_variable: path_variable.clone(),
4940                            edge_properties: HashSet::new(),
4941                            is_variable_length: true,
4942                            optional_pattern_vars: optional_pattern_vars.clone(),
4943                            scope_match_variables,
4944                            edge_filter_expr: None,
4945                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4946                            qpp_steps: Some(qpp_step_infos),
4947                        };
4948
4949                        // Handle bound target: filter rebound results against original variable
4950                        if let Some(ref btv) = rebound_target_var {
4951                            // Filter: __rebound_x._vid = x._vid
4952                            let filter_pred = Expr::BinaryOp {
4953                                left: Box::new(Expr::Property(
4954                                    Box::new(Expr::Variable(effective_target_var.clone())),
4955                                    "_vid".to_string(),
4956                                )),
4957                                op: BinaryOp::Eq,
4958                                right: Box::new(Expr::Property(
4959                                    Box::new(Expr::Variable(btv.clone())),
4960                                    "_vid".to_string(),
4961                                )),
4962                            };
4963                            plan = LogicalPlan::Filter {
4964                                input: Box::new(plan),
4965                                predicate: filter_pred,
4966                                optional_variables: if optional {
4967                                    optional_pattern_vars.clone()
4968                                } else {
4969                                    HashSet::new()
4970                                },
4971                            };
4972                        }
4973
4974                        // Add target variable to scope
4975                        if !target_is_bound {
4976                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4977                        }
4978
4979                        // Add path variable to scope
4980                        if let Some(ref pv) = path_variable
4981                            && !pv.is_empty()
4982                            && !is_var_in_scope(vars_in_scope, pv)
4983                        {
4984                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4985                        }
4986                    }
4987                    had_traverses = true;
4988
4989                    // Skip the outer target node if we consumed it
4990                    if outer_target_node.is_some() {
4991                        i += 2; // skip both Parenthesized and the following Node
4992                    } else {
4993                        i += 1;
4994                    }
4995                }
4996            }
4997        }
4998
4999        // If this is a single-node pattern with a path variable, bind the zero-length path
5000        // E.g., `p = (a)` should create a Path with one node and zero edges
5001        if let Some(ref path_var) = path_variable
5002            && !path_var.is_empty()
5003            && !had_traverses
5004            && let Some(node_var) = single_node_variable
5005        {
5006            plan = LogicalPlan::BindZeroLengthPath {
5007                input: Box::new(plan),
5008                node_variable: node_var,
5009                path_variable: path_var.clone(),
5010            };
5011            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5012        }
5013
5014        // Bind fixed-length path from collected node/edge variables
5015        if let Some(ref path_var) = path_variable
5016            && !path_var.is_empty()
5017            && had_traverses
5018            && !path_node_vars.is_empty()
5019            && !is_var_in_scope(vars_in_scope, path_var)
5020        {
5021            plan = LogicalPlan::BindPath {
5022                input: Box::new(plan),
5023                node_variables: path_node_vars,
5024                edge_variables: path_edge_vars,
5025                path_variable: path_var.clone(),
5026            };
5027            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5028        }
5029
5030        Ok(plan)
5031    }
5032
5033    /// Plan a traverse with an explicit source variable name.
5034    ///
5035    /// Returns `(plan, target_variable, effective_target_variable)` where:
5036    /// - `target_variable` is the semantic variable name for downstream scope
5037    /// - `effective_target_variable` is the actual column-name prefix used by
5038    ///   the traverse (may be `__rebound_x` for bound-target patterns)
5039    fn plan_traverse_with_source(
5040        &self,
5041        plan: LogicalPlan,
5042        vars_in_scope: &mut Vec<VariableInfo>,
5043        params: TraverseParams<'_>,
5044        source_variable: &str,
5045        vars_before_pattern: usize,
5046        path_bound_edge_vars: &HashSet<String>,
5047    ) -> Result<(LogicalPlan, String, String)> {
5048        // Check for parameter used as relationship predicate
5049        if let Some(Expr::Parameter(_)) = &params.rel.properties {
5050            return Err(anyhow!(
5051                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
5052            ));
5053        }
5054
5055        let mut edge_type_ids = Vec::new();
5056        let mut dst_labels = Vec::new();
5057        let mut unknown_types = Vec::new();
5058
5059        if params.rel.types.is_empty() {
5060            // All types - include both schema and schemaless edge types
5061            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
5062            edge_type_ids = self.schema.all_edge_type_ids();
5063            for meta in self.schema.edge_types.values() {
5064                dst_labels.extend(meta.dst_labels.iter().cloned());
5065            }
5066        } else {
5067            for type_name in &params.rel.types {
5068                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
5069                    // Known type - use standard Traverse with type_id
5070                    edge_type_ids.push(edge_meta.id);
5071                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
5072                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
5073                    // M5b.3: virtual edge type (plugin-registered CatalogTable).
5074                    // Resolving it into `edge_type_ids` (not `unknown_types`)
5075                    // lets the regular `Traverse` planner build a structured
5076                    // plan that the physical planner can dispatch to a
5077                    // `CatalogEdgeScanExec` mid-pattern.
5078                    edge_type_ids.push(vid);
5079                } else {
5080                    // Unknown type - will use TraverseMainByType
5081                    unknown_types.push(type_name.clone());
5082                }
5083            }
5084        }
5085
5086        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
5087        edge_type_ids.sort_unstable();
5088        edge_type_ids.dedup();
5089        unknown_types.sort_unstable();
5090        unknown_types.dedup();
5091
5092        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
5093        if target_variable.is_empty() {
5094            target_variable = self.next_anon_var();
5095        }
5096        let target_is_bound =
5097            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
5098
5099        // Check for VariableTypeConflict: relationship variable used as node
5100        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
5101        if let Some(rel_var) = &params.rel.variable
5102            && !rel_var.is_empty()
5103            && rel_var == &target_variable
5104        {
5105            return Err(anyhow!(
5106                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
5107                rel_var
5108            ));
5109        }
5110
5111        // Check for VariableTypeConflict/RelationshipUniquenessViolation
5112        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
5113        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
5114        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
5115        let mut bound_edge_var: Option<String> = None;
5116        let mut bound_edge_list_var: Option<String> = None;
5117        if let Some(rel_var) = &params.rel.variable
5118            && !rel_var.is_empty()
5119            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
5120        {
5121            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
5122                .iter()
5123                .any(|v| v.name == *rel_var);
5124
5125            if info.var_type == VariableType::Edge {
5126                // Check if this edge variable comes from a previous clause (before this MATCH)
5127                if is_from_previous_clause {
5128                    // Edge variable bound from previous clause - this is allowed
5129                    // We'll filter the traversal to match this specific edge
5130                    bound_edge_var = Some(rel_var.clone());
5131                } else {
5132                    // Same relationship variable used twice in the same MATCH clause
5133                    return Err(anyhow!(
5134                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
5135                        rel_var
5136                    ));
5137                }
5138            } else if params.rel.range.is_some()
5139                && is_from_previous_clause
5140                && matches!(
5141                    info.var_type,
5142                    VariableType::Scalar | VariableType::ScalarLiteral
5143                )
5144            {
5145                // Allow VLP rebound against a previously bound relationship list
5146                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
5147                bound_edge_list_var = Some(rel_var.clone());
5148            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
5149                return Err(anyhow!(
5150                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
5151                    rel_var,
5152                    info.var_type
5153                ));
5154            }
5155        }
5156
5157        // Check for VariableTypeConflict: target node variable already bound as non-Node
5158        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
5159        if target_is_bound
5160            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5161            && !info.var_type.is_compatible_with(VariableType::Node)
5162        {
5163            return Err(anyhow!(
5164                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
5165                target_variable,
5166                info.var_type
5167            ));
5168        }
5169
5170        // If all requested types are unknown (schemaless), use TraverseMainByType
5171        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
5172        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
5173        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
5174            // All types are unknown - use schemaless traversal
5175
5176            let is_variable_length = params.rel.range.is_some();
5177
5178            const DEFAULT_MAX_HOPS: usize = 100;
5179            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5180                let min = range.min.unwrap_or(1) as usize;
5181                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5182                (min, max)
5183            } else {
5184                (1, 1)
5185            };
5186
5187            // For both single-hop and variable-length paths:
5188            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5189            //   Single-hop: step_var holds a single edge object
5190            //   VLP: step_var holds a list of edge objects
5191            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5192            let step_var = params.rel.variable.clone();
5193            let path_var = params.path_variable.clone();
5194
5195            // Compute scope_match_variables for relationship uniqueness scoping.
5196            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5197                .iter()
5198                .map(|v| v.name.clone())
5199                .collect();
5200            if let Some(ref sv) = step_var {
5201                // Only add the step variable to scope if it's NOT rebound from a previous clause.
5202                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
5203                // filtering because the second MATCH intentionally reuses the same edge.
5204                if bound_edge_var.is_none() {
5205                    scope_match_variables.insert(sv.clone());
5206                }
5207            }
5208            scope_match_variables.insert(target_variable.clone());
5209            // Include bound edge variables from this path for cross-segment Trail mode
5210            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
5211            // edges already claimed by a bound relationship [r] in the same path.
5212            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
5213            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
5214            scope_match_variables.extend(
5215                path_bound_edge_vars
5216                    .iter()
5217                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
5218                    .cloned(),
5219            );
5220
5221            let mut plan = LogicalPlan::TraverseMainByType {
5222                type_names: unknown_types,
5223                input: Box::new(plan),
5224                direction: params.rel.direction.clone(),
5225                source_variable: source_variable.to_string(),
5226                target_variable: target_variable.clone(),
5227                step_variable: step_var.clone(),
5228                min_hops,
5229                max_hops,
5230                optional: params.optional,
5231                target_filter: self.node_filter_expr(
5232                    &target_variable,
5233                    &params.target_node.labels,
5234                    &params.target_node.properties,
5235                ),
5236                path_variable: path_var.clone(),
5237                is_variable_length,
5238                optional_pattern_vars: params.optional_pattern_vars.clone(),
5239                scope_match_variables,
5240                edge_filter_expr: if is_variable_length {
5241                    let filter_var = step_var
5242                        .clone()
5243                        .unwrap_or_else(|| "__anon_edge".to_string());
5244                    self.properties_to_expr(&filter_var, &params.rel.properties)
5245                } else {
5246                    None
5247                },
5248                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5249            };
5250
5251            // Only apply bound target filter for Imported variables (from outer scope/subquery).
5252            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5253            // uses Parameter which requires the value to be in params (subquery context).
5254            if target_is_bound
5255                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5256                && info.var_type == VariableType::Imported
5257            {
5258                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5259            }
5260
5261            // Apply relationship property predicates for fixed-length schemaless
5262            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
5263            // For VLP, predicates are stored inline in edge_filter_expr (above).
5264            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5265            if !is_variable_length
5266                && let Some(edge_var_name) = step_var.as_ref()
5267                && let Some(edge_prop_filter) =
5268                    self.properties_to_expr(edge_var_name, &params.rel.properties)
5269            {
5270                let filter_optional_vars = if params.optional {
5271                    params.optional_pattern_vars.clone()
5272                } else {
5273                    HashSet::new()
5274                };
5275                plan = LogicalPlan::Filter {
5276                    input: Box::new(plan),
5277                    predicate: edge_prop_filter,
5278                    optional_variables: filter_optional_vars,
5279                };
5280            }
5281
5282            // Add the bound variables to scope
5283            if let Some(sv) = &step_var {
5284                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5285                if is_variable_length
5286                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5287                {
5288                    info.is_vlp = true;
5289                }
5290            }
5291            if let Some(pv) = &path_var
5292                && !is_var_in_scope(vars_in_scope, pv)
5293            {
5294                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5295            }
5296            if !is_var_in_scope(vars_in_scope, &target_variable) {
5297                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5298            }
5299
5300            return Ok((plan, target_variable.clone(), target_variable));
5301        }
5302
5303        // If we have a mix of known and unknown types, error for now
5304        // (could be extended to Union of Traverse + TraverseMainByType)
5305        if !unknown_types.is_empty() {
5306            return Err(anyhow!(
5307                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
5308                unknown_types
5309            ));
5310        }
5311
5312        // Resolve target label to either a schema id or a virtual id from the
5313        // plugin registry. Mid-pattern virtual-label dispatch (M5b.3) requires
5314        // the virtual id to flow into `Traverse.target_label_id` so the
5315        // physical planner can layer a `CatalogVertexScanExec` join on the
5316        // traverse output. Mirrors the schema-then-virtual fallthrough used
5317        // by single-vertex `Scan` planning (~`plan_node_pattern` below).
5318        let mut virtual_target_label_id: Option<u16> = None;
5319        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
5320            // Use first label for target_label_id
5321            // For schemaless support, allow unknown target labels
5322            match self.schema.get_label_case_insensitive(label_name) {
5323                Some(meta) => Some(meta),
5324                None => {
5325                    if let Some((vid, _)) = self.allocate_virtual_label(label_name)? {
5326                        virtual_target_label_id = Some(vid);
5327                    }
5328                    None
5329                }
5330            }
5331        } else if !target_is_bound {
5332            // Infer from edge type(s)
5333            let unique_dsts: Vec<_> = dst_labels
5334                .into_iter()
5335                .collect::<HashSet<_>>()
5336                .into_iter()
5337                .collect();
5338            if unique_dsts.len() == 1 {
5339                let label_name = &unique_dsts[0];
5340                self.schema.get_label_case_insensitive(label_name)
5341            } else {
5342                // Multiple or no destination labels inferred - allow any target
5343                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
5344                // where the edge type can connect to multiple labels
5345                None
5346            }
5347        } else {
5348            None
5349        };
5350
5351        // Check if this is a variable-length pattern (has range specifier like *1..3)
5352        let is_variable_length = params.rel.range.is_some();
5353
5354        // For VLP patterns, default min to 1 and max to a reasonable limit.
5355        // For single-hop patterns (no range), both are 1.
5356        const DEFAULT_MAX_HOPS: usize = 100;
5357        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5358            let min = range.min.unwrap_or(1) as usize;
5359            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5360            (min, max)
5361        } else {
5362            (1, 1)
5363        };
5364
5365        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5366        //   Single-hop: step_var holds a single edge object
5367        //   VLP: step_var holds a list of edge objects
5368        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5369        let step_var = params.rel.variable.clone();
5370        let path_var = params.path_variable.clone();
5371
5372        // If we have a bound edge variable from a previous clause, use a temp variable
5373        // for the Traverse step, then filter to match the bound edge
5374        let rebound_var = bound_edge_var
5375            .as_ref()
5376            .or(bound_edge_list_var.as_ref())
5377            .cloned();
5378        let effective_step_var = if let Some(ref bv) = rebound_var {
5379            Some(format!("__rebound_{}", bv))
5380        } else {
5381            step_var.clone()
5382        };
5383
5384        // If we have a bound target variable from a previous clause (e.g. WITH),
5385        // use a temp variable for the Traverse step, then filter to match the bound
5386        // target — mirroring the bound edge pattern above.
5387        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
5388            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
5389                .map(|info| info.var_type == VariableType::Imported)
5390                .unwrap_or(false);
5391            if !is_imported {
5392                Some(target_variable.clone())
5393            } else {
5394                None
5395            }
5396        } else {
5397            None
5398        };
5399
5400        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5401            format!("__rebound_{}", bv)
5402        } else {
5403            target_variable.clone()
5404        };
5405
5406        // Collect all variables (node + edge) from the current MATCH clause scope
5407        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
5408        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
5409        // if their associated variable is in this set. This prevents relationship
5410        // uniqueness from being enforced across disconnected MATCH clauses.
5411        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5412            .iter()
5413            .map(|v| v.name.clone())
5414            .collect();
5415        // Include the current traverse's edge variable (not yet added to vars_in_scope)
5416        if let Some(ref sv) = effective_step_var {
5417            scope_match_variables.insert(sv.clone());
5418        }
5419        // Include the target variable (not yet added to vars_in_scope)
5420        scope_match_variables.insert(effective_target_var.clone());
5421        // Include bound edge variables from this path for cross-segment Trail mode
5422        // enforcement (same as the schemaless path above).
5423        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
5424
5425        let mut plan = LogicalPlan::Traverse {
5426            input: Box::new(plan),
5427            edge_type_ids,
5428            direction: params.rel.direction.clone(),
5429            source_variable: source_variable.to_string(),
5430            target_variable: effective_target_var.clone(),
5431            target_label_id: target_label_meta
5432                .map(|m| m.id)
5433                .or(virtual_target_label_id)
5434                .unwrap_or(0),
5435            step_variable: effective_step_var.clone(),
5436            min_hops,
5437            max_hops,
5438            optional: params.optional,
5439            target_filter: self.node_filter_expr(
5440                &target_variable,
5441                &params.target_node.labels,
5442                &params.target_node.properties,
5443            ),
5444            path_variable: path_var.clone(),
5445            edge_properties: HashSet::new(),
5446            is_variable_length,
5447            optional_pattern_vars: params.optional_pattern_vars.clone(),
5448            scope_match_variables,
5449            edge_filter_expr: if is_variable_length {
5450                // Use the step variable name, or a fallback for anonymous edges.
5451                // The variable name is used by properties_to_expr to build
5452                // `var.prop = value` expressions. For BFS property checking,
5453                // only the property name and value matter (the variable name
5454                // is stripped during extraction).
5455                let filter_var = effective_step_var
5456                    .clone()
5457                    .unwrap_or_else(|| "__anon_edge".to_string());
5458                self.properties_to_expr(&filter_var, &params.rel.properties)
5459            } else {
5460                None
5461            },
5462            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5463            qpp_steps: None,
5464        };
5465
5466        // Pre-compute optional variables set for filter nodes in this traverse.
5467        // Used by relationship property filters and bound-edge filters below.
5468        let filter_optional_vars = if params.optional {
5469            params.optional_pattern_vars.clone()
5470        } else {
5471            HashSet::new()
5472        };
5473
5474        // Apply relationship property predicates (e.g. [r {k: v}]).
5475        // For VLP, predicates are stored inline in edge_filter_expr (above).
5476        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5477        if !is_variable_length
5478            && let Some(edge_var_name) = effective_step_var.as_ref()
5479            && let Some(edge_prop_filter) =
5480                self.properties_to_expr(edge_var_name, &params.rel.properties)
5481        {
5482            plan = LogicalPlan::Filter {
5483                input: Box::new(plan),
5484                predicate: edge_prop_filter,
5485                optional_variables: filter_optional_vars.clone(),
5486            };
5487        }
5488
5489        // Only apply bound target filter for Imported variables (from outer scope/subquery).
5490        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5491        // uses Parameter which requires the value to be in params (subquery context).
5492        if target_is_bound
5493            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5494            && info.var_type == VariableType::Imported
5495        {
5496            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5497        }
5498
5499        // If we have a bound edge variable, add a filter to match it
5500        if let Some(ref bv) = bound_edge_var {
5501            let temp_var = format!("__rebound_{}", bv);
5502            let bound_check = Expr::BinaryOp {
5503                left: Box::new(Expr::Property(
5504                    Box::new(Expr::Variable(temp_var)),
5505                    "_eid".to_string(),
5506                )),
5507                op: BinaryOp::Eq,
5508                right: Box::new(Expr::Property(
5509                    Box::new(Expr::Variable(bv.clone())),
5510                    "_eid".to_string(),
5511                )),
5512            };
5513            plan = LogicalPlan::Filter {
5514                input: Box::new(plan),
5515                predicate: bound_check,
5516                optional_variables: filter_optional_vars.clone(),
5517            };
5518        }
5519
5520        // If we have a bound relationship list variable for a VLP pattern,
5521        // add a filter to match the traversed relationship list exactly.
5522        if let Some(ref bv) = bound_edge_list_var {
5523            let temp_var = format!("__rebound_{}", bv);
5524            let temp_eids = Expr::ListComprehension {
5525                variable: "__rebound_edge".to_string(),
5526                list: Box::new(Expr::Variable(temp_var)),
5527                where_clause: None,
5528                map_expr: Box::new(Expr::FunctionCall {
5529                    name: "toInteger".to_string(),
5530                    args: vec![Expr::Property(
5531                        Box::new(Expr::Variable("__rebound_edge".to_string())),
5532                        "_eid".to_string(),
5533                    )],
5534                    distinct: false,
5535                    window_spec: None,
5536                }),
5537            };
5538            let bound_eids = Expr::ListComprehension {
5539                variable: "__bound_edge".to_string(),
5540                list: Box::new(Expr::Variable(bv.clone())),
5541                where_clause: None,
5542                map_expr: Box::new(Expr::FunctionCall {
5543                    name: "toInteger".to_string(),
5544                    args: vec![Expr::Property(
5545                        Box::new(Expr::Variable("__bound_edge".to_string())),
5546                        "_eid".to_string(),
5547                    )],
5548                    distinct: false,
5549                    window_spec: None,
5550                }),
5551            };
5552            let bound_list_check = Expr::BinaryOp {
5553                left: Box::new(temp_eids),
5554                op: BinaryOp::Eq,
5555                right: Box::new(bound_eids),
5556            };
5557            plan = LogicalPlan::Filter {
5558                input: Box::new(plan),
5559                predicate: bound_list_check,
5560                optional_variables: filter_optional_vars.clone(),
5561            };
5562        }
5563
5564        // If we have a bound target variable (non-imported), add a filter to constrain
5565        // the traversal output to match the previously bound target node.
5566        if let Some(ref bv) = rebound_target_var {
5567            let temp_var = format!("__rebound_{}", bv);
5568            let bound_check = Expr::BinaryOp {
5569                left: Box::new(Expr::Property(
5570                    Box::new(Expr::Variable(temp_var.clone())),
5571                    "_vid".to_string(),
5572                )),
5573                op: BinaryOp::Eq,
5574                right: Box::new(Expr::Property(
5575                    Box::new(Expr::Variable(bv.clone())),
5576                    "_vid".to_string(),
5577                )),
5578            };
5579            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5580            // so that OptionalFilterExec excludes it from the grouping key and
5581            // properly nullifies it in recovery rows when all matches are filtered out.
5582            // Without this, each traverse result creates its own group (keyed by
5583            // __rebound_c._vid), and null-row recovery emits a spurious null row
5584            // for every non-matching target instead of one per source group.
5585            let mut rebound_filter_vars = filter_optional_vars;
5586            if params.optional {
5587                rebound_filter_vars.insert(temp_var);
5588            }
5589            plan = LogicalPlan::Filter {
5590                input: Box::new(plan),
5591                predicate: bound_check,
5592                optional_variables: rebound_filter_vars,
5593            };
5594        }
5595
5596        // Add the bound variables to scope
5597        // Skip adding the edge variable if it's already bound from a previous clause
5598        if let Some(sv) = &step_var
5599            && bound_edge_var.is_none()
5600            && bound_edge_list_var.is_none()
5601        {
5602            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5603            if is_variable_length
5604                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5605            {
5606                info.is_vlp = true;
5607            }
5608        }
5609        if let Some(pv) = &path_var
5610            && !is_var_in_scope(vars_in_scope, pv)
5611        {
5612            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5613        }
5614        if !is_var_in_scope(vars_in_scope, &target_variable) {
5615            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5616        }
5617
5618        Ok((plan, target_variable, effective_target_var))
5619    }
5620
5621    /// Combine a new scan plan with an existing plan.
5622    ///
5623    /// If the existing plan is `Empty`, returns the new plan directly.
5624    /// Otherwise, wraps them in a `CrossJoin`.
5625    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5626        if matches!(existing, LogicalPlan::Empty) {
5627            new
5628        } else {
5629            LogicalPlan::CrossJoin {
5630                left: Box::new(existing),
5631                right: Box::new(new),
5632            }
5633        }
5634    }
5635
5636    /// Split node map predicates into scan-pushable and residual filters.
5637    ///
5638    /// A predicate is scan-pushable when its value expression references only
5639    /// the node variable itself (or no variables). Predicates referencing other
5640    /// in-scope variables (correlated predicates) are returned as residual so
5641    /// they can be applied after joining with the existing plan.
5642    fn split_node_property_filters_for_scan(
5643        &self,
5644        variable: &str,
5645        properties: &Option<Expr>,
5646    ) -> (Option<Expr>, Option<Expr>) {
5647        let entries = match properties {
5648            Some(Expr::Map(entries)) => entries,
5649            _ => return (None, None),
5650        };
5651
5652        if entries.is_empty() {
5653            return (None, None);
5654        }
5655
5656        let mut pushdown_entries = Vec::new();
5657        let mut residual_entries = Vec::new();
5658
5659        for (prop, val_expr) in entries {
5660            let vars = collect_expr_variables(val_expr);
5661            if vars.iter().all(|v| v == variable) {
5662                pushdown_entries.push((prop.clone(), val_expr.clone()));
5663            } else {
5664                residual_entries.push((prop.clone(), val_expr.clone()));
5665            }
5666        }
5667
5668        let pushdown_map = if pushdown_entries.is_empty() {
5669            None
5670        } else {
5671            Some(Expr::Map(pushdown_entries))
5672        };
5673        let residual_map = if residual_entries.is_empty() {
5674            None
5675        } else {
5676            Some(Expr::Map(residual_entries))
5677        };
5678
5679        (
5680            self.properties_to_expr(variable, &pushdown_map),
5681            self.properties_to_expr(variable, &residual_map),
5682        )
5683    }
5684
5685    /// Decide whether per-label `Scan` branches for a label disjunction can
5686    /// safely be combined under `LogicalPlan::Union`. Returns `true` iff every
5687    /// label in `labels` is registered in the schema AND every pair shares an
5688    /// identical property name+type set.
5689    ///
5690    /// When this returns `false`, the disjunction must fall back to a single
5691    /// `ScanMainByLabels` over all labels — otherwise DataFusion's
5692    /// `UnionExec::try_new` panics in `union_schema` because the per-label
5693    /// `GraphScanExec` outputs (`_vid` + `_labels` + per-label projected
5694    /// properties) have different field counts. Issue rustic-ai/uni-db#62.
5695    ///
5696    /// We deliberately compare full schema property sets rather than only the
5697    /// properties referenced by the current query: at this logical-planning
5698    /// stage we have not yet collected `all_properties`, and `*` wildcards
5699    /// (e.g. from unknown function calls) would expand per-label downstream
5700    /// in `df_planner::resolve_properties` even when the query text only
5701    /// touches common columns.
5702    fn label_branches_share_property_schema(&self, labels: &[String]) -> bool {
5703        if labels.len() < 2 {
5704            return true;
5705        }
5706        let mut iter = labels.iter();
5707        let first = iter.next().expect("len >= 2");
5708        let Some(first_props) = self.schema.properties.get(first) else {
5709            return false;
5710        };
5711        for label in iter {
5712            let Some(props) = self.schema.properties.get(label) else {
5713                return false;
5714            };
5715            if props.len() != first_props.len() {
5716                return false;
5717            }
5718            for (name, meta) in first_props {
5719                let Some(other_meta) = props.get(name) else {
5720                    return false;
5721                };
5722                if meta.r#type != other_meta.r#type {
5723                    return false;
5724                }
5725            }
5726        }
5727        true
5728    }
5729
5730    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5731    fn plan_unbound_node(
5732        &self,
5733        node: &NodePattern,
5734        variable: &str,
5735        plan: LogicalPlan,
5736        optional: bool,
5737    ) -> Result<LogicalPlan> {
5738        // Properties handling
5739        let properties = match &node.properties {
5740            Some(Expr::Map(entries)) => entries.as_slice(),
5741            Some(Expr::Parameter(_)) => {
5742                return Err(anyhow!(
5743                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5744                ));
5745            }
5746            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5747            None => &[],
5748        };
5749
5750        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5751
5752        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5753            if let Some(predicate) = residual {
5754                LogicalPlan::Filter {
5755                    input: Box::new(input),
5756                    predicate,
5757                    optional_variables: HashSet::new(),
5758                }
5759            } else {
5760                input
5761            }
5762        };
5763
5764        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5765            self.split_node_property_filters_for_scan(variable, &node.properties)
5766        } else {
5767            (self.properties_to_expr(variable, &node.properties), None)
5768        };
5769
5770        // Check for ext_id in properties when no label is specified
5771        if node.labels.is_empty() {
5772            // Try to find ext_id property for main table lookup
5773            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5774                // Extract the ext_id value as a string
5775                let ext_id = match ext_id_value {
5776                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5777                    _ => {
5778                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5779                    }
5780                };
5781
5782                // Build filter for remaining properties (excluding ext_id)
5783                let remaining_props: Vec<_> = properties
5784                    .iter()
5785                    .filter(|(k, _)| k != "ext_id")
5786                    .cloned()
5787                    .collect();
5788
5789                let remaining_expr = if remaining_props.is_empty() {
5790                    None
5791                } else {
5792                    Some(Expr::Map(remaining_props))
5793                };
5794
5795                let (prop_filter, residual_filter) = if has_existing_scope {
5796                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5797                } else {
5798                    (self.properties_to_expr(variable, &remaining_expr), None)
5799                };
5800
5801                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5802                    variable: variable.to_string(),
5803                    ext_id,
5804                    filter: prop_filter,
5805                    optional,
5806                };
5807
5808                let joined = Self::join_with_plan(plan, ext_id_lookup);
5809                return Ok(apply_residual_filter(joined, residual_filter));
5810            }
5811
5812            // No ext_id: create ScanAll for unlabeled node pattern
5813            let scan_all = LogicalPlan::ScanAll {
5814                variable: variable.to_string(),
5815                filter: node_scan_filter,
5816                optional,
5817            };
5818
5819            let joined = Self::join_with_plan(plan, scan_all);
5820            return Ok(apply_residual_filter(joined, node_residual_filter));
5821        }
5822
5823        // Label disjunction `(n:A|B|C)` — emit Union of label-scoped Scans.
5824        //
5825        // Storage fact: a multi-labeled vertex is fanned out into every
5826        // per-label table it carries (uni-store/src/runtime/writer.rs's
5827        // `push_vertex_to_labels`), so the same vid can appear in both the
5828        // `A` scan and the `B` scan of a disjunctive query. Use
5829        // `Union { all: false }` so the combined result deduplicates by row
5830        // contents (which include the vid) rather than emitting the same
5831        // vertex twice. The single-label-disjunction case (`Disjunction(["A"])`)
5832        // is encoded the same way the parser already encodes single edge
5833        // types, and reduces to one Scan with no Union wrapping.
5834        if node.labels.is_proper_disjunction() {
5835            let label_names: Vec<String> = node.labels.names().to_vec();
5836
5837            // Per-label branches under a `Union` only line up when every
5838            // branch produces the same Arrow schema. The narrow-scan
5839            // `Scan` path resolves columns *per label*, so heterogeneous
5840            // property sets (or any schemaless label in the mix) yield
5841            // mismatched widths and DataFusion's `UnionExec::try_new`
5842            // panics inside `union_schema` (issue rustic-ai/uni-db#62).
5843            //
5844            // For those cases, lower every branch to a *single-label*
5845            // `ScanMainByLabels` instead. The schemaless main-table scan
5846            // resolves columns from `all_properties` directly (no per-label
5847            // expansion), so all branches emit a uniform schema and the
5848            // outer `Union { all: false }` deduplicates correctly. We
5849            // keep the per-branch Union shape (rather than collapsing to
5850            // a single multi-label scan) because multi-label
5851            // `ScanMainByLabels` has AND/intersection semantics — wrong
5852            // for a disjunction.
5853            let use_main_table_branches = !self.label_branches_share_property_schema(&label_names);
5854
5855            let mut branches: Vec<LogicalPlan> = Vec::with_capacity(label_names.len());
5856            for label_name in &label_names {
5857                let branch = if use_main_table_branches {
5858                    LogicalPlan::ScanMainByLabels {
5859                        labels: vec![label_name.clone()],
5860                        variable: variable.to_string(),
5861                        filter: node_scan_filter.clone(),
5862                        optional,
5863                    }
5864                } else {
5865                    let meta = self
5866                        .schema
5867                        .get_label_case_insensitive(label_name)
5868                        .expect("share_property_schema true implies all labels in schema");
5869                    LogicalPlan::Scan {
5870                        label_id: meta.id,
5871                        labels: vec![label_name.clone()],
5872                        variable: variable.to_string(),
5873                        filter: node_scan_filter.clone(),
5874                        optional,
5875                    }
5876                };
5877                branches.push(branch);
5878            }
5879            // Left-leaning Union: Union(Union(A, B), C). All inner
5880            // unions dedupe by row, so the outer one does too.
5881            let mut iter = branches.into_iter();
5882            let mut union_plan = iter
5883                .next()
5884                .expect("is_proper_disjunction implies at least 2 labels");
5885            for next in iter {
5886                union_plan = LogicalPlan::Union {
5887                    left: Box::new(union_plan),
5888                    right: Box::new(next),
5889                    all: false,
5890                };
5891            }
5892            let joined = Self::join_with_plan(plan, union_plan);
5893            return Ok(apply_residual_filter(joined, node_residual_filter));
5894        }
5895
5896        // Use first label for label_id (primary label for dataset selection)
5897        let label_name = &node.labels[0];
5898
5899        // Check if label exists in schema
5900        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5901            // Known label: use standard Scan
5902            let scan = LogicalPlan::Scan {
5903                label_id: label_meta.id,
5904                labels: node.labels.names().to_vec(),
5905                variable: variable.to_string(),
5906                filter: node_scan_filter,
5907                optional,
5908            };
5909
5910            let joined = Self::join_with_plan(plan, scan);
5911            Ok(apply_residual_filter(joined, node_residual_filter))
5912        } else {
5913            // Unknown label. Try a CatalogProvider / ReplacementScanProvider
5914            // claim first: on success allocate a virtual label-ID and emit a
5915            // regular `Scan` against the virtual id (`df_planner` dispatches
5916            // to `CatalogVertexScanExec`). When no provider claims and the
5917            // replacement-scan gate is on, strict-mode errors. When the gate
5918            // is off and no provider claims, preserve today's silent-empty
5919            // schemaless `ScanMainByLabels` behavior bit-for-bit.
5920            if let Some((virtual_id, _)) = self.allocate_virtual_label(label_name)? {
5921                let scan = LogicalPlan::Scan {
5922                    label_id: virtual_id,
5923                    labels: node.labels.names().to_vec(),
5924                    variable: variable.to_string(),
5925                    filter: node_scan_filter,
5926                    optional,
5927                };
5928                let joined = Self::join_with_plan(plan, scan);
5929                return Ok(apply_residual_filter(joined, node_residual_filter));
5930            }
5931            if self.replacement_scans_enabled {
5932                return Err(anyhow!(
5933                    "Label `{}` is not defined in schema and no \
5934                     CatalogProvider or ReplacementScanProvider claimed it; \
5935                     strict-mode (replacement_scans=true) requires the label \
5936                     to resolve",
5937                    label_name
5938                ));
5939            }
5940
5941            let scan_main = LogicalPlan::ScanMainByLabels {
5942                labels: node.labels.names().to_vec(),
5943                variable: variable.to_string(),
5944                filter: node_scan_filter,
5945                optional,
5946            };
5947
5948            let joined = Self::join_with_plan(plan, scan_main);
5949            Ok(apply_residual_filter(joined, node_residual_filter))
5950        }
5951    }
5952
5953    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
5954    ///
5955    /// When `optional_vars` is non-empty, the Filter will preserve rows where
5956    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
5957    fn plan_where_clause(
5958        &self,
5959        predicate: &Expr,
5960        plan: LogicalPlan,
5961        vars_in_scope: &[VariableInfo],
5962        optional_vars: HashSet<String>,
5963    ) -> Result<LogicalPlan> {
5964        // Validate no aggregation functions in WHERE clause
5965        validate_no_aggregation_in_where(predicate)?;
5966
5967        // Validate all variables used are in scope
5968        validate_expression_variables(predicate, vars_in_scope)?;
5969
5970        // Validate expression types (function args, boolean operators)
5971        validate_expression(predicate, vars_in_scope)?;
5972
5973        // Check that WHERE predicate isn't a bare node/edge/path variable
5974        if let Expr::Variable(var_name) = predicate
5975            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
5976            && matches!(
5977                info.var_type,
5978                VariableType::Node | VariableType::Edge | VariableType::Path
5979            )
5980        {
5981            return Err(anyhow!(
5982                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
5983                info.var_type
5984            ));
5985        }
5986
5987        let mut plan = plan;
5988
5989        // Transform VALID_AT macro to function call
5990        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
5991
5992        // Rewrite id(var) to var._vid so PredicateAnalyzer can push it down
5993        let transformed_predicate = Self::rewrite_id_to_vid(transformed_predicate);
5994
5995        let mut current_predicate =
5996            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
5997
5998        // 1. Try to extract vector_similarity predicate for optimization
5999        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
6000            let vs = &extraction.predicate;
6001            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
6002                plan = Self::replace_scan_with_knn(
6003                    plan,
6004                    &vs.variable,
6005                    &vs.property,
6006                    vs.query.clone(),
6007                    vs.threshold,
6008                );
6009                if let Some(residual) = extraction.residual {
6010                    current_predicate = residual;
6011                } else {
6012                    current_predicate = Expr::TRUE;
6013                }
6014            }
6015        }
6016
6017        // 2. Label/type disjunction → narrow-scan rewrite.
6018        //
6019        // `WHERE n:A OR n:B` and `WHERE type(r) = 'A' OR type(r) = 'B'`
6020        // are functionally identical to the inline forms `(n:A|B)` and
6021        // `[r:A|B]`, but a literal pattern lowering would route them
6022        // through `Filter(LabelCheck OR LabelCheck)` over `ScanAll` —
6023        // a full vertex/edge scan plus residual filter, missing the
6024        // narrow-scan fast-path that the inline forms get for free.
6025        // Detect those OR-chains here and rewrite the upstream
6026        // `ScanAll` / `Traverse` accordingly.
6027        let conjuncts = Self::split_and_conjuncts(&current_predicate);
6028        let mut keep: Vec<Expr> = Vec::with_capacity(conjuncts.len());
6029        for conj in conjuncts {
6030            let mut consumed = false;
6031            for var in vars_in_scope {
6032                if optional_vars.contains(&var.name) {
6033                    continue;
6034                }
6035                // Node label disjunction → Union of label-scoped Scans.
6036                if Self::is_scan_all_for(&plan, &var.name)
6037                    && let Some(labels) = try_label_or_to_union(&conj, &var.name)
6038                {
6039                    plan = self.replace_scan_all_with_label_union(plan, &var.name, &labels, false);
6040                    consumed = true;
6041                    break;
6042                }
6043                // Edge type disjunction → merge into Traverse.edge_type_ids.
6044                if let Some(types) = try_type_or_to_union(&conj, &var.name)
6045                    && Self::merge_traverse_types_for(&plan, &var.name, &types).is_some()
6046                {
6047                    let mut ids: Vec<u32> = Vec::with_capacity(types.len());
6048                    let mut all_known = true;
6049                    for t in &types {
6050                        match self.schema.edge_types.get(t) {
6051                            Some(meta) => ids.push(meta.id),
6052                            None => {
6053                                all_known = false;
6054                                break;
6055                            }
6056                        }
6057                    }
6058                    if all_known {
6059                        plan = Self::set_traverse_edge_type_ids(plan, &var.name, ids);
6060                        consumed = true;
6061                        break;
6062                    }
6063                }
6064            }
6065            if !consumed {
6066                keep.push(conj);
6067            }
6068        }
6069        current_predicate = Self::combine_predicates(keep).unwrap_or(Expr::TRUE);
6070
6071        // 3. Push eligible predicates to Scan OR Traverse filters
6072        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
6073        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
6074        // rows. Let them stay in the Filter operator which handles NULL preservation.
6075        for var in vars_in_scope {
6076            // Skip pushdown for optional variables - they need NULL preservation in Filter
6077            if optional_vars.contains(&var.name) {
6078                continue;
6079            }
6080
6081            // Check if var is produced by a Scan
6082            if Self::find_scan_label_id(&plan, &var.name).is_some() {
6083                let (pushable, residual) =
6084                    Self::extract_variable_predicates(&current_predicate, &var.name);
6085
6086                for pred in pushable {
6087                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
6088                }
6089
6090                if let Some(r) = residual {
6091                    current_predicate = r;
6092                } else {
6093                    current_predicate = Expr::TRUE;
6094                }
6095            } else if Self::is_traverse_target(&plan, &var.name) {
6096                // Push to Traverse
6097                let (pushable, residual) =
6098                    Self::extract_variable_predicates(&current_predicate, &var.name);
6099
6100                for pred in pushable {
6101                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
6102                }
6103
6104                if let Some(r) = residual {
6105                    current_predicate = r;
6106                } else {
6107                    current_predicate = Expr::TRUE;
6108                }
6109            }
6110        }
6111
6112        // 4. Push predicates to Apply.input_filter
6113        // This filters input rows BEFORE executing correlated subqueries.
6114        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
6115
6116        // 5. Add Filter node for any remaining predicates
6117        if !current_predicate.is_true_literal() {
6118            plan = LogicalPlan::Filter {
6119                input: Box::new(plan),
6120                predicate: current_predicate,
6121                optional_variables: optional_vars,
6122            };
6123        }
6124
6125        Ok(plan)
6126    }
6127
6128    fn rewrite_predicates_using_indexes(
6129        &self,
6130        predicate: &Expr,
6131        plan: &LogicalPlan,
6132        vars_in_scope: &[VariableInfo],
6133    ) -> Result<Expr> {
6134        let mut rewritten = predicate.clone();
6135
6136        for var in vars_in_scope {
6137            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
6138                // Find label name
6139                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
6140
6141                if let Some(label) = label_name
6142                    && let Some(props) = self.schema.properties.get(&label)
6143                {
6144                    for (gen_col, meta) in props {
6145                        if meta.generation_expression.is_some() {
6146                            // Use cached parsed expression
6147                            if let Some(schema_expr) =
6148                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
6149                            {
6150                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
6151                                rewritten = Self::replace_expression(
6152                                    rewritten,
6153                                    schema_expr,
6154                                    &var.name,
6155                                    gen_col,
6156                                );
6157                            }
6158                        }
6159                    }
6160                }
6161            }
6162        }
6163        Ok(rewritten)
6164    }
6165
6166    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
6167        // First, normalize schema_expr to use query_var
6168        let schema_var = schema_expr.extract_variable();
6169
6170        if let Some(s_var) = schema_var {
6171            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
6172
6173            if expr == target_expr {
6174                return Expr::Property(
6175                    Box::new(Expr::Variable(query_var.to_string())),
6176                    gen_col.to_string(),
6177                );
6178            }
6179        }
6180
6181        // Recurse
6182        match expr {
6183            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
6184                left: Box::new(Self::replace_expression(
6185                    *left,
6186                    schema_expr,
6187                    query_var,
6188                    gen_col,
6189                )),
6190                op,
6191                right: Box::new(Self::replace_expression(
6192                    *right,
6193                    schema_expr,
6194                    query_var,
6195                    gen_col,
6196                )),
6197            },
6198            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
6199                op,
6200                expr: Box::new(Self::replace_expression(
6201                    *expr,
6202                    schema_expr,
6203                    query_var,
6204                    gen_col,
6205                )),
6206            },
6207            Expr::FunctionCall {
6208                name,
6209                args,
6210                distinct,
6211                window_spec,
6212            } => Expr::FunctionCall {
6213                name,
6214                args: args
6215                    .into_iter()
6216                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
6217                    .collect(),
6218                distinct,
6219                window_spec,
6220            },
6221            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
6222                *expr,
6223                schema_expr,
6224                query_var,
6225                gen_col,
6226            ))),
6227            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
6228                *expr,
6229                schema_expr,
6230                query_var,
6231                gen_col,
6232            ))),
6233            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
6234                *expr,
6235                schema_expr,
6236                query_var,
6237                gen_col,
6238            ))),
6239            Expr::ArrayIndex {
6240                array: e,
6241                index: idx,
6242            } => Expr::ArrayIndex {
6243                array: Box::new(Self::replace_expression(
6244                    *e,
6245                    schema_expr,
6246                    query_var,
6247                    gen_col,
6248                )),
6249                index: Box::new(Self::replace_expression(
6250                    *idx,
6251                    schema_expr,
6252                    query_var,
6253                    gen_col,
6254                )),
6255            },
6256            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
6257                array: Box::new(Self::replace_expression(
6258                    *array,
6259                    schema_expr,
6260                    query_var,
6261                    gen_col,
6262                )),
6263                start: start.map(|s| {
6264                    Box::new(Self::replace_expression(
6265                        *s,
6266                        schema_expr,
6267                        query_var,
6268                        gen_col,
6269                    ))
6270                }),
6271                end: end.map(|e| {
6272                    Box::new(Self::replace_expression(
6273                        *e,
6274                        schema_expr,
6275                        query_var,
6276                        gen_col,
6277                    ))
6278                }),
6279            },
6280            Expr::List(exprs) => Expr::List(
6281                exprs
6282                    .into_iter()
6283                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
6284                    .collect(),
6285            ),
6286            Expr::Map(entries) => Expr::Map(
6287                entries
6288                    .into_iter()
6289                    .map(|(k, v)| {
6290                        (
6291                            k,
6292                            Self::replace_expression(v, schema_expr, query_var, gen_col),
6293                        )
6294                    })
6295                    .collect(),
6296            ),
6297            Expr::Property(e, prop) => Expr::Property(
6298                Box::new(Self::replace_expression(
6299                    *e,
6300                    schema_expr,
6301                    query_var,
6302                    gen_col,
6303                )),
6304                prop,
6305            ),
6306            Expr::Case {
6307                expr: case_expr,
6308                when_then,
6309                else_expr,
6310            } => Expr::Case {
6311                expr: case_expr.map(|e| {
6312                    Box::new(Self::replace_expression(
6313                        *e,
6314                        schema_expr,
6315                        query_var,
6316                        gen_col,
6317                    ))
6318                }),
6319                when_then: when_then
6320                    .into_iter()
6321                    .map(|(w, t)| {
6322                        (
6323                            Self::replace_expression(w, schema_expr, query_var, gen_col),
6324                            Self::replace_expression(t, schema_expr, query_var, gen_col),
6325                        )
6326                    })
6327                    .collect(),
6328                else_expr: else_expr.map(|e| {
6329                    Box::new(Self::replace_expression(
6330                        *e,
6331                        schema_expr,
6332                        query_var,
6333                        gen_col,
6334                    ))
6335                }),
6336            },
6337            Expr::Reduce {
6338                accumulator,
6339                init,
6340                variable: reduce_var,
6341                list,
6342                expr: reduce_expr,
6343            } => Expr::Reduce {
6344                accumulator,
6345                init: Box::new(Self::replace_expression(
6346                    *init,
6347                    schema_expr,
6348                    query_var,
6349                    gen_col,
6350                )),
6351                variable: reduce_var,
6352                list: Box::new(Self::replace_expression(
6353                    *list,
6354                    schema_expr,
6355                    query_var,
6356                    gen_col,
6357                )),
6358                expr: Box::new(Self::replace_expression(
6359                    *reduce_expr,
6360                    schema_expr,
6361                    query_var,
6362                    gen_col,
6363                )),
6364            },
6365
6366            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
6367            _ => expr,
6368        }
6369    }
6370
6371    /// Returns `true` iff `variable` is bound to a `ScanAll` operator
6372    /// (somewhere under `plan`). Used to gate the
6373    /// `WHERE n:A OR n:B` → `Union(Scan{A}, Scan{B})` rewrite — we only
6374    /// fire it when the variable is currently doing a full vertex scan,
6375    /// not when it's already bound to a labeled `Scan`.
6376    fn is_scan_all_for(plan: &LogicalPlan, variable: &str) -> bool {
6377        match plan {
6378            LogicalPlan::ScanAll { variable: var, .. } => var == variable,
6379            LogicalPlan::Filter { input, .. }
6380            | LogicalPlan::Project { input, .. }
6381            | LogicalPlan::Sort { input, .. }
6382            | LogicalPlan::Limit { input, .. }
6383            | LogicalPlan::Aggregate { input, .. }
6384            | LogicalPlan::Apply { input, .. }
6385            | LogicalPlan::Traverse { input, .. } => Self::is_scan_all_for(input, variable),
6386            LogicalPlan::CrossJoin { left, right } => {
6387                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6388            }
6389            LogicalPlan::Union { left, right, .. } => {
6390                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6391            }
6392            _ => false,
6393        }
6394    }
6395
6396    /// Replace the `ScanAll` for `variable` in `plan` with a left-leaning
6397    /// `Union` of label-scoped `Scan` (or `ScanMainByLabels` for unknown
6398    /// labels) operators built from `labels`. Used by the
6399    /// `WHERE n:A OR n:B` rewrite.
6400    fn replace_scan_all_with_label_union(
6401        &self,
6402        plan: LogicalPlan,
6403        variable: &str,
6404        labels: &[String],
6405        optional: bool,
6406    ) -> LogicalPlan {
6407        match plan {
6408            LogicalPlan::ScanAll {
6409                variable: var,
6410                filter,
6411                optional: scan_optional,
6412            } if var == variable => {
6413                // Heterogeneous (or any-schemaless) disjunction: route every
6414                // branch through a single-label `ScanMainByLabels` so all
6415                // branches emit a uniform schemaless schema. Avoids the
6416                // DataFusion `union_schema` panic. See `plan_unbound_node`
6417                // and issue rustic-ai/uni-db#62.
6418                let use_main_table_branches = !self.label_branches_share_property_schema(labels);
6419
6420                let mut branches: Vec<LogicalPlan> = Vec::with_capacity(labels.len());
6421                for label in labels {
6422                    let branch = if use_main_table_branches {
6423                        LogicalPlan::ScanMainByLabels {
6424                            labels: vec![label.clone()],
6425                            variable: variable.to_string(),
6426                            filter: filter.clone(),
6427                            optional: scan_optional || optional,
6428                        }
6429                    } else {
6430                        let meta = self
6431                            .schema
6432                            .get_label_case_insensitive(label)
6433                            .expect("share_property_schema true implies all labels in schema");
6434                        LogicalPlan::Scan {
6435                            label_id: meta.id,
6436                            labels: vec![label.clone()],
6437                            variable: variable.to_string(),
6438                            filter: filter.clone(),
6439                            optional: scan_optional || optional,
6440                        }
6441                    };
6442                    branches.push(branch);
6443                }
6444                let mut iter = branches.into_iter();
6445                let mut union_plan = iter.next().expect("at least one label");
6446                for next in iter {
6447                    union_plan = LogicalPlan::Union {
6448                        left: Box::new(union_plan),
6449                        right: Box::new(next),
6450                        all: false,
6451                    };
6452                }
6453                union_plan
6454            }
6455            LogicalPlan::Filter {
6456                input,
6457                predicate,
6458                optional_variables,
6459            } => LogicalPlan::Filter {
6460                input: Box::new(
6461                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6462                ),
6463                predicate,
6464                optional_variables,
6465            },
6466            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6467                input: Box::new(
6468                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6469                ),
6470                projections,
6471            },
6472            LogicalPlan::CrossJoin { left, right } => {
6473                if Self::is_scan_all_for(&left, variable) {
6474                    LogicalPlan::CrossJoin {
6475                        left: Box::new(
6476                            self.replace_scan_all_with_label_union(
6477                                *left, variable, labels, optional,
6478                            ),
6479                        ),
6480                        right,
6481                    }
6482                } else {
6483                    LogicalPlan::CrossJoin {
6484                        left,
6485                        right: Box::new(
6486                            self.replace_scan_all_with_label_union(
6487                                *right, variable, labels, optional,
6488                            ),
6489                        ),
6490                    }
6491                }
6492            }
6493            LogicalPlan::Traverse {
6494                input,
6495                edge_type_ids,
6496                direction,
6497                source_variable,
6498                target_variable,
6499                target_label_id,
6500                step_variable,
6501                min_hops,
6502                max_hops,
6503                optional: trav_optional,
6504                target_filter,
6505                path_variable,
6506                edge_properties,
6507                is_variable_length,
6508                optional_pattern_vars,
6509                scope_match_variables,
6510                edge_filter_expr,
6511                path_mode,
6512                qpp_steps,
6513            } => LogicalPlan::Traverse {
6514                input: Box::new(
6515                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6516                ),
6517                edge_type_ids,
6518                direction,
6519                source_variable,
6520                target_variable,
6521                target_label_id,
6522                step_variable,
6523                min_hops,
6524                max_hops,
6525                optional: trav_optional,
6526                target_filter,
6527                path_variable,
6528                edge_properties,
6529                is_variable_length,
6530                optional_pattern_vars,
6531                scope_match_variables,
6532                edge_filter_expr,
6533                path_mode,
6534                qpp_steps,
6535            },
6536            other => other,
6537        }
6538    }
6539
6540    /// Returns `Some(())` iff `variable` is the `step_variable` (i.e. the
6541    /// edge variable) of some `Traverse` operator in `plan`. Used to gate
6542    /// the `WHERE type(r) = 'A' OR type(r) = 'B'` rewrite — we need a
6543    /// Traverse whose types we can merge into.
6544    fn merge_traverse_types_for(
6545        plan: &LogicalPlan,
6546        edge_var: &str,
6547        _types: &[String],
6548    ) -> Option<()> {
6549        match plan {
6550            LogicalPlan::Traverse {
6551                step_variable,
6552                input,
6553                ..
6554            } => {
6555                if step_variable.as_deref() == Some(edge_var) {
6556                    Some(())
6557                } else {
6558                    Self::merge_traverse_types_for(input, edge_var, _types)
6559                }
6560            }
6561            LogicalPlan::Filter { input, .. }
6562            | LogicalPlan::Project { input, .. }
6563            | LogicalPlan::Sort { input, .. }
6564            | LogicalPlan::Limit { input, .. }
6565            | LogicalPlan::Aggregate { input, .. }
6566            | LogicalPlan::Apply { input, .. } => {
6567                Self::merge_traverse_types_for(input, edge_var, _types)
6568            }
6569            LogicalPlan::CrossJoin { left, right } | LogicalPlan::Union { left, right, .. } => {
6570                Self::merge_traverse_types_for(left, edge_var, _types)
6571                    .or_else(|| Self::merge_traverse_types_for(right, edge_var, _types))
6572            }
6573            _ => None,
6574        }
6575    }
6576
6577    /// Replace `edge_type_ids` on the Traverse whose `step_variable`
6578    /// equals `edge_var`. Used by the type-OR rewrite.
6579    fn set_traverse_edge_type_ids(
6580        plan: LogicalPlan,
6581        edge_var: &str,
6582        new_ids: Vec<u32>,
6583    ) -> LogicalPlan {
6584        match plan {
6585            LogicalPlan::Traverse {
6586                input,
6587                edge_type_ids,
6588                direction,
6589                source_variable,
6590                target_variable,
6591                target_label_id,
6592                step_variable,
6593                min_hops,
6594                max_hops,
6595                optional,
6596                target_filter,
6597                path_variable,
6598                edge_properties,
6599                is_variable_length,
6600                optional_pattern_vars,
6601                scope_match_variables,
6602                edge_filter_expr,
6603                path_mode,
6604                qpp_steps,
6605            } => {
6606                let matches_var = step_variable.as_deref() == Some(edge_var);
6607                let recursed_input = if matches_var {
6608                    input
6609                } else {
6610                    Box::new(Self::set_traverse_edge_type_ids(
6611                        *input,
6612                        edge_var,
6613                        new_ids.clone(),
6614                    ))
6615                };
6616                LogicalPlan::Traverse {
6617                    input: recursed_input,
6618                    edge_type_ids: if matches_var { new_ids } else { edge_type_ids },
6619                    direction,
6620                    source_variable,
6621                    target_variable,
6622                    target_label_id,
6623                    step_variable,
6624                    min_hops,
6625                    max_hops,
6626                    optional,
6627                    target_filter,
6628                    path_variable,
6629                    edge_properties,
6630                    is_variable_length,
6631                    optional_pattern_vars,
6632                    scope_match_variables,
6633                    edge_filter_expr,
6634                    path_mode,
6635                    qpp_steps,
6636                }
6637            }
6638            LogicalPlan::Filter {
6639                input,
6640                predicate,
6641                optional_variables,
6642            } => LogicalPlan::Filter {
6643                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6644                predicate,
6645                optional_variables,
6646            },
6647            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6648                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6649                projections,
6650            },
6651            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6652                left: Box::new(Self::set_traverse_edge_type_ids(
6653                    *left,
6654                    edge_var,
6655                    new_ids.clone(),
6656                )),
6657                right: Box::new(Self::set_traverse_edge_type_ids(*right, edge_var, new_ids)),
6658            },
6659            other => other,
6660        }
6661    }
6662
6663    /// Check if the variable is the target of a Traverse node
6664    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
6665        match plan {
6666            LogicalPlan::Traverse {
6667                target_variable,
6668                input,
6669                ..
6670            } => target_variable == variable || Self::is_traverse_target(input, variable),
6671            LogicalPlan::Filter { input, .. }
6672            | LogicalPlan::Project { input, .. }
6673            | LogicalPlan::Sort { input, .. }
6674            | LogicalPlan::Limit { input, .. }
6675            | LogicalPlan::Aggregate { input, .. }
6676            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
6677            LogicalPlan::CrossJoin { left, right } => {
6678                Self::is_traverse_target(left, variable)
6679                    || Self::is_traverse_target(right, variable)
6680            }
6681            _ => false,
6682        }
6683    }
6684
6685    /// Push a predicate into a Traverse's target_filter for the specified variable
6686    fn push_predicate_to_traverse(
6687        plan: LogicalPlan,
6688        variable: &str,
6689        predicate: Expr,
6690    ) -> LogicalPlan {
6691        match plan {
6692            LogicalPlan::Traverse {
6693                input,
6694                edge_type_ids,
6695                direction,
6696                source_variable,
6697                target_variable,
6698                target_label_id,
6699                step_variable,
6700                min_hops,
6701                max_hops,
6702                optional,
6703                target_filter,
6704                path_variable,
6705                edge_properties,
6706                is_variable_length,
6707                optional_pattern_vars,
6708                scope_match_variables,
6709                edge_filter_expr,
6710                path_mode,
6711                qpp_steps,
6712            } => {
6713                if target_variable == variable {
6714                    // Found the traverse producing this variable
6715                    let new_filter = match target_filter {
6716                        Some(existing) => Some(Expr::BinaryOp {
6717                            left: Box::new(existing),
6718                            op: BinaryOp::And,
6719                            right: Box::new(predicate),
6720                        }),
6721                        None => Some(predicate),
6722                    };
6723                    LogicalPlan::Traverse {
6724                        input,
6725                        edge_type_ids,
6726                        direction,
6727                        source_variable,
6728                        target_variable,
6729                        target_label_id,
6730                        step_variable,
6731                        min_hops,
6732                        max_hops,
6733                        optional,
6734                        target_filter: new_filter,
6735                        path_variable,
6736                        edge_properties,
6737                        is_variable_length,
6738                        optional_pattern_vars,
6739                        scope_match_variables,
6740                        edge_filter_expr,
6741                        path_mode,
6742                        qpp_steps,
6743                    }
6744                } else {
6745                    // Recurse into input
6746                    LogicalPlan::Traverse {
6747                        input: Box::new(Self::push_predicate_to_traverse(
6748                            *input, variable, predicate,
6749                        )),
6750                        edge_type_ids,
6751                        direction,
6752                        source_variable,
6753                        target_variable,
6754                        target_label_id,
6755                        step_variable,
6756                        min_hops,
6757                        max_hops,
6758                        optional,
6759                        target_filter,
6760                        path_variable,
6761                        edge_properties,
6762                        is_variable_length,
6763                        optional_pattern_vars,
6764                        scope_match_variables,
6765                        edge_filter_expr,
6766                        path_mode,
6767                        qpp_steps,
6768                    }
6769                }
6770            }
6771            LogicalPlan::Filter {
6772                input,
6773                predicate: p,
6774                optional_variables: opt_vars,
6775            } => LogicalPlan::Filter {
6776                input: Box::new(Self::push_predicate_to_traverse(
6777                    *input, variable, predicate,
6778                )),
6779                predicate: p,
6780                optional_variables: opt_vars,
6781            },
6782            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6783                input: Box::new(Self::push_predicate_to_traverse(
6784                    *input, variable, predicate,
6785                )),
6786                projections,
6787            },
6788            LogicalPlan::CrossJoin { left, right } => {
6789                // Check which side has the variable
6790                if Self::is_traverse_target(&left, variable) {
6791                    LogicalPlan::CrossJoin {
6792                        left: Box::new(Self::push_predicate_to_traverse(
6793                            *left, variable, predicate,
6794                        )),
6795                        right,
6796                    }
6797                } else {
6798                    LogicalPlan::CrossJoin {
6799                        left,
6800                        right: Box::new(Self::push_predicate_to_traverse(
6801                            *right, variable, predicate,
6802                        )),
6803                    }
6804                }
6805            }
6806            other => other,
6807        }
6808    }
6809
6810    /// Plan a WITH clause, handling aggregations and projections.
6811    fn plan_with_clause(
6812        &self,
6813        with_clause: &WithClause,
6814        plan: LogicalPlan,
6815        vars_in_scope: &[VariableInfo],
6816    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
6817        let mut plan = plan;
6818        let mut group_by: Vec<Expr> = Vec::new();
6819        let mut aggregates: Vec<Expr> = Vec::new();
6820        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
6821        let mut has_agg = false;
6822        let mut projections = Vec::new();
6823        let mut new_vars: Vec<VariableInfo> = Vec::new();
6824        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
6825        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
6826        let mut projected_aliases: HashSet<String> = HashSet::new();
6827        let mut has_unaliased_non_variable_expr = false;
6828
6829        for item in &with_clause.items {
6830            match item {
6831                ReturnItem::All => {
6832                    // WITH * - add all variables in scope
6833                    for v in vars_in_scope {
6834                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6835                        projected_aliases.insert(v.name.clone());
6836                        projected_simple_reprs.insert(v.name.clone());
6837                    }
6838                    new_vars.extend(vars_in_scope.iter().cloned());
6839                }
6840                ReturnItem::Expr { expr, alias, .. } => {
6841                    if matches!(expr, Expr::Wildcard) {
6842                        for v in vars_in_scope {
6843                            projections
6844                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6845                            projected_aliases.insert(v.name.clone());
6846                            projected_simple_reprs.insert(v.name.clone());
6847                        }
6848                        new_vars.extend(vars_in_scope.iter().cloned());
6849                    } else {
6850                        // Validate expression variables and syntax
6851                        validate_expression_variables(expr, vars_in_scope)?;
6852                        validate_expression(expr, vars_in_scope)?;
6853                        // Pattern predicates are not allowed in WITH
6854                        if contains_pattern_predicate(expr) {
6855                            return Err(anyhow!(
6856                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
6857                            ));
6858                        }
6859
6860                        projections.push((expr.clone(), alias.clone()));
6861                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
6862                            // Bare aggregate — push directly
6863                            has_agg = true;
6864                            aggregates.push(expr.clone());
6865                            projected_aggregate_reprs.insert(expr.to_string_repr());
6866                        } else if !is_window_function(expr)
6867                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
6868                        {
6869                            // Compound aggregate or expression containing aggregates
6870                            has_agg = true;
6871                            compound_agg_exprs.push(expr.clone());
6872                            for inner in extract_inner_aggregates(expr) {
6873                                let repr = inner.to_string_repr();
6874                                if !projected_aggregate_reprs.contains(&repr) {
6875                                    aggregates.push(inner);
6876                                    projected_aggregate_reprs.insert(repr);
6877                                }
6878                            }
6879                        } else if !group_by.contains(expr) {
6880                            group_by.push(expr.clone());
6881                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
6882                                projected_simple_reprs.insert(expr.to_string_repr());
6883                            }
6884                        }
6885
6886                        // Preserve non-scalar type information when WITH aliases
6887                        // entity/path-capable expressions.
6888                        if let Some(a) = alias {
6889                            if projected_aliases.contains(a) {
6890                                return Err(anyhow!(
6891                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6892                                    a
6893                                ));
6894                            }
6895                            let inferred = infer_with_output_type(expr, vars_in_scope);
6896                            new_vars.push(VariableInfo::new(a.clone(), inferred));
6897                            projected_aliases.insert(a.clone());
6898                        } else if let Expr::Variable(v) = expr {
6899                            if projected_aliases.contains(v) {
6900                                return Err(anyhow!(
6901                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6902                                    v
6903                                ));
6904                            }
6905                            // Preserve the original type if the variable is just passed through
6906                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
6907                                new_vars.push(existing.clone());
6908                            } else {
6909                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
6910                            }
6911                            projected_aliases.insert(v.clone());
6912                        } else {
6913                            has_unaliased_non_variable_expr = true;
6914                        }
6915                    }
6916                }
6917            }
6918        }
6919
6920        // Collect extra variables that need to survive the projection stage
6921        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
6922        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
6923        let mut passthrough_extras: Vec<String> = Vec::new();
6924        let mut seen_passthrough: HashSet<String> = HashSet::new();
6925
6926        if let Some(predicate) = &with_clause.where_clause {
6927            for name in collect_expr_variables(predicate) {
6928                if !projected_names.contains(name.as_str())
6929                    && find_var_in_scope(vars_in_scope, &name).is_some()
6930                    && seen_passthrough.insert(name.clone())
6931                {
6932                    passthrough_extras.push(name);
6933                }
6934            }
6935        }
6936
6937        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
6938        // Carry those variables through the projection so Sort can resolve them.
6939        if !has_agg && let Some(order_by) = &with_clause.order_by {
6940            for item in order_by {
6941                for name in collect_expr_variables(&item.expr) {
6942                    if !projected_names.contains(name.as_str())
6943                        && find_var_in_scope(vars_in_scope, &name).is_some()
6944                        && seen_passthrough.insert(name.clone())
6945                    {
6946                        passthrough_extras.push(name);
6947                    }
6948                }
6949            }
6950        }
6951
6952        let needs_cleanup = !passthrough_extras.is_empty();
6953        for extra in &passthrough_extras {
6954            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
6955        }
6956
6957        // Validate compound aggregate expressions: non-aggregate refs must be
6958        // individually present in the group_by as simple variables or properties.
6959        if has_agg {
6960            let group_by_reprs: HashSet<String> =
6961                group_by.iter().map(|e| e.to_string_repr()).collect();
6962            for expr in &compound_agg_exprs {
6963                let mut refs = Vec::new();
6964                collect_non_aggregate_refs(expr, false, &mut refs);
6965                for r in &refs {
6966                    let is_covered = match r {
6967                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
6968                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
6969                    };
6970                    if !is_covered {
6971                        return Err(anyhow!(
6972                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
6973                        ));
6974                    }
6975                }
6976            }
6977        }
6978
6979        if has_agg {
6980            plan = LogicalPlan::Aggregate {
6981                input: Box::new(plan),
6982                group_by,
6983                aggregates,
6984            };
6985
6986            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
6987            // can reference the WITH aliases instead of raw column names.
6988            let rename_projections: Vec<(Expr, Option<String>)> = projections
6989                .iter()
6990                .map(|(expr, alias)| {
6991                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
6992                        // Bare aggregate — reference by column name
6993                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
6994                    } else if is_compound_aggregate(expr)
6995                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
6996                    {
6997                        // Compound aggregate — replace inner aggregates with
6998                        // column references, keep outer expression
6999                        (replace_aggregates_with_columns(expr), alias.clone())
7000                    } else {
7001                        (Expr::Variable(expr.to_string_repr()), alias.clone())
7002                    }
7003                })
7004                .collect();
7005            plan = LogicalPlan::Project {
7006                input: Box::new(plan),
7007                projections: rename_projections,
7008            };
7009        } else if !projections.is_empty() {
7010            plan = LogicalPlan::Project {
7011                input: Box::new(plan),
7012                projections: projections.clone(),
7013            };
7014        }
7015
7016        // Apply the WHERE filter (post-projection, with extras still visible).
7017        if let Some(predicate) = &with_clause.where_clause {
7018            plan = LogicalPlan::Filter {
7019                input: Box::new(plan),
7020                predicate: predicate.clone(),
7021                optional_variables: HashSet::new(),
7022            };
7023        }
7024
7025        // Validate and apply ORDER BY for WITH clause.
7026        // Keep pre-WITH vars in scope for parser compatibility, then apply
7027        // stricter checks for aggregate-containing ORDER BY items.
7028        if let Some(order_by) = &with_clause.order_by {
7029            // Build a mapping from aliases and projected expression reprs to
7030            // output columns of the preceding Project/Aggregate pipeline.
7031            let with_order_aliases: HashMap<String, Expr> = projections
7032                .iter()
7033                .flat_map(|(expr, alias)| {
7034                    let output_col = if let Some(a) = alias {
7035                        a.clone()
7036                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
7037                        aggregate_column_name(expr)
7038                    } else {
7039                        expr.to_string_repr()
7040                    };
7041
7042                    let mut entries = Vec::new();
7043                    // ORDER BY alias
7044                    if let Some(a) = alias {
7045                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
7046                    }
7047                    // ORDER BY projected expression (e.g. me.age)
7048                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
7049                    entries
7050                })
7051                .collect();
7052
7053            let order_by_scope: Vec<VariableInfo> = {
7054                let mut scope = new_vars.clone();
7055                for v in vars_in_scope {
7056                    if !is_var_in_scope(&scope, &v.name) {
7057                        scope.push(v.clone());
7058                    }
7059                }
7060                scope
7061            };
7062            for item in order_by {
7063                validate_expression_variables(&item.expr, &order_by_scope)?;
7064                validate_expression(&item.expr, &order_by_scope)?;
7065                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
7066                if has_aggregate_in_item && !has_agg {
7067                    return Err(anyhow!(
7068                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
7069                    ));
7070                }
7071                if has_agg && has_aggregate_in_item {
7072                    validate_with_order_by_aggregate_item(
7073                        &item.expr,
7074                        &projected_aggregate_reprs,
7075                        &projected_simple_reprs,
7076                        &projected_aliases,
7077                    )?;
7078                }
7079            }
7080            let rewritten_order_by: Vec<SortItem> = order_by
7081                .iter()
7082                .map(|item| {
7083                    let mut expr =
7084                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
7085                    if has_agg {
7086                        // Rewrite any aggregate calls to the aggregate output
7087                        // columns produced by Aggregate.
7088                        expr = replace_aggregates_with_columns(&expr);
7089                        // Then re-map projected property expressions to aliases
7090                        // from the WITH projection.
7091                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
7092                    }
7093                    SortItem {
7094                        expr,
7095                        ascending: item.ascending,
7096                    }
7097                })
7098                .collect();
7099            plan = LogicalPlan::Sort {
7100                input: Box::new(plan),
7101                order_by: rewritten_order_by,
7102            };
7103        }
7104
7105        // Non-variable expressions in WITH must be aliased.
7106        // This check is intentionally placed after ORDER BY validation so
7107        // higher-priority semantic errors (e.g., ambiguous aggregation in
7108        // ORDER BY) can surface first.
7109        if has_unaliased_non_variable_expr {
7110            return Err(anyhow!(
7111                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
7112            ));
7113        }
7114
7115        // Validate and apply SKIP/LIMIT for WITH clause
7116        let skip = with_clause
7117            .skip
7118            .as_ref()
7119            .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
7120            .transpose()?
7121            .flatten();
7122        let fetch = with_clause
7123            .limit
7124            .as_ref()
7125            .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
7126            .transpose()?
7127            .flatten();
7128
7129        if skip.is_some() || fetch.is_some() {
7130            plan = LogicalPlan::Limit {
7131                input: Box::new(plan),
7132                skip,
7133                fetch,
7134            };
7135        }
7136
7137        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
7138        if needs_cleanup {
7139            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
7140                .iter()
7141                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
7142                .collect();
7143            plan = LogicalPlan::Project {
7144                input: Box::new(plan),
7145                projections: cleanup_projections,
7146            };
7147        }
7148
7149        if with_clause.distinct {
7150            plan = LogicalPlan::Distinct {
7151                input: Box::new(plan),
7152            };
7153        }
7154
7155        Ok((plan, new_vars))
7156    }
7157
7158    fn plan_with_recursive(
7159        &self,
7160        with_recursive: &WithRecursiveClause,
7161        _prev_plan: LogicalPlan,
7162        vars_in_scope: &[VariableInfo],
7163    ) -> Result<LogicalPlan> {
7164        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
7165        match &*with_recursive.query {
7166            Query::Union { left, right, .. } => {
7167                // Plan the anchor (initial) query with current scope
7168                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
7169
7170                // Plan the recursive query with the CTE name added to scope
7171                // so it can reference itself
7172                let mut recursive_scope = vars_in_scope.to_vec();
7173                recursive_scope.push(VariableInfo::new(
7174                    with_recursive.name.clone(),
7175                    VariableType::Scalar,
7176                ));
7177                let recursive_plan =
7178                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
7179
7180                Ok(LogicalPlan::RecursiveCTE {
7181                    cte_name: with_recursive.name.clone(),
7182                    initial: Box::new(initial_plan),
7183                    recursive: Box::new(recursive_plan),
7184                })
7185            }
7186            _ => Err(anyhow::anyhow!(
7187                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
7188            )),
7189        }
7190    }
7191
7192    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
7193        let entries = match properties {
7194            Some(Expr::Map(entries)) => entries,
7195            _ => return None,
7196        };
7197
7198        if entries.is_empty() {
7199            return None;
7200        }
7201        let mut final_expr = None;
7202        for (prop, val_expr) in entries {
7203            let eq_expr = Expr::BinaryOp {
7204                left: Box::new(Expr::Property(
7205                    Box::new(Expr::Variable(variable.to_string())),
7206                    prop.clone(),
7207                )),
7208                op: BinaryOp::Eq,
7209                right: Box::new(val_expr.clone()),
7210            };
7211
7212            if let Some(e) = final_expr {
7213                final_expr = Some(Expr::BinaryOp {
7214                    left: Box::new(e),
7215                    op: BinaryOp::And,
7216                    right: Box::new(eq_expr),
7217                });
7218            } else {
7219                final_expr = Some(eq_expr);
7220            }
7221        }
7222        final_expr
7223    }
7224
7225    /// Build a filter expression from node properties and labels.
7226    ///
7227    /// This is used for TraverseMainByType where we need to filter target nodes
7228    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
7229    pub fn node_filter_expr(
7230        &self,
7231        variable: &str,
7232        labels: &[String],
7233        properties: &Option<Expr>,
7234    ) -> Option<Expr> {
7235        let mut final_expr = None;
7236
7237        // Add label checks using hasLabel(variable, 'label')
7238        for label in labels {
7239            let label_check = Expr::FunctionCall {
7240                name: "hasLabel".to_string(),
7241                args: vec![
7242                    Expr::Variable(variable.to_string()),
7243                    Expr::Literal(CypherLiteral::String(label.clone())),
7244                ],
7245                distinct: false,
7246                window_spec: None,
7247            };
7248
7249            final_expr = match final_expr {
7250                Some(e) => Some(Expr::BinaryOp {
7251                    left: Box::new(e),
7252                    op: BinaryOp::And,
7253                    right: Box::new(label_check),
7254                }),
7255                None => Some(label_check),
7256            };
7257        }
7258
7259        // Add property checks
7260        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
7261            final_expr = match final_expr {
7262                Some(e) => Some(Expr::BinaryOp {
7263                    left: Box::new(e),
7264                    op: BinaryOp::And,
7265                    right: Box::new(prop_expr),
7266                }),
7267                None => Some(prop_expr),
7268            };
7269        }
7270
7271        final_expr
7272    }
7273
7274    /// Create a filter plan that ensures traversed target matches a bound variable.
7275    ///
7276    /// Used in EXISTS subquery patterns where the target is already bound.
7277    /// Compares the target's VID against the bound variable's VID.
7278    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
7279        // Compare the traverse-discovered target's VID against the bound variable's VID.
7280        // Left side: Property access on the variable from current scope.
7281        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
7282        // We use Variable("{var}._vid") to access the VID column from the traverse output,
7283        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
7284        let bound_check = Expr::BinaryOp {
7285            left: Box::new(Expr::Property(
7286                Box::new(Expr::Variable(target_variable.to_string())),
7287                "_vid".to_string(),
7288            )),
7289            op: BinaryOp::Eq,
7290            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
7291        };
7292        LogicalPlan::Filter {
7293            input: Box::new(plan),
7294            predicate: bound_check,
7295            optional_variables: HashSet::new(),
7296        }
7297    }
7298
7299    /// Replace a Scan node matching the variable with a VectorKnn node
7300    fn replace_scan_with_knn(
7301        plan: LogicalPlan,
7302        variable: &str,
7303        property: &str,
7304        query: Expr,
7305        threshold: Option<f32>,
7306    ) -> LogicalPlan {
7307        match plan {
7308            LogicalPlan::Scan {
7309                label_id,
7310                labels,
7311                variable: scan_var,
7312                filter,
7313                optional,
7314            } => {
7315                if scan_var == variable {
7316                    // Inject any existing scan filter into VectorKnn?
7317                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
7318                    // Typically filter is applied post-Knn or during Knn if supported.
7319                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
7320                    // If `filter` is present on Scan, it must be preserved.
7321                    // We can wrap VectorKnn in Filter if Scan had filter.
7322
7323                    let knn = LogicalPlan::VectorKnn {
7324                        label_id,
7325                        variable: variable.to_string(),
7326                        property: property.to_string(),
7327                        query,
7328                        k: 100, // Default K, should push down LIMIT
7329                        threshold,
7330                    };
7331
7332                    if let Some(f) = filter {
7333                        LogicalPlan::Filter {
7334                            input: Box::new(knn),
7335                            predicate: f,
7336                            optional_variables: HashSet::new(),
7337                        }
7338                    } else {
7339                        knn
7340                    }
7341                } else {
7342                    LogicalPlan::Scan {
7343                        label_id,
7344                        labels,
7345                        variable: scan_var,
7346                        filter,
7347                        optional,
7348                    }
7349                }
7350            }
7351            LogicalPlan::Filter {
7352                input,
7353                predicate,
7354                optional_variables,
7355            } => LogicalPlan::Filter {
7356                input: Box::new(Self::replace_scan_with_knn(
7357                    *input, variable, property, query, threshold,
7358                )),
7359                predicate,
7360                optional_variables,
7361            },
7362            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7363                input: Box::new(Self::replace_scan_with_knn(
7364                    *input, variable, property, query, threshold,
7365                )),
7366                projections,
7367            },
7368            LogicalPlan::Limit { input, skip, fetch } => {
7369                // If we encounter Limit, we should ideally push K down to VectorKnn
7370                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
7371                // Limit comes later.
7372                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
7373                LogicalPlan::Limit {
7374                    input: Box::new(Self::replace_scan_with_knn(
7375                        *input, variable, property, query, threshold,
7376                    )),
7377                    skip,
7378                    fetch,
7379                }
7380            }
7381            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7382                left: Box::new(Self::replace_scan_with_knn(
7383                    *left,
7384                    variable,
7385                    property,
7386                    query.clone(),
7387                    threshold,
7388                )),
7389                right: Box::new(Self::replace_scan_with_knn(
7390                    *right, variable, property, query, threshold,
7391                )),
7392            },
7393            other => other,
7394        }
7395    }
7396
7397    /// Find the label_id for a Scan node matching the given variable
7398    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
7399        match plan {
7400            LogicalPlan::Scan {
7401                label_id,
7402                variable: var,
7403                ..
7404            } if var == variable => Some(*label_id),
7405            LogicalPlan::ScanAll { variable: var, .. } if var == variable => Some(0),
7406            LogicalPlan::Filter { input, .. }
7407            | LogicalPlan::Project { input, .. }
7408            | LogicalPlan::Sort { input, .. }
7409            | LogicalPlan::Limit { input, .. }
7410            | LogicalPlan::Aggregate { input, .. }
7411            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
7412            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
7413                .or_else(|| Self::find_scan_label_id(right, variable)),
7414            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
7415            _ => None,
7416        }
7417    }
7418
7419    /// Push a predicate into a Scan's filter for the specified variable
7420    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
7421        match plan {
7422            LogicalPlan::Scan {
7423                label_id,
7424                labels,
7425                variable: var,
7426                filter,
7427                optional,
7428            } if var == variable => {
7429                // Merge the predicate with existing filter
7430                let new_filter = match filter {
7431                    Some(existing) => Some(Expr::BinaryOp {
7432                        left: Box::new(existing),
7433                        op: BinaryOp::And,
7434                        right: Box::new(predicate),
7435                    }),
7436                    None => Some(predicate),
7437                };
7438                LogicalPlan::Scan {
7439                    label_id,
7440                    labels,
7441                    variable: var,
7442                    filter: new_filter,
7443                    optional,
7444                }
7445            }
7446            LogicalPlan::ScanAll {
7447                variable: var,
7448                filter,
7449                optional,
7450            } if var == variable => {
7451                let new_filter = match filter {
7452                    Some(existing) => Some(Expr::BinaryOp {
7453                        left: Box::new(existing),
7454                        op: BinaryOp::And,
7455                        right: Box::new(predicate),
7456                    }),
7457                    None => Some(predicate),
7458                };
7459                LogicalPlan::ScanAll {
7460                    variable: var,
7461                    filter: new_filter,
7462                    optional,
7463                }
7464            }
7465            LogicalPlan::Filter {
7466                input,
7467                predicate: p,
7468                optional_variables: opt_vars,
7469            } => LogicalPlan::Filter {
7470                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7471                predicate: p,
7472                optional_variables: opt_vars,
7473            },
7474            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7475                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7476                projections,
7477            },
7478            LogicalPlan::CrossJoin { left, right } => {
7479                // Check which side has the variable
7480                if Self::find_scan_label_id(&left, variable).is_some() {
7481                    LogicalPlan::CrossJoin {
7482                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
7483                        right,
7484                    }
7485                } else {
7486                    LogicalPlan::CrossJoin {
7487                        left,
7488                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
7489                    }
7490                }
7491            }
7492            LogicalPlan::Traverse {
7493                input,
7494                edge_type_ids,
7495                direction,
7496                source_variable,
7497                target_variable,
7498                target_label_id,
7499                step_variable,
7500                min_hops,
7501                max_hops,
7502                optional,
7503                target_filter,
7504                path_variable,
7505                edge_properties,
7506                is_variable_length,
7507                optional_pattern_vars,
7508                scope_match_variables,
7509                edge_filter_expr,
7510                path_mode,
7511                qpp_steps,
7512            } => LogicalPlan::Traverse {
7513                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7514                edge_type_ids,
7515                direction,
7516                source_variable,
7517                target_variable,
7518                target_label_id,
7519                step_variable,
7520                min_hops,
7521                max_hops,
7522                optional,
7523                target_filter,
7524                path_variable,
7525                edge_properties,
7526                is_variable_length,
7527                optional_pattern_vars,
7528                scope_match_variables,
7529                edge_filter_expr,
7530                path_mode,
7531                qpp_steps,
7532            },
7533            other => other,
7534        }
7535    }
7536
7537    /// Extract predicates that reference only the specified variable
7538    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
7539        let analyzer = PredicateAnalyzer::new();
7540        let analysis = analyzer.analyze(predicate, variable);
7541
7542        // Return pushable predicates and combined residual
7543        let residual = if analysis.residual.is_empty() {
7544            None
7545        } else {
7546            let mut iter = analysis.residual.into_iter();
7547            let first = iter.next().unwrap();
7548            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
7549                left: Box::new(acc),
7550                op: BinaryOp::And,
7551                right: Box::new(e),
7552            }))
7553        };
7554
7555        (analysis.pushable, residual)
7556    }
7557
7558    // =====================================================================
7559    // Apply Predicate Pushdown - Helper Functions
7560    // =====================================================================
7561
7562    /// Split AND-connected predicates into a list.
7563    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
7564        match expr {
7565            Expr::BinaryOp {
7566                left,
7567                op: BinaryOp::And,
7568                right,
7569            } => {
7570                let mut result = Self::split_and_conjuncts(left);
7571                result.extend(Self::split_and_conjuncts(right));
7572                result
7573            }
7574            _ => vec![expr.clone()],
7575        }
7576    }
7577
7578    /// Combine predicates with AND.
7579    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
7580        if predicates.is_empty() {
7581            return None;
7582        }
7583        let mut result = predicates[0].clone();
7584        for pred in predicates.iter().skip(1) {
7585            result = Expr::BinaryOp {
7586                left: Box::new(result),
7587                op: BinaryOp::And,
7588                right: Box::new(pred.clone()),
7589            };
7590        }
7591        Some(result)
7592    }
7593
7594    /// Collect all variable names referenced in an expression.
7595    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
7596        let mut vars = HashSet::new();
7597        Self::collect_expr_variables_impl(expr, &mut vars);
7598        vars
7599    }
7600
7601    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
7602        match expr {
7603            Expr::Variable(name) => {
7604                vars.insert(name.clone());
7605            }
7606            Expr::Property(inner, _) => {
7607                if let Expr::Variable(name) = inner.as_ref() {
7608                    vars.insert(name.clone());
7609                } else {
7610                    Self::collect_expr_variables_impl(inner, vars);
7611                }
7612            }
7613            Expr::BinaryOp { left, right, .. } => {
7614                Self::collect_expr_variables_impl(left, vars);
7615                Self::collect_expr_variables_impl(right, vars);
7616            }
7617            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7618            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
7619            Expr::FunctionCall { args, .. } => {
7620                for arg in args {
7621                    Self::collect_expr_variables_impl(arg, vars);
7622                }
7623            }
7624            Expr::List(items) => {
7625                for item in items {
7626                    Self::collect_expr_variables_impl(item, vars);
7627                }
7628            }
7629            Expr::Case {
7630                expr,
7631                when_then,
7632                else_expr,
7633            } => {
7634                if let Some(e) = expr {
7635                    Self::collect_expr_variables_impl(e, vars);
7636                }
7637                for (w, t) in when_then {
7638                    Self::collect_expr_variables_impl(w, vars);
7639                    Self::collect_expr_variables_impl(t, vars);
7640                }
7641                if let Some(e) = else_expr {
7642                    Self::collect_expr_variables_impl(e, vars);
7643                }
7644            }
7645            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7646            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
7647            // they introduce local variable bindings not in outer scope.
7648            _ => {}
7649        }
7650    }
7651
7652    /// Collect all variables produced by a logical plan.
7653    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
7654        let mut vars = HashSet::new();
7655        Self::collect_plan_variables_impl(plan, &mut vars);
7656        vars
7657    }
7658
7659    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
7660        match plan {
7661            LogicalPlan::Scan { variable, .. } => {
7662                vars.insert(variable.clone());
7663            }
7664            LogicalPlan::Traverse {
7665                target_variable,
7666                step_variable,
7667                input,
7668                path_variable,
7669                ..
7670            } => {
7671                vars.insert(target_variable.clone());
7672                if let Some(sv) = step_variable {
7673                    vars.insert(sv.clone());
7674                }
7675                if let Some(pv) = path_variable {
7676                    vars.insert(pv.clone());
7677                }
7678                Self::collect_plan_variables_impl(input, vars);
7679            }
7680            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
7681            LogicalPlan::Project { input, projections } => {
7682                for (expr, alias) in projections {
7683                    if let Some(a) = alias {
7684                        vars.insert(a.clone());
7685                    } else if let Expr::Variable(v) = expr {
7686                        vars.insert(v.clone());
7687                    }
7688                }
7689                Self::collect_plan_variables_impl(input, vars);
7690            }
7691            LogicalPlan::Apply {
7692                input, subquery, ..
7693            } => {
7694                Self::collect_plan_variables_impl(input, vars);
7695                Self::collect_plan_variables_impl(subquery, vars);
7696            }
7697            LogicalPlan::CrossJoin { left, right } => {
7698                Self::collect_plan_variables_impl(left, vars);
7699                Self::collect_plan_variables_impl(right, vars);
7700            }
7701            LogicalPlan::Unwind {
7702                input, variable, ..
7703            } => {
7704                vars.insert(variable.clone());
7705                Self::collect_plan_variables_impl(input, vars);
7706            }
7707            LogicalPlan::Aggregate { input, .. } => {
7708                Self::collect_plan_variables_impl(input, vars);
7709            }
7710            LogicalPlan::Distinct { input } => {
7711                Self::collect_plan_variables_impl(input, vars);
7712            }
7713            LogicalPlan::Sort { input, .. } => {
7714                Self::collect_plan_variables_impl(input, vars);
7715            }
7716            LogicalPlan::Limit { input, .. } => {
7717                Self::collect_plan_variables_impl(input, vars);
7718            }
7719            LogicalPlan::VectorKnn { variable, .. } => {
7720                vars.insert(variable.clone());
7721            }
7722            LogicalPlan::ProcedureCall { yield_items, .. } => {
7723                for (name, alias) in yield_items {
7724                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
7725                }
7726            }
7727            LogicalPlan::ShortestPath {
7728                input,
7729                path_variable,
7730                ..
7731            } => {
7732                vars.insert(path_variable.clone());
7733                Self::collect_plan_variables_impl(input, vars);
7734            }
7735            LogicalPlan::AllShortestPaths {
7736                input,
7737                path_variable,
7738                ..
7739            } => {
7740                vars.insert(path_variable.clone());
7741                Self::collect_plan_variables_impl(input, vars);
7742            }
7743            LogicalPlan::RecursiveCTE {
7744                initial, recursive, ..
7745            } => {
7746                Self::collect_plan_variables_impl(initial, vars);
7747                Self::collect_plan_variables_impl(recursive, vars);
7748            }
7749            LogicalPlan::SubqueryCall {
7750                input, subquery, ..
7751            } => {
7752                Self::collect_plan_variables_impl(input, vars);
7753                Self::collect_plan_variables_impl(subquery, vars);
7754            }
7755            _ => {}
7756        }
7757    }
7758
7759    /// Extract predicates that only reference variables from Apply's input.
7760    /// Returns (input_only_predicates, remaining_predicates).
7761    fn extract_apply_input_predicates(
7762        predicate: &Expr,
7763        input_variables: &HashSet<String>,
7764        subquery_new_variables: &HashSet<String>,
7765    ) -> (Vec<Expr>, Vec<Expr>) {
7766        let conjuncts = Self::split_and_conjuncts(predicate);
7767        let mut input_preds = Vec::new();
7768        let mut remaining = Vec::new();
7769
7770        for conj in conjuncts {
7771            let vars = Self::collect_expr_variables(&conj);
7772
7773            // Predicate only references input variables (none from subquery)
7774            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
7775            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
7776
7777            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
7778                input_preds.push(conj);
7779            } else {
7780                remaining.push(conj);
7781            }
7782        }
7783
7784        (input_preds, remaining)
7785    }
7786
7787    /// Push eligible predicates into Apply.input_filter.
7788    /// This filters input rows BEFORE executing the correlated subquery.
7789    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
7790        match plan {
7791            LogicalPlan::Apply {
7792                input,
7793                subquery,
7794                input_filter,
7795            } => {
7796                // Collect variables from input plan
7797                let input_vars = Self::collect_plan_variables(&input);
7798
7799                // Collect NEW variables introduced by subquery (not in input)
7800                let subquery_vars = Self::collect_plan_variables(&subquery);
7801                let new_subquery_vars: HashSet<String> =
7802                    subquery_vars.difference(&input_vars).cloned().collect();
7803
7804                // Extract predicates that only reference input variables
7805                let (input_preds, remaining) = Self::extract_apply_input_predicates(
7806                    current_predicate,
7807                    &input_vars,
7808                    &new_subquery_vars,
7809                );
7810
7811                // Update current_predicate to only remaining predicates
7812                *current_predicate = if remaining.is_empty() {
7813                    Expr::TRUE
7814                } else {
7815                    Self::combine_predicates(remaining).unwrap()
7816                };
7817
7818                // Combine extracted predicates with existing input_filter
7819                let new_input_filter = if input_preds.is_empty() {
7820                    input_filter
7821                } else {
7822                    let extracted = Self::combine_predicates(input_preds).unwrap();
7823                    match input_filter {
7824                        Some(existing) => Some(Expr::BinaryOp {
7825                            left: Box::new(existing),
7826                            op: BinaryOp::And,
7827                            right: Box::new(extracted),
7828                        }),
7829                        None => Some(extracted),
7830                    }
7831                };
7832
7833                // Recurse into input plan
7834                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
7835
7836                LogicalPlan::Apply {
7837                    input: Box::new(new_input),
7838                    subquery,
7839                    input_filter: new_input_filter,
7840                }
7841            }
7842            // Recurse into other plan nodes
7843            LogicalPlan::Filter {
7844                input,
7845                predicate,
7846                optional_variables,
7847            } => LogicalPlan::Filter {
7848                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7849                predicate,
7850                optional_variables,
7851            },
7852            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7853                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7854                projections,
7855            },
7856            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
7857                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7858                order_by,
7859            },
7860            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
7861                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7862                skip,
7863                fetch,
7864            },
7865            LogicalPlan::Aggregate {
7866                input,
7867                group_by,
7868                aggregates,
7869            } => LogicalPlan::Aggregate {
7870                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7871                group_by,
7872                aggregates,
7873            },
7874            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7875                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
7876                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
7877            },
7878            LogicalPlan::Traverse {
7879                input,
7880                edge_type_ids,
7881                direction,
7882                source_variable,
7883                target_variable,
7884                target_label_id,
7885                step_variable,
7886                min_hops,
7887                max_hops,
7888                optional,
7889                target_filter,
7890                path_variable,
7891                edge_properties,
7892                is_variable_length,
7893                optional_pattern_vars,
7894                scope_match_variables,
7895                edge_filter_expr,
7896                path_mode,
7897                qpp_steps,
7898            } => LogicalPlan::Traverse {
7899                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7900                edge_type_ids,
7901                direction,
7902                source_variable,
7903                target_variable,
7904                target_label_id,
7905                step_variable,
7906                min_hops,
7907                max_hops,
7908                optional,
7909                target_filter,
7910                path_variable,
7911                edge_properties,
7912                is_variable_length,
7913                optional_pattern_vars,
7914                scope_match_variables,
7915                edge_filter_expr,
7916                path_mode,
7917                qpp_steps,
7918            },
7919            other => other,
7920        }
7921    }
7922}
7923
7924/// Get the expected column name for an aggregate expression.
7925///
7926/// This is the single source of truth for aggregate column naming, used by:
7927/// - Logical planner (to create column references)
7928/// - Physical planner (to rename DataFusion's auto-generated column names)
7929/// - Fallback executor (to name result columns)
7930pub fn aggregate_column_name(expr: &Expr) -> String {
7931    expr.to_string_repr()
7932}
7933
7934/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
7935#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
7936pub struct ExplainOutput {
7937    /// Debug-formatted logical plan tree.
7938    pub plan_text: String,
7939    /// Index availability report for each scan in the plan.
7940    pub index_usage: Vec<IndexUsage>,
7941    /// Rough row and cost estimates for the full plan.
7942    pub cost_estimates: CostEstimates,
7943    /// Planner warnings (e.g., missing index, forced full scan).
7944    pub warnings: Vec<String>,
7945    /// Suggested indexes that would improve this query.
7946    pub suggestions: Vec<IndexSuggestion>,
7947}
7948
7949/// Suggestion for creating an index to improve query performance.
7950#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
7951pub struct IndexSuggestion {
7952    /// Label or edge type that would benefit from the index.
7953    pub label_or_type: String,
7954    /// Property to index.
7955    pub property: String,
7956    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
7957    pub index_type: String,
7958    /// Human-readable explanation of the performance benefit.
7959    pub reason: String,
7960    /// Ready-to-execute Cypher statement to create the index.
7961    pub create_statement: String,
7962}
7963
7964/// Index availability report for a single scan operator.
7965#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
7966pub struct IndexUsage {
7967    pub label_or_type: String,
7968    pub property: String,
7969    pub index_type: String,
7970    /// Whether the index was actually used for this scan.
7971    pub used: bool,
7972    /// Human-readable explanation of why the index was or was not used.
7973    pub reason: Option<String>,
7974}
7975
7976/// Rough cost and row count estimates for a complete logical plan.
7977#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
7978pub struct CostEstimates {
7979    /// Estimated number of rows the plan will produce.
7980    pub estimated_rows: f64,
7981    /// Abstract cost units (lower is cheaper).
7982    pub estimated_cost: f64,
7983}
7984
7985impl QueryPlanner {
7986    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
7987    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
7988        let plan = self.plan(ast)?;
7989        self.explain_logical_plan(&plan)
7990    }
7991
7992    /// Produce an EXPLAIN report for an already-planned logical plan.
7993    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
7994        let index_usage = self.analyze_index_usage(plan)?;
7995        let cost_estimates = self.estimate_costs(plan)?;
7996        let suggestions = self.collect_index_suggestions(plan);
7997        let warnings = Vec::new();
7998        let plan_text = format!("{:#?}", plan);
7999
8000        Ok(ExplainOutput {
8001            plan_text,
8002            index_usage,
8003            cost_estimates,
8004            warnings,
8005            suggestions,
8006        })
8007    }
8008
8009    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
8010        let mut usage = Vec::new();
8011        self.collect_index_usage(plan, &mut usage);
8012        Ok(usage)
8013    }
8014
8015    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
8016        match plan {
8017            LogicalPlan::Scan {
8018                label_id,
8019                filter: Some(filter),
8020                ..
8021            } => {
8022                // Detect indexed-property pushdown — issue #57. Run the same
8023                // analyzer the physical planner uses; if it reports a
8024                // hash-index hit, surface it in EXPLAIN.
8025                if let Some(label_name) = self.schema.label_name_by_id(*label_id) {
8026                    let analyzer = crate::query::pushdown::IndexAwareAnalyzer::new(&self.schema);
8027                    // The variable name is the scan's binding variable; we
8028                    // reach for it via the Scan node directly.
8029                    if let LogicalPlan::Scan { variable, .. } = plan {
8030                        let strategy = analyzer.analyze(filter, variable, *label_id);
8031                        for prop in strategy.hash_index_columns {
8032                            usage.push(IndexUsage {
8033                                label_or_type: label_name.to_string(),
8034                                property: prop,
8035                                index_type: "HASH".to_string(),
8036                                used: true,
8037                                reason: Some(
8038                                    "Hash index point lookup pushed into Lance scan".to_string(),
8039                                ),
8040                            });
8041                        }
8042                    }
8043                }
8044            }
8045            LogicalPlan::Scan { .. } => {}
8046            LogicalPlan::VectorKnn {
8047                label_id, property, ..
8048            } => {
8049                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
8050                usage.push(IndexUsage {
8051                    label_or_type: label_name.to_string(),
8052                    property: property.clone(),
8053                    index_type: "VECTOR".to_string(),
8054                    used: true,
8055                    reason: None,
8056                });
8057            }
8058            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
8059            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
8060            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
8061            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
8062            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
8063            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
8064            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
8065            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8066                self.collect_index_usage(left, usage);
8067                self.collect_index_usage(right, usage);
8068            }
8069            _ => {}
8070        }
8071    }
8072
8073    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
8074        Ok(CostEstimates {
8075            estimated_rows: 100.0,
8076            estimated_cost: 10.0,
8077        })
8078    }
8079
8080    /// Collect index suggestions based on query patterns.
8081    ///
8082    /// Currently detects:
8083    /// - Temporal predicates from `uni.validAt()` function calls
8084    /// - Temporal predicates from `VALID_AT` macro expansion
8085    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
8086        let mut suggestions = Vec::new();
8087        self.collect_temporal_suggestions(plan, &mut suggestions);
8088        suggestions
8089    }
8090
8091    /// Recursively collect temporal index suggestions from the plan.
8092    fn collect_temporal_suggestions(
8093        &self,
8094        plan: &LogicalPlan,
8095        suggestions: &mut Vec<IndexSuggestion>,
8096    ) {
8097        match plan {
8098            LogicalPlan::Filter {
8099                input, predicate, ..
8100            } => {
8101                // Check for temporal patterns in the predicate
8102                self.detect_temporal_pattern(predicate, suggestions);
8103                // Recurse into input
8104                self.collect_temporal_suggestions(input, suggestions);
8105            }
8106            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
8107            LogicalPlan::Project { input, .. } => {
8108                self.collect_temporal_suggestions(input, suggestions)
8109            }
8110            LogicalPlan::Limit { input, .. } => {
8111                self.collect_temporal_suggestions(input, suggestions)
8112            }
8113            LogicalPlan::Sort { input, .. } => {
8114                self.collect_temporal_suggestions(input, suggestions)
8115            }
8116            LogicalPlan::Aggregate { input, .. } => {
8117                self.collect_temporal_suggestions(input, suggestions)
8118            }
8119            LogicalPlan::Traverse { input, .. } => {
8120                self.collect_temporal_suggestions(input, suggestions)
8121            }
8122            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8123                self.collect_temporal_suggestions(left, suggestions);
8124                self.collect_temporal_suggestions(right, suggestions);
8125            }
8126            _ => {}
8127        }
8128    }
8129
8130    /// Detect temporal predicate patterns and suggest indexes.
8131    ///
8132    /// Detects two patterns:
8133    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
8134    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
8135    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
8136        match expr {
8137            // Pattern 1: uni.temporal.validAt() function call
8138            Expr::FunctionCall { name, args, .. }
8139                if (name.eq_ignore_ascii_case("uni.temporal.validAt")
8140                    || name.eq_ignore_ascii_case("validAt"))
8141                    && args.len() >= 2 =>
8142            {
8143                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
8144                let start_prop = if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1)
8145                {
8146                    s.clone()
8147                } else {
8148                    "valid_from".to_string()
8149                };
8150
8151                // Try to extract label from the node expression
8152                if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
8153                    self.suggest_temporal_index(&var, &start_prop, suggestions);
8154                }
8155            }
8156
8157            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
8158            Expr::BinaryOp {
8159                left,
8160                op: BinaryOp::And,
8161                right,
8162            } => {
8163                // Check left side for `prop <= time` pattern (temporal start condition)
8164                if let Expr::BinaryOp {
8165                    left: prop_expr,
8166                    op: BinaryOp::LtEq,
8167                    ..
8168                } = left.as_ref()
8169                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
8170                    && (prop_name == "valid_from"
8171                        || prop_name.contains("start")
8172                        || prop_name.contains("from")
8173                        || prop_name.contains("begin"))
8174                    && let Some(var) = base.extract_variable()
8175                {
8176                    self.suggest_temporal_index(&var, prop_name, suggestions);
8177                }
8178
8179                // Recurse into both sides of AND
8180                self.detect_temporal_pattern(left.as_ref(), suggestions);
8181                self.detect_temporal_pattern(right.as_ref(), suggestions);
8182            }
8183
8184            // Recurse into other binary ops
8185            Expr::BinaryOp { left, right, .. } => {
8186                self.detect_temporal_pattern(left.as_ref(), suggestions);
8187                self.detect_temporal_pattern(right.as_ref(), suggestions);
8188            }
8189
8190            _ => {}
8191        }
8192    }
8193
8194    /// Suggest a scalar index for a temporal property if one doesn't already exist.
8195    fn suggest_temporal_index(
8196        &self,
8197        _variable: &str,
8198        property: &str,
8199        suggestions: &mut Vec<IndexSuggestion>,
8200    ) {
8201        // Check if a scalar index already exists for this property
8202        // We need to check all labels since we may not know the exact label from the variable
8203        let mut has_index = false;
8204
8205        for index in &self.schema.indexes {
8206            if let IndexDefinition::Scalar(config) = index
8207                && config.properties.contains(&property.to_string())
8208            {
8209                has_index = true;
8210                break;
8211            }
8212        }
8213
8214        if !has_index {
8215            // Avoid duplicate suggestions
8216            let already_suggested = suggestions.iter().any(|s| s.property == property);
8217            if !already_suggested {
8218                suggestions.push(IndexSuggestion {
8219                    label_or_type: "(detected from temporal query)".to_string(),
8220                    property: property.to_string(),
8221                    index_type: "SCALAR (BTree)".to_string(),
8222                    reason: format!(
8223                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
8224                        property
8225                    ),
8226                    create_statement: format!(
8227                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
8228                        property, property
8229                    ),
8230                });
8231            }
8232        }
8233    }
8234
8235    /// Helper functions for expression normalization
8236    /// Normalize an expression for storage: strip variable prefixes
8237    /// For simple property: u.email -> "email"
8238    /// For expressions: lower(u.email) -> "lower(email)"
8239    fn normalize_expression_for_storage(expr: &Expr) -> String {
8240        match expr {
8241            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
8242            _ => {
8243                // Serialize expression and strip variable prefix
8244                let expr_str = expr.to_string_repr();
8245                Self::strip_variable_prefix(&expr_str)
8246            }
8247        }
8248    }
8249
8250    /// Strip variable references like "u.prop" from expression strings
8251    /// Converts "lower(u.email)" to "lower(email)"
8252    fn strip_variable_prefix(expr_str: &str) -> String {
8253        use regex::Regex;
8254        // Match patterns like "word.property" and replace with just "property"
8255        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
8256        re.replace_all(expr_str, "$1").to_string()
8257    }
8258
8259    /// Plan a schema command from the new AST
8260    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
8261        match cmd {
8262            SchemaCommand::CreateVectorIndex(c) => {
8263                // Parse index type from options (default: IvfPq)
8264                let opt = |key: &str| {
8265                    c.options
8266                        .get(key)
8267                        .and_then(|v| v.as_str())
8268                        .and_then(|s| s.parse::<u32>().ok())
8269                };
8270                let opt_u8 = |key: &str| -> Option<u8> {
8271                    c.options
8272                        .get(key)
8273                        .and_then(|v| v.as_str())
8274                        .and_then(|s| s.parse::<u8>().ok())
8275                };
8276                let index_type = match c.options.get("type").and_then(|v| v.as_str()) {
8277                    Some("flat") => VectorIndexType::Flat,
8278                    Some("ivf_flat") => VectorIndexType::IvfFlat {
8279                        num_partitions: opt("partitions").unwrap_or(256),
8280                    },
8281                    Some("ivf_sq") => VectorIndexType::IvfSq {
8282                        num_partitions: opt("partitions").unwrap_or(256),
8283                    },
8284                    Some("ivf_rq") => VectorIndexType::IvfRq {
8285                        num_partitions: opt("partitions").unwrap_or(256),
8286                        num_bits: opt_u8("num_bits"),
8287                    },
8288                    Some("hnsw_flat") => VectorIndexType::HnswFlat {
8289                        m: opt("m").unwrap_or(16),
8290                        ef_construction: opt("ef_construction").unwrap_or(200),
8291                        num_partitions: opt("partitions"),
8292                    },
8293                    Some("hnsw") | Some("hnsw_sq") => VectorIndexType::HnswSq {
8294                        m: opt("m").unwrap_or(16),
8295                        ef_construction: opt("ef_construction").unwrap_or(200),
8296                        num_partitions: opt("partitions"),
8297                    },
8298                    Some("hnsw_pq") => VectorIndexType::HnswPq {
8299                        m: opt("m").unwrap_or(16),
8300                        ef_construction: opt("ef_construction").unwrap_or(200),
8301                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
8302                        num_partitions: opt("partitions"),
8303                    },
8304                    _ => VectorIndexType::IvfPq {
8305                        num_partitions: opt("partitions").unwrap_or(256),
8306                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
8307                        bits_per_subvector: opt_u8("num_bits").unwrap_or(8),
8308                    },
8309                };
8310
8311                // Parse embedding config from options
8312                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
8313                    Self::parse_embedding_config(emb_val)?
8314                } else {
8315                    None
8316                };
8317
8318                let config = VectorIndexConfig {
8319                    name: c.name,
8320                    label: c.label,
8321                    property: c.property,
8322                    metric: DistanceMetric::Cosine,
8323                    index_type,
8324                    embedding_config,
8325                    metadata: Default::default(),
8326                };
8327                Ok(LogicalPlan::CreateVectorIndex {
8328                    config,
8329                    if_not_exists: c.if_not_exists,
8330                })
8331            }
8332            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
8333                config: FullTextIndexConfig {
8334                    name: cfg.name,
8335                    label: cfg.label,
8336                    properties: cfg.properties,
8337                    tokenizer: TokenizerConfig::Standard,
8338                    with_positions: true,
8339                    metadata: Default::default(),
8340                },
8341                if_not_exists: cfg.if_not_exists,
8342            }),
8343            SchemaCommand::CreateScalarIndex(cfg) => {
8344                // Convert expressions to storage strings (strip variable prefix)
8345                let properties: Vec<String> = cfg
8346                    .expressions
8347                    .iter()
8348                    .map(Self::normalize_expression_for_storage)
8349                    .collect();
8350
8351                Ok(LogicalPlan::CreateScalarIndex {
8352                    config: ScalarIndexConfig {
8353                        name: cfg.name,
8354                        label: cfg.label,
8355                        properties,
8356                        index_type: ScalarIndexType::BTree,
8357                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
8358                        metadata: Default::default(),
8359                    },
8360                    if_not_exists: cfg.if_not_exists,
8361                })
8362            }
8363            SchemaCommand::CreateJsonFtsIndex(cfg) => {
8364                let with_positions = cfg
8365                    .options
8366                    .get("with_positions")
8367                    .and_then(|v| v.as_bool())
8368                    .unwrap_or(false);
8369                Ok(LogicalPlan::CreateJsonFtsIndex {
8370                    config: JsonFtsIndexConfig {
8371                        name: cfg.name,
8372                        label: cfg.label,
8373                        column: cfg.column,
8374                        paths: Vec::new(),
8375                        with_positions,
8376                        metadata: Default::default(),
8377                    },
8378                    if_not_exists: cfg.if_not_exists,
8379                })
8380            }
8381            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
8382                name: drop.name,
8383                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
8384            }),
8385            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
8386            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
8387            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
8388            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
8389            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
8390            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
8391            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
8392            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
8393            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
8394            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
8395            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
8396            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
8397            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
8398            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
8399            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
8400            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
8401                destination: path,
8402                options: HashMap::new(),
8403            }),
8404            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
8405                label: cmd.label,
8406                path: cmd.path,
8407                format: cmd.format,
8408                options: cmd.options,
8409            }),
8410            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
8411                label: cmd.label,
8412                path: cmd.path,
8413                format: cmd.format,
8414                options: cmd.options,
8415            }),
8416        }
8417    }
8418
8419    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
8420        let obj = emb_val
8421            .as_object()
8422            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
8423
8424        // Parse alias (required)
8425        let alias = obj
8426            .get("alias")
8427            .and_then(|v| v.as_str())
8428            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
8429
8430        // Parse source properties (required)
8431        let source_properties = obj
8432            .get("source")
8433            .and_then(|v| v.as_array())
8434            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
8435            .iter()
8436            .filter_map(|v| v.as_str().map(|s| s.to_string()))
8437            .collect::<Vec<_>>();
8438
8439        if source_properties.is_empty() {
8440            return Err(anyhow!(
8441                "embedding.source must contain at least one property"
8442            ));
8443        }
8444
8445        let batch_size = obj
8446            .get("batch_size")
8447            .and_then(|v| v.as_u64())
8448            .map(|v| v as usize)
8449            .unwrap_or(32);
8450
8451        let document_prefix = obj
8452            .get("document_prefix")
8453            .and_then(|v| v.as_str())
8454            .map(|s| s.to_string());
8455
8456        let query_prefix = obj
8457            .get("query_prefix")
8458            .and_then(|v| v.as_str())
8459            .map(|s| s.to_string());
8460
8461        Ok(Some(EmbeddingConfig {
8462            alias: alias.to_string(),
8463            source_properties,
8464            batch_size,
8465            document_prefix,
8466            query_prefix,
8467        }))
8468    }
8469}
8470
8471/// Collect all properties referenced anywhere in the LogicalPlan tree.
8472///
8473/// This is critical for window functions: properties must be materialized
8474/// at the Scan node so they're available for window operations later.
8475///
8476/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
8477pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
8478    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
8479    collect_properties_recursive(plan, &mut properties);
8480    properties
8481}
8482
8483/// Recursively walk the LogicalPlan tree and collect all property references.
8484fn collect_properties_recursive(
8485    plan: &LogicalPlan,
8486    properties: &mut HashMap<String, HashSet<String>>,
8487) {
8488    match plan {
8489        LogicalPlan::Window {
8490            input,
8491            window_exprs,
8492        } => {
8493            // Collect from window expressions
8494            for expr in window_exprs {
8495                collect_properties_from_expr_into(expr, properties);
8496            }
8497            collect_properties_recursive(input, properties);
8498        }
8499        LogicalPlan::Project { input, projections } => {
8500            for (expr, _alias) in projections {
8501                collect_properties_from_expr_into(expr, properties);
8502            }
8503            collect_properties_recursive(input, properties);
8504        }
8505        LogicalPlan::Sort { input, order_by } => {
8506            for sort_item in order_by {
8507                collect_properties_from_expr_into(&sort_item.expr, properties);
8508            }
8509            collect_properties_recursive(input, properties);
8510        }
8511        LogicalPlan::Filter {
8512            input, predicate, ..
8513        } => {
8514            collect_properties_from_expr_into(predicate, properties);
8515            collect_properties_recursive(input, properties);
8516        }
8517        LogicalPlan::Aggregate {
8518            input,
8519            group_by,
8520            aggregates,
8521        } => {
8522            for expr in group_by {
8523                collect_properties_from_expr_into(expr, properties);
8524            }
8525            for expr in aggregates {
8526                collect_properties_from_expr_into(expr, properties);
8527            }
8528            collect_properties_recursive(input, properties);
8529        }
8530        LogicalPlan::Scan {
8531            filter: Some(expr), ..
8532        } => {
8533            collect_properties_from_expr_into(expr, properties);
8534        }
8535        LogicalPlan::Scan { filter: None, .. } => {}
8536        LogicalPlan::ExtIdLookup {
8537            filter: Some(expr), ..
8538        } => {
8539            collect_properties_from_expr_into(expr, properties);
8540        }
8541        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
8542        LogicalPlan::ScanAll {
8543            filter: Some(expr), ..
8544        } => {
8545            collect_properties_from_expr_into(expr, properties);
8546        }
8547        LogicalPlan::ScanAll { filter: None, .. } => {}
8548        LogicalPlan::ScanMainByLabels {
8549            filter: Some(expr), ..
8550        } => {
8551            collect_properties_from_expr_into(expr, properties);
8552        }
8553        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
8554        LogicalPlan::TraverseMainByType {
8555            input,
8556            target_filter,
8557            ..
8558        } => {
8559            if let Some(expr) = target_filter {
8560                collect_properties_from_expr_into(expr, properties);
8561            }
8562            collect_properties_recursive(input, properties);
8563        }
8564        LogicalPlan::Traverse {
8565            input,
8566            target_filter,
8567            step_variable: _,
8568            ..
8569        } => {
8570            if let Some(expr) = target_filter {
8571                collect_properties_from_expr_into(expr, properties);
8572            }
8573            // Note: Edge properties (step_variable) will be collected from expressions
8574            // that reference them. The edge_properties field in LogicalPlan is populated
8575            // later during physical planning based on this collected map.
8576            collect_properties_recursive(input, properties);
8577        }
8578        LogicalPlan::Unwind { input, expr, .. } => {
8579            collect_properties_from_expr_into(expr, properties);
8580            collect_properties_recursive(input, properties);
8581        }
8582        LogicalPlan::Create { input, pattern } => {
8583            // Mark variables referenced in CREATE patterns with "*" so plan_scan
8584            // adds structural projections (bare entity columns). Without this,
8585            // execute_create_pattern() can't find bound variables and creates
8586            // spurious new nodes instead of using existing MATCH'd ones.
8587            mark_pattern_variables(pattern, properties);
8588            collect_properties_recursive(input, properties);
8589        }
8590        LogicalPlan::CreateBatch { input, patterns } => {
8591            for pattern in patterns {
8592                mark_pattern_variables(pattern, properties);
8593            }
8594            collect_properties_recursive(input, properties);
8595        }
8596        LogicalPlan::Merge {
8597            input,
8598            pattern,
8599            on_match,
8600            on_create,
8601        } => {
8602            mark_pattern_variables(pattern, properties);
8603            if let Some(set_clause) = on_match {
8604                mark_set_item_variables(&set_clause.items, properties);
8605            }
8606            if let Some(set_clause) = on_create {
8607                mark_set_item_variables(&set_clause.items, properties);
8608            }
8609            collect_properties_recursive(input, properties);
8610        }
8611        LogicalPlan::Set { input, items } => {
8612            mark_set_item_variables(items, properties);
8613            collect_properties_recursive(input, properties);
8614        }
8615        LogicalPlan::Remove { input, items } => {
8616            for item in items {
8617                match item {
8618                    RemoveItem::Property(expr) => {
8619                        // REMOVE n.prop — collect the property and mark the variable
8620                        // with "*" so full structural projection is applied.
8621                        collect_properties_from_expr_into(expr, properties);
8622                        if let Expr::Property(base, _) = expr
8623                            && let Expr::Variable(var) = base.as_ref()
8624                        {
8625                            properties
8626                                .entry(var.clone())
8627                                .or_default()
8628                                .insert("*".to_string());
8629                        }
8630                    }
8631                    RemoveItem::Labels { variable, .. } => {
8632                        // REMOVE n:Label — mark n with "*"
8633                        properties
8634                            .entry(variable.clone())
8635                            .or_default()
8636                            .insert("*".to_string());
8637                    }
8638                }
8639            }
8640            collect_properties_recursive(input, properties);
8641        }
8642        LogicalPlan::Delete { input, items, .. } => {
8643            for expr in items {
8644                collect_properties_from_expr_into(expr, properties);
8645            }
8646            collect_properties_recursive(input, properties);
8647        }
8648        LogicalPlan::Foreach {
8649            input, list, body, ..
8650        } => {
8651            collect_properties_from_expr_into(list, properties);
8652            for plan in body {
8653                collect_properties_recursive(plan, properties);
8654            }
8655            collect_properties_recursive(input, properties);
8656        }
8657        LogicalPlan::Limit { input, .. } => {
8658            collect_properties_recursive(input, properties);
8659        }
8660        LogicalPlan::CrossJoin { left, right } => {
8661            collect_properties_recursive(left, properties);
8662            collect_properties_recursive(right, properties);
8663        }
8664        LogicalPlan::Apply {
8665            input,
8666            subquery,
8667            input_filter,
8668        } => {
8669            if let Some(expr) = input_filter {
8670                collect_properties_from_expr_into(expr, properties);
8671            }
8672            collect_properties_recursive(input, properties);
8673            collect_properties_recursive(subquery, properties);
8674        }
8675        LogicalPlan::Union { left, right, .. } => {
8676            collect_properties_recursive(left, properties);
8677            collect_properties_recursive(right, properties);
8678        }
8679        LogicalPlan::RecursiveCTE {
8680            initial, recursive, ..
8681        } => {
8682            collect_properties_recursive(initial, properties);
8683            collect_properties_recursive(recursive, properties);
8684        }
8685        LogicalPlan::ProcedureCall { arguments, .. } => {
8686            for arg in arguments {
8687                collect_properties_from_expr_into(arg, properties);
8688            }
8689        }
8690        LogicalPlan::VectorKnn { query, .. } => {
8691            collect_properties_from_expr_into(query, properties);
8692        }
8693        LogicalPlan::InvertedIndexLookup { terms, .. } => {
8694            collect_properties_from_expr_into(terms, properties);
8695        }
8696        LogicalPlan::ShortestPath { input, .. } => {
8697            collect_properties_recursive(input, properties);
8698        }
8699        LogicalPlan::AllShortestPaths { input, .. } => {
8700            collect_properties_recursive(input, properties);
8701        }
8702        LogicalPlan::Distinct { input } => {
8703            collect_properties_recursive(input, properties);
8704        }
8705        LogicalPlan::QuantifiedPattern {
8706            input,
8707            pattern_plan,
8708            ..
8709        } => {
8710            collect_properties_recursive(input, properties);
8711            collect_properties_recursive(pattern_plan, properties);
8712        }
8713        LogicalPlan::BindZeroLengthPath { input, .. } => {
8714            collect_properties_recursive(input, properties);
8715        }
8716        LogicalPlan::BindPath { input, .. } => {
8717            collect_properties_recursive(input, properties);
8718        }
8719        LogicalPlan::SubqueryCall { input, subquery } => {
8720            collect_properties_recursive(input, properties);
8721            collect_properties_recursive(subquery, properties);
8722        }
8723        LogicalPlan::LocyProject {
8724            input, projections, ..
8725        } => {
8726            for (expr, _alias) in projections {
8727                match expr {
8728                    // Bare variable in LocyProject: only need _vid for node variables
8729                    // (plan_locy_project extracts VID directly). Adding "*" would create
8730                    // a structural Struct column that conflicts with derived scan columns.
8731                    Expr::Variable(name) if !name.contains('.') => {
8732                        properties
8733                            .entry(name.clone())
8734                            .or_default()
8735                            .insert("_vid".to_string());
8736                    }
8737                    _ => collect_properties_from_expr_into(expr, properties),
8738                }
8739            }
8740            collect_properties_recursive(input, properties);
8741        }
8742        LogicalPlan::LocyFold {
8743            input,
8744            fold_bindings,
8745            ..
8746        } => {
8747            for (_name, expr) in fold_bindings {
8748                collect_properties_from_expr_into(expr, properties);
8749            }
8750            collect_properties_recursive(input, properties);
8751        }
8752        LogicalPlan::LocyBestBy {
8753            input, criteria, ..
8754        } => {
8755            for (expr, _asc) in criteria {
8756                collect_properties_from_expr_into(expr, properties);
8757            }
8758            collect_properties_recursive(input, properties);
8759        }
8760        LogicalPlan::LocyPriority { input, .. } => {
8761            collect_properties_recursive(input, properties);
8762        }
8763        LogicalPlan::LocyModelInvoke { input, .. } => {
8764            // Model invocations don't introduce new property accesses
8765            // — feature expressions are lifted to hidden YIELD items
8766            // by `extract_model_invocations` (uni-locy typecheck) and
8767            // their property refs are already collected via the
8768            // wrapped LocyProject's projection walk.
8769            collect_properties_recursive(input, properties);
8770        }
8771        // DDL and other plans don't reference properties
8772        _ => {}
8773    }
8774}
8775
8776/// Mark target variables from SET items with "*" and collect value expressions.
8777fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
8778    for item in items {
8779        match item {
8780            SetItem::Property { expr, value } => {
8781                // SET n.prop = val — mark n with STRUCT_ONLY_SENTINEL so the
8782                // scan builds the bare `n` struct column (needed for executor
8783                // `row.get(var_name)`) WITHOUT pulling the full schema. The
8784                // explicit `prop` is collected via `collect_properties_from_expr_into`
8785                // below and joins the variable's HashSet alongside the sentinel.
8786                //
8787                // If the same variable is also referenced bare elsewhere
8788                // (e.g. `SET n.x = 1 RETURN n`), `collect_properties_from_expr_into`
8789                // inserts "*" through the bare-Variable path; "*" dominates
8790                // the sentinel in `resolve_properties`, so the full schema
8791                // is still pulled when actually required.
8792                collect_properties_from_expr_into(expr, properties);
8793                collect_properties_from_expr_into(value, properties);
8794                if let Expr::Property(base, _) = expr
8795                    && let Expr::Variable(var) = base.as_ref()
8796                {
8797                    properties
8798                        .entry(var.clone())
8799                        .or_default()
8800                        .insert(STRUCT_ONLY_SENTINEL.to_string());
8801                }
8802            }
8803            SetItem::Labels { variable, .. } => {
8804                // SET n:Label — need full access to n
8805                properties
8806                    .entry(variable.clone())
8807                    .or_default()
8808                    .insert("*".to_string());
8809            }
8810            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
8811                // SET n = {props} or SET n += {props}
8812                properties
8813                    .entry(variable.clone())
8814                    .or_default()
8815                    .insert("*".to_string());
8816                collect_properties_from_expr_into(value, properties);
8817            }
8818        }
8819    }
8820}
8821
8822/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
8823/// adds structural projections (bare entity Struct columns) for them.
8824/// This is needed so that execute_create_pattern() can find bound variables
8825/// in the row HashMap and reuse existing nodes instead of creating new ones.
8826fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
8827    for path in &pattern.paths {
8828        if let Some(ref v) = path.variable {
8829            properties
8830                .entry(v.clone())
8831                .or_default()
8832                .insert("*".to_string());
8833        }
8834        for element in &path.elements {
8835            match element {
8836                PatternElement::Node(n) => {
8837                    if let Some(ref v) = n.variable {
8838                        properties
8839                            .entry(v.clone())
8840                            .or_default()
8841                            .insert("*".to_string());
8842                    }
8843                    // Also collect properties from inline property expressions
8844                    if let Some(ref props) = n.properties {
8845                        collect_properties_from_expr_into(props, properties);
8846                    }
8847                }
8848                PatternElement::Relationship(r) => {
8849                    if let Some(ref v) = r.variable {
8850                        properties
8851                            .entry(v.clone())
8852                            .or_default()
8853                            .insert("*".to_string());
8854                    }
8855                    if let Some(ref props) = r.properties {
8856                        collect_properties_from_expr_into(props, properties);
8857                    }
8858                }
8859                PatternElement::Parenthesized { pattern, .. } => {
8860                    let sub = Pattern {
8861                        paths: vec![pattern.as_ref().clone()],
8862                    };
8863                    mark_pattern_variables(&sub, properties);
8864                }
8865            }
8866        }
8867    }
8868}
8869
8870/// Collect properties from an expression into a HashMap.
8871fn collect_properties_from_expr_into(
8872    expr: &Expr,
8873    properties: &mut HashMap<String, HashSet<String>>,
8874) {
8875    match expr {
8876        Expr::PatternComprehension {
8877            where_clause,
8878            map_expr,
8879            ..
8880        } => {
8881            // Collect properties from the WHERE clause and map expression.
8882            // The pattern itself creates local bindings that don't need
8883            // property collection from the outer scope.
8884            if let Some(where_expr) = where_clause {
8885                collect_properties_from_expr_into(where_expr, properties);
8886            }
8887            collect_properties_from_expr_into(map_expr, properties);
8888        }
8889        Expr::Variable(name) => {
8890            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
8891            if let Some((var, prop)) = name.split_once('.') {
8892                properties
8893                    .entry(var.to_string())
8894                    .or_default()
8895                    .insert(prop.to_string());
8896            } else {
8897                // Bare variable (e.g., RETURN n) — needs all properties materialized
8898                properties
8899                    .entry(name.clone())
8900                    .or_default()
8901                    .insert("*".to_string());
8902            }
8903        }
8904        Expr::Property(base, name) => {
8905            // Extract variable name from the base expression
8906            if let Expr::Variable(var) = base.as_ref() {
8907                properties
8908                    .entry(var.clone())
8909                    .or_default()
8910                    .insert(name.clone());
8911                // Don't recurse into Variable — that would mark it as a bare
8912                // variable reference (adding "*") when it's just a property base.
8913            } else {
8914                // Recurse for complex base expressions (nested property, function call, etc.)
8915                collect_properties_from_expr_into(base, properties);
8916            }
8917        }
8918        Expr::BinaryOp { left, right, .. } => {
8919            collect_properties_from_expr_into(left, properties);
8920            collect_properties_from_expr_into(right, properties);
8921        }
8922        Expr::FunctionCall {
8923            name,
8924            args,
8925            window_spec,
8926            ..
8927        } => {
8928            // Analyze function for property requirements (pushdown hydration)
8929            analyze_function_property_requirements(name, args, properties);
8930
8931            // Collect from arguments
8932            for arg in args {
8933                collect_properties_from_expr_into(arg, properties);
8934            }
8935
8936            // Collect from window spec (PARTITION BY, ORDER BY)
8937            if let Some(spec) = window_spec {
8938                for part_expr in &spec.partition_by {
8939                    collect_properties_from_expr_into(part_expr, properties);
8940                }
8941                for sort_item in &spec.order_by {
8942                    collect_properties_from_expr_into(&sort_item.expr, properties);
8943                }
8944            }
8945        }
8946        Expr::UnaryOp { expr, .. } => {
8947            collect_properties_from_expr_into(expr, properties);
8948        }
8949        Expr::List(items) => {
8950            for item in items {
8951                collect_properties_from_expr_into(item, properties);
8952            }
8953        }
8954        Expr::Map(entries) => {
8955            for (_key, value) in entries {
8956                collect_properties_from_expr_into(value, properties);
8957            }
8958        }
8959        Expr::ListComprehension {
8960            list,
8961            where_clause,
8962            map_expr,
8963            ..
8964        } => {
8965            collect_properties_from_expr_into(list, properties);
8966            if let Some(where_expr) = where_clause {
8967                collect_properties_from_expr_into(where_expr, properties);
8968            }
8969            collect_properties_from_expr_into(map_expr, properties);
8970        }
8971        Expr::Case {
8972            expr,
8973            when_then,
8974            else_expr,
8975        } => {
8976            if let Some(scrutinee_expr) = expr {
8977                collect_properties_from_expr_into(scrutinee_expr, properties);
8978            }
8979            for (when, then) in when_then {
8980                collect_properties_from_expr_into(when, properties);
8981                collect_properties_from_expr_into(then, properties);
8982            }
8983            if let Some(default_expr) = else_expr {
8984                collect_properties_from_expr_into(default_expr, properties);
8985            }
8986        }
8987        Expr::Quantifier {
8988            list, predicate, ..
8989        } => {
8990            collect_properties_from_expr_into(list, properties);
8991            collect_properties_from_expr_into(predicate, properties);
8992        }
8993        Expr::Reduce {
8994            init, list, expr, ..
8995        } => {
8996            collect_properties_from_expr_into(init, properties);
8997            collect_properties_from_expr_into(list, properties);
8998            collect_properties_from_expr_into(expr, properties);
8999        }
9000        Expr::Exists { query, .. } => {
9001            // Walk into EXISTS body to collect property references for outer-scope variables.
9002            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
9003            // are included in the outer scan's property list. Extra properties collected for
9004            // inner-only variables are harmless — the outer scan ignores unknown variable names.
9005            collect_properties_from_subquery(query, properties);
9006        }
9007        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
9008            collect_properties_from_subquery(query, properties);
9009        }
9010        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
9011            collect_properties_from_expr_into(expr, properties);
9012        }
9013        Expr::In { expr, list } => {
9014            collect_properties_from_expr_into(expr, properties);
9015            collect_properties_from_expr_into(list, properties);
9016        }
9017        Expr::ArrayIndex { array, index } => {
9018            if let Expr::Variable(var) = array.as_ref() {
9019                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
9020                    // Static string key: e['name'] → only need that specific property
9021                    properties
9022                        .entry(var.clone())
9023                        .or_default()
9024                        .insert(prop_name.clone());
9025                } else {
9026                    // Dynamic property access: e[prop] → need all properties
9027                    properties
9028                        .entry(var.clone())
9029                        .or_default()
9030                        .insert("*".to_string());
9031                }
9032            }
9033            collect_properties_from_expr_into(array, properties);
9034            collect_properties_from_expr_into(index, properties);
9035        }
9036        Expr::ArraySlice { array, start, end } => {
9037            collect_properties_from_expr_into(array, properties);
9038            if let Some(start_expr) = start {
9039                collect_properties_from_expr_into(start_expr, properties);
9040            }
9041            if let Some(end_expr) = end {
9042                collect_properties_from_expr_into(end_expr, properties);
9043            }
9044        }
9045        Expr::ValidAt {
9046            entity,
9047            timestamp,
9048            start_prop,
9049            end_prop,
9050        } => {
9051            // Extract property requirements from ValidAt expression
9052            if let Expr::Variable(var) = entity.as_ref() {
9053                if let Some(prop) = start_prop {
9054                    properties
9055                        .entry(var.clone())
9056                        .or_default()
9057                        .insert(prop.clone());
9058                }
9059                if let Some(prop) = end_prop {
9060                    properties
9061                        .entry(var.clone())
9062                        .or_default()
9063                        .insert(prop.clone());
9064                }
9065            }
9066            collect_properties_from_expr_into(entity, properties);
9067            collect_properties_from_expr_into(timestamp, properties);
9068        }
9069        Expr::MapProjection { base, items } => {
9070            collect_properties_from_expr_into(base, properties);
9071            for item in items {
9072                match item {
9073                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
9074                        if let Expr::Variable(var) = base.as_ref() {
9075                            properties
9076                                .entry(var.clone())
9077                                .or_default()
9078                                .insert(prop.clone());
9079                        }
9080                    }
9081                    uni_cypher::ast::MapProjectionItem::AllProperties => {
9082                        if let Expr::Variable(var) = base.as_ref() {
9083                            properties
9084                                .entry(var.clone())
9085                                .or_default()
9086                                .insert("*".to_string());
9087                        }
9088                    }
9089                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
9090                        collect_properties_from_expr_into(expr, properties);
9091                    }
9092                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
9093                }
9094            }
9095        }
9096        Expr::LabelCheck { expr, .. } => {
9097            collect_properties_from_expr_into(expr, properties);
9098        }
9099        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
9100        // Mark them with "*" so the outer scan produces structural projections that
9101        // extract_row_params can resolve.
9102        Expr::Parameter(name) => {
9103            properties
9104                .entry(name.clone())
9105                .or_default()
9106                .insert("*".to_string());
9107        }
9108        // Literals and wildcard don't reference properties
9109        Expr::Literal(_) | Expr::Wildcard => {}
9110    }
9111}
9112
9113/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
9114///
9115/// This is needed so that correlated property accesses like `a.city` inside
9116/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
9117/// scan to include `a.city` in its projected columns.
9118fn collect_properties_from_subquery(
9119    query: &Query,
9120    properties: &mut HashMap<String, HashSet<String>>,
9121) {
9122    match query {
9123        Query::Single(stmt) => {
9124            for clause in &stmt.clauses {
9125                match clause {
9126                    Clause::Match(m) => {
9127                        if let Some(ref wc) = m.where_clause {
9128                            collect_properties_from_expr_into(wc, properties);
9129                        }
9130                    }
9131                    Clause::With(w) => {
9132                        for item in &w.items {
9133                            if let ReturnItem::Expr { expr, .. } = item {
9134                                collect_properties_from_expr_into(expr, properties);
9135                            }
9136                        }
9137                        if let Some(ref wc) = w.where_clause {
9138                            collect_properties_from_expr_into(wc, properties);
9139                        }
9140                    }
9141                    Clause::Return(r) => {
9142                        for item in &r.items {
9143                            if let ReturnItem::Expr { expr, .. } = item {
9144                                collect_properties_from_expr_into(expr, properties);
9145                            }
9146                        }
9147                    }
9148                    _ => {}
9149                }
9150            }
9151        }
9152        Query::Union { left, right, .. } => {
9153            collect_properties_from_subquery(left, properties);
9154            collect_properties_from_subquery(right, properties);
9155        }
9156        _ => {}
9157    }
9158}
9159
9160/// Analyze function calls to extract property requirements for pushdown hydration
9161///
9162/// This function examines function calls and their arguments to determine which properties
9163/// need to be loaded for entity arguments. For example:
9164/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
9165/// - keys(n) -> n needs all properties (*)
9166///
9167/// The extracted requirements are added to the properties map for later use during
9168/// scan planning.
9169fn analyze_function_property_requirements(
9170    name: &str,
9171    args: &[Expr],
9172    properties: &mut HashMap<String, HashSet<String>>,
9173) {
9174    use crate::query::function_props::get_function_spec;
9175
9176    /// Helper to mark a variable as needing all properties.
9177    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
9178        properties
9179            .entry(var.to_string())
9180            .or_default()
9181            .insert("*".to_string());
9182    }
9183
9184    // System-managed timestamp functions: require only the corresponding
9185    // `_created_at` / `_updated_at` column, not full entity materialization.
9186    if name.eq_ignore_ascii_case("created_at") || name.eq_ignore_ascii_case("updated_at") {
9187        if let Some(Expr::Variable(var)) = args.first() {
9188            let col = if name.eq_ignore_ascii_case("created_at") {
9189                "_created_at"
9190            } else {
9191                "_updated_at"
9192            };
9193            properties
9194                .entry(var.clone())
9195                .or_default()
9196                .insert(col.to_string());
9197        }
9198        return;
9199    }
9200
9201    let Some(spec) = get_function_spec(name) else {
9202        // Unknown function: conservatively require all properties for variable args
9203        for arg in args {
9204            if let Expr::Variable(var) = arg {
9205                mark_wildcard(var, properties);
9206            }
9207        }
9208        return;
9209    };
9210
9211    // Extract property names from string literal arguments
9212    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
9213        let entity_arg = args.get(entity_arg_idx);
9214        let prop_arg = args.get(prop_arg_idx);
9215
9216        match (entity_arg, prop_arg) {
9217            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
9218                properties
9219                    .entry(var.clone())
9220                    .or_default()
9221                    .insert(prop.clone());
9222            }
9223            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
9224                // Parameter property name: need all properties
9225                mark_wildcard(var, properties);
9226            }
9227            _ => {}
9228        }
9229    }
9230
9231    // Handle full entity requirement (keys(), properties())
9232    if spec.needs_full_entity {
9233        for &idx in spec.entity_args {
9234            if let Some(Expr::Variable(var)) = args.get(idx) {
9235                mark_wildcard(var, properties);
9236            }
9237        }
9238    }
9239}
9240
9241// ============================================================================
9242// Phase 5a-impl — fork-aware fusion rewrite
9243// ============================================================================
9244
9245/// Trait that exposes the per-fork "is there a fork-local index for
9246/// `(label, column)`?" lookup. Implemented for `StorageManager` so
9247/// callers don't need to depend on the fork module directly; tests
9248/// can mock by implementing it on a `HashMap`.
9249pub trait ForkIndexLookup {
9250    fn fork_index_for(
9251        &self,
9252        label: &str,
9253        column: &str,
9254    ) -> Option<uni_store::fork::ForkLocalIndexKind>;
9255
9256    /// Phase 5b followup: resolve a label id, then dispatch to
9257    /// `fork_index_for`. Used by the rewrite when wrapping
9258    /// `VectorKnn` and `InvertedIndexLookup` nodes which carry
9259    /// `label_id: u16` rather than the label name. Default returns
9260    /// `None`; the `StorageManager` impl resolves via its
9261    /// `schema_manager`.
9262    fn fork_index_for_label_id(
9263        &self,
9264        _label_id: u16,
9265        _column: &str,
9266    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9267        None
9268    }
9269}
9270
9271impl ForkIndexLookup for uni_store::storage::StorageManager {
9272    fn fork_index_for(
9273        &self,
9274        label: &str,
9275        column: &str,
9276    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9277        self.fork_index_exists(label, column)
9278    }
9279
9280    fn fork_index_for_label_id(
9281        &self,
9282        label_id: u16,
9283        column: &str,
9284    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9285        let schema = self.schema_manager().schema();
9286        let label_name = schema.label_name_by_id(label_id)?;
9287        self.fork_index_exists(label_name, column)
9288    }
9289}
9290
9291/// Walk a [`LogicalPlan`] tree and rewrite each `Scan` whose target
9292/// `(label, column)` has a registered fork-local index into the
9293/// matching `FusedIndexScan` variant.
9294///
9295/// Phase 5a-impl Step 4 covers `VidUidForkFirst`; Steps 5 and 6 add
9296/// `BtreeUnion` and `SortedKWayMerge` by extending `kind_for_filter`.
9297///
9298/// Idempotent: a tree that already contains `FusedIndexScan` nodes
9299/// passes through unchanged.
9300#[must_use]
9301pub fn rewrite_for_fork_fusion<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9302    rewrite_node(plan, lookup)
9303}
9304
9305fn rewrite_node<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9306    match plan {
9307        LogicalPlan::Scan {
9308            label_id,
9309            labels,
9310            variable,
9311            filter,
9312            optional,
9313        } => {
9314            // VidUid fusion only fires on a single-label scan with an
9315            // equality filter on a registered UID column. BTree and
9316            // Sorted will extend this match in Steps 5 and 6.
9317            let kind = if labels.len() == 1
9318                && let Some(col) = filter
9319                    .as_ref()
9320                    .and_then(|f| equality_target_column(f, &variable))
9321                && let Some(idx_kind) = lookup.fork_index_for(&labels[0], &col)
9322            {
9323                into_fusion_kind(idx_kind)
9324            } else {
9325                None
9326            };
9327            match kind {
9328                Some(kind) => LogicalPlan::FusedIndexScan {
9329                    label_id,
9330                    labels,
9331                    variable,
9332                    filter,
9333                    optional,
9334                    kind,
9335                },
9336                None => LogicalPlan::Scan {
9337                    label_id,
9338                    labels,
9339                    variable,
9340                    filter,
9341                    optional,
9342                },
9343            }
9344        }
9345        // Phase 5b followup: wrap lossy leaf operators when a
9346        // matching fork-local index has been registered. The wrap
9347        // preserves the original node's fields (the physical
9348        // planner unwraps and recurses); only the explain-plan
9349        // surface and runtime-stats operator name change. The
9350        // actual fusion still happens at the `BranchedBackend`
9351        // layer via Lance's per-branch reads.
9352        //
9353        // The CALL-style vector/FTS queries land as `ProcedureCall`
9354        // (not the dedicated `VectorKnn`/`InvertedIndexLookup`
9355        // operators); recognize those by procedure name and the
9356        // shape of their first two arguments (`label, column, ...`).
9357        LogicalPlan::ProcedureCall {
9358            procedure_name,
9359            arguments,
9360            yield_items,
9361        } => {
9362            let kind = procedure_call_fusion_kind(&procedure_name, &arguments, lookup);
9363            let inner = LogicalPlan::ProcedureCall {
9364                procedure_name,
9365                arguments,
9366                yield_items,
9367            };
9368            match kind {
9369                Some(kind) => LogicalPlan::FusedIndexScanWrapped {
9370                    inner: Box::new(inner),
9371                    kind,
9372                },
9373                None => inner,
9374            }
9375        }
9376        LogicalPlan::VectorKnn {
9377            label_id,
9378            variable,
9379            property,
9380            query,
9381            k,
9382            threshold,
9383        } => {
9384            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9385                && let Some(kind) = into_fusion_kind(idx_kind)
9386            {
9387                LogicalPlan::FusedIndexScanWrapped {
9388                    inner: Box::new(LogicalPlan::VectorKnn {
9389                        label_id,
9390                        variable,
9391                        property,
9392                        query,
9393                        k,
9394                        threshold,
9395                    }),
9396                    kind,
9397                }
9398            } else {
9399                LogicalPlan::VectorKnn {
9400                    label_id,
9401                    variable,
9402                    property,
9403                    query,
9404                    k,
9405                    threshold,
9406                }
9407            }
9408        }
9409        LogicalPlan::InvertedIndexLookup {
9410            label_id,
9411            variable,
9412            property,
9413            terms,
9414        } => {
9415            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9416                && let Some(kind) = into_fusion_kind(idx_kind)
9417            {
9418                LogicalPlan::FusedIndexScanWrapped {
9419                    inner: Box::new(LogicalPlan::InvertedIndexLookup {
9420                        label_id,
9421                        variable,
9422                        property,
9423                        terms,
9424                    }),
9425                    kind,
9426                }
9427            } else {
9428                LogicalPlan::InvertedIndexLookup {
9429                    label_id,
9430                    variable,
9431                    property,
9432                    terms,
9433                }
9434            }
9435        }
9436        // Tree-recursive variants — only the ones that can carry a
9437        // Scan in their subtree need to recurse here. Adding more is
9438        // safe (a missing recursion just means fusion doesn't fire
9439        // for that nested context, not incorrect results).
9440        LogicalPlan::Filter {
9441            input,
9442            predicate,
9443            optional_variables,
9444        } => LogicalPlan::Filter {
9445            input: Box::new(rewrite_node(*input, lookup)),
9446            predicate,
9447            optional_variables,
9448        },
9449        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9450            input: Box::new(rewrite_node(*input, lookup)),
9451            projections,
9452        },
9453        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9454            input: Box::new(rewrite_node(*input, lookup)),
9455            skip,
9456            fetch,
9457        },
9458        LogicalPlan::Sort { input, order_by } => {
9459            // Phase 5a-impl Sorted fusion: when the immediate child
9460            // is a single-label Scan AND the sole sort key is a
9461            // single-column property reference on that scan's
9462            // variable AND the column has a fork-local Sorted index
9463            // registered, rewrite to FusedIndexScan { SortedKWayMerge }.
9464            // Otherwise recurse normally.
9465            let new_input = match (*input, &order_by[..]) {
9466                (
9467                    LogicalPlan::Scan {
9468                        label_id,
9469                        labels,
9470                        variable,
9471                        filter,
9472                        optional,
9473                    },
9474                    [single_sort],
9475                ) if labels.len() == 1
9476                    && let Some(col) = column_of_scan_variable(&single_sort.expr, &variable)
9477                    && let Some(uni_store::fork::ForkLocalIndexKind::Sorted) =
9478                        lookup.fork_index_for(&labels[0], &col) =>
9479                {
9480                    LogicalPlan::FusedIndexScan {
9481                        label_id,
9482                        labels,
9483                        variable,
9484                        filter,
9485                        optional,
9486                        kind: FusionKind::SortedKWayMerge,
9487                    }
9488                }
9489                (other_input, _) => rewrite_node(other_input, lookup),
9490            };
9491            LogicalPlan::Sort {
9492                input: Box::new(new_input),
9493                order_by,
9494            }
9495        }
9496        LogicalPlan::Union { left, right, all } => LogicalPlan::Union {
9497            left: Box::new(rewrite_node(*left, lookup)),
9498            right: Box::new(rewrite_node(*right, lookup)),
9499            all,
9500        },
9501        // Everything else passes through unchanged. Adding more
9502        // arms is purely additive — fusion just doesn't fire inside
9503        // un-recursed-into subtrees.
9504        other => other,
9505    }
9506}
9507
9508/// Phase 5b followup: inspect a CALL-style procedure invocation
9509/// for a `(label, column)` pair and check whether a fork-local
9510/// index has been registered for it.
9511///
9512/// Recognizes:
9513/// - `uni.vector.query(label, column, query_vec, k)` → `AnnRerank`
9514///   when a `Vector` fork-local index exists.
9515/// - `uni.fts.query(label, column, query, k)` → `Bm25Rrf` when a
9516///   `FullText` fork-local index exists.
9517///
9518/// Returns `None` for any other procedure (no rewrite) or when the
9519/// registry has no matching entry.
9520fn procedure_call_fusion_kind<L: ForkIndexLookup>(
9521    procedure_name: &str,
9522    arguments: &[Expr],
9523    lookup: &L,
9524) -> Option<FusionKind> {
9525    if arguments.len() < 2 {
9526        return None;
9527    }
9528    let label = match &arguments[0] {
9529        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9530        _ => return None,
9531    };
9532    let column = match &arguments[1] {
9533        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9534        _ => return None,
9535    };
9536    let expected = match procedure_name {
9537        "uni.vector.query" => uni_store::fork::ForkLocalIndexKind::Vector,
9538        "uni.fts.query" => uni_store::fork::ForkLocalIndexKind::FullText,
9539        _ => return None,
9540    };
9541    let registered = lookup.fork_index_for(label, column)?;
9542    if registered != expected {
9543        return None;
9544    }
9545    into_fusion_kind(registered)
9546}
9547
9548/// Map a fork-local index kind to its planner-side fusion variant.
9549/// Returns `None` for any future `ForkLocalIndexKind` we don't yet
9550/// know how to fuse — the caller falls back to a regular Scan.
9551fn into_fusion_kind(kind: uni_store::fork::ForkLocalIndexKind) -> Option<FusionKind> {
9552    use uni_store::fork::ForkLocalIndexKind as K;
9553    match kind {
9554        K::VidUid => Some(FusionKind::VidUidForkFirst),
9555        K::ScalarBtree => Some(FusionKind::BtreeUnion),
9556        K::Sorted => Some(FusionKind::SortedKWayMerge),
9557        K::Vector => Some(FusionKind::AnnRerank),
9558        K::FullText => Some(FusionKind::Bm25Rrf),
9559        // `ForkLocalIndexKind` is `#[non_exhaustive]`; future kinds
9560        // we don't yet handle are silently passed through as a
9561        // regular Scan so a forward-incompatible binary doesn't
9562        // panic — just misses the fusion opportunity.
9563        _ => None,
9564    }
9565}
9566
9567/// Inspect a Scan filter `Expr` for a single-column equality predicate
9568/// against the scan's variable. Returns the column name if the
9569/// predicate matches the shape `variable.column = <literal_or_param>`
9570/// (or its commuted form). Returns `None` for any other shape — fusion
9571/// only fires on the simple case in Phase 5a-impl.
9572fn equality_target_column(filter: &Expr, scan_variable: &str) -> Option<String> {
9573    let (lhs, rhs) = match filter {
9574        Expr::BinaryOp {
9575            left,
9576            op: uni_cypher::ast::BinaryOp::Eq,
9577            right,
9578        } => (left.as_ref(), right.as_ref()),
9579        _ => return None,
9580    };
9581    // Try lhs = column-of-scan-var, rhs = literal/param; or commuted.
9582    if let Some(col) = column_of_scan_variable(lhs, scan_variable)
9583        && is_constant_or_param(rhs)
9584    {
9585        return Some(col);
9586    }
9587    if let Some(col) = column_of_scan_variable(rhs, scan_variable)
9588        && is_constant_or_param(lhs)
9589    {
9590        return Some(col);
9591    }
9592    None
9593}
9594
9595fn column_of_scan_variable(expr: &Expr, scan_variable: &str) -> Option<String> {
9596    if let Expr::Property(base, prop) = expr
9597        && let Expr::Variable(v) = base.as_ref()
9598        && v == scan_variable
9599    {
9600        return Some(prop.clone());
9601    }
9602    None
9603}
9604
9605fn is_constant_or_param(expr: &Expr) -> bool {
9606    matches!(expr, Expr::Literal(_) | Expr::Parameter(_))
9607}
9608
9609#[cfg(test)]
9610mod pushdown_tests {
9611    use super::*;
9612
9613    #[test]
9614    fn test_validat_extracts_property_names() {
9615        // validAt(e, 'start', 'end', ts) → e: {start, end}
9616        let mut properties = HashMap::new();
9617
9618        let args = vec![
9619            Expr::Variable("e".to_string()),
9620            Expr::Literal(CypherLiteral::String("start".to_string())),
9621            Expr::Literal(CypherLiteral::String("end".to_string())),
9622            Expr::Variable("ts".to_string()),
9623        ];
9624
9625        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
9626
9627        assert!(properties.contains_key("e"));
9628        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
9629            .iter()
9630            .cloned()
9631            .collect();
9632        assert_eq!(properties.get("e").unwrap(), &e_props);
9633    }
9634
9635    #[test]
9636    fn test_keys_requires_wildcard() {
9637        // keys(n) → n: {*}
9638        let mut properties = HashMap::new();
9639
9640        let args = vec![Expr::Variable("n".to_string())];
9641
9642        analyze_function_property_requirements("keys", &args, &mut properties);
9643
9644        assert!(properties.contains_key("n"));
9645        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
9646        assert_eq!(properties.get("n").unwrap(), &n_props);
9647    }
9648
9649    #[test]
9650    fn test_properties_requires_wildcard() {
9651        // properties(n) → n: {*}
9652        let mut properties = HashMap::new();
9653
9654        let args = vec![Expr::Variable("n".to_string())];
9655
9656        analyze_function_property_requirements("properties", &args, &mut properties);
9657
9658        assert!(properties.contains_key("n"));
9659        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
9660        assert_eq!(properties.get("n").unwrap(), &n_props);
9661    }
9662
9663    #[test]
9664    fn test_unknown_function_conservative() {
9665        // customUdf(e) → e: {*}
9666        let mut properties = HashMap::new();
9667
9668        let args = vec![Expr::Variable("e".to_string())];
9669
9670        analyze_function_property_requirements("customUdf", &args, &mut properties);
9671
9672        assert!(properties.contains_key("e"));
9673        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
9674        assert_eq!(properties.get("e").unwrap(), &e_props);
9675    }
9676
9677    #[test]
9678    fn test_parameter_property_name() {
9679        // validAt(e, $start, $end, ts) → e: {*}
9680        let mut properties = HashMap::new();
9681
9682        let args = vec![
9683            Expr::Variable("e".to_string()),
9684            Expr::Parameter("start".to_string()),
9685            Expr::Parameter("end".to_string()),
9686            Expr::Variable("ts".to_string()),
9687        ];
9688
9689        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
9690
9691        assert!(properties.contains_key("e"));
9692        assert!(properties.get("e").unwrap().contains("*"));
9693    }
9694
9695    #[test]
9696    fn test_validat_expr_extracts_properties() {
9697        // Test Expr::ValidAt variant property extraction
9698        let mut properties = HashMap::new();
9699
9700        let validat_expr = Expr::ValidAt {
9701            entity: Box::new(Expr::Variable("e".to_string())),
9702            timestamp: Box::new(Expr::Variable("ts".to_string())),
9703            start_prop: Some("valid_from".to_string()),
9704            end_prop: Some("valid_to".to_string()),
9705        };
9706
9707        collect_properties_from_expr_into(&validat_expr, &mut properties);
9708
9709        assert!(properties.contains_key("e"));
9710        assert!(properties.get("e").unwrap().contains("valid_from"));
9711        assert!(properties.get("e").unwrap().contains("valid_to"));
9712    }
9713
9714    #[test]
9715    fn test_array_index_requires_wildcard() {
9716        // e[prop] → e: {*}
9717        let mut properties = HashMap::new();
9718
9719        let array_index_expr = Expr::ArrayIndex {
9720            array: Box::new(Expr::Variable("e".to_string())),
9721            index: Box::new(Expr::Variable("prop".to_string())),
9722        };
9723
9724        collect_properties_from_expr_into(&array_index_expr, &mut properties);
9725
9726        assert!(properties.contains_key("e"));
9727        assert!(properties.get("e").unwrap().contains("*"));
9728    }
9729
9730    #[test]
9731    fn test_property_access_extraction() {
9732        // e.name → e: {name}
9733        let mut properties = HashMap::new();
9734
9735        let prop_access = Expr::Property(
9736            Box::new(Expr::Variable("e".to_string())),
9737            "name".to_string(),
9738        );
9739
9740        collect_properties_from_expr_into(&prop_access, &mut properties);
9741
9742        assert!(properties.contains_key("e"));
9743        assert!(properties.get("e").unwrap().contains("name"));
9744    }
9745}