Skip to main content

grafeo_engine/query/
plan.rs

1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12    /// The root operator of the plan.
13    pub root: LogicalOperator,
14    /// When true, return the plan tree as text instead of executing.
15    pub explain: bool,
16}
17
18impl LogicalPlan {
19    /// Creates a new logical plan with the given root operator.
20    pub fn new(root: LogicalOperator) -> Self {
21        Self {
22            root,
23            explain: false,
24        }
25    }
26
27    /// Creates an EXPLAIN plan that returns the plan tree without executing.
28    pub fn explain(root: LogicalOperator) -> Self {
29        Self {
30            root,
31            explain: true,
32        }
33    }
34}
35
36/// A logical operator in the query plan.
37#[derive(Debug, Clone)]
38pub enum LogicalOperator {
39    /// Scan all nodes, optionally filtered by label.
40    NodeScan(NodeScanOp),
41
42    /// Scan all edges, optionally filtered by type.
43    EdgeScan(EdgeScanOp),
44
45    /// Expand from nodes to neighbors via edges.
46    Expand(ExpandOp),
47
48    /// Filter rows based on a predicate.
49    Filter(FilterOp),
50
51    /// Project specific columns.
52    Project(ProjectOp),
53
54    /// Join two inputs.
55    Join(JoinOp),
56
57    /// Aggregate with grouping.
58    Aggregate(AggregateOp),
59
60    /// Limit the number of results.
61    Limit(LimitOp),
62
63    /// Skip a number of results.
64    Skip(SkipOp),
65
66    /// Sort results.
67    Sort(SortOp),
68
69    /// Remove duplicate results.
70    Distinct(DistinctOp),
71
72    /// Create a new node.
73    CreateNode(CreateNodeOp),
74
75    /// Create a new edge.
76    CreateEdge(CreateEdgeOp),
77
78    /// Delete a node.
79    DeleteNode(DeleteNodeOp),
80
81    /// Delete an edge.
82    DeleteEdge(DeleteEdgeOp),
83
84    /// Set properties on a node or edge.
85    SetProperty(SetPropertyOp),
86
87    /// Add labels to a node.
88    AddLabel(AddLabelOp),
89
90    /// Remove labels from a node.
91    RemoveLabel(RemoveLabelOp),
92
93    /// Return results (terminal operator).
94    Return(ReturnOp),
95
96    /// Empty result set.
97    Empty,
98
99    // ==================== RDF/SPARQL Operators ====================
100    /// Scan RDF triples matching a pattern.
101    TripleScan(TripleScanOp),
102
103    /// Union of multiple result sets.
104    Union(UnionOp),
105
106    /// Left outer join for OPTIONAL patterns.
107    LeftJoin(LeftJoinOp),
108
109    /// Anti-join for MINUS patterns.
110    AntiJoin(AntiJoinOp),
111
112    /// Bind a variable to an expression.
113    Bind(BindOp),
114
115    /// Unwind a list into individual rows.
116    Unwind(UnwindOp),
117
118    /// Collect grouped key-value rows into a single Map value.
119    /// Used for Gremlin `groupCount()` semantics.
120    MapCollect(MapCollectOp),
121
122    /// Merge a node pattern (match or create).
123    Merge(MergeOp),
124
125    /// Merge a relationship pattern (match or create).
126    MergeRelationship(MergeRelationshipOp),
127
128    /// Find shortest path between nodes.
129    ShortestPath(ShortestPathOp),
130
131    // ==================== SPARQL Update Operators ====================
132    /// Insert RDF triples.
133    InsertTriple(InsertTripleOp),
134
135    /// Delete RDF triples.
136    DeleteTriple(DeleteTripleOp),
137
138    /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
139    /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
140    Modify(ModifyOp),
141
142    /// Clear a graph (remove all triples).
143    ClearGraph(ClearGraphOp),
144
145    /// Create a new named graph.
146    CreateGraph(CreateGraphOp),
147
148    /// Drop (remove) a named graph.
149    DropGraph(DropGraphOp),
150
151    /// Load data from a URL into a graph.
152    LoadGraph(LoadGraphOp),
153
154    /// Copy triples from one graph to another.
155    CopyGraph(CopyGraphOp),
156
157    /// Move triples from one graph to another.
158    MoveGraph(MoveGraphOp),
159
160    /// Add (merge) triples from one graph to another.
161    AddGraph(AddGraphOp),
162
163    /// Per-row aggregation over a list-valued column (horizontal aggregation, GE09).
164    HorizontalAggregate(HorizontalAggregateOp),
165
166    // ==================== Vector Search Operators ====================
167    /// Scan using vector similarity search.
168    VectorScan(VectorScanOp),
169
170    /// Join graph patterns with vector similarity search.
171    ///
172    /// Computes vector distances between entities from the left input and
173    /// a query vector, then joins with similarity scores. Useful for:
174    /// - Filtering graph traversal results by vector similarity
175    /// - Computing aggregated embeddings and finding similar entities
176    /// - Combining multiple vector sources with graph structure
177    VectorJoin(VectorJoinOp),
178
179    // ==================== Set Operations ====================
180    /// Set difference: rows in left that are not in right.
181    Except(ExceptOp),
182
183    /// Set intersection: rows common to all inputs.
184    Intersect(IntersectOp),
185
186    /// Fallback: use left result if non-empty, otherwise right.
187    Otherwise(OtherwiseOp),
188
189    // ==================== Correlated Subquery ====================
190    /// Apply (lateral join): evaluate a subplan per input row.
191    Apply(ApplyOp),
192
193    /// Parameter scan: leaf of a correlated inner plan that receives values
194    /// from the outer Apply operator. The column names match `ApplyOp.shared_variables`.
195    ParameterScan(ParameterScanOp),
196
197    // ==================== DDL Operators ====================
198    /// Define a property graph schema (SQL/PGQ DDL).
199    CreatePropertyGraph(CreatePropertyGraphOp),
200
201    // ==================== Multi-Way Join ====================
202    /// Multi-way join using worst-case optimal join (leapfrog).
203    /// Used for cyclic patterns (triangles, cliques) with 3+ relations.
204    MultiWayJoin(MultiWayJoinOp),
205
206    // ==================== Procedure Call Operators ====================
207    /// Invoke a stored procedure (CALL ... YIELD).
208    CallProcedure(CallProcedureOp),
209}
210
211impl LogicalOperator {
212    /// Returns `true` if this operator or any of its children perform mutations.
213    #[must_use]
214    pub fn has_mutations(&self) -> bool {
215        match self {
216            // Direct mutation operators
217            Self::CreateNode(_)
218            | Self::CreateEdge(_)
219            | Self::DeleteNode(_)
220            | Self::DeleteEdge(_)
221            | Self::SetProperty(_)
222            | Self::AddLabel(_)
223            | Self::RemoveLabel(_)
224            | Self::Merge(_)
225            | Self::MergeRelationship(_)
226            | Self::InsertTriple(_)
227            | Self::DeleteTriple(_)
228            | Self::Modify(_)
229            | Self::ClearGraph(_)
230            | Self::CreateGraph(_)
231            | Self::DropGraph(_)
232            | Self::LoadGraph(_)
233            | Self::CopyGraph(_)
234            | Self::MoveGraph(_)
235            | Self::AddGraph(_)
236            | Self::CreatePropertyGraph(_) => true,
237
238            // Operators with an `input` child
239            Self::Filter(op) => op.input.has_mutations(),
240            Self::Project(op) => op.input.has_mutations(),
241            Self::Aggregate(op) => op.input.has_mutations(),
242            Self::Limit(op) => op.input.has_mutations(),
243            Self::Skip(op) => op.input.has_mutations(),
244            Self::Sort(op) => op.input.has_mutations(),
245            Self::Distinct(op) => op.input.has_mutations(),
246            Self::Unwind(op) => op.input.has_mutations(),
247            Self::Bind(op) => op.input.has_mutations(),
248            Self::MapCollect(op) => op.input.has_mutations(),
249            Self::Return(op) => op.input.has_mutations(),
250            Self::HorizontalAggregate(op) => op.input.has_mutations(),
251            Self::VectorScan(_) | Self::VectorJoin(_) => false,
252
253            // Operators with two children
254            Self::Join(op) => op.left.has_mutations() || op.right.has_mutations(),
255            Self::LeftJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
256            Self::AntiJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
257            Self::Except(op) => op.left.has_mutations() || op.right.has_mutations(),
258            Self::Intersect(op) => op.left.has_mutations() || op.right.has_mutations(),
259            Self::Otherwise(op) => op.left.has_mutations() || op.right.has_mutations(),
260            Self::Union(op) => op.inputs.iter().any(|i| i.has_mutations()),
261            Self::MultiWayJoin(op) => op.inputs.iter().any(|i| i.has_mutations()),
262            Self::Apply(op) => op.input.has_mutations() || op.subplan.has_mutations(),
263
264            // Leaf operators (read-only)
265            Self::NodeScan(_)
266            | Self::EdgeScan(_)
267            | Self::Expand(_)
268            | Self::TripleScan(_)
269            | Self::ShortestPath(_)
270            | Self::Empty
271            | Self::ParameterScan(_)
272            | Self::CallProcedure(_) => false,
273        }
274    }
275}
276
277impl LogicalOperator {
278    /// Formats this operator tree as a human-readable plan for EXPLAIN output.
279    pub fn explain_tree(&self) -> String {
280        let mut output = String::new();
281        self.fmt_tree(&mut output, 0);
282        output
283    }
284
285    fn fmt_tree(&self, out: &mut String, depth: usize) {
286        use std::fmt::Write;
287
288        let indent = "  ".repeat(depth);
289        match self {
290            Self::NodeScan(op) => {
291                let label = op.label.as_deref().unwrap_or("*");
292                let _ = writeln!(out, "{indent}NodeScan ({var}:{label})", var = op.variable);
293                if let Some(input) = &op.input {
294                    input.fmt_tree(out, depth + 1);
295                }
296            }
297            Self::EdgeScan(op) => {
298                let types = if op.edge_types.is_empty() {
299                    "*".to_string()
300                } else {
301                    op.edge_types.join("|")
302                };
303                let _ = writeln!(out, "{indent}EdgeScan ({var}:{types})", var = op.variable);
304            }
305            Self::Expand(op) => {
306                let types = if op.edge_types.is_empty() {
307                    "*".to_string()
308                } else {
309                    op.edge_types.join("|")
310                };
311                let dir = match op.direction {
312                    ExpandDirection::Outgoing => "->",
313                    ExpandDirection::Incoming => "<-",
314                    ExpandDirection::Both => "--",
315                };
316                let hops = match (op.min_hops, op.max_hops) {
317                    (1, Some(1)) => String::new(),
318                    (min, Some(max)) if min == max => format!("*{min}"),
319                    (min, Some(max)) => format!("*{min}..{max}"),
320                    (min, None) => format!("*{min}.."),
321                };
322                let _ = writeln!(
323                    out,
324                    "{indent}Expand ({from}){dir}[:{types}{hops}]{dir}({to})",
325                    from = op.from_variable,
326                    to = op.to_variable,
327                );
328                op.input.fmt_tree(out, depth + 1);
329            }
330            Self::Filter(op) => {
331                let hint = match &op.pushdown_hint {
332                    Some(PushdownHint::IndexLookup { property }) => {
333                        format!(" [index: {property}]")
334                    }
335                    Some(PushdownHint::RangeScan { property }) => {
336                        format!(" [range: {property}]")
337                    }
338                    Some(PushdownHint::LabelFirst) => " [label-first]".to_string(),
339                    None => String::new(),
340                };
341                let _ = writeln!(
342                    out,
343                    "{indent}Filter ({expr}){hint}",
344                    expr = fmt_expr(&op.predicate)
345                );
346                op.input.fmt_tree(out, depth + 1);
347            }
348            Self::Project(op) => {
349                let cols: Vec<String> = op
350                    .projections
351                    .iter()
352                    .map(|p| {
353                        let expr = fmt_expr(&p.expression);
354                        match &p.alias {
355                            Some(alias) => format!("{expr} AS {alias}"),
356                            None => expr,
357                        }
358                    })
359                    .collect();
360                let _ = writeln!(out, "{indent}Project ({cols})", cols = cols.join(", "));
361                op.input.fmt_tree(out, depth + 1);
362            }
363            Self::Join(op) => {
364                let _ = writeln!(out, "{indent}Join ({ty:?})", ty = op.join_type);
365                op.left.fmt_tree(out, depth + 1);
366                op.right.fmt_tree(out, depth + 1);
367            }
368            Self::Aggregate(op) => {
369                let groups: Vec<String> = op.group_by.iter().map(fmt_expr).collect();
370                let aggs: Vec<String> = op
371                    .aggregates
372                    .iter()
373                    .map(|a| {
374                        let func = format!("{:?}", a.function).to_lowercase();
375                        match &a.alias {
376                            Some(alias) => format!("{func}(...) AS {alias}"),
377                            None => format!("{func}(...)"),
378                        }
379                    })
380                    .collect();
381                let _ = writeln!(
382                    out,
383                    "{indent}Aggregate (group: [{groups}], aggs: [{aggs}])",
384                    groups = groups.join(", "),
385                    aggs = aggs.join(", "),
386                );
387                op.input.fmt_tree(out, depth + 1);
388            }
389            Self::Limit(op) => {
390                let _ = writeln!(out, "{indent}Limit ({count})", count = op.count);
391                op.input.fmt_tree(out, depth + 1);
392            }
393            Self::Skip(op) => {
394                let _ = writeln!(out, "{indent}Skip ({count})", count = op.count);
395                op.input.fmt_tree(out, depth + 1);
396            }
397            Self::Sort(op) => {
398                let keys: Vec<String> = op
399                    .keys
400                    .iter()
401                    .map(|k| {
402                        let dir = match k.order {
403                            SortOrder::Ascending => "ASC",
404                            SortOrder::Descending => "DESC",
405                        };
406                        format!("{} {dir}", fmt_expr(&k.expression))
407                    })
408                    .collect();
409                let _ = writeln!(out, "{indent}Sort ({keys})", keys = keys.join(", "));
410                op.input.fmt_tree(out, depth + 1);
411            }
412            Self::Distinct(op) => {
413                let _ = writeln!(out, "{indent}Distinct");
414                op.input.fmt_tree(out, depth + 1);
415            }
416            Self::Return(op) => {
417                let items: Vec<String> = op
418                    .items
419                    .iter()
420                    .map(|item| {
421                        let expr = fmt_expr(&item.expression);
422                        match &item.alias {
423                            Some(alias) => format!("{expr} AS {alias}"),
424                            None => expr,
425                        }
426                    })
427                    .collect();
428                let distinct = if op.distinct { " DISTINCT" } else { "" };
429                let _ = writeln!(
430                    out,
431                    "{indent}Return{distinct} ({items})",
432                    items = items.join(", ")
433                );
434                op.input.fmt_tree(out, depth + 1);
435            }
436            Self::Union(op) => {
437                let _ = writeln!(out, "{indent}Union ({n} branches)", n = op.inputs.len());
438                for input in &op.inputs {
439                    input.fmt_tree(out, depth + 1);
440                }
441            }
442            Self::MultiWayJoin(op) => {
443                let vars = op.shared_variables.join(", ");
444                let _ = writeln!(
445                    out,
446                    "{indent}MultiWayJoin ({n} inputs, shared: [{vars}])",
447                    n = op.inputs.len()
448                );
449                for input in &op.inputs {
450                    input.fmt_tree(out, depth + 1);
451                }
452            }
453            Self::LeftJoin(op) => {
454                let _ = writeln!(out, "{indent}LeftJoin");
455                op.left.fmt_tree(out, depth + 1);
456                op.right.fmt_tree(out, depth + 1);
457            }
458            Self::AntiJoin(op) => {
459                let _ = writeln!(out, "{indent}AntiJoin");
460                op.left.fmt_tree(out, depth + 1);
461                op.right.fmt_tree(out, depth + 1);
462            }
463            Self::Unwind(op) => {
464                let _ = writeln!(out, "{indent}Unwind ({var})", var = op.variable);
465                op.input.fmt_tree(out, depth + 1);
466            }
467            Self::Bind(op) => {
468                let _ = writeln!(out, "{indent}Bind ({var})", var = op.variable);
469                op.input.fmt_tree(out, depth + 1);
470            }
471            Self::MapCollect(op) => {
472                let _ = writeln!(
473                    out,
474                    "{indent}MapCollect ({key} -> {val} AS {alias})",
475                    key = op.key_var,
476                    val = op.value_var,
477                    alias = op.alias
478                );
479                op.input.fmt_tree(out, depth + 1);
480            }
481            Self::Apply(op) => {
482                let _ = writeln!(out, "{indent}Apply");
483                op.input.fmt_tree(out, depth + 1);
484                op.subplan.fmt_tree(out, depth + 1);
485            }
486            Self::Except(op) => {
487                let all = if op.all { " ALL" } else { "" };
488                let _ = writeln!(out, "{indent}Except{all}");
489                op.left.fmt_tree(out, depth + 1);
490                op.right.fmt_tree(out, depth + 1);
491            }
492            Self::Intersect(op) => {
493                let all = if op.all { " ALL" } else { "" };
494                let _ = writeln!(out, "{indent}Intersect{all}");
495                op.left.fmt_tree(out, depth + 1);
496                op.right.fmt_tree(out, depth + 1);
497            }
498            Self::Otherwise(op) => {
499                let _ = writeln!(out, "{indent}Otherwise");
500                op.left.fmt_tree(out, depth + 1);
501                op.right.fmt_tree(out, depth + 1);
502            }
503            Self::ShortestPath(op) => {
504                let _ = writeln!(
505                    out,
506                    "{indent}ShortestPath ({from} -> {to})",
507                    from = op.source_var,
508                    to = op.target_var
509                );
510                op.input.fmt_tree(out, depth + 1);
511            }
512            Self::Merge(op) => {
513                let _ = writeln!(out, "{indent}Merge ({var})", var = op.variable);
514                op.input.fmt_tree(out, depth + 1);
515            }
516            Self::MergeRelationship(op) => {
517                let _ = writeln!(out, "{indent}MergeRelationship ({var})", var = op.variable);
518                op.input.fmt_tree(out, depth + 1);
519            }
520            Self::CreateNode(op) => {
521                let labels = op.labels.join(":");
522                let _ = writeln!(
523                    out,
524                    "{indent}CreateNode ({var}:{labels})",
525                    var = op.variable
526                );
527                if let Some(input) = &op.input {
528                    input.fmt_tree(out, depth + 1);
529                }
530            }
531            Self::CreateEdge(op) => {
532                let var = op.variable.as_deref().unwrap_or("?");
533                let _ = writeln!(
534                    out,
535                    "{indent}CreateEdge ({from})-[{var}:{ty}]->({to})",
536                    from = op.from_variable,
537                    ty = op.edge_type,
538                    to = op.to_variable
539                );
540                op.input.fmt_tree(out, depth + 1);
541            }
542            Self::DeleteNode(op) => {
543                let _ = writeln!(out, "{indent}DeleteNode ({var})", var = op.variable);
544                op.input.fmt_tree(out, depth + 1);
545            }
546            Self::DeleteEdge(op) => {
547                let _ = writeln!(out, "{indent}DeleteEdge ({var})", var = op.variable);
548                op.input.fmt_tree(out, depth + 1);
549            }
550            Self::SetProperty(op) => {
551                let props: Vec<String> = op
552                    .properties
553                    .iter()
554                    .map(|(k, _)| format!("{}.{k}", op.variable))
555                    .collect();
556                let _ = writeln!(
557                    out,
558                    "{indent}SetProperty ({props})",
559                    props = props.join(", ")
560                );
561                op.input.fmt_tree(out, depth + 1);
562            }
563            Self::AddLabel(op) => {
564                let labels = op.labels.join(":");
565                let _ = writeln!(out, "{indent}AddLabel ({var}:{labels})", var = op.variable);
566                op.input.fmt_tree(out, depth + 1);
567            }
568            Self::RemoveLabel(op) => {
569                let labels = op.labels.join(":");
570                let _ = writeln!(
571                    out,
572                    "{indent}RemoveLabel ({var}:{labels})",
573                    var = op.variable
574                );
575                op.input.fmt_tree(out, depth + 1);
576            }
577            Self::CallProcedure(op) => {
578                let _ = writeln!(
579                    out,
580                    "{indent}CallProcedure ({name})",
581                    name = op.name.join(".")
582                );
583            }
584            Self::TripleScan(op) => {
585                let _ = writeln!(
586                    out,
587                    "{indent}TripleScan ({s} {p} {o})",
588                    s = fmt_triple_component(&op.subject),
589                    p = fmt_triple_component(&op.predicate),
590                    o = fmt_triple_component(&op.object)
591                );
592                if let Some(input) = &op.input {
593                    input.fmt_tree(out, depth + 1);
594                }
595            }
596            Self::Empty => {
597                let _ = writeln!(out, "{indent}Empty");
598            }
599            // Remaining operators: show a simple name
600            _ => {
601                let _ = writeln!(out, "{indent}{:?}", std::mem::discriminant(self));
602            }
603        }
604    }
605}
606
607/// Format a logical expression compactly for EXPLAIN output.
608fn fmt_expr(expr: &LogicalExpression) -> String {
609    match expr {
610        LogicalExpression::Variable(name) => name.clone(),
611        LogicalExpression::Property { variable, property } => format!("{variable}.{property}"),
612        LogicalExpression::Literal(val) => format!("{val}"),
613        LogicalExpression::Binary { left, op, right } => {
614            format!("{} {op:?} {}", fmt_expr(left), fmt_expr(right))
615        }
616        LogicalExpression::Unary { op, operand } => {
617            format!("{op:?} {}", fmt_expr(operand))
618        }
619        LogicalExpression::FunctionCall { name, args, .. } => {
620            let arg_strs: Vec<String> = args.iter().map(fmt_expr).collect();
621            format!("{name}({})", arg_strs.join(", "))
622        }
623        _ => format!("{expr:?}"),
624    }
625}
626
627/// Format a triple component for EXPLAIN output.
628fn fmt_triple_component(comp: &TripleComponent) -> String {
629    match comp {
630        TripleComponent::Variable(name) => format!("?{name}"),
631        TripleComponent::Iri(iri) => format!("<{iri}>"),
632        TripleComponent::Literal(val) => format!("{val}"),
633    }
634}
635
636/// Scan nodes from the graph.
637#[derive(Debug, Clone)]
638pub struct NodeScanOp {
639    /// Variable name to bind the node to.
640    pub variable: String,
641    /// Optional label filter.
642    pub label: Option<String>,
643    /// Child operator (if any, for chained patterns).
644    pub input: Option<Box<LogicalOperator>>,
645}
646
647/// Scan edges from the graph.
648#[derive(Debug, Clone)]
649pub struct EdgeScanOp {
650    /// Variable name to bind the edge to.
651    pub variable: String,
652    /// Edge type filter (empty = match all types).
653    pub edge_types: Vec<String>,
654    /// Child operator (if any).
655    pub input: Option<Box<LogicalOperator>>,
656}
657
658/// Path traversal mode for variable-length expansion.
659#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
660pub enum PathMode {
661    /// Allows repeated nodes and edges (default).
662    #[default]
663    Walk,
664    /// No repeated edges.
665    Trail,
666    /// No repeated nodes except endpoints.
667    Simple,
668    /// No repeated nodes at all.
669    Acyclic,
670}
671
672/// Expand from nodes to their neighbors.
673#[derive(Debug, Clone)]
674pub struct ExpandOp {
675    /// Source node variable.
676    pub from_variable: String,
677    /// Target node variable to bind.
678    pub to_variable: String,
679    /// Edge variable to bind (optional).
680    pub edge_variable: Option<String>,
681    /// Direction of expansion.
682    pub direction: ExpandDirection,
683    /// Edge type filter (empty = match all types, multiple = match any).
684    pub edge_types: Vec<String>,
685    /// Minimum hops (for variable-length patterns).
686    pub min_hops: u32,
687    /// Maximum hops (for variable-length patterns).
688    pub max_hops: Option<u32>,
689    /// Input operator.
690    pub input: Box<LogicalOperator>,
691    /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
692    /// When set, a path length column will be output under this name.
693    pub path_alias: Option<String>,
694    /// Path traversal mode (WALK, TRAIL, SIMPLE, ACYCLIC).
695    pub path_mode: PathMode,
696}
697
698/// Direction for edge expansion.
699#[derive(Debug, Clone, Copy, PartialEq, Eq)]
700pub enum ExpandDirection {
701    /// Follow outgoing edges.
702    Outgoing,
703    /// Follow incoming edges.
704    Incoming,
705    /// Follow edges in either direction.
706    Both,
707}
708
709/// Join two inputs.
710#[derive(Debug, Clone)]
711pub struct JoinOp {
712    /// Left input.
713    pub left: Box<LogicalOperator>,
714    /// Right input.
715    pub right: Box<LogicalOperator>,
716    /// Join type.
717    pub join_type: JoinType,
718    /// Join conditions.
719    pub conditions: Vec<JoinCondition>,
720}
721
722/// Join type.
723#[derive(Debug, Clone, Copy, PartialEq, Eq)]
724pub enum JoinType {
725    /// Inner join.
726    Inner,
727    /// Left outer join.
728    Left,
729    /// Right outer join.
730    Right,
731    /// Full outer join.
732    Full,
733    /// Cross join (Cartesian product).
734    Cross,
735    /// Semi join (returns left rows with matching right rows).
736    Semi,
737    /// Anti join (returns left rows without matching right rows).
738    Anti,
739}
740
741/// A join condition.
742#[derive(Debug, Clone)]
743pub struct JoinCondition {
744    /// Left expression.
745    pub left: LogicalExpression,
746    /// Right expression.
747    pub right: LogicalExpression,
748}
749
750/// Multi-way join for worst-case optimal joins (leapfrog).
751///
752/// Unlike binary `JoinOp`, this joins 3+ relations simultaneously
753/// using the leapfrog trie join algorithm. Preferred for cyclic patterns
754/// (triangles, cliques) where cascading binary joins hit O(N^2).
755#[derive(Debug, Clone)]
756pub struct MultiWayJoinOp {
757    /// Input relations (one per relation in the join).
758    pub inputs: Vec<LogicalOperator>,
759    /// All pairwise join conditions.
760    pub conditions: Vec<JoinCondition>,
761    /// Variables shared across multiple inputs (intersection keys).
762    pub shared_variables: Vec<String>,
763}
764
765/// Aggregate with grouping.
766#[derive(Debug, Clone)]
767pub struct AggregateOp {
768    /// Group by expressions.
769    pub group_by: Vec<LogicalExpression>,
770    /// Aggregate functions.
771    pub aggregates: Vec<AggregateExpr>,
772    /// Input operator.
773    pub input: Box<LogicalOperator>,
774    /// HAVING clause filter (applied after aggregation).
775    pub having: Option<LogicalExpression>,
776}
777
778/// Whether a horizontal aggregate operates on edges or nodes.
779#[derive(Debug, Clone, Copy, PartialEq, Eq)]
780pub enum EntityKind {
781    /// Aggregate over edges in a path.
782    Edge,
783    /// Aggregate over nodes in a path.
784    Node,
785}
786
787/// Per-row aggregation over a list-valued column (horizontal aggregation, GE09).
788///
789/// For each input row, reads a list of entity IDs from `list_column`, accesses
790/// `property` on each entity, computes the aggregate, and emits the scalar result.
791#[derive(Debug, Clone)]
792pub struct HorizontalAggregateOp {
793    /// The list column name (e.g., `_path_edges_p`).
794    pub list_column: String,
795    /// Whether the list contains edge IDs or node IDs.
796    pub entity_kind: EntityKind,
797    /// The aggregate function to apply.
798    pub function: AggregateFunction,
799    /// The property to access on each entity.
800    pub property: String,
801    /// Output alias for the result column.
802    pub alias: String,
803    /// Input operator.
804    pub input: Box<LogicalOperator>,
805}
806
807/// An aggregate expression.
808#[derive(Debug, Clone)]
809pub struct AggregateExpr {
810    /// Aggregate function.
811    pub function: AggregateFunction,
812    /// Expression to aggregate (first/only argument, y for binary set functions).
813    pub expression: Option<LogicalExpression>,
814    /// Second expression for binary set functions (x for COVAR, CORR, REGR_*).
815    pub expression2: Option<LogicalExpression>,
816    /// Whether to use DISTINCT.
817    pub distinct: bool,
818    /// Alias for the result.
819    pub alias: Option<String>,
820    /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
821    pub percentile: Option<f64>,
822}
823
824/// Aggregate function.
825#[derive(Debug, Clone, Copy, PartialEq, Eq)]
826pub enum AggregateFunction {
827    /// Count all rows (COUNT(*)).
828    Count,
829    /// Count non-null values (COUNT(expr)).
830    CountNonNull,
831    /// Sum values.
832    Sum,
833    /// Average values.
834    Avg,
835    /// Minimum value.
836    Min,
837    /// Maximum value.
838    Max,
839    /// Collect into list.
840    Collect,
841    /// Sample standard deviation (STDEV).
842    StdDev,
843    /// Population standard deviation (STDEVP).
844    StdDevPop,
845    /// Sample variance (VAR_SAMP / VARIANCE).
846    Variance,
847    /// Population variance (VAR_POP).
848    VariancePop,
849    /// Discrete percentile (PERCENTILE_DISC).
850    PercentileDisc,
851    /// Continuous percentile (PERCENTILE_CONT).
852    PercentileCont,
853    /// Concatenate values with separator (GROUP_CONCAT).
854    GroupConcat,
855    /// Return an arbitrary value from the group (SAMPLE).
856    Sample,
857    /// Sample covariance (COVAR_SAMP(y, x)).
858    CovarSamp,
859    /// Population covariance (COVAR_POP(y, x)).
860    CovarPop,
861    /// Pearson correlation coefficient (CORR(y, x)).
862    Corr,
863    /// Regression slope (REGR_SLOPE(y, x)).
864    RegrSlope,
865    /// Regression intercept (REGR_INTERCEPT(y, x)).
866    RegrIntercept,
867    /// Coefficient of determination (REGR_R2(y, x)).
868    RegrR2,
869    /// Regression count of non-null pairs (REGR_COUNT(y, x)).
870    RegrCount,
871    /// Regression sum of squares for x (REGR_SXX(y, x)).
872    RegrSxx,
873    /// Regression sum of squares for y (REGR_SYY(y, x)).
874    RegrSyy,
875    /// Regression sum of cross-products (REGR_SXY(y, x)).
876    RegrSxy,
877    /// Regression average of x (REGR_AVGX(y, x)).
878    RegrAvgx,
879    /// Regression average of y (REGR_AVGY(y, x)).
880    RegrAvgy,
881}
882
883/// Hint about how a filter will be executed at the physical level.
884///
885/// Set during EXPLAIN annotation to communicate pushdown decisions.
886#[derive(Debug, Clone)]
887pub enum PushdownHint {
888    /// Equality predicate resolved via a property index.
889    IndexLookup {
890        /// The indexed property name.
891        property: String,
892    },
893    /// Range predicate resolved via a range/btree index.
894    RangeScan {
895        /// The indexed property name.
896        property: String,
897    },
898    /// No index available, but label narrows the scan before filtering.
899    LabelFirst,
900}
901
902/// Filter rows based on a predicate.
903#[derive(Debug, Clone)]
904pub struct FilterOp {
905    /// The filter predicate.
906    pub predicate: LogicalExpression,
907    /// Input operator.
908    pub input: Box<LogicalOperator>,
909    /// Optional hint about pushdown strategy (populated by EXPLAIN).
910    pub pushdown_hint: Option<PushdownHint>,
911}
912
913/// Project specific columns.
914#[derive(Debug, Clone)]
915pub struct ProjectOp {
916    /// Columns to project.
917    pub projections: Vec<Projection>,
918    /// Input operator.
919    pub input: Box<LogicalOperator>,
920}
921
922/// A single projection (column selection or computation).
923#[derive(Debug, Clone)]
924pub struct Projection {
925    /// Expression to compute.
926    pub expression: LogicalExpression,
927    /// Alias for the result.
928    pub alias: Option<String>,
929}
930
931/// Limit the number of results.
932#[derive(Debug, Clone)]
933pub struct LimitOp {
934    /// Maximum number of rows to return.
935    pub count: usize,
936    /// Input operator.
937    pub input: Box<LogicalOperator>,
938}
939
940/// Skip a number of results.
941#[derive(Debug, Clone)]
942pub struct SkipOp {
943    /// Number of rows to skip.
944    pub count: usize,
945    /// Input operator.
946    pub input: Box<LogicalOperator>,
947}
948
949/// Sort results.
950#[derive(Debug, Clone)]
951pub struct SortOp {
952    /// Sort keys.
953    pub keys: Vec<SortKey>,
954    /// Input operator.
955    pub input: Box<LogicalOperator>,
956}
957
958/// A sort key.
959#[derive(Debug, Clone)]
960pub struct SortKey {
961    /// Expression to sort by.
962    pub expression: LogicalExpression,
963    /// Sort order.
964    pub order: SortOrder,
965    /// Optional null ordering (NULLS FIRST / NULLS LAST).
966    pub nulls: Option<NullsOrdering>,
967}
968
969/// Sort order.
970#[derive(Debug, Clone, Copy, PartialEq, Eq)]
971pub enum SortOrder {
972    /// Ascending order.
973    Ascending,
974    /// Descending order.
975    Descending,
976}
977
978/// Null ordering for sort operations.
979#[derive(Debug, Clone, Copy, PartialEq, Eq)]
980pub enum NullsOrdering {
981    /// Nulls sort before all non-null values.
982    First,
983    /// Nulls sort after all non-null values.
984    Last,
985}
986
987/// Remove duplicate results.
988#[derive(Debug, Clone)]
989pub struct DistinctOp {
990    /// Input operator.
991    pub input: Box<LogicalOperator>,
992    /// Optional columns to use for deduplication.
993    /// If None, all columns are used.
994    pub columns: Option<Vec<String>>,
995}
996
997/// Create a new node.
998#[derive(Debug, Clone)]
999pub struct CreateNodeOp {
1000    /// Variable name to bind the created node to.
1001    pub variable: String,
1002    /// Labels for the new node.
1003    pub labels: Vec<String>,
1004    /// Properties for the new node.
1005    pub properties: Vec<(String, LogicalExpression)>,
1006    /// Input operator (for chained creates).
1007    pub input: Option<Box<LogicalOperator>>,
1008}
1009
1010/// Create a new edge.
1011#[derive(Debug, Clone)]
1012pub struct CreateEdgeOp {
1013    /// Variable name to bind the created edge to.
1014    pub variable: Option<String>,
1015    /// Source node variable.
1016    pub from_variable: String,
1017    /// Target node variable.
1018    pub to_variable: String,
1019    /// Edge type.
1020    pub edge_type: String,
1021    /// Properties for the new edge.
1022    pub properties: Vec<(String, LogicalExpression)>,
1023    /// Input operator.
1024    pub input: Box<LogicalOperator>,
1025}
1026
1027/// Delete a node.
1028#[derive(Debug, Clone)]
1029pub struct DeleteNodeOp {
1030    /// Variable of the node to delete.
1031    pub variable: String,
1032    /// Whether to detach (delete connected edges) before deleting.
1033    pub detach: bool,
1034    /// Input operator.
1035    pub input: Box<LogicalOperator>,
1036}
1037
1038/// Delete an edge.
1039#[derive(Debug, Clone)]
1040pub struct DeleteEdgeOp {
1041    /// Variable of the edge to delete.
1042    pub variable: String,
1043    /// Input operator.
1044    pub input: Box<LogicalOperator>,
1045}
1046
1047/// Set properties on a node or edge.
1048#[derive(Debug, Clone)]
1049pub struct SetPropertyOp {
1050    /// Variable of the entity to update.
1051    pub variable: String,
1052    /// Properties to set (name -> expression).
1053    pub properties: Vec<(String, LogicalExpression)>,
1054    /// Whether to replace all properties (vs. merge).
1055    pub replace: bool,
1056    /// Whether the target variable is an edge (vs. node).
1057    pub is_edge: bool,
1058    /// Input operator.
1059    pub input: Box<LogicalOperator>,
1060}
1061
1062/// Add labels to a node.
1063#[derive(Debug, Clone)]
1064pub struct AddLabelOp {
1065    /// Variable of the node to update.
1066    pub variable: String,
1067    /// Labels to add.
1068    pub labels: Vec<String>,
1069    /// Input operator.
1070    pub input: Box<LogicalOperator>,
1071}
1072
1073/// Remove labels from a node.
1074#[derive(Debug, Clone)]
1075pub struct RemoveLabelOp {
1076    /// Variable of the node to update.
1077    pub variable: String,
1078    /// Labels to remove.
1079    pub labels: Vec<String>,
1080    /// Input operator.
1081    pub input: Box<LogicalOperator>,
1082}
1083
1084// ==================== RDF/SPARQL Operators ====================
1085
1086/// Scan RDF triples matching a pattern.
1087#[derive(Debug, Clone)]
1088pub struct TripleScanOp {
1089    /// Subject pattern (variable name or IRI).
1090    pub subject: TripleComponent,
1091    /// Predicate pattern (variable name or IRI).
1092    pub predicate: TripleComponent,
1093    /// Object pattern (variable name, IRI, or literal).
1094    pub object: TripleComponent,
1095    /// Named graph (optional).
1096    pub graph: Option<TripleComponent>,
1097    /// Input operator (for chained patterns).
1098    pub input: Option<Box<LogicalOperator>>,
1099}
1100
1101/// A component of a triple pattern.
1102#[derive(Debug, Clone)]
1103pub enum TripleComponent {
1104    /// A variable to bind.
1105    Variable(String),
1106    /// A constant IRI.
1107    Iri(String),
1108    /// A constant literal value.
1109    Literal(Value),
1110}
1111
1112/// Union of multiple result sets.
1113#[derive(Debug, Clone)]
1114pub struct UnionOp {
1115    /// Inputs to union together.
1116    pub inputs: Vec<LogicalOperator>,
1117}
1118
1119/// Set difference: rows in left that are not in right.
1120#[derive(Debug, Clone)]
1121pub struct ExceptOp {
1122    /// Left input.
1123    pub left: Box<LogicalOperator>,
1124    /// Right input (rows to exclude).
1125    pub right: Box<LogicalOperator>,
1126    /// If true, preserve duplicates (EXCEPT ALL); if false, deduplicate (EXCEPT DISTINCT).
1127    pub all: bool,
1128}
1129
1130/// Set intersection: rows common to both inputs.
1131#[derive(Debug, Clone)]
1132pub struct IntersectOp {
1133    /// Left input.
1134    pub left: Box<LogicalOperator>,
1135    /// Right input.
1136    pub right: Box<LogicalOperator>,
1137    /// If true, preserve duplicates (INTERSECT ALL); if false, deduplicate (INTERSECT DISTINCT).
1138    pub all: bool,
1139}
1140
1141/// Fallback operator: use left result if non-empty, otherwise use right.
1142#[derive(Debug, Clone)]
1143pub struct OtherwiseOp {
1144    /// Primary input (preferred).
1145    pub left: Box<LogicalOperator>,
1146    /// Fallback input (used only if left produces zero rows).
1147    pub right: Box<LogicalOperator>,
1148}
1149
1150/// Apply (lateral join): evaluate a subplan for each row of the outer input.
1151///
1152/// The subplan can reference variables bound by the outer input. Results are
1153/// concatenated (cross-product per row).
1154#[derive(Debug, Clone)]
1155pub struct ApplyOp {
1156    /// Outer input providing rows.
1157    pub input: Box<LogicalOperator>,
1158    /// Subplan to evaluate per outer row.
1159    pub subplan: Box<LogicalOperator>,
1160    /// Variables imported from the outer scope into the inner plan.
1161    /// When non-empty, the planner injects these via `ParameterState`.
1162    pub shared_variables: Vec<String>,
1163}
1164
1165/// Parameter scan: leaf operator for correlated subquery inner plans.
1166///
1167/// Emits a single row containing the values injected from the outer Apply.
1168/// Column names correspond to the outer variables imported via WITH.
1169#[derive(Debug, Clone)]
1170pub struct ParameterScanOp {
1171    /// Column names for the injected parameters.
1172    pub columns: Vec<String>,
1173}
1174
1175/// Left outer join for OPTIONAL patterns.
1176#[derive(Debug, Clone)]
1177pub struct LeftJoinOp {
1178    /// Left (required) input.
1179    pub left: Box<LogicalOperator>,
1180    /// Right (optional) input.
1181    pub right: Box<LogicalOperator>,
1182    /// Optional filter condition.
1183    pub condition: Option<LogicalExpression>,
1184}
1185
1186/// Anti-join for MINUS patterns.
1187#[derive(Debug, Clone)]
1188pub struct AntiJoinOp {
1189    /// Left input (results to keep if no match on right).
1190    pub left: Box<LogicalOperator>,
1191    /// Right input (patterns to exclude).
1192    pub right: Box<LogicalOperator>,
1193}
1194
1195/// Bind a variable to an expression.
1196#[derive(Debug, Clone)]
1197pub struct BindOp {
1198    /// Expression to compute.
1199    pub expression: LogicalExpression,
1200    /// Variable to bind the result to.
1201    pub variable: String,
1202    /// Input operator.
1203    pub input: Box<LogicalOperator>,
1204}
1205
1206/// Unwind a list into individual rows.
1207///
1208/// For each input row, evaluates the expression (which should return a list)
1209/// and emits one row for each element in the list.
1210#[derive(Debug, Clone)]
1211pub struct UnwindOp {
1212    /// The list expression to unwind.
1213    pub expression: LogicalExpression,
1214    /// The variable name for each element.
1215    pub variable: String,
1216    /// Optional variable for 1-based element position (ORDINALITY).
1217    pub ordinality_var: Option<String>,
1218    /// Optional variable for 0-based element position (OFFSET).
1219    pub offset_var: Option<String>,
1220    /// Input operator.
1221    pub input: Box<LogicalOperator>,
1222}
1223
1224/// Collect grouped key-value rows into a single Map value.
1225/// Used for Gremlin `groupCount()` semantics.
1226#[derive(Debug, Clone)]
1227pub struct MapCollectOp {
1228    /// Variable holding the map key.
1229    pub key_var: String,
1230    /// Variable holding the map value.
1231    pub value_var: String,
1232    /// Output variable alias.
1233    pub alias: String,
1234    /// Input operator (typically a grouped aggregate).
1235    pub input: Box<LogicalOperator>,
1236}
1237
1238/// Merge a pattern (match or create).
1239///
1240/// MERGE tries to match a pattern in the graph. If found, returns the existing
1241/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
1242/// (optionally applying ON CREATE SET).
1243#[derive(Debug, Clone)]
1244pub struct MergeOp {
1245    /// The node to merge.
1246    pub variable: String,
1247    /// Labels to match/create.
1248    pub labels: Vec<String>,
1249    /// Properties that must match (used for both matching and creation).
1250    pub match_properties: Vec<(String, LogicalExpression)>,
1251    /// Properties to set on CREATE.
1252    pub on_create: Vec<(String, LogicalExpression)>,
1253    /// Properties to set on MATCH.
1254    pub on_match: Vec<(String, LogicalExpression)>,
1255    /// Input operator.
1256    pub input: Box<LogicalOperator>,
1257}
1258
1259/// Merge a relationship pattern (match or create between two bound nodes).
1260///
1261/// MERGE on a relationship tries to find an existing relationship of the given type
1262/// between the source and target nodes. If found, returns the existing relationship
1263/// (optionally applying ON MATCH SET). If not found, creates it (optionally applying
1264/// ON CREATE SET).
1265#[derive(Debug, Clone)]
1266pub struct MergeRelationshipOp {
1267    /// Variable to bind the relationship to.
1268    pub variable: String,
1269    /// Source node variable (must already be bound).
1270    pub source_variable: String,
1271    /// Target node variable (must already be bound).
1272    pub target_variable: String,
1273    /// Relationship type.
1274    pub edge_type: String,
1275    /// Properties that must match (used for both matching and creation).
1276    pub match_properties: Vec<(String, LogicalExpression)>,
1277    /// Properties to set on CREATE.
1278    pub on_create: Vec<(String, LogicalExpression)>,
1279    /// Properties to set on MATCH.
1280    pub on_match: Vec<(String, LogicalExpression)>,
1281    /// Input operator.
1282    pub input: Box<LogicalOperator>,
1283}
1284
1285/// Find shortest path between two nodes.
1286///
1287/// This operator uses Dijkstra's algorithm to find the shortest path(s)
1288/// between a source node and a target node, optionally filtered by edge type.
1289#[derive(Debug, Clone)]
1290pub struct ShortestPathOp {
1291    /// Input operator providing source/target nodes.
1292    pub input: Box<LogicalOperator>,
1293    /// Variable name for the source node.
1294    pub source_var: String,
1295    /// Variable name for the target node.
1296    pub target_var: String,
1297    /// Edge type filter (empty = match all types, multiple = match any).
1298    pub edge_types: Vec<String>,
1299    /// Direction of edge traversal.
1300    pub direction: ExpandDirection,
1301    /// Variable name to bind the path result.
1302    pub path_alias: String,
1303    /// Whether to find all shortest paths (vs. just one).
1304    pub all_paths: bool,
1305}
1306
1307// ==================== SPARQL Update Operators ====================
1308
1309/// Insert RDF triples.
1310#[derive(Debug, Clone)]
1311pub struct InsertTripleOp {
1312    /// Subject of the triple.
1313    pub subject: TripleComponent,
1314    /// Predicate of the triple.
1315    pub predicate: TripleComponent,
1316    /// Object of the triple.
1317    pub object: TripleComponent,
1318    /// Named graph (optional).
1319    pub graph: Option<String>,
1320    /// Input operator (provides variable bindings).
1321    pub input: Option<Box<LogicalOperator>>,
1322}
1323
1324/// Delete RDF triples.
1325#[derive(Debug, Clone)]
1326pub struct DeleteTripleOp {
1327    /// Subject pattern.
1328    pub subject: TripleComponent,
1329    /// Predicate pattern.
1330    pub predicate: TripleComponent,
1331    /// Object pattern.
1332    pub object: TripleComponent,
1333    /// Named graph (optional).
1334    pub graph: Option<String>,
1335    /// Input operator (provides variable bindings).
1336    pub input: Option<Box<LogicalOperator>>,
1337}
1338
1339/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
1340///
1341/// Per SPARQL 1.1 Update spec, this operator:
1342/// 1. Evaluates the WHERE clause once to get bindings
1343/// 2. Applies DELETE templates using those bindings
1344/// 3. Applies INSERT templates using the SAME bindings
1345///
1346/// This ensures DELETE and INSERT see consistent data.
1347#[derive(Debug, Clone)]
1348pub struct ModifyOp {
1349    /// DELETE triple templates (patterns with variables).
1350    pub delete_templates: Vec<TripleTemplate>,
1351    /// INSERT triple templates (patterns with variables).
1352    pub insert_templates: Vec<TripleTemplate>,
1353    /// WHERE clause that provides variable bindings.
1354    pub where_clause: Box<LogicalOperator>,
1355    /// Named graph context (for WITH clause).
1356    pub graph: Option<String>,
1357}
1358
1359/// A triple template for DELETE/INSERT operations.
1360#[derive(Debug, Clone)]
1361pub struct TripleTemplate {
1362    /// Subject (may be a variable).
1363    pub subject: TripleComponent,
1364    /// Predicate (may be a variable).
1365    pub predicate: TripleComponent,
1366    /// Object (may be a variable or literal).
1367    pub object: TripleComponent,
1368    /// Named graph (optional).
1369    pub graph: Option<String>,
1370}
1371
1372/// Clear all triples from a graph.
1373#[derive(Debug, Clone)]
1374pub struct ClearGraphOp {
1375    /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
1376    pub graph: Option<String>,
1377    /// Whether to silently ignore errors.
1378    pub silent: bool,
1379}
1380
1381/// Create a new named graph.
1382#[derive(Debug, Clone)]
1383pub struct CreateGraphOp {
1384    /// IRI of the graph to create.
1385    pub graph: String,
1386    /// Whether to silently ignore if graph already exists.
1387    pub silent: bool,
1388}
1389
1390/// Drop (remove) a named graph.
1391#[derive(Debug, Clone)]
1392pub struct DropGraphOp {
1393    /// Target graph (None = default graph).
1394    pub graph: Option<String>,
1395    /// Whether to silently ignore errors.
1396    pub silent: bool,
1397}
1398
1399/// Load data from a URL into a graph.
1400#[derive(Debug, Clone)]
1401pub struct LoadGraphOp {
1402    /// Source URL to load data from.
1403    pub source: String,
1404    /// Destination graph (None = default graph).
1405    pub destination: Option<String>,
1406    /// Whether to silently ignore errors.
1407    pub silent: bool,
1408}
1409
1410/// Copy triples from one graph to another.
1411#[derive(Debug, Clone)]
1412pub struct CopyGraphOp {
1413    /// Source graph.
1414    pub source: Option<String>,
1415    /// Destination graph.
1416    pub destination: Option<String>,
1417    /// Whether to silently ignore errors.
1418    pub silent: bool,
1419}
1420
1421/// Move triples from one graph to another.
1422#[derive(Debug, Clone)]
1423pub struct MoveGraphOp {
1424    /// Source graph.
1425    pub source: Option<String>,
1426    /// Destination graph.
1427    pub destination: Option<String>,
1428    /// Whether to silently ignore errors.
1429    pub silent: bool,
1430}
1431
1432/// Add (merge) triples from one graph to another.
1433#[derive(Debug, Clone)]
1434pub struct AddGraphOp {
1435    /// Source graph.
1436    pub source: Option<String>,
1437    /// Destination graph.
1438    pub destination: Option<String>,
1439    /// Whether to silently ignore errors.
1440    pub silent: bool,
1441}
1442
1443// ==================== Vector Search Operators ====================
1444
1445/// Vector similarity scan operation.
1446///
1447/// Performs approximate nearest neighbor search using a vector index (HNSW)
1448/// or brute-force search for small datasets. Returns nodes/edges whose
1449/// embeddings are similar to the query vector.
1450///
1451/// # Example GQL
1452///
1453/// ```gql
1454/// MATCH (m:Movie)
1455/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
1456/// RETURN m.title
1457/// ```
1458#[derive(Debug, Clone)]
1459pub struct VectorScanOp {
1460    /// Variable name to bind matching entities to.
1461    pub variable: String,
1462    /// Name of the vector index to use (None = brute-force).
1463    pub index_name: Option<String>,
1464    /// Property containing the vector embedding.
1465    pub property: String,
1466    /// Optional label filter (scan only nodes with this label).
1467    pub label: Option<String>,
1468    /// The query vector expression.
1469    pub query_vector: LogicalExpression,
1470    /// Number of nearest neighbors to return.
1471    pub k: usize,
1472    /// Distance metric (None = use index default, typically cosine).
1473    pub metric: Option<VectorMetric>,
1474    /// Minimum similarity threshold (filters results below this).
1475    pub min_similarity: Option<f32>,
1476    /// Maximum distance threshold (filters results above this).
1477    pub max_distance: Option<f32>,
1478    /// Input operator (for hybrid queries combining graph + vector).
1479    pub input: Option<Box<LogicalOperator>>,
1480}
1481
1482/// Vector distance/similarity metric for vector scan operations.
1483#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1484pub enum VectorMetric {
1485    /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
1486    Cosine,
1487    /// Euclidean (L2) distance. Best when magnitude matters.
1488    Euclidean,
1489    /// Dot product. Best for maximum inner product search.
1490    DotProduct,
1491    /// Manhattan (L1) distance. Less sensitive to outliers.
1492    Manhattan,
1493}
1494
1495/// Join graph patterns with vector similarity search.
1496///
1497/// This operator takes entities from the left input and computes vector
1498/// similarity against a query vector, outputting (entity, distance) pairs.
1499///
1500/// # Use Cases
1501///
1502/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
1503/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
1504/// 3. **Filtering by similarity**: Join with threshold-based filtering
1505///
1506/// # Example
1507///
1508/// ```gql
1509/// // Find movies similar to what the user liked
1510/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
1511/// WITH avg(liked.embedding) AS user_taste
1512/// VECTOR JOIN (m:Movie) ON m.embedding
1513/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
1514/// RETURN m.title
1515/// ```
1516#[derive(Debug, Clone)]
1517pub struct VectorJoinOp {
1518    /// Input operator providing entities to match against.
1519    pub input: Box<LogicalOperator>,
1520    /// Variable from input to extract vectors from (for entity-to-entity similarity).
1521    /// If None, uses `query_vector` directly.
1522    pub left_vector_variable: Option<String>,
1523    /// Property containing the left vector (used with `left_vector_variable`).
1524    pub left_property: Option<String>,
1525    /// The query vector expression (constant or computed).
1526    pub query_vector: LogicalExpression,
1527    /// Variable name to bind the right-side matching entities.
1528    pub right_variable: String,
1529    /// Property containing the right-side vector embeddings.
1530    pub right_property: String,
1531    /// Optional label filter for right-side entities.
1532    pub right_label: Option<String>,
1533    /// Name of vector index on right side (None = brute-force).
1534    pub index_name: Option<String>,
1535    /// Number of nearest neighbors per left-side entity.
1536    pub k: usize,
1537    /// Distance metric.
1538    pub metric: Option<VectorMetric>,
1539    /// Minimum similarity threshold.
1540    pub min_similarity: Option<f32>,
1541    /// Maximum distance threshold.
1542    pub max_distance: Option<f32>,
1543    /// Variable to bind the distance/similarity score.
1544    pub score_variable: Option<String>,
1545}
1546
1547/// Return results (terminal operator).
1548#[derive(Debug, Clone)]
1549pub struct ReturnOp {
1550    /// Items to return.
1551    pub items: Vec<ReturnItem>,
1552    /// Whether to return distinct results.
1553    pub distinct: bool,
1554    /// Input operator.
1555    pub input: Box<LogicalOperator>,
1556}
1557
1558/// A single return item.
1559#[derive(Debug, Clone)]
1560pub struct ReturnItem {
1561    /// Expression to return.
1562    pub expression: LogicalExpression,
1563    /// Alias for the result column.
1564    pub alias: Option<String>,
1565}
1566
1567/// Define a property graph schema (SQL/PGQ DDL).
1568#[derive(Debug, Clone)]
1569pub struct CreatePropertyGraphOp {
1570    /// Graph name.
1571    pub name: String,
1572    /// Node table schemas (label name + column definitions).
1573    pub node_tables: Vec<PropertyGraphNodeTable>,
1574    /// Edge table schemas (type name + column definitions + references).
1575    pub edge_tables: Vec<PropertyGraphEdgeTable>,
1576}
1577
1578/// A node table in a property graph definition.
1579#[derive(Debug, Clone)]
1580pub struct PropertyGraphNodeTable {
1581    /// Table name (maps to a node label).
1582    pub name: String,
1583    /// Column definitions as (name, type_name) pairs.
1584    pub columns: Vec<(String, String)>,
1585}
1586
1587/// An edge table in a property graph definition.
1588#[derive(Debug, Clone)]
1589pub struct PropertyGraphEdgeTable {
1590    /// Table name (maps to an edge type).
1591    pub name: String,
1592    /// Column definitions as (name, type_name) pairs.
1593    pub columns: Vec<(String, String)>,
1594    /// Source node table name.
1595    pub source_table: String,
1596    /// Target node table name.
1597    pub target_table: String,
1598}
1599
1600// ==================== Procedure Call Types ====================
1601
1602/// A CALL procedure operation.
1603///
1604/// ```text
1605/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
1606/// ```
1607#[derive(Debug, Clone)]
1608pub struct CallProcedureOp {
1609    /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
1610    pub name: Vec<String>,
1611    /// Argument expressions (constants in Phase 1).
1612    pub arguments: Vec<LogicalExpression>,
1613    /// Optional YIELD clause: which columns to expose + aliases.
1614    pub yield_items: Option<Vec<ProcedureYield>>,
1615}
1616
1617/// A single YIELD item in a procedure call.
1618#[derive(Debug, Clone)]
1619pub struct ProcedureYield {
1620    /// Column name from the procedure result.
1621    pub field_name: String,
1622    /// Optional alias (YIELD score AS rank).
1623    pub alias: Option<String>,
1624}
1625
1626/// A logical expression.
1627#[derive(Debug, Clone)]
1628pub enum LogicalExpression {
1629    /// A literal value.
1630    Literal(Value),
1631
1632    /// A variable reference.
1633    Variable(String),
1634
1635    /// Property access (e.g., n.name).
1636    Property {
1637        /// The variable to access.
1638        variable: String,
1639        /// The property name.
1640        property: String,
1641    },
1642
1643    /// Binary operation.
1644    Binary {
1645        /// Left operand.
1646        left: Box<LogicalExpression>,
1647        /// Operator.
1648        op: BinaryOp,
1649        /// Right operand.
1650        right: Box<LogicalExpression>,
1651    },
1652
1653    /// Unary operation.
1654    Unary {
1655        /// Operator.
1656        op: UnaryOp,
1657        /// Operand.
1658        operand: Box<LogicalExpression>,
1659    },
1660
1661    /// Function call.
1662    FunctionCall {
1663        /// Function name.
1664        name: String,
1665        /// Arguments.
1666        args: Vec<LogicalExpression>,
1667        /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
1668        distinct: bool,
1669    },
1670
1671    /// List literal.
1672    List(Vec<LogicalExpression>),
1673
1674    /// Map literal (e.g., {name: 'Alix', age: 30}).
1675    Map(Vec<(String, LogicalExpression)>),
1676
1677    /// Index access (e.g., `list[0]`).
1678    IndexAccess {
1679        /// The base expression (typically a list or string).
1680        base: Box<LogicalExpression>,
1681        /// The index expression.
1682        index: Box<LogicalExpression>,
1683    },
1684
1685    /// Slice access (e.g., list[1..3]).
1686    SliceAccess {
1687        /// The base expression (typically a list or string).
1688        base: Box<LogicalExpression>,
1689        /// Start index (None means from beginning).
1690        start: Option<Box<LogicalExpression>>,
1691        /// End index (None means to end).
1692        end: Option<Box<LogicalExpression>>,
1693    },
1694
1695    /// CASE expression.
1696    Case {
1697        /// Test expression (for simple CASE).
1698        operand: Option<Box<LogicalExpression>>,
1699        /// WHEN clauses.
1700        when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
1701        /// ELSE clause.
1702        else_clause: Option<Box<LogicalExpression>>,
1703    },
1704
1705    /// Parameter reference.
1706    Parameter(String),
1707
1708    /// Labels of a node.
1709    Labels(String),
1710
1711    /// Type of an edge.
1712    Type(String),
1713
1714    /// ID of a node or edge.
1715    Id(String),
1716
1717    /// List comprehension: [x IN list WHERE predicate | expression]
1718    ListComprehension {
1719        /// Variable name for each element.
1720        variable: String,
1721        /// The source list expression.
1722        list_expr: Box<LogicalExpression>,
1723        /// Optional filter predicate.
1724        filter_expr: Option<Box<LogicalExpression>>,
1725        /// The mapping expression for each element.
1726        map_expr: Box<LogicalExpression>,
1727    },
1728
1729    /// List predicate: all/any/none/single(x IN list WHERE pred).
1730    ListPredicate {
1731        /// The kind of list predicate.
1732        kind: ListPredicateKind,
1733        /// The iteration variable name.
1734        variable: String,
1735        /// The source list expression.
1736        list_expr: Box<LogicalExpression>,
1737        /// The predicate to test for each element.
1738        predicate: Box<LogicalExpression>,
1739    },
1740
1741    /// EXISTS subquery.
1742    ExistsSubquery(Box<LogicalOperator>),
1743
1744    /// COUNT subquery.
1745    CountSubquery(Box<LogicalOperator>),
1746
1747    /// Map projection: `node { .prop1, .prop2, key: expr, .* }`.
1748    MapProjection {
1749        /// The base variable name.
1750        base: String,
1751        /// Projection entries (property selectors, literal entries, all-properties).
1752        entries: Vec<MapProjectionEntry>,
1753    },
1754
1755    /// reduce() accumulator: `reduce(acc = init, x IN list | expr)`.
1756    Reduce {
1757        /// Accumulator variable name.
1758        accumulator: String,
1759        /// Initial value for the accumulator.
1760        initial: Box<LogicalExpression>,
1761        /// Iteration variable name.
1762        variable: String,
1763        /// List to iterate over.
1764        list: Box<LogicalExpression>,
1765        /// Body expression evaluated per iteration (references both accumulator and variable).
1766        expression: Box<LogicalExpression>,
1767    },
1768
1769    /// Pattern comprehension: `[(pattern) WHERE pred | expr]`.
1770    ///
1771    /// Executes the inner subplan, evaluates the projection for each row,
1772    /// and collects the results into a list.
1773    PatternComprehension {
1774        /// The subplan produced by translating the pattern (+optional WHERE).
1775        subplan: Box<LogicalOperator>,
1776        /// The projection expression evaluated for each match.
1777        projection: Box<LogicalExpression>,
1778    },
1779}
1780
1781/// An entry in a map projection.
1782#[derive(Debug, Clone)]
1783pub enum MapProjectionEntry {
1784    /// `.propertyName`: shorthand for `propertyName: base.propertyName`.
1785    PropertySelector(String),
1786    /// `key: expression`: explicit key-value pair.
1787    LiteralEntry(String, LogicalExpression),
1788    /// `.*`: include all properties of the base entity.
1789    AllProperties,
1790}
1791
1792/// The kind of list predicate function.
1793#[derive(Debug, Clone, PartialEq, Eq)]
1794pub enum ListPredicateKind {
1795    /// all(x IN list WHERE pred): true if pred holds for every element.
1796    All,
1797    /// any(x IN list WHERE pred): true if pred holds for at least one element.
1798    Any,
1799    /// none(x IN list WHERE pred): true if pred holds for no element.
1800    None,
1801    /// single(x IN list WHERE pred): true if pred holds for exactly one element.
1802    Single,
1803}
1804
1805/// Binary operator.
1806#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1807pub enum BinaryOp {
1808    /// Equality comparison (=).
1809    Eq,
1810    /// Inequality comparison (<>).
1811    Ne,
1812    /// Less than (<).
1813    Lt,
1814    /// Less than or equal (<=).
1815    Le,
1816    /// Greater than (>).
1817    Gt,
1818    /// Greater than or equal (>=).
1819    Ge,
1820
1821    /// Logical AND.
1822    And,
1823    /// Logical OR.
1824    Or,
1825    /// Logical XOR.
1826    Xor,
1827
1828    /// Addition (+).
1829    Add,
1830    /// Subtraction (-).
1831    Sub,
1832    /// Multiplication (*).
1833    Mul,
1834    /// Division (/).
1835    Div,
1836    /// Modulo (%).
1837    Mod,
1838
1839    /// String concatenation.
1840    Concat,
1841    /// String starts with.
1842    StartsWith,
1843    /// String ends with.
1844    EndsWith,
1845    /// String contains.
1846    Contains,
1847
1848    /// Collection membership (IN).
1849    In,
1850    /// Pattern matching (LIKE).
1851    Like,
1852    /// Regex matching (=~).
1853    Regex,
1854    /// Power/exponentiation (^).
1855    Pow,
1856}
1857
1858/// Unary operator.
1859#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1860pub enum UnaryOp {
1861    /// Logical NOT.
1862    Not,
1863    /// Numeric negation.
1864    Neg,
1865    /// IS NULL check.
1866    IsNull,
1867    /// IS NOT NULL check.
1868    IsNotNull,
1869}
1870
1871#[cfg(test)]
1872mod tests {
1873    use super::*;
1874
1875    #[test]
1876    fn test_simple_node_scan_plan() {
1877        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1878            items: vec![ReturnItem {
1879                expression: LogicalExpression::Variable("n".into()),
1880                alias: None,
1881            }],
1882            distinct: false,
1883            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1884                variable: "n".into(),
1885                label: Some("Person".into()),
1886                input: None,
1887            })),
1888        }));
1889
1890        // Verify structure
1891        if let LogicalOperator::Return(ret) = &plan.root {
1892            assert_eq!(ret.items.len(), 1);
1893            assert!(!ret.distinct);
1894            if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1895                assert_eq!(scan.variable, "n");
1896                assert_eq!(scan.label, Some("Person".into()));
1897            } else {
1898                panic!("Expected NodeScan");
1899            }
1900        } else {
1901            panic!("Expected Return");
1902        }
1903    }
1904
1905    #[test]
1906    fn test_filter_plan() {
1907        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1908            items: vec![ReturnItem {
1909                expression: LogicalExpression::Property {
1910                    variable: "n".into(),
1911                    property: "name".into(),
1912                },
1913                alias: Some("name".into()),
1914            }],
1915            distinct: false,
1916            input: Box::new(LogicalOperator::Filter(FilterOp {
1917                predicate: LogicalExpression::Binary {
1918                    left: Box::new(LogicalExpression::Property {
1919                        variable: "n".into(),
1920                        property: "age".into(),
1921                    }),
1922                    op: BinaryOp::Gt,
1923                    right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1924                },
1925                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1926                    variable: "n".into(),
1927                    label: Some("Person".into()),
1928                    input: None,
1929                })),
1930                pushdown_hint: None,
1931            })),
1932        }));
1933
1934        if let LogicalOperator::Return(ret) = &plan.root {
1935            if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1936                if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1937                    assert_eq!(*op, BinaryOp::Gt);
1938                } else {
1939                    panic!("Expected Binary expression");
1940                }
1941            } else {
1942                panic!("Expected Filter");
1943            }
1944        } else {
1945            panic!("Expected Return");
1946        }
1947    }
1948}