grafeo_engine/query/plan.rs
1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use std::collections::HashMap;
8use std::fmt;
9
10use grafeo_common::types::Value;
11
12/// A count expression for SKIP/LIMIT: either a resolved literal or an unresolved parameter.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub enum CountExpr {
15 /// A resolved integer count.
16 Literal(usize),
17 /// An unresolved parameter reference (e.g., `$limit`).
18 Parameter(String),
19}
20
21impl CountExpr {
22 /// Returns the resolved count, or panics if still a parameter reference.
23 ///
24 /// Call this only after parameter substitution has run.
25 pub fn value(&self) -> usize {
26 match self {
27 Self::Literal(n) => *n,
28 Self::Parameter(name) => panic!("Unresolved parameter: ${name}"),
29 }
30 }
31
32 /// Returns the resolved count, or an error if still a parameter reference.
33 pub fn try_value(&self) -> Result<usize, String> {
34 match self {
35 Self::Literal(n) => Ok(*n),
36 Self::Parameter(name) => Err(format!("Unresolved SKIP/LIMIT parameter: ${name}")),
37 }
38 }
39
40 /// Returns the count as f64 for cardinality estimation (defaults to 10 for unresolved params).
41 pub fn estimate(&self) -> f64 {
42 match self {
43 Self::Literal(n) => *n as f64,
44 Self::Parameter(_) => 10.0, // reasonable default for unresolved params
45 }
46 }
47}
48
49impl fmt::Display for CountExpr {
50 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51 match self {
52 Self::Literal(n) => write!(f, "{n}"),
53 Self::Parameter(name) => write!(f, "${name}"),
54 }
55 }
56}
57
58impl From<usize> for CountExpr {
59 fn from(n: usize) -> Self {
60 Self::Literal(n)
61 }
62}
63
64impl PartialEq<usize> for CountExpr {
65 fn eq(&self, other: &usize) -> bool {
66 matches!(self, Self::Literal(n) if n == other)
67 }
68}
69
70/// A logical query plan.
71#[derive(Debug, Clone)]
72pub struct LogicalPlan {
73 /// The root operator of the plan.
74 pub root: LogicalOperator,
75 /// When true, return the plan tree as text instead of executing.
76 pub explain: bool,
77 /// When true, execute the query and return per-operator runtime metrics.
78 pub profile: bool,
79 /// Default parameter values from variable declarations (e.g., GraphQL
80 /// `query($limit: Int = 2)`). The processor merges these with caller-supplied
81 /// params, giving caller values higher precedence.
82 pub default_params: HashMap<String, Value>,
83}
84
85impl LogicalPlan {
86 /// Creates a new logical plan with the given root operator.
87 pub fn new(root: LogicalOperator) -> Self {
88 Self {
89 root,
90 explain: false,
91 profile: false,
92 default_params: HashMap::new(),
93 }
94 }
95
96 /// Creates an EXPLAIN plan that returns the plan tree without executing.
97 pub fn explain(root: LogicalOperator) -> Self {
98 Self {
99 root,
100 explain: true,
101 profile: false,
102 default_params: HashMap::new(),
103 }
104 }
105
106 /// Creates a PROFILE plan that executes and returns per-operator metrics.
107 pub fn profile(root: LogicalOperator) -> Self {
108 Self {
109 root,
110 explain: false,
111 profile: true,
112 default_params: HashMap::new(),
113 }
114 }
115}
116
117/// A logical operator in the query plan.
118#[derive(Debug, Clone)]
119pub enum LogicalOperator {
120 /// Scan all nodes, optionally filtered by label.
121 NodeScan(NodeScanOp),
122
123 /// Scan all edges, optionally filtered by type.
124 EdgeScan(EdgeScanOp),
125
126 /// Expand from nodes to neighbors via edges.
127 Expand(ExpandOp),
128
129 /// Filter rows based on a predicate.
130 Filter(FilterOp),
131
132 /// Project specific columns.
133 Project(ProjectOp),
134
135 /// Join two inputs.
136 Join(JoinOp),
137
138 /// Aggregate with grouping.
139 Aggregate(AggregateOp),
140
141 /// Limit the number of results.
142 Limit(LimitOp),
143
144 /// Skip a number of results.
145 Skip(SkipOp),
146
147 /// Sort results.
148 Sort(SortOp),
149
150 /// Remove duplicate results.
151 Distinct(DistinctOp),
152
153 /// Create a new node.
154 CreateNode(CreateNodeOp),
155
156 /// Create a new edge.
157 CreateEdge(CreateEdgeOp),
158
159 /// Delete a node.
160 DeleteNode(DeleteNodeOp),
161
162 /// Delete an edge.
163 DeleteEdge(DeleteEdgeOp),
164
165 /// Set properties on a node or edge.
166 SetProperty(SetPropertyOp),
167
168 /// Add labels to a node.
169 AddLabel(AddLabelOp),
170
171 /// Remove labels from a node.
172 RemoveLabel(RemoveLabelOp),
173
174 /// Return results (terminal operator).
175 Return(ReturnOp),
176
177 /// Empty result set.
178 Empty,
179
180 // ==================== RDF/SPARQL Operators ====================
181 /// Scan RDF triples matching a pattern.
182 TripleScan(TripleScanOp),
183
184 /// Union of multiple result sets.
185 Union(UnionOp),
186
187 /// Left outer join for OPTIONAL patterns.
188 LeftJoin(LeftJoinOp),
189
190 /// Anti-join for MINUS patterns.
191 AntiJoin(AntiJoinOp),
192
193 /// Bind a variable to an expression.
194 Bind(BindOp),
195
196 /// Unwind a list into individual rows.
197 Unwind(UnwindOp),
198
199 /// Collect grouped key-value rows into a single Map value.
200 /// Used for Gremlin `groupCount()` semantics.
201 MapCollect(MapCollectOp),
202
203 /// Merge a node pattern (match or create).
204 Merge(MergeOp),
205
206 /// Merge a relationship pattern (match or create).
207 MergeRelationship(MergeRelationshipOp),
208
209 /// Find shortest path between nodes.
210 ShortestPath(ShortestPathOp),
211
212 // ==================== SPARQL Update Operators ====================
213 /// Insert RDF triples.
214 InsertTriple(InsertTripleOp),
215
216 /// Delete RDF triples.
217 DeleteTriple(DeleteTripleOp),
218
219 /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
220 /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
221 Modify(ModifyOp),
222
223 /// Clear a graph (remove all triples).
224 ClearGraph(ClearGraphOp),
225
226 /// Create a new named graph.
227 CreateGraph(CreateGraphOp),
228
229 /// Drop (remove) a named graph.
230 DropGraph(DropGraphOp),
231
232 /// Load data from a URL into a graph.
233 LoadGraph(LoadGraphOp),
234
235 /// Copy triples from one graph to another.
236 CopyGraph(CopyGraphOp),
237
238 /// Move triples from one graph to another.
239 MoveGraph(MoveGraphOp),
240
241 /// Add (merge) triples from one graph to another.
242 AddGraph(AddGraphOp),
243
244 /// Per-row aggregation over a list-valued column (horizontal aggregation, GE09).
245 HorizontalAggregate(HorizontalAggregateOp),
246
247 // ==================== Vector Search Operators ====================
248 /// Scan using vector similarity search.
249 VectorScan(VectorScanOp),
250
251 /// Join graph patterns with vector similarity search.
252 ///
253 /// Computes vector distances between entities from the left input and
254 /// a query vector, then joins with similarity scores. Useful for:
255 /// - Filtering graph traversal results by vector similarity
256 /// - Computing aggregated embeddings and finding similar entities
257 /// - Combining multiple vector sources with graph structure
258 VectorJoin(VectorJoinOp),
259
260 // ==================== Set Operations ====================
261 /// Set difference: rows in left that are not in right.
262 Except(ExceptOp),
263
264 /// Set intersection: rows common to all inputs.
265 Intersect(IntersectOp),
266
267 /// Fallback: use left result if non-empty, otherwise right.
268 Otherwise(OtherwiseOp),
269
270 // ==================== Correlated Subquery ====================
271 /// Apply (lateral join): evaluate a subplan per input row.
272 Apply(ApplyOp),
273
274 /// Parameter scan: leaf of a correlated inner plan that receives values
275 /// from the outer Apply operator. The column names match `ApplyOp.shared_variables`.
276 ParameterScan(ParameterScanOp),
277
278 // ==================== DDL Operators ====================
279 /// Define a property graph schema (SQL/PGQ DDL).
280 CreatePropertyGraph(CreatePropertyGraphOp),
281
282 // ==================== Multi-Way Join ====================
283 /// Multi-way join using worst-case optimal join (leapfrog).
284 /// Used for cyclic patterns (triangles, cliques) with 3+ relations.
285 MultiWayJoin(MultiWayJoinOp),
286
287 // ==================== Procedure Call Operators ====================
288 /// Invoke a stored procedure (CALL ... YIELD).
289 CallProcedure(CallProcedureOp),
290
291 // ==================== Data Import Operators ====================
292 /// Load data from a file (CSV, JSONL, or Parquet), producing one row per record.
293 LoadData(LoadDataOp),
294}
295
296impl LogicalOperator {
297 /// Returns `true` if this operator or any of its children perform mutations.
298 #[must_use]
299 pub fn has_mutations(&self) -> bool {
300 match self {
301 // Direct mutation operators
302 Self::CreateNode(_)
303 | Self::CreateEdge(_)
304 | Self::DeleteNode(_)
305 | Self::DeleteEdge(_)
306 | Self::SetProperty(_)
307 | Self::AddLabel(_)
308 | Self::RemoveLabel(_)
309 | Self::Merge(_)
310 | Self::MergeRelationship(_)
311 | Self::InsertTriple(_)
312 | Self::DeleteTriple(_)
313 | Self::Modify(_)
314 | Self::ClearGraph(_)
315 | Self::CreateGraph(_)
316 | Self::DropGraph(_)
317 | Self::LoadGraph(_)
318 | Self::CopyGraph(_)
319 | Self::MoveGraph(_)
320 | Self::AddGraph(_)
321 | Self::CreatePropertyGraph(_) => true,
322
323 // Operators with an `input` child
324 Self::Filter(op) => op.input.has_mutations(),
325 Self::Project(op) => op.input.has_mutations(),
326 Self::Aggregate(op) => op.input.has_mutations(),
327 Self::Limit(op) => op.input.has_mutations(),
328 Self::Skip(op) => op.input.has_mutations(),
329 Self::Sort(op) => op.input.has_mutations(),
330 Self::Distinct(op) => op.input.has_mutations(),
331 Self::Unwind(op) => op.input.has_mutations(),
332 Self::Bind(op) => op.input.has_mutations(),
333 Self::MapCollect(op) => op.input.has_mutations(),
334 Self::Return(op) => op.input.has_mutations(),
335 Self::HorizontalAggregate(op) => op.input.has_mutations(),
336 Self::VectorScan(_) | Self::VectorJoin(_) => false,
337
338 // Operators with two children
339 Self::Join(op) => op.left.has_mutations() || op.right.has_mutations(),
340 Self::LeftJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
341 Self::AntiJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
342 Self::Except(op) => op.left.has_mutations() || op.right.has_mutations(),
343 Self::Intersect(op) => op.left.has_mutations() || op.right.has_mutations(),
344 Self::Otherwise(op) => op.left.has_mutations() || op.right.has_mutations(),
345 Self::Union(op) => op.inputs.iter().any(|i| i.has_mutations()),
346 Self::MultiWayJoin(op) => op.inputs.iter().any(|i| i.has_mutations()),
347 Self::Apply(op) => op.input.has_mutations() || op.subplan.has_mutations(),
348
349 // Leaf operators (read-only)
350 Self::NodeScan(_)
351 | Self::EdgeScan(_)
352 | Self::Expand(_)
353 | Self::TripleScan(_)
354 | Self::ShortestPath(_)
355 | Self::Empty
356 | Self::ParameterScan(_)
357 | Self::CallProcedure(_)
358 | Self::LoadData(_) => false,
359 }
360 }
361
362 /// Returns references to the child operators.
363 ///
364 /// Used by [`crate::query::profile::build_profile_tree`] to walk the logical
365 /// plan tree in post-order, matching operators to profiling entries.
366 #[must_use]
367 pub fn children(&self) -> Vec<&LogicalOperator> {
368 match self {
369 // Optional single input
370 Self::NodeScan(op) => op.input.as_deref().into_iter().collect(),
371 Self::EdgeScan(op) => op.input.as_deref().into_iter().collect(),
372 Self::TripleScan(op) => op.input.as_deref().into_iter().collect(),
373 Self::VectorScan(op) => op.input.as_deref().into_iter().collect(),
374 Self::CreateNode(op) => op.input.as_deref().into_iter().collect(),
375 Self::InsertTriple(op) => op.input.as_deref().into_iter().collect(),
376 Self::DeleteTriple(op) => op.input.as_deref().into_iter().collect(),
377
378 // Single required input
379 Self::Expand(op) => vec![&*op.input],
380 Self::Filter(op) => vec![&*op.input],
381 Self::Project(op) => vec![&*op.input],
382 Self::Aggregate(op) => vec![&*op.input],
383 Self::Limit(op) => vec![&*op.input],
384 Self::Skip(op) => vec![&*op.input],
385 Self::Sort(op) => vec![&*op.input],
386 Self::Distinct(op) => vec![&*op.input],
387 Self::Return(op) => vec![&*op.input],
388 Self::Unwind(op) => vec![&*op.input],
389 Self::Bind(op) => vec![&*op.input],
390 Self::MapCollect(op) => vec![&*op.input],
391 Self::ShortestPath(op) => vec![&*op.input],
392 Self::Merge(op) => vec![&*op.input],
393 Self::MergeRelationship(op) => vec![&*op.input],
394 Self::CreateEdge(op) => vec![&*op.input],
395 Self::DeleteNode(op) => vec![&*op.input],
396 Self::DeleteEdge(op) => vec![&*op.input],
397 Self::SetProperty(op) => vec![&*op.input],
398 Self::AddLabel(op) => vec![&*op.input],
399 Self::RemoveLabel(op) => vec![&*op.input],
400 Self::HorizontalAggregate(op) => vec![&*op.input],
401 Self::VectorJoin(op) => vec![&*op.input],
402 Self::Modify(op) => vec![&*op.where_clause],
403
404 // Two children (left + right)
405 Self::Join(op) => vec![&*op.left, &*op.right],
406 Self::LeftJoin(op) => vec![&*op.left, &*op.right],
407 Self::AntiJoin(op) => vec![&*op.left, &*op.right],
408 Self::Except(op) => vec![&*op.left, &*op.right],
409 Self::Intersect(op) => vec![&*op.left, &*op.right],
410 Self::Otherwise(op) => vec![&*op.left, &*op.right],
411
412 // Two children (input + subplan)
413 Self::Apply(op) => vec![&*op.input, &*op.subplan],
414
415 // Vec children
416 Self::Union(op) => op.inputs.iter().collect(),
417 Self::MultiWayJoin(op) => op.inputs.iter().collect(),
418
419 // Leaf operators
420 Self::Empty
421 | Self::ParameterScan(_)
422 | Self::CallProcedure(_)
423 | Self::ClearGraph(_)
424 | Self::CreateGraph(_)
425 | Self::DropGraph(_)
426 | Self::LoadGraph(_)
427 | Self::CopyGraph(_)
428 | Self::MoveGraph(_)
429 | Self::AddGraph(_)
430 | Self::CreatePropertyGraph(_)
431 | Self::LoadData(_) => vec![],
432 }
433 }
434
435 /// Returns a compact display label for this operator, used in PROFILE output.
436 #[must_use]
437 pub fn display_label(&self) -> String {
438 match self {
439 Self::NodeScan(op) => {
440 let label = op.label.as_deref().unwrap_or("*");
441 format!("{}:{}", op.variable, label)
442 }
443 Self::EdgeScan(op) => {
444 let types = if op.edge_types.is_empty() {
445 "*".to_string()
446 } else {
447 op.edge_types.join("|")
448 };
449 format!("{}:{}", op.variable, types)
450 }
451 Self::Expand(op) => {
452 let types = if op.edge_types.is_empty() {
453 "*".to_string()
454 } else {
455 op.edge_types.join("|")
456 };
457 let dir = match op.direction {
458 ExpandDirection::Outgoing => "->",
459 ExpandDirection::Incoming => "<-",
460 ExpandDirection::Both => "--",
461 };
462 format!(
463 "({from}){dir}[:{types}]{dir}({to})",
464 from = op.from_variable,
465 to = op.to_variable,
466 )
467 }
468 Self::Filter(op) => {
469 let hint = match &op.pushdown_hint {
470 Some(PushdownHint::IndexLookup { property }) => {
471 format!(" [index: {property}]")
472 }
473 Some(PushdownHint::RangeScan { property }) => {
474 format!(" [range: {property}]")
475 }
476 Some(PushdownHint::LabelFirst) => " [label-first]".to_string(),
477 None => String::new(),
478 };
479 format!("{}{hint}", fmt_expr(&op.predicate))
480 }
481 Self::Project(op) => {
482 let cols: Vec<String> = op
483 .projections
484 .iter()
485 .map(|p| match &p.alias {
486 Some(alias) => alias.clone(),
487 None => fmt_expr(&p.expression),
488 })
489 .collect();
490 cols.join(", ")
491 }
492 Self::Join(op) => format!("{:?}", op.join_type),
493 Self::Aggregate(op) => {
494 let groups: Vec<String> = op.group_by.iter().map(fmt_expr).collect();
495 format!("group: [{}]", groups.join(", "))
496 }
497 Self::Limit(op) => format!("{}", op.count),
498 Self::Skip(op) => format!("{}", op.count),
499 Self::Sort(op) => {
500 let keys: Vec<String> = op
501 .keys
502 .iter()
503 .map(|k| {
504 let dir = match k.order {
505 SortOrder::Ascending => "ASC",
506 SortOrder::Descending => "DESC",
507 };
508 format!("{} {dir}", fmt_expr(&k.expression))
509 })
510 .collect();
511 keys.join(", ")
512 }
513 Self::Distinct(_) => String::new(),
514 Self::Return(op) => {
515 let items: Vec<String> = op
516 .items
517 .iter()
518 .map(|item| match &item.alias {
519 Some(alias) => alias.clone(),
520 None => fmt_expr(&item.expression),
521 })
522 .collect();
523 items.join(", ")
524 }
525 Self::Union(op) => format!("{} branches", op.inputs.len()),
526 Self::MultiWayJoin(op) => {
527 format!("{} inputs", op.inputs.len())
528 }
529 Self::LeftJoin(_) => String::new(),
530 Self::AntiJoin(_) => String::new(),
531 Self::Unwind(op) => op.variable.clone(),
532 Self::Bind(op) => op.variable.clone(),
533 Self::MapCollect(op) => op.alias.clone(),
534 Self::ShortestPath(op) => {
535 format!("{} -> {}", op.source_var, op.target_var)
536 }
537 Self::Merge(op) => op.variable.clone(),
538 Self::MergeRelationship(op) => op.variable.clone(),
539 Self::CreateNode(op) => {
540 let labels = op.labels.join(":");
541 format!("{}:{labels}", op.variable)
542 }
543 Self::CreateEdge(op) => {
544 format!(
545 "[{}:{}]",
546 op.variable.as_deref().unwrap_or("?"),
547 op.edge_type
548 )
549 }
550 Self::DeleteNode(op) => op.variable.clone(),
551 Self::DeleteEdge(op) => op.variable.clone(),
552 Self::SetProperty(op) => op.variable.clone(),
553 Self::AddLabel(op) => {
554 let labels = op.labels.join(":");
555 format!("{}:{labels}", op.variable)
556 }
557 Self::RemoveLabel(op) => {
558 let labels = op.labels.join(":");
559 format!("{}:{labels}", op.variable)
560 }
561 Self::CallProcedure(op) => op.name.join("."),
562 Self::LoadData(op) => format!("{} AS {}", op.path, op.variable),
563 Self::Apply(_) => String::new(),
564 Self::VectorScan(op) => op.variable.clone(),
565 Self::VectorJoin(op) => op.right_variable.clone(),
566 _ => String::new(),
567 }
568 }
569}
570
571impl LogicalOperator {
572 /// Formats this operator tree as a human-readable plan for EXPLAIN output.
573 pub fn explain_tree(&self) -> String {
574 let mut output = String::new();
575 self.fmt_tree(&mut output, 0);
576 output
577 }
578
579 fn fmt_tree(&self, out: &mut String, depth: usize) {
580 use std::fmt::Write;
581
582 let indent = " ".repeat(depth);
583 match self {
584 Self::NodeScan(op) => {
585 let label = op.label.as_deref().unwrap_or("*");
586 let _ = writeln!(out, "{indent}NodeScan ({var}:{label})", var = op.variable);
587 if let Some(input) = &op.input {
588 input.fmt_tree(out, depth + 1);
589 }
590 }
591 Self::EdgeScan(op) => {
592 let types = if op.edge_types.is_empty() {
593 "*".to_string()
594 } else {
595 op.edge_types.join("|")
596 };
597 let _ = writeln!(out, "{indent}EdgeScan ({var}:{types})", var = op.variable);
598 }
599 Self::Expand(op) => {
600 let types = if op.edge_types.is_empty() {
601 "*".to_string()
602 } else {
603 op.edge_types.join("|")
604 };
605 let dir = match op.direction {
606 ExpandDirection::Outgoing => "->",
607 ExpandDirection::Incoming => "<-",
608 ExpandDirection::Both => "--",
609 };
610 let hops = match (op.min_hops, op.max_hops) {
611 (1, Some(1)) => String::new(),
612 (min, Some(max)) if min == max => format!("*{min}"),
613 (min, Some(max)) => format!("*{min}..{max}"),
614 (min, None) => format!("*{min}.."),
615 };
616 let _ = writeln!(
617 out,
618 "{indent}Expand ({from}){dir}[:{types}{hops}]{dir}({to})",
619 from = op.from_variable,
620 to = op.to_variable,
621 );
622 op.input.fmt_tree(out, depth + 1);
623 }
624 Self::Filter(op) => {
625 let hint = match &op.pushdown_hint {
626 Some(PushdownHint::IndexLookup { property }) => {
627 format!(" [index: {property}]")
628 }
629 Some(PushdownHint::RangeScan { property }) => {
630 format!(" [range: {property}]")
631 }
632 Some(PushdownHint::LabelFirst) => " [label-first]".to_string(),
633 None => String::new(),
634 };
635 let _ = writeln!(
636 out,
637 "{indent}Filter ({expr}){hint}",
638 expr = fmt_expr(&op.predicate)
639 );
640 op.input.fmt_tree(out, depth + 1);
641 }
642 Self::Project(op) => {
643 let cols: Vec<String> = op
644 .projections
645 .iter()
646 .map(|p| {
647 let expr = fmt_expr(&p.expression);
648 match &p.alias {
649 Some(alias) => format!("{expr} AS {alias}"),
650 None => expr,
651 }
652 })
653 .collect();
654 let _ = writeln!(out, "{indent}Project ({cols})", cols = cols.join(", "));
655 op.input.fmt_tree(out, depth + 1);
656 }
657 Self::Join(op) => {
658 let _ = writeln!(out, "{indent}Join ({ty:?})", ty = op.join_type);
659 op.left.fmt_tree(out, depth + 1);
660 op.right.fmt_tree(out, depth + 1);
661 }
662 Self::Aggregate(op) => {
663 let groups: Vec<String> = op.group_by.iter().map(fmt_expr).collect();
664 let aggs: Vec<String> = op
665 .aggregates
666 .iter()
667 .map(|a| {
668 let func = format!("{:?}", a.function).to_lowercase();
669 match &a.alias {
670 Some(alias) => format!("{func}(...) AS {alias}"),
671 None => format!("{func}(...)"),
672 }
673 })
674 .collect();
675 let _ = writeln!(
676 out,
677 "{indent}Aggregate (group: [{groups}], aggs: [{aggs}])",
678 groups = groups.join(", "),
679 aggs = aggs.join(", "),
680 );
681 op.input.fmt_tree(out, depth + 1);
682 }
683 Self::Limit(op) => {
684 let _ = writeln!(out, "{indent}Limit ({})", op.count);
685 op.input.fmt_tree(out, depth + 1);
686 }
687 Self::Skip(op) => {
688 let _ = writeln!(out, "{indent}Skip ({})", op.count);
689 op.input.fmt_tree(out, depth + 1);
690 }
691 Self::Sort(op) => {
692 let keys: Vec<String> = op
693 .keys
694 .iter()
695 .map(|k| {
696 let dir = match k.order {
697 SortOrder::Ascending => "ASC",
698 SortOrder::Descending => "DESC",
699 };
700 format!("{} {dir}", fmt_expr(&k.expression))
701 })
702 .collect();
703 let _ = writeln!(out, "{indent}Sort ({keys})", keys = keys.join(", "));
704 op.input.fmt_tree(out, depth + 1);
705 }
706 Self::Distinct(op) => {
707 let _ = writeln!(out, "{indent}Distinct");
708 op.input.fmt_tree(out, depth + 1);
709 }
710 Self::Return(op) => {
711 let items: Vec<String> = op
712 .items
713 .iter()
714 .map(|item| {
715 let expr = fmt_expr(&item.expression);
716 match &item.alias {
717 Some(alias) => format!("{expr} AS {alias}"),
718 None => expr,
719 }
720 })
721 .collect();
722 let distinct = if op.distinct { " DISTINCT" } else { "" };
723 let _ = writeln!(
724 out,
725 "{indent}Return{distinct} ({items})",
726 items = items.join(", ")
727 );
728 op.input.fmt_tree(out, depth + 1);
729 }
730 Self::Union(op) => {
731 let _ = writeln!(out, "{indent}Union ({n} branches)", n = op.inputs.len());
732 for input in &op.inputs {
733 input.fmt_tree(out, depth + 1);
734 }
735 }
736 Self::MultiWayJoin(op) => {
737 let vars = op.shared_variables.join(", ");
738 let _ = writeln!(
739 out,
740 "{indent}MultiWayJoin ({n} inputs, shared: [{vars}])",
741 n = op.inputs.len()
742 );
743 for input in &op.inputs {
744 input.fmt_tree(out, depth + 1);
745 }
746 }
747 Self::LeftJoin(op) => {
748 if let Some(cond) = &op.condition {
749 let _ = writeln!(out, "{indent}LeftJoin (condition: {cond:?})");
750 } else {
751 let _ = writeln!(out, "{indent}LeftJoin");
752 }
753 op.left.fmt_tree(out, depth + 1);
754 op.right.fmt_tree(out, depth + 1);
755 }
756 Self::AntiJoin(op) => {
757 let _ = writeln!(out, "{indent}AntiJoin");
758 op.left.fmt_tree(out, depth + 1);
759 op.right.fmt_tree(out, depth + 1);
760 }
761 Self::Unwind(op) => {
762 let _ = writeln!(out, "{indent}Unwind ({var})", var = op.variable);
763 op.input.fmt_tree(out, depth + 1);
764 }
765 Self::Bind(op) => {
766 let _ = writeln!(out, "{indent}Bind ({var})", var = op.variable);
767 op.input.fmt_tree(out, depth + 1);
768 }
769 Self::MapCollect(op) => {
770 let _ = writeln!(
771 out,
772 "{indent}MapCollect ({key} -> {val} AS {alias})",
773 key = op.key_var,
774 val = op.value_var,
775 alias = op.alias
776 );
777 op.input.fmt_tree(out, depth + 1);
778 }
779 Self::Apply(op) => {
780 let _ = writeln!(out, "{indent}Apply");
781 op.input.fmt_tree(out, depth + 1);
782 op.subplan.fmt_tree(out, depth + 1);
783 }
784 Self::Except(op) => {
785 let all = if op.all { " ALL" } else { "" };
786 let _ = writeln!(out, "{indent}Except{all}");
787 op.left.fmt_tree(out, depth + 1);
788 op.right.fmt_tree(out, depth + 1);
789 }
790 Self::Intersect(op) => {
791 let all = if op.all { " ALL" } else { "" };
792 let _ = writeln!(out, "{indent}Intersect{all}");
793 op.left.fmt_tree(out, depth + 1);
794 op.right.fmt_tree(out, depth + 1);
795 }
796 Self::Otherwise(op) => {
797 let _ = writeln!(out, "{indent}Otherwise");
798 op.left.fmt_tree(out, depth + 1);
799 op.right.fmt_tree(out, depth + 1);
800 }
801 Self::ShortestPath(op) => {
802 let _ = writeln!(
803 out,
804 "{indent}ShortestPath ({from} -> {to})",
805 from = op.source_var,
806 to = op.target_var
807 );
808 op.input.fmt_tree(out, depth + 1);
809 }
810 Self::Merge(op) => {
811 let _ = writeln!(out, "{indent}Merge ({var})", var = op.variable);
812 op.input.fmt_tree(out, depth + 1);
813 }
814 Self::MergeRelationship(op) => {
815 let _ = writeln!(out, "{indent}MergeRelationship ({var})", var = op.variable);
816 op.input.fmt_tree(out, depth + 1);
817 }
818 Self::CreateNode(op) => {
819 let labels = op.labels.join(":");
820 let _ = writeln!(
821 out,
822 "{indent}CreateNode ({var}:{labels})",
823 var = op.variable
824 );
825 if let Some(input) = &op.input {
826 input.fmt_tree(out, depth + 1);
827 }
828 }
829 Self::CreateEdge(op) => {
830 let var = op.variable.as_deref().unwrap_or("?");
831 let _ = writeln!(
832 out,
833 "{indent}CreateEdge ({from})-[{var}:{ty}]->({to})",
834 from = op.from_variable,
835 ty = op.edge_type,
836 to = op.to_variable
837 );
838 op.input.fmt_tree(out, depth + 1);
839 }
840 Self::DeleteNode(op) => {
841 let _ = writeln!(out, "{indent}DeleteNode ({var})", var = op.variable);
842 op.input.fmt_tree(out, depth + 1);
843 }
844 Self::DeleteEdge(op) => {
845 let _ = writeln!(out, "{indent}DeleteEdge ({var})", var = op.variable);
846 op.input.fmt_tree(out, depth + 1);
847 }
848 Self::SetProperty(op) => {
849 let props: Vec<String> = op
850 .properties
851 .iter()
852 .map(|(k, _)| format!("{}.{k}", op.variable))
853 .collect();
854 let _ = writeln!(
855 out,
856 "{indent}SetProperty ({props})",
857 props = props.join(", ")
858 );
859 op.input.fmt_tree(out, depth + 1);
860 }
861 Self::AddLabel(op) => {
862 let labels = op.labels.join(":");
863 let _ = writeln!(out, "{indent}AddLabel ({var}:{labels})", var = op.variable);
864 op.input.fmt_tree(out, depth + 1);
865 }
866 Self::RemoveLabel(op) => {
867 let labels = op.labels.join(":");
868 let _ = writeln!(
869 out,
870 "{indent}RemoveLabel ({var}:{labels})",
871 var = op.variable
872 );
873 op.input.fmt_tree(out, depth + 1);
874 }
875 Self::CallProcedure(op) => {
876 let _ = writeln!(
877 out,
878 "{indent}CallProcedure ({name})",
879 name = op.name.join(".")
880 );
881 }
882 Self::LoadData(op) => {
883 let format_name = match op.format {
884 LoadDataFormat::Csv => "LoadCsv",
885 LoadDataFormat::Jsonl => "LoadJsonl",
886 LoadDataFormat::Parquet => "LoadParquet",
887 };
888 let headers = if op.with_headers && op.format == LoadDataFormat::Csv {
889 " WITH HEADERS"
890 } else {
891 ""
892 };
893 let _ = writeln!(
894 out,
895 "{indent}{format_name}{headers} ('{path}' AS {var})",
896 path = op.path,
897 var = op.variable,
898 );
899 }
900 Self::TripleScan(op) => {
901 let _ = writeln!(
902 out,
903 "{indent}TripleScan ({s} {p} {o})",
904 s = fmt_triple_component(&op.subject),
905 p = fmt_triple_component(&op.predicate),
906 o = fmt_triple_component(&op.object)
907 );
908 if let Some(input) = &op.input {
909 input.fmt_tree(out, depth + 1);
910 }
911 }
912 Self::Empty => {
913 let _ = writeln!(out, "{indent}Empty");
914 }
915 // Remaining operators: show a simple name
916 _ => {
917 let _ = writeln!(out, "{indent}{:?}", std::mem::discriminant(self));
918 }
919 }
920 }
921}
922
923/// Format a logical expression compactly for EXPLAIN output.
924fn fmt_expr(expr: &LogicalExpression) -> String {
925 match expr {
926 LogicalExpression::Variable(name) => name.clone(),
927 LogicalExpression::Property { variable, property } => format!("{variable}.{property}"),
928 LogicalExpression::Literal(val) => format!("{val}"),
929 LogicalExpression::Binary { left, op, right } => {
930 format!("{} {op:?} {}", fmt_expr(left), fmt_expr(right))
931 }
932 LogicalExpression::Unary { op, operand } => {
933 format!("{op:?} {}", fmt_expr(operand))
934 }
935 LogicalExpression::FunctionCall { name, args, .. } => {
936 let arg_strs: Vec<String> = args.iter().map(fmt_expr).collect();
937 format!("{name}({})", arg_strs.join(", "))
938 }
939 _ => format!("{expr:?}"),
940 }
941}
942
943/// Format a triple component for EXPLAIN output.
944fn fmt_triple_component(comp: &TripleComponent) -> String {
945 match comp {
946 TripleComponent::Variable(name) => format!("?{name}"),
947 TripleComponent::Iri(iri) => format!("<{iri}>"),
948 TripleComponent::Literal(val) => format!("{val}"),
949 TripleComponent::LangLiteral { value, lang } => format!("\"{value}\"@{lang}"),
950 TripleComponent::BlankNode(label) => format!("_:{label}"),
951 }
952}
953
954/// Scan nodes from the graph.
955#[derive(Debug, Clone)]
956pub struct NodeScanOp {
957 /// Variable name to bind the node to.
958 pub variable: String,
959 /// Optional label filter.
960 pub label: Option<String>,
961 /// Child operator (if any, for chained patterns).
962 pub input: Option<Box<LogicalOperator>>,
963}
964
965/// Scan edges from the graph.
966#[derive(Debug, Clone)]
967pub struct EdgeScanOp {
968 /// Variable name to bind the edge to.
969 pub variable: String,
970 /// Edge type filter (empty = match all types).
971 pub edge_types: Vec<String>,
972 /// Child operator (if any).
973 pub input: Option<Box<LogicalOperator>>,
974}
975
976/// Path traversal mode for variable-length expansion.
977#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
978pub enum PathMode {
979 /// Allows repeated nodes and edges (default).
980 #[default]
981 Walk,
982 /// No repeated edges.
983 Trail,
984 /// No repeated nodes except endpoints.
985 Simple,
986 /// No repeated nodes at all.
987 Acyclic,
988}
989
990/// Expand from nodes to their neighbors.
991#[derive(Debug, Clone)]
992pub struct ExpandOp {
993 /// Source node variable.
994 pub from_variable: String,
995 /// Target node variable to bind.
996 pub to_variable: String,
997 /// Edge variable to bind (optional).
998 pub edge_variable: Option<String>,
999 /// Direction of expansion.
1000 pub direction: ExpandDirection,
1001 /// Edge type filter (empty = match all types, multiple = match any).
1002 pub edge_types: Vec<String>,
1003 /// Minimum hops (for variable-length patterns).
1004 pub min_hops: u32,
1005 /// Maximum hops (for variable-length patterns).
1006 pub max_hops: Option<u32>,
1007 /// Input operator.
1008 pub input: Box<LogicalOperator>,
1009 /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
1010 /// When set, a path length column will be output under this name.
1011 pub path_alias: Option<String>,
1012 /// Path traversal mode (WALK, TRAIL, SIMPLE, ACYCLIC).
1013 pub path_mode: PathMode,
1014}
1015
1016/// Direction for edge expansion.
1017#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1018pub enum ExpandDirection {
1019 /// Follow outgoing edges.
1020 Outgoing,
1021 /// Follow incoming edges.
1022 Incoming,
1023 /// Follow edges in either direction.
1024 Both,
1025}
1026
1027/// Join two inputs.
1028#[derive(Debug, Clone)]
1029pub struct JoinOp {
1030 /// Left input.
1031 pub left: Box<LogicalOperator>,
1032 /// Right input.
1033 pub right: Box<LogicalOperator>,
1034 /// Join type.
1035 pub join_type: JoinType,
1036 /// Join conditions.
1037 pub conditions: Vec<JoinCondition>,
1038}
1039
1040/// Join type.
1041#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1042pub enum JoinType {
1043 /// Inner join.
1044 Inner,
1045 /// Left outer join.
1046 Left,
1047 /// Right outer join.
1048 Right,
1049 /// Full outer join.
1050 Full,
1051 /// Cross join (Cartesian product).
1052 Cross,
1053 /// Semi join (returns left rows with matching right rows).
1054 Semi,
1055 /// Anti join (returns left rows without matching right rows).
1056 Anti,
1057}
1058
1059/// A join condition.
1060#[derive(Debug, Clone)]
1061pub struct JoinCondition {
1062 /// Left expression.
1063 pub left: LogicalExpression,
1064 /// Right expression.
1065 pub right: LogicalExpression,
1066}
1067
1068/// Multi-way join for worst-case optimal joins (leapfrog).
1069///
1070/// Unlike binary `JoinOp`, this joins 3+ relations simultaneously
1071/// using the leapfrog trie join algorithm. Preferred for cyclic patterns
1072/// (triangles, cliques) where cascading binary joins hit O(N^2).
1073#[derive(Debug, Clone)]
1074pub struct MultiWayJoinOp {
1075 /// Input relations (one per relation in the join).
1076 pub inputs: Vec<LogicalOperator>,
1077 /// All pairwise join conditions.
1078 pub conditions: Vec<JoinCondition>,
1079 /// Variables shared across multiple inputs (intersection keys).
1080 pub shared_variables: Vec<String>,
1081}
1082
1083/// Aggregate with grouping.
1084#[derive(Debug, Clone)]
1085pub struct AggregateOp {
1086 /// Group by expressions.
1087 pub group_by: Vec<LogicalExpression>,
1088 /// Aggregate functions.
1089 pub aggregates: Vec<AggregateExpr>,
1090 /// Input operator.
1091 pub input: Box<LogicalOperator>,
1092 /// HAVING clause filter (applied after aggregation).
1093 pub having: Option<LogicalExpression>,
1094}
1095
1096/// Whether a horizontal aggregate operates on edges or nodes.
1097#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1098pub enum EntityKind {
1099 /// Aggregate over edges in a path.
1100 Edge,
1101 /// Aggregate over nodes in a path.
1102 Node,
1103}
1104
1105/// Per-row aggregation over a list-valued column (horizontal aggregation, GE09).
1106///
1107/// For each input row, reads a list of entity IDs from `list_column`, accesses
1108/// `property` on each entity, computes the aggregate, and emits the scalar result.
1109#[derive(Debug, Clone)]
1110pub struct HorizontalAggregateOp {
1111 /// The list column name (e.g., `_path_edges_p`).
1112 pub list_column: String,
1113 /// Whether the list contains edge IDs or node IDs.
1114 pub entity_kind: EntityKind,
1115 /// The aggregate function to apply.
1116 pub function: AggregateFunction,
1117 /// The property to access on each entity.
1118 pub property: String,
1119 /// Output alias for the result column.
1120 pub alias: String,
1121 /// Input operator.
1122 pub input: Box<LogicalOperator>,
1123}
1124
1125/// An aggregate expression.
1126#[derive(Debug, Clone)]
1127pub struct AggregateExpr {
1128 /// Aggregate function.
1129 pub function: AggregateFunction,
1130 /// Expression to aggregate (first/only argument, y for binary set functions).
1131 pub expression: Option<LogicalExpression>,
1132 /// Second expression for binary set functions (x for COVAR, CORR, REGR_*).
1133 pub expression2: Option<LogicalExpression>,
1134 /// Whether to use DISTINCT.
1135 pub distinct: bool,
1136 /// Alias for the result.
1137 pub alias: Option<String>,
1138 /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
1139 pub percentile: Option<f64>,
1140 /// Separator string for GROUP_CONCAT / LISTAGG (defaults to space for GROUP_CONCAT, comma for LISTAGG).
1141 pub separator: Option<String>,
1142}
1143
1144/// Aggregate function.
1145#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1146pub enum AggregateFunction {
1147 /// Count all rows (COUNT(*)).
1148 Count,
1149 /// Count non-null values (COUNT(expr)).
1150 CountNonNull,
1151 /// Sum values.
1152 Sum,
1153 /// Average values.
1154 Avg,
1155 /// Minimum value.
1156 Min,
1157 /// Maximum value.
1158 Max,
1159 /// Collect into list.
1160 Collect,
1161 /// Sample standard deviation (STDEV).
1162 StdDev,
1163 /// Population standard deviation (STDEVP).
1164 StdDevPop,
1165 /// Sample variance (VAR_SAMP / VARIANCE).
1166 Variance,
1167 /// Population variance (VAR_POP).
1168 VariancePop,
1169 /// Discrete percentile (PERCENTILE_DISC).
1170 PercentileDisc,
1171 /// Continuous percentile (PERCENTILE_CONT).
1172 PercentileCont,
1173 /// Concatenate values with separator (GROUP_CONCAT).
1174 GroupConcat,
1175 /// Return an arbitrary value from the group (SAMPLE).
1176 Sample,
1177 /// Sample covariance (COVAR_SAMP(y, x)).
1178 CovarSamp,
1179 /// Population covariance (COVAR_POP(y, x)).
1180 CovarPop,
1181 /// Pearson correlation coefficient (CORR(y, x)).
1182 Corr,
1183 /// Regression slope (REGR_SLOPE(y, x)).
1184 RegrSlope,
1185 /// Regression intercept (REGR_INTERCEPT(y, x)).
1186 RegrIntercept,
1187 /// Coefficient of determination (REGR_R2(y, x)).
1188 RegrR2,
1189 /// Regression count of non-null pairs (REGR_COUNT(y, x)).
1190 RegrCount,
1191 /// Regression sum of squares for x (REGR_SXX(y, x)).
1192 RegrSxx,
1193 /// Regression sum of squares for y (REGR_SYY(y, x)).
1194 RegrSyy,
1195 /// Regression sum of cross-products (REGR_SXY(y, x)).
1196 RegrSxy,
1197 /// Regression average of x (REGR_AVGX(y, x)).
1198 RegrAvgx,
1199 /// Regression average of y (REGR_AVGY(y, x)).
1200 RegrAvgy,
1201}
1202
1203/// Hint about how a filter will be executed at the physical level.
1204///
1205/// Set during EXPLAIN annotation to communicate pushdown decisions.
1206#[derive(Debug, Clone)]
1207pub enum PushdownHint {
1208 /// Equality predicate resolved via a property index.
1209 IndexLookup {
1210 /// The indexed property name.
1211 property: String,
1212 },
1213 /// Range predicate resolved via a range/btree index.
1214 RangeScan {
1215 /// The indexed property name.
1216 property: String,
1217 },
1218 /// No index available, but label narrows the scan before filtering.
1219 LabelFirst,
1220}
1221
1222/// Filter rows based on a predicate.
1223#[derive(Debug, Clone)]
1224pub struct FilterOp {
1225 /// The filter predicate.
1226 pub predicate: LogicalExpression,
1227 /// Input operator.
1228 pub input: Box<LogicalOperator>,
1229 /// Optional hint about pushdown strategy (populated by EXPLAIN).
1230 pub pushdown_hint: Option<PushdownHint>,
1231}
1232
1233/// Project specific columns.
1234#[derive(Debug, Clone)]
1235pub struct ProjectOp {
1236 /// Columns to project.
1237 pub projections: Vec<Projection>,
1238 /// Input operator.
1239 pub input: Box<LogicalOperator>,
1240 /// When true, all input columns are passed through and the explicit
1241 /// projections are appended as additional output columns. Used by GQL
1242 /// LET clauses which add bindings without replacing the existing scope.
1243 pub pass_through_input: bool,
1244}
1245
1246/// A single projection (column selection or computation).
1247#[derive(Debug, Clone)]
1248pub struct Projection {
1249 /// Expression to compute.
1250 pub expression: LogicalExpression,
1251 /// Alias for the result.
1252 pub alias: Option<String>,
1253}
1254
1255/// Limit the number of results.
1256#[derive(Debug, Clone)]
1257pub struct LimitOp {
1258 /// Maximum number of rows to return (literal or parameter reference).
1259 pub count: CountExpr,
1260 /// Input operator.
1261 pub input: Box<LogicalOperator>,
1262}
1263
1264/// Skip a number of results.
1265#[derive(Debug, Clone)]
1266pub struct SkipOp {
1267 /// Number of rows to skip (literal or parameter reference).
1268 pub count: CountExpr,
1269 /// Input operator.
1270 pub input: Box<LogicalOperator>,
1271}
1272
1273/// Sort results.
1274#[derive(Debug, Clone)]
1275pub struct SortOp {
1276 /// Sort keys.
1277 pub keys: Vec<SortKey>,
1278 /// Input operator.
1279 pub input: Box<LogicalOperator>,
1280}
1281
1282/// A sort key.
1283#[derive(Debug, Clone)]
1284pub struct SortKey {
1285 /// Expression to sort by.
1286 pub expression: LogicalExpression,
1287 /// Sort order.
1288 pub order: SortOrder,
1289 /// Optional null ordering (NULLS FIRST / NULLS LAST).
1290 pub nulls: Option<NullsOrdering>,
1291}
1292
1293/// Sort order.
1294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1295pub enum SortOrder {
1296 /// Ascending order.
1297 Ascending,
1298 /// Descending order.
1299 Descending,
1300}
1301
1302/// Null ordering for sort operations.
1303#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1304pub enum NullsOrdering {
1305 /// Nulls sort before all non-null values.
1306 First,
1307 /// Nulls sort after all non-null values.
1308 Last,
1309}
1310
1311/// Remove duplicate results.
1312#[derive(Debug, Clone)]
1313pub struct DistinctOp {
1314 /// Input operator.
1315 pub input: Box<LogicalOperator>,
1316 /// Optional columns to use for deduplication.
1317 /// If None, all columns are used.
1318 pub columns: Option<Vec<String>>,
1319}
1320
1321/// Create a new node.
1322#[derive(Debug, Clone)]
1323pub struct CreateNodeOp {
1324 /// Variable name to bind the created node to.
1325 pub variable: String,
1326 /// Labels for the new node.
1327 pub labels: Vec<String>,
1328 /// Properties for the new node.
1329 pub properties: Vec<(String, LogicalExpression)>,
1330 /// Input operator (for chained creates).
1331 pub input: Option<Box<LogicalOperator>>,
1332}
1333
1334/// Create a new edge.
1335#[derive(Debug, Clone)]
1336pub struct CreateEdgeOp {
1337 /// Variable name to bind the created edge to.
1338 pub variable: Option<String>,
1339 /// Source node variable.
1340 pub from_variable: String,
1341 /// Target node variable.
1342 pub to_variable: String,
1343 /// Edge type.
1344 pub edge_type: String,
1345 /// Properties for the new edge.
1346 pub properties: Vec<(String, LogicalExpression)>,
1347 /// Input operator.
1348 pub input: Box<LogicalOperator>,
1349}
1350
1351/// Delete a node.
1352#[derive(Debug, Clone)]
1353pub struct DeleteNodeOp {
1354 /// Variable of the node to delete.
1355 pub variable: String,
1356 /// Whether to detach (delete connected edges) before deleting.
1357 pub detach: bool,
1358 /// Input operator.
1359 pub input: Box<LogicalOperator>,
1360}
1361
1362/// Delete an edge.
1363#[derive(Debug, Clone)]
1364pub struct DeleteEdgeOp {
1365 /// Variable of the edge to delete.
1366 pub variable: String,
1367 /// Input operator.
1368 pub input: Box<LogicalOperator>,
1369}
1370
1371/// Set properties on a node or edge.
1372#[derive(Debug, Clone)]
1373pub struct SetPropertyOp {
1374 /// Variable of the entity to update.
1375 pub variable: String,
1376 /// Properties to set (name -> expression).
1377 pub properties: Vec<(String, LogicalExpression)>,
1378 /// Whether to replace all properties (vs. merge).
1379 pub replace: bool,
1380 /// Whether the target variable is an edge (vs. node).
1381 pub is_edge: bool,
1382 /// Input operator.
1383 pub input: Box<LogicalOperator>,
1384}
1385
1386/// Add labels to a node.
1387#[derive(Debug, Clone)]
1388pub struct AddLabelOp {
1389 /// Variable of the node to update.
1390 pub variable: String,
1391 /// Labels to add.
1392 pub labels: Vec<String>,
1393 /// Input operator.
1394 pub input: Box<LogicalOperator>,
1395}
1396
1397/// Remove labels from a node.
1398#[derive(Debug, Clone)]
1399pub struct RemoveLabelOp {
1400 /// Variable of the node to update.
1401 pub variable: String,
1402 /// Labels to remove.
1403 pub labels: Vec<String>,
1404 /// Input operator.
1405 pub input: Box<LogicalOperator>,
1406}
1407
1408// ==================== RDF/SPARQL Operators ====================
1409
1410/// SPARQL dataset restriction from FROM / FROM NAMED clauses.
1411///
1412/// When present, restricts which graphs are visible to a triple scan:
1413/// - `default_graphs`: IRIs whose union forms the default graph (basic patterns).
1414/// - `named_graphs`: IRIs that enumerate the available named graphs (GRAPH patterns).
1415#[derive(Debug, Clone, Default)]
1416pub struct DatasetRestriction {
1417 /// FROM IRIs: the default graph is the union of these named graphs.
1418 /// Empty means no FROM clause was specified (unrestricted default graph).
1419 pub default_graphs: Vec<String>,
1420 /// FROM NAMED IRIs: only these named graphs are available to GRAPH patterns.
1421 /// Empty means no FROM NAMED clause was specified (all named graphs visible).
1422 pub named_graphs: Vec<String>,
1423}
1424
1425/// Scan RDF triples matching a pattern.
1426#[derive(Debug, Clone)]
1427pub struct TripleScanOp {
1428 /// Subject pattern (variable name or IRI).
1429 pub subject: TripleComponent,
1430 /// Predicate pattern (variable name or IRI).
1431 pub predicate: TripleComponent,
1432 /// Object pattern (variable name, IRI, or literal).
1433 pub object: TripleComponent,
1434 /// Named graph (optional).
1435 pub graph: Option<TripleComponent>,
1436 /// Input operator (for chained patterns).
1437 pub input: Option<Box<LogicalOperator>>,
1438 /// Dataset restriction from SPARQL FROM / FROM NAMED clauses.
1439 pub dataset: Option<DatasetRestriction>,
1440}
1441
1442/// A component of a triple pattern.
1443#[derive(Debug, Clone)]
1444pub enum TripleComponent {
1445 /// A variable to bind.
1446 Variable(String),
1447 /// A constant IRI.
1448 Iri(String),
1449 /// A constant literal value.
1450 Literal(Value),
1451 /// A language-tagged string literal (RDF `rdf:langString`).
1452 ///
1453 /// Carries the lexical value and the BCP47 language tag separately so that
1454 /// the tag survives the translator to planner to RDF store round-trip.
1455 LangLiteral {
1456 /// The lexical string value.
1457 value: String,
1458 /// BCP47 language tag, e.g. `"fr"`, `"en-GB"`.
1459 lang: String,
1460 },
1461 /// A blank node with a scoped label (used in INSERT DATA).
1462 BlankNode(String),
1463}
1464
1465/// Union of multiple result sets.
1466#[derive(Debug, Clone)]
1467pub struct UnionOp {
1468 /// Inputs to union together.
1469 pub inputs: Vec<LogicalOperator>,
1470}
1471
1472/// Set difference: rows in left that are not in right.
1473#[derive(Debug, Clone)]
1474pub struct ExceptOp {
1475 /// Left input.
1476 pub left: Box<LogicalOperator>,
1477 /// Right input (rows to exclude).
1478 pub right: Box<LogicalOperator>,
1479 /// If true, preserve duplicates (EXCEPT ALL); if false, deduplicate (EXCEPT DISTINCT).
1480 pub all: bool,
1481}
1482
1483/// Set intersection: rows common to both inputs.
1484#[derive(Debug, Clone)]
1485pub struct IntersectOp {
1486 /// Left input.
1487 pub left: Box<LogicalOperator>,
1488 /// Right input.
1489 pub right: Box<LogicalOperator>,
1490 /// If true, preserve duplicates (INTERSECT ALL); if false, deduplicate (INTERSECT DISTINCT).
1491 pub all: bool,
1492}
1493
1494/// Fallback operator: use left result if non-empty, otherwise use right.
1495#[derive(Debug, Clone)]
1496pub struct OtherwiseOp {
1497 /// Primary input (preferred).
1498 pub left: Box<LogicalOperator>,
1499 /// Fallback input (used only if left produces zero rows).
1500 pub right: Box<LogicalOperator>,
1501}
1502
1503/// Apply (lateral join): evaluate a subplan for each row of the outer input.
1504///
1505/// The subplan can reference variables bound by the outer input. Results are
1506/// concatenated (cross-product per row).
1507#[derive(Debug, Clone)]
1508pub struct ApplyOp {
1509 /// Outer input providing rows.
1510 pub input: Box<LogicalOperator>,
1511 /// Subplan to evaluate per outer row.
1512 pub subplan: Box<LogicalOperator>,
1513 /// Variables imported from the outer scope into the inner plan.
1514 /// When non-empty, the planner injects these via `ParameterState`.
1515 pub shared_variables: Vec<String>,
1516 /// When true, uses left-join semantics: outer rows with no matching inner
1517 /// rows are emitted with NULLs for the inner columns (OPTIONAL CALL).
1518 pub optional: bool,
1519}
1520
1521/// Parameter scan: leaf operator for correlated subquery inner plans.
1522///
1523/// Emits a single row containing the values injected from the outer Apply.
1524/// Column names correspond to the outer variables imported via WITH.
1525#[derive(Debug, Clone)]
1526pub struct ParameterScanOp {
1527 /// Column names for the injected parameters.
1528 pub columns: Vec<String>,
1529}
1530
1531/// Left outer join for OPTIONAL patterns.
1532#[derive(Debug, Clone)]
1533pub struct LeftJoinOp {
1534 /// Left (required) input.
1535 pub left: Box<LogicalOperator>,
1536 /// Right (optional) input.
1537 pub right: Box<LogicalOperator>,
1538 /// Optional filter condition.
1539 pub condition: Option<LogicalExpression>,
1540}
1541
1542/// Anti-join for MINUS patterns.
1543#[derive(Debug, Clone)]
1544pub struct AntiJoinOp {
1545 /// Left input (results to keep if no match on right).
1546 pub left: Box<LogicalOperator>,
1547 /// Right input (patterns to exclude).
1548 pub right: Box<LogicalOperator>,
1549}
1550
1551/// Bind a variable to an expression.
1552#[derive(Debug, Clone)]
1553pub struct BindOp {
1554 /// Expression to compute.
1555 pub expression: LogicalExpression,
1556 /// Variable to bind the result to.
1557 pub variable: String,
1558 /// Input operator.
1559 pub input: Box<LogicalOperator>,
1560}
1561
1562/// Unwind a list into individual rows.
1563///
1564/// For each input row, evaluates the expression (which should return a list)
1565/// and emits one row for each element in the list.
1566#[derive(Debug, Clone)]
1567pub struct UnwindOp {
1568 /// The list expression to unwind.
1569 pub expression: LogicalExpression,
1570 /// The variable name for each element.
1571 pub variable: String,
1572 /// Optional variable for 1-based element position (ORDINALITY).
1573 pub ordinality_var: Option<String>,
1574 /// Optional variable for 0-based element position (OFFSET).
1575 pub offset_var: Option<String>,
1576 /// Input operator.
1577 pub input: Box<LogicalOperator>,
1578}
1579
1580/// Collect grouped key-value rows into a single Map value.
1581/// Used for Gremlin `groupCount()` semantics.
1582#[derive(Debug, Clone)]
1583pub struct MapCollectOp {
1584 /// Variable holding the map key.
1585 pub key_var: String,
1586 /// Variable holding the map value.
1587 pub value_var: String,
1588 /// Output variable alias.
1589 pub alias: String,
1590 /// Input operator (typically a grouped aggregate).
1591 pub input: Box<LogicalOperator>,
1592}
1593
1594/// Merge a pattern (match or create).
1595///
1596/// MERGE tries to match a pattern in the graph. If found, returns the existing
1597/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
1598/// (optionally applying ON CREATE SET).
1599#[derive(Debug, Clone)]
1600pub struct MergeOp {
1601 /// The node to merge.
1602 pub variable: String,
1603 /// Labels to match/create.
1604 pub labels: Vec<String>,
1605 /// Properties that must match (used for both matching and creation).
1606 pub match_properties: Vec<(String, LogicalExpression)>,
1607 /// Properties to set on CREATE.
1608 pub on_create: Vec<(String, LogicalExpression)>,
1609 /// Properties to set on MATCH.
1610 pub on_match: Vec<(String, LogicalExpression)>,
1611 /// Input operator.
1612 pub input: Box<LogicalOperator>,
1613}
1614
1615/// Merge a relationship pattern (match or create between two bound nodes).
1616///
1617/// MERGE on a relationship tries to find an existing relationship of the given type
1618/// between the source and target nodes. If found, returns the existing relationship
1619/// (optionally applying ON MATCH SET). If not found, creates it (optionally applying
1620/// ON CREATE SET).
1621#[derive(Debug, Clone)]
1622pub struct MergeRelationshipOp {
1623 /// Variable to bind the relationship to.
1624 pub variable: String,
1625 /// Source node variable (must already be bound).
1626 pub source_variable: String,
1627 /// Target node variable (must already be bound).
1628 pub target_variable: String,
1629 /// Relationship type.
1630 pub edge_type: String,
1631 /// Properties that must match (used for both matching and creation).
1632 pub match_properties: Vec<(String, LogicalExpression)>,
1633 /// Properties to set on CREATE.
1634 pub on_create: Vec<(String, LogicalExpression)>,
1635 /// Properties to set on MATCH.
1636 pub on_match: Vec<(String, LogicalExpression)>,
1637 /// Input operator.
1638 pub input: Box<LogicalOperator>,
1639}
1640
1641/// Find shortest path between two nodes.
1642///
1643/// This operator uses Dijkstra's algorithm to find the shortest path(s)
1644/// between a source node and a target node, optionally filtered by edge type.
1645#[derive(Debug, Clone)]
1646pub struct ShortestPathOp {
1647 /// Input operator providing source/target nodes.
1648 pub input: Box<LogicalOperator>,
1649 /// Variable name for the source node.
1650 pub source_var: String,
1651 /// Variable name for the target node.
1652 pub target_var: String,
1653 /// Edge type filter (empty = match all types, multiple = match any).
1654 pub edge_types: Vec<String>,
1655 /// Direction of edge traversal.
1656 pub direction: ExpandDirection,
1657 /// Variable name to bind the path result.
1658 pub path_alias: String,
1659 /// Whether to find all shortest paths (vs. just one).
1660 pub all_paths: bool,
1661}
1662
1663// ==================== SPARQL Update Operators ====================
1664
1665/// Insert RDF triples.
1666#[derive(Debug, Clone)]
1667pub struct InsertTripleOp {
1668 /// Subject of the triple.
1669 pub subject: TripleComponent,
1670 /// Predicate of the triple.
1671 pub predicate: TripleComponent,
1672 /// Object of the triple.
1673 pub object: TripleComponent,
1674 /// Named graph (optional).
1675 pub graph: Option<String>,
1676 /// Input operator (provides variable bindings).
1677 pub input: Option<Box<LogicalOperator>>,
1678}
1679
1680/// Delete RDF triples.
1681#[derive(Debug, Clone)]
1682pub struct DeleteTripleOp {
1683 /// Subject pattern.
1684 pub subject: TripleComponent,
1685 /// Predicate pattern.
1686 pub predicate: TripleComponent,
1687 /// Object pattern.
1688 pub object: TripleComponent,
1689 /// Named graph (optional).
1690 pub graph: Option<String>,
1691 /// Input operator (provides variable bindings).
1692 pub input: Option<Box<LogicalOperator>>,
1693}
1694
1695/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
1696///
1697/// Per SPARQL 1.1 Update spec, this operator:
1698/// 1. Evaluates the WHERE clause once to get bindings
1699/// 2. Applies DELETE templates using those bindings
1700/// 3. Applies INSERT templates using the SAME bindings
1701///
1702/// This ensures DELETE and INSERT see consistent data.
1703#[derive(Debug, Clone)]
1704pub struct ModifyOp {
1705 /// DELETE triple templates (patterns with variables).
1706 pub delete_templates: Vec<TripleTemplate>,
1707 /// INSERT triple templates (patterns with variables).
1708 pub insert_templates: Vec<TripleTemplate>,
1709 /// WHERE clause that provides variable bindings.
1710 pub where_clause: Box<LogicalOperator>,
1711 /// Named graph context (for WITH clause).
1712 pub graph: Option<String>,
1713}
1714
1715/// A triple template for DELETE/INSERT operations.
1716#[derive(Debug, Clone)]
1717pub struct TripleTemplate {
1718 /// Subject (may be a variable).
1719 pub subject: TripleComponent,
1720 /// Predicate (may be a variable).
1721 pub predicate: TripleComponent,
1722 /// Object (may be a variable or literal).
1723 pub object: TripleComponent,
1724 /// Named graph (optional).
1725 pub graph: Option<String>,
1726}
1727
1728/// Clear all triples from a graph.
1729#[derive(Debug, Clone)]
1730pub struct ClearGraphOp {
1731 /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
1732 pub graph: Option<String>,
1733 /// Whether to silently ignore errors.
1734 pub silent: bool,
1735}
1736
1737/// Create a new named graph.
1738#[derive(Debug, Clone)]
1739pub struct CreateGraphOp {
1740 /// IRI of the graph to create.
1741 pub graph: String,
1742 /// Whether to silently ignore if graph already exists.
1743 pub silent: bool,
1744}
1745
1746/// Drop (remove) a named graph.
1747#[derive(Debug, Clone)]
1748pub struct DropGraphOp {
1749 /// Target graph (None = default graph).
1750 pub graph: Option<String>,
1751 /// Whether to silently ignore errors.
1752 pub silent: bool,
1753}
1754
1755/// Load data from a URL into a graph.
1756#[derive(Debug, Clone)]
1757pub struct LoadGraphOp {
1758 /// Source URL to load data from.
1759 pub source: String,
1760 /// Destination graph (None = default graph).
1761 pub destination: Option<String>,
1762 /// Whether to silently ignore errors.
1763 pub silent: bool,
1764}
1765
1766/// Copy triples from one graph to another.
1767#[derive(Debug, Clone)]
1768pub struct CopyGraphOp {
1769 /// Source graph.
1770 pub source: Option<String>,
1771 /// Destination graph.
1772 pub destination: Option<String>,
1773 /// Whether to silently ignore errors.
1774 pub silent: bool,
1775}
1776
1777/// Move triples from one graph to another.
1778#[derive(Debug, Clone)]
1779pub struct MoveGraphOp {
1780 /// Source graph.
1781 pub source: Option<String>,
1782 /// Destination graph.
1783 pub destination: Option<String>,
1784 /// Whether to silently ignore errors.
1785 pub silent: bool,
1786}
1787
1788/// Add (merge) triples from one graph to another.
1789#[derive(Debug, Clone)]
1790pub struct AddGraphOp {
1791 /// Source graph.
1792 pub source: Option<String>,
1793 /// Destination graph.
1794 pub destination: Option<String>,
1795 /// Whether to silently ignore errors.
1796 pub silent: bool,
1797}
1798
1799// ==================== Vector Search Operators ====================
1800
1801/// Vector similarity scan operation.
1802///
1803/// Performs approximate nearest neighbor search using a vector index (HNSW)
1804/// or brute-force search for small datasets. Returns nodes/edges whose
1805/// embeddings are similar to the query vector.
1806///
1807/// # Example GQL
1808///
1809/// ```gql
1810/// MATCH (m:Movie)
1811/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
1812/// RETURN m.title
1813/// ```
1814#[derive(Debug, Clone)]
1815pub struct VectorScanOp {
1816 /// Variable name to bind matching entities to.
1817 pub variable: String,
1818 /// Name of the vector index to use (None = brute-force).
1819 pub index_name: Option<String>,
1820 /// Property containing the vector embedding.
1821 pub property: String,
1822 /// Optional label filter (scan only nodes with this label).
1823 pub label: Option<String>,
1824 /// The query vector expression.
1825 pub query_vector: LogicalExpression,
1826 /// Number of nearest neighbors to return.
1827 pub k: usize,
1828 /// Distance metric (None = use index default, typically cosine).
1829 pub metric: Option<VectorMetric>,
1830 /// Minimum similarity threshold (filters results below this).
1831 pub min_similarity: Option<f32>,
1832 /// Maximum distance threshold (filters results above this).
1833 pub max_distance: Option<f32>,
1834 /// Input operator (for hybrid queries combining graph + vector).
1835 pub input: Option<Box<LogicalOperator>>,
1836}
1837
1838/// Vector distance/similarity metric for vector scan operations.
1839#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1840pub enum VectorMetric {
1841 /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
1842 Cosine,
1843 /// Euclidean (L2) distance. Best when magnitude matters.
1844 Euclidean,
1845 /// Dot product. Best for maximum inner product search.
1846 DotProduct,
1847 /// Manhattan (L1) distance. Less sensitive to outliers.
1848 Manhattan,
1849}
1850
1851/// Join graph patterns with vector similarity search.
1852///
1853/// This operator takes entities from the left input and computes vector
1854/// similarity against a query vector, outputting (entity, distance) pairs.
1855///
1856/// # Use Cases
1857///
1858/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
1859/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
1860/// 3. **Filtering by similarity**: Join with threshold-based filtering
1861///
1862/// # Example
1863///
1864/// ```gql
1865/// // Find movies similar to what the user liked
1866/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
1867/// WITH avg(liked.embedding) AS user_taste
1868/// VECTOR JOIN (m:Movie) ON m.embedding
1869/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
1870/// RETURN m.title
1871/// ```
1872#[derive(Debug, Clone)]
1873pub struct VectorJoinOp {
1874 /// Input operator providing entities to match against.
1875 pub input: Box<LogicalOperator>,
1876 /// Variable from input to extract vectors from (for entity-to-entity similarity).
1877 /// If None, uses `query_vector` directly.
1878 pub left_vector_variable: Option<String>,
1879 /// Property containing the left vector (used with `left_vector_variable`).
1880 pub left_property: Option<String>,
1881 /// The query vector expression (constant or computed).
1882 pub query_vector: LogicalExpression,
1883 /// Variable name to bind the right-side matching entities.
1884 pub right_variable: String,
1885 /// Property containing the right-side vector embeddings.
1886 pub right_property: String,
1887 /// Optional label filter for right-side entities.
1888 pub right_label: Option<String>,
1889 /// Name of vector index on right side (None = brute-force).
1890 pub index_name: Option<String>,
1891 /// Number of nearest neighbors per left-side entity.
1892 pub k: usize,
1893 /// Distance metric.
1894 pub metric: Option<VectorMetric>,
1895 /// Minimum similarity threshold.
1896 pub min_similarity: Option<f32>,
1897 /// Maximum distance threshold.
1898 pub max_distance: Option<f32>,
1899 /// Variable to bind the distance/similarity score.
1900 pub score_variable: Option<String>,
1901}
1902
1903/// Return results (terminal operator).
1904#[derive(Debug, Clone)]
1905pub struct ReturnOp {
1906 /// Items to return.
1907 pub items: Vec<ReturnItem>,
1908 /// Whether to return distinct results.
1909 pub distinct: bool,
1910 /// Input operator.
1911 pub input: Box<LogicalOperator>,
1912}
1913
1914/// A single return item.
1915#[derive(Debug, Clone)]
1916pub struct ReturnItem {
1917 /// Expression to return.
1918 pub expression: LogicalExpression,
1919 /// Alias for the result column.
1920 pub alias: Option<String>,
1921}
1922
1923/// Define a property graph schema (SQL/PGQ DDL).
1924#[derive(Debug, Clone)]
1925pub struct CreatePropertyGraphOp {
1926 /// Graph name.
1927 pub name: String,
1928 /// Node table schemas (label name + column definitions).
1929 pub node_tables: Vec<PropertyGraphNodeTable>,
1930 /// Edge table schemas (type name + column definitions + references).
1931 pub edge_tables: Vec<PropertyGraphEdgeTable>,
1932}
1933
1934/// A node table in a property graph definition.
1935#[derive(Debug, Clone)]
1936pub struct PropertyGraphNodeTable {
1937 /// Table name (maps to a node label).
1938 pub name: String,
1939 /// Column definitions as (name, type_name) pairs.
1940 pub columns: Vec<(String, String)>,
1941}
1942
1943/// An edge table in a property graph definition.
1944#[derive(Debug, Clone)]
1945pub struct PropertyGraphEdgeTable {
1946 /// Table name (maps to an edge type).
1947 pub name: String,
1948 /// Column definitions as (name, type_name) pairs.
1949 pub columns: Vec<(String, String)>,
1950 /// Source node table name.
1951 pub source_table: String,
1952 /// Target node table name.
1953 pub target_table: String,
1954}
1955
1956// ==================== Procedure Call Types ====================
1957
1958/// A CALL procedure operation.
1959///
1960/// ```text
1961/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
1962/// ```
1963#[derive(Debug, Clone)]
1964pub struct CallProcedureOp {
1965 /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
1966 pub name: Vec<String>,
1967 /// Argument expressions (constants in Phase 1).
1968 pub arguments: Vec<LogicalExpression>,
1969 /// Optional YIELD clause: which columns to expose + aliases.
1970 pub yield_items: Option<Vec<ProcedureYield>>,
1971}
1972
1973/// A single YIELD item in a procedure call.
1974#[derive(Debug, Clone)]
1975pub struct ProcedureYield {
1976 /// Column name from the procedure result.
1977 pub field_name: String,
1978 /// Optional alias (YIELD score AS rank).
1979 pub alias: Option<String>,
1980}
1981
1982/// Re-export format enum from the physical operator.
1983pub use grafeo_core::execution::operators::LoadDataFormat;
1984
1985/// LOAD DATA operator: reads a file and produces rows.
1986///
1987/// With headers (CSV), each row is bound as a `Value::Map` with column names as keys.
1988/// Without headers (CSV), each row is bound as a `Value::List` of string values.
1989/// JSONL always produces `Value::Map`. Parquet always produces `Value::Map`.
1990#[derive(Debug, Clone)]
1991pub struct LoadDataOp {
1992 /// File format.
1993 pub format: LoadDataFormat,
1994 /// Whether the file has a header row (CSV only, ignored for JSONL/Parquet).
1995 pub with_headers: bool,
1996 /// File path (local filesystem).
1997 pub path: String,
1998 /// Variable name to bind each row to.
1999 pub variable: String,
2000 /// Field separator character (CSV only, default: comma).
2001 pub field_terminator: Option<char>,
2002}
2003
2004/// A logical expression.
2005#[derive(Debug, Clone)]
2006pub enum LogicalExpression {
2007 /// A literal value.
2008 Literal(Value),
2009
2010 /// A variable reference.
2011 Variable(String),
2012
2013 /// Property access (e.g., n.name).
2014 Property {
2015 /// The variable to access.
2016 variable: String,
2017 /// The property name.
2018 property: String,
2019 },
2020
2021 /// Binary operation.
2022 Binary {
2023 /// Left operand.
2024 left: Box<LogicalExpression>,
2025 /// Operator.
2026 op: BinaryOp,
2027 /// Right operand.
2028 right: Box<LogicalExpression>,
2029 },
2030
2031 /// Unary operation.
2032 Unary {
2033 /// Operator.
2034 op: UnaryOp,
2035 /// Operand.
2036 operand: Box<LogicalExpression>,
2037 },
2038
2039 /// Function call.
2040 FunctionCall {
2041 /// Function name.
2042 name: String,
2043 /// Arguments.
2044 args: Vec<LogicalExpression>,
2045 /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
2046 distinct: bool,
2047 },
2048
2049 /// List literal.
2050 List(Vec<LogicalExpression>),
2051
2052 /// Map literal (e.g., {name: 'Alix', age: 30}).
2053 Map(Vec<(String, LogicalExpression)>),
2054
2055 /// Index access (e.g., `list[0]`).
2056 IndexAccess {
2057 /// The base expression (typically a list or string).
2058 base: Box<LogicalExpression>,
2059 /// The index expression.
2060 index: Box<LogicalExpression>,
2061 },
2062
2063 /// Slice access (e.g., list[1..3]).
2064 SliceAccess {
2065 /// The base expression (typically a list or string).
2066 base: Box<LogicalExpression>,
2067 /// Start index (None means from beginning).
2068 start: Option<Box<LogicalExpression>>,
2069 /// End index (None means to end).
2070 end: Option<Box<LogicalExpression>>,
2071 },
2072
2073 /// CASE expression.
2074 Case {
2075 /// Test expression (for simple CASE).
2076 operand: Option<Box<LogicalExpression>>,
2077 /// WHEN clauses.
2078 when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
2079 /// ELSE clause.
2080 else_clause: Option<Box<LogicalExpression>>,
2081 },
2082
2083 /// Parameter reference.
2084 Parameter(String),
2085
2086 /// Labels of a node.
2087 Labels(String),
2088
2089 /// Type of an edge.
2090 Type(String),
2091
2092 /// ID of a node or edge.
2093 Id(String),
2094
2095 /// List comprehension: [x IN list WHERE predicate | expression]
2096 ListComprehension {
2097 /// Variable name for each element.
2098 variable: String,
2099 /// The source list expression.
2100 list_expr: Box<LogicalExpression>,
2101 /// Optional filter predicate.
2102 filter_expr: Option<Box<LogicalExpression>>,
2103 /// The mapping expression for each element.
2104 map_expr: Box<LogicalExpression>,
2105 },
2106
2107 /// List predicate: all/any/none/single(x IN list WHERE pred).
2108 ListPredicate {
2109 /// The kind of list predicate.
2110 kind: ListPredicateKind,
2111 /// The iteration variable name.
2112 variable: String,
2113 /// The source list expression.
2114 list_expr: Box<LogicalExpression>,
2115 /// The predicate to test for each element.
2116 predicate: Box<LogicalExpression>,
2117 },
2118
2119 /// EXISTS subquery.
2120 ExistsSubquery(Box<LogicalOperator>),
2121
2122 /// COUNT subquery.
2123 CountSubquery(Box<LogicalOperator>),
2124
2125 /// VALUE subquery: returns scalar value from first row of inner query.
2126 ValueSubquery(Box<LogicalOperator>),
2127
2128 /// Map projection: `node { .prop1, .prop2, key: expr, .* }`.
2129 MapProjection {
2130 /// The base variable name.
2131 base: String,
2132 /// Projection entries (property selectors, literal entries, all-properties).
2133 entries: Vec<MapProjectionEntry>,
2134 },
2135
2136 /// reduce() accumulator: `reduce(acc = init, x IN list | expr)`.
2137 Reduce {
2138 /// Accumulator variable name.
2139 accumulator: String,
2140 /// Initial value for the accumulator.
2141 initial: Box<LogicalExpression>,
2142 /// Iteration variable name.
2143 variable: String,
2144 /// List to iterate over.
2145 list: Box<LogicalExpression>,
2146 /// Body expression evaluated per iteration (references both accumulator and variable).
2147 expression: Box<LogicalExpression>,
2148 },
2149
2150 /// Pattern comprehension: `[(pattern) WHERE pred | expr]`.
2151 ///
2152 /// Executes the inner subplan, evaluates the projection for each row,
2153 /// and collects the results into a list.
2154 PatternComprehension {
2155 /// The subplan produced by translating the pattern (+optional WHERE).
2156 subplan: Box<LogicalOperator>,
2157 /// The projection expression evaluated for each match.
2158 projection: Box<LogicalExpression>,
2159 },
2160}
2161
2162/// An entry in a map projection.
2163#[derive(Debug, Clone)]
2164pub enum MapProjectionEntry {
2165 /// `.propertyName`: shorthand for `propertyName: base.propertyName`.
2166 PropertySelector(String),
2167 /// `key: expression`: explicit key-value pair.
2168 LiteralEntry(String, LogicalExpression),
2169 /// `.*`: include all properties of the base entity.
2170 AllProperties,
2171}
2172
2173/// The kind of list predicate function.
2174#[derive(Debug, Clone, PartialEq, Eq)]
2175pub enum ListPredicateKind {
2176 /// all(x IN list WHERE pred): true if pred holds for every element.
2177 All,
2178 /// any(x IN list WHERE pred): true if pred holds for at least one element.
2179 Any,
2180 /// none(x IN list WHERE pred): true if pred holds for no element.
2181 None,
2182 /// single(x IN list WHERE pred): true if pred holds for exactly one element.
2183 Single,
2184}
2185
2186/// Binary operator.
2187#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2188pub enum BinaryOp {
2189 /// Equality comparison (=).
2190 Eq,
2191 /// Inequality comparison (<>).
2192 Ne,
2193 /// Less than (<).
2194 Lt,
2195 /// Less than or equal (<=).
2196 Le,
2197 /// Greater than (>).
2198 Gt,
2199 /// Greater than or equal (>=).
2200 Ge,
2201
2202 /// Logical AND.
2203 And,
2204 /// Logical OR.
2205 Or,
2206 /// Logical XOR.
2207 Xor,
2208
2209 /// Addition (+).
2210 Add,
2211 /// Subtraction (-).
2212 Sub,
2213 /// Multiplication (*).
2214 Mul,
2215 /// Division (/).
2216 Div,
2217 /// Modulo (%).
2218 Mod,
2219
2220 /// String concatenation.
2221 Concat,
2222 /// String starts with.
2223 StartsWith,
2224 /// String ends with.
2225 EndsWith,
2226 /// String contains.
2227 Contains,
2228
2229 /// Collection membership (IN).
2230 In,
2231 /// Pattern matching (LIKE).
2232 Like,
2233 /// Regex matching (=~).
2234 Regex,
2235 /// Power/exponentiation (^).
2236 Pow,
2237}
2238
2239/// Unary operator.
2240#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2241pub enum UnaryOp {
2242 /// Logical NOT.
2243 Not,
2244 /// Numeric negation.
2245 Neg,
2246 /// IS NULL check.
2247 IsNull,
2248 /// IS NOT NULL check.
2249 IsNotNull,
2250}
2251
2252#[cfg(test)]
2253mod tests {
2254 use super::*;
2255
2256 #[test]
2257 fn test_simple_node_scan_plan() {
2258 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2259 items: vec![ReturnItem {
2260 expression: LogicalExpression::Variable("n".into()),
2261 alias: None,
2262 }],
2263 distinct: false,
2264 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2265 variable: "n".into(),
2266 label: Some("Person".into()),
2267 input: None,
2268 })),
2269 }));
2270
2271 // Verify structure
2272 if let LogicalOperator::Return(ret) = &plan.root {
2273 assert_eq!(ret.items.len(), 1);
2274 assert!(!ret.distinct);
2275 if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
2276 assert_eq!(scan.variable, "n");
2277 assert_eq!(scan.label, Some("Person".into()));
2278 } else {
2279 panic!("Expected NodeScan");
2280 }
2281 } else {
2282 panic!("Expected Return");
2283 }
2284 }
2285
2286 #[test]
2287 fn test_filter_plan() {
2288 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2289 items: vec![ReturnItem {
2290 expression: LogicalExpression::Property {
2291 variable: "n".into(),
2292 property: "name".into(),
2293 },
2294 alias: Some("name".into()),
2295 }],
2296 distinct: false,
2297 input: Box::new(LogicalOperator::Filter(FilterOp {
2298 predicate: LogicalExpression::Binary {
2299 left: Box::new(LogicalExpression::Property {
2300 variable: "n".into(),
2301 property: "age".into(),
2302 }),
2303 op: BinaryOp::Gt,
2304 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
2305 },
2306 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2307 variable: "n".into(),
2308 label: Some("Person".into()),
2309 input: None,
2310 })),
2311 pushdown_hint: None,
2312 })),
2313 }));
2314
2315 if let LogicalOperator::Return(ret) = &plan.root {
2316 if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
2317 if let LogicalExpression::Binary { op, .. } = &filter.predicate {
2318 assert_eq!(*op, BinaryOp::Gt);
2319 } else {
2320 panic!("Expected Binary expression");
2321 }
2322 } else {
2323 panic!("Expected Filter");
2324 }
2325 } else {
2326 panic!("Expected Return");
2327 }
2328 }
2329}