grafeo_engine/query/plan.rs
1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use std::collections::HashMap;
8use std::fmt;
9
10use grafeo_common::types::Value;
11
12/// A count expression for SKIP/LIMIT: either a resolved literal or an unresolved parameter.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub enum CountExpr {
15 /// A resolved integer count.
16 Literal(usize),
17 /// An unresolved parameter reference (e.g., `$limit`).
18 Parameter(String),
19}
20
21impl CountExpr {
22 /// Returns the resolved count, or panics if still a parameter reference.
23 ///
24 /// Call this only after parameter substitution has run.
25 ///
26 /// # Panics
27 ///
28 /// Panics if the expression is an unresolved `Parameter` reference.
29 pub fn value(&self) -> usize {
30 match self {
31 Self::Literal(n) => *n,
32 Self::Parameter(name) => panic!("Unresolved parameter: ${name}"),
33 }
34 }
35
36 /// Returns the resolved count, or an error if still a parameter reference.
37 ///
38 /// # Errors
39 ///
40 /// Returns an error string if the expression is an unresolved `Parameter`.
41 pub fn try_value(&self) -> Result<usize, String> {
42 match self {
43 Self::Literal(n) => Ok(*n),
44 Self::Parameter(name) => Err(format!("Unresolved SKIP/LIMIT parameter: ${name}")),
45 }
46 }
47
48 /// Returns the count as f64 for cardinality estimation (defaults to 10 for unresolved params).
49 pub fn estimate(&self) -> f64 {
50 match self {
51 Self::Literal(n) => *n as f64,
52 Self::Parameter(_) => 10.0, // reasonable default for unresolved params
53 }
54 }
55}
56
57impl fmt::Display for CountExpr {
58 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59 match self {
60 Self::Literal(n) => write!(f, "{n}"),
61 Self::Parameter(name) => write!(f, "${name}"),
62 }
63 }
64}
65
66impl From<usize> for CountExpr {
67 fn from(n: usize) -> Self {
68 Self::Literal(n)
69 }
70}
71
72impl PartialEq<usize> for CountExpr {
73 fn eq(&self, other: &usize) -> bool {
74 matches!(self, Self::Literal(n) if n == other)
75 }
76}
77
78/// A logical query plan.
79#[derive(Debug, Clone)]
80pub struct LogicalPlan {
81 /// The root operator of the plan.
82 pub root: LogicalOperator,
83 /// When true, return the plan tree as text instead of executing.
84 pub explain: bool,
85 /// When true, execute the query and return per-operator runtime metrics.
86 pub profile: bool,
87 /// Default parameter values from variable declarations (e.g., GraphQL
88 /// `query($limit: Int = 2)`). The processor merges these with caller-supplied
89 /// params, giving caller values higher precedence.
90 pub default_params: HashMap<String, Value>,
91}
92
93impl LogicalPlan {
94 /// Creates a new logical plan with the given root operator.
95 pub fn new(root: LogicalOperator) -> Self {
96 Self {
97 root,
98 explain: false,
99 profile: false,
100 default_params: HashMap::new(),
101 }
102 }
103
104 /// Creates an EXPLAIN plan that returns the plan tree without executing.
105 pub fn explain(root: LogicalOperator) -> Self {
106 Self {
107 root,
108 explain: true,
109 profile: false,
110 default_params: HashMap::new(),
111 }
112 }
113
114 /// Creates a PROFILE plan that executes and returns per-operator metrics.
115 pub fn profile(root: LogicalOperator) -> Self {
116 Self {
117 root,
118 explain: false,
119 profile: true,
120 default_params: HashMap::new(),
121 }
122 }
123}
124
125/// A logical operator in the query plan.
126#[derive(Debug, Clone)]
127pub enum LogicalOperator {
128 /// Scan all nodes, optionally filtered by label.
129 NodeScan(NodeScanOp),
130
131 /// Scan all edges, optionally filtered by type.
132 EdgeScan(EdgeScanOp),
133
134 /// Expand from nodes to neighbors via edges.
135 Expand(ExpandOp),
136
137 /// Filter rows based on a predicate.
138 Filter(FilterOp),
139
140 /// Project specific columns.
141 Project(ProjectOp),
142
143 /// Join two inputs.
144 Join(JoinOp),
145
146 /// Aggregate with grouping.
147 Aggregate(AggregateOp),
148
149 /// Limit the number of results.
150 Limit(LimitOp),
151
152 /// Skip a number of results.
153 Skip(SkipOp),
154
155 /// Sort results.
156 Sort(SortOp),
157
158 /// Remove duplicate results.
159 Distinct(DistinctOp),
160
161 /// Create a new node.
162 CreateNode(CreateNodeOp),
163
164 /// Create a new edge.
165 CreateEdge(CreateEdgeOp),
166
167 /// Delete a node.
168 DeleteNode(DeleteNodeOp),
169
170 /// Delete an edge.
171 DeleteEdge(DeleteEdgeOp),
172
173 /// Set properties on a node or edge.
174 SetProperty(SetPropertyOp),
175
176 /// Add labels to a node.
177 AddLabel(AddLabelOp),
178
179 /// Remove labels from a node.
180 RemoveLabel(RemoveLabelOp),
181
182 /// Return results (terminal operator).
183 Return(ReturnOp),
184
185 /// Empty result set.
186 Empty,
187
188 // ==================== RDF/SPARQL Operators ====================
189 /// Scan RDF triples matching a pattern.
190 TripleScan(TripleScanOp),
191
192 /// Union of multiple result sets.
193 Union(UnionOp),
194
195 /// Left outer join for OPTIONAL patterns.
196 LeftJoin(LeftJoinOp),
197
198 /// Anti-join for MINUS patterns.
199 AntiJoin(AntiJoinOp),
200
201 /// Bind a variable to an expression.
202 Bind(BindOp),
203
204 /// Unwind a list into individual rows.
205 Unwind(UnwindOp),
206
207 /// Collect grouped key-value rows into a single Map value.
208 /// Used for Gremlin `groupCount()` semantics.
209 MapCollect(MapCollectOp),
210
211 /// Merge a node pattern (match or create).
212 Merge(MergeOp),
213
214 /// Merge a relationship pattern (match or create).
215 MergeRelationship(MergeRelationshipOp),
216
217 /// Find shortest path between nodes.
218 ShortestPath(ShortestPathOp),
219
220 // ==================== SPARQL Update Operators ====================
221 /// Insert RDF triples.
222 InsertTriple(InsertTripleOp),
223
224 /// Delete RDF triples.
225 DeleteTriple(DeleteTripleOp),
226
227 /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
228 /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
229 Modify(ModifyOp),
230
231 /// Clear a graph (remove all triples).
232 ClearGraph(ClearGraphOp),
233
234 /// Create a new named graph.
235 CreateGraph(CreateGraphOp),
236
237 /// Drop (remove) a named graph.
238 DropGraph(DropGraphOp),
239
240 /// Load data from a URL into a graph.
241 LoadGraph(LoadGraphOp),
242
243 /// Copy triples from one graph to another.
244 CopyGraph(CopyGraphOp),
245
246 /// Move triples from one graph to another.
247 MoveGraph(MoveGraphOp),
248
249 /// Add (merge) triples from one graph to another.
250 AddGraph(AddGraphOp),
251
252 /// Per-row aggregation over a list-valued column (horizontal aggregation, GE09).
253 HorizontalAggregate(HorizontalAggregateOp),
254
255 // ==================== Vector Search Operators ====================
256 /// Scan using vector similarity search.
257 VectorScan(VectorScanOp),
258
259 /// Join graph patterns with vector similarity search.
260 ///
261 /// Computes vector distances between entities from the left input and
262 /// a query vector, then joins with similarity scores. Useful for:
263 /// - Filtering graph traversal results by vector similarity
264 /// - Computing aggregated embeddings and finding similar entities
265 /// - Combining multiple vector sources with graph structure
266 VectorJoin(VectorJoinOp),
267
268 // ==================== Set Operations ====================
269 /// Set difference: rows in left that are not in right.
270 Except(ExceptOp),
271
272 /// Set intersection: rows common to all inputs.
273 Intersect(IntersectOp),
274
275 /// Fallback: use left result if non-empty, otherwise right.
276 Otherwise(OtherwiseOp),
277
278 // ==================== Correlated Subquery ====================
279 /// Apply (lateral join): evaluate a subplan per input row.
280 Apply(ApplyOp),
281
282 /// Parameter scan: leaf of a correlated inner plan that receives values
283 /// from the outer Apply operator. The column names match `ApplyOp.shared_variables`.
284 ParameterScan(ParameterScanOp),
285
286 // ==================== DDL Operators ====================
287 /// Define a property graph schema (SQL/PGQ DDL).
288 CreatePropertyGraph(CreatePropertyGraphOp),
289
290 // ==================== Multi-Way Join ====================
291 /// Multi-way join using worst-case optimal join (leapfrog).
292 /// Used for cyclic patterns (triangles, cliques) with 3+ relations.
293 MultiWayJoin(MultiWayJoinOp),
294
295 // ==================== Procedure Call Operators ====================
296 /// Invoke a stored procedure (CALL ... YIELD).
297 CallProcedure(CallProcedureOp),
298
299 // ==================== Data Import Operators ====================
300 /// Load data from a file (CSV, JSONL, or Parquet), producing one row per record.
301 LoadData(LoadDataOp),
302}
303
304impl LogicalOperator {
305 /// Returns `true` if this operator or any of its children perform mutations.
306 #[must_use]
307 pub fn has_mutations(&self) -> bool {
308 match self {
309 // Direct mutation operators
310 Self::CreateNode(_)
311 | Self::CreateEdge(_)
312 | Self::DeleteNode(_)
313 | Self::DeleteEdge(_)
314 | Self::SetProperty(_)
315 | Self::AddLabel(_)
316 | Self::RemoveLabel(_)
317 | Self::Merge(_)
318 | Self::MergeRelationship(_)
319 | Self::InsertTriple(_)
320 | Self::DeleteTriple(_)
321 | Self::Modify(_)
322 | Self::ClearGraph(_)
323 | Self::CreateGraph(_)
324 | Self::DropGraph(_)
325 | Self::LoadGraph(_)
326 | Self::CopyGraph(_)
327 | Self::MoveGraph(_)
328 | Self::AddGraph(_)
329 | Self::CreatePropertyGraph(_) => true,
330
331 // Operators with an `input` child
332 Self::Filter(op) => op.input.has_mutations(),
333 Self::Project(op) => op.input.has_mutations(),
334 Self::Aggregate(op) => op.input.has_mutations(),
335 Self::Limit(op) => op.input.has_mutations(),
336 Self::Skip(op) => op.input.has_mutations(),
337 Self::Sort(op) => op.input.has_mutations(),
338 Self::Distinct(op) => op.input.has_mutations(),
339 Self::Unwind(op) => op.input.has_mutations(),
340 Self::Bind(op) => op.input.has_mutations(),
341 Self::MapCollect(op) => op.input.has_mutations(),
342 Self::Return(op) => op.input.has_mutations(),
343 Self::HorizontalAggregate(op) => op.input.has_mutations(),
344 Self::VectorScan(_) | Self::VectorJoin(_) => false,
345
346 // Operators with two children
347 Self::Join(op) => op.left.has_mutations() || op.right.has_mutations(),
348 Self::LeftJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
349 Self::AntiJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
350 Self::Except(op) => op.left.has_mutations() || op.right.has_mutations(),
351 Self::Intersect(op) => op.left.has_mutations() || op.right.has_mutations(),
352 Self::Otherwise(op) => op.left.has_mutations() || op.right.has_mutations(),
353 Self::Union(op) => op.inputs.iter().any(|i| i.has_mutations()),
354 Self::MultiWayJoin(op) => op.inputs.iter().any(|i| i.has_mutations()),
355 Self::Apply(op) => op.input.has_mutations() || op.subplan.has_mutations(),
356
357 // Leaf operators (read-only)
358 Self::NodeScan(_)
359 | Self::EdgeScan(_)
360 | Self::Expand(_)
361 | Self::TripleScan(_)
362 | Self::ShortestPath(_)
363 | Self::Empty
364 | Self::ParameterScan(_)
365 | Self::CallProcedure(_)
366 | Self::LoadData(_) => false,
367 }
368 }
369
370 /// Returns references to the child operators.
371 ///
372 /// Used by [`crate::query::profile::build_profile_tree`] to walk the logical
373 /// plan tree in post-order, matching operators to profiling entries.
374 #[must_use]
375 pub fn children(&self) -> Vec<&LogicalOperator> {
376 match self {
377 // Optional single input
378 Self::NodeScan(op) => op.input.as_deref().into_iter().collect(),
379 Self::EdgeScan(op) => op.input.as_deref().into_iter().collect(),
380 Self::TripleScan(op) => op.input.as_deref().into_iter().collect(),
381 Self::VectorScan(op) => op.input.as_deref().into_iter().collect(),
382 Self::CreateNode(op) => op.input.as_deref().into_iter().collect(),
383 Self::InsertTriple(op) => op.input.as_deref().into_iter().collect(),
384 Self::DeleteTriple(op) => op.input.as_deref().into_iter().collect(),
385
386 // Single required input
387 Self::Expand(op) => vec![&*op.input],
388 Self::Filter(op) => vec![&*op.input],
389 Self::Project(op) => vec![&*op.input],
390 Self::Aggregate(op) => vec![&*op.input],
391 Self::Limit(op) => vec![&*op.input],
392 Self::Skip(op) => vec![&*op.input],
393 Self::Sort(op) => vec![&*op.input],
394 Self::Distinct(op) => vec![&*op.input],
395 Self::Return(op) => vec![&*op.input],
396 Self::Unwind(op) => vec![&*op.input],
397 Self::Bind(op) => vec![&*op.input],
398 Self::MapCollect(op) => vec![&*op.input],
399 Self::ShortestPath(op) => vec![&*op.input],
400 Self::Merge(op) => vec![&*op.input],
401 Self::MergeRelationship(op) => vec![&*op.input],
402 Self::CreateEdge(op) => vec![&*op.input],
403 Self::DeleteNode(op) => vec![&*op.input],
404 Self::DeleteEdge(op) => vec![&*op.input],
405 Self::SetProperty(op) => vec![&*op.input],
406 Self::AddLabel(op) => vec![&*op.input],
407 Self::RemoveLabel(op) => vec![&*op.input],
408 Self::HorizontalAggregate(op) => vec![&*op.input],
409 Self::VectorJoin(op) => vec![&*op.input],
410 Self::Modify(op) => vec![&*op.where_clause],
411
412 // Two children (left + right)
413 Self::Join(op) => vec![&*op.left, &*op.right],
414 Self::LeftJoin(op) => vec![&*op.left, &*op.right],
415 Self::AntiJoin(op) => vec![&*op.left, &*op.right],
416 Self::Except(op) => vec![&*op.left, &*op.right],
417 Self::Intersect(op) => vec![&*op.left, &*op.right],
418 Self::Otherwise(op) => vec![&*op.left, &*op.right],
419
420 // Two children (input + subplan)
421 Self::Apply(op) => vec![&*op.input, &*op.subplan],
422
423 // Vec children
424 Self::Union(op) => op.inputs.iter().collect(),
425 Self::MultiWayJoin(op) => op.inputs.iter().collect(),
426
427 // Leaf operators
428 Self::Empty
429 | Self::ParameterScan(_)
430 | Self::CallProcedure(_)
431 | Self::ClearGraph(_)
432 | Self::CreateGraph(_)
433 | Self::DropGraph(_)
434 | Self::LoadGraph(_)
435 | Self::CopyGraph(_)
436 | Self::MoveGraph(_)
437 | Self::AddGraph(_)
438 | Self::CreatePropertyGraph(_)
439 | Self::LoadData(_) => vec![],
440 }
441 }
442
443 /// Returns a compact display label for this operator, used in PROFILE output.
444 #[must_use]
445 pub fn display_label(&self) -> String {
446 match self {
447 Self::NodeScan(op) => {
448 let label = op.label.as_deref().unwrap_or("*");
449 format!("{}:{}", op.variable, label)
450 }
451 Self::EdgeScan(op) => {
452 let types = if op.edge_types.is_empty() {
453 "*".to_string()
454 } else {
455 op.edge_types.join("|")
456 };
457 format!("{}:{}", op.variable, types)
458 }
459 Self::Expand(op) => {
460 let types = if op.edge_types.is_empty() {
461 "*".to_string()
462 } else {
463 op.edge_types.join("|")
464 };
465 let dir = match op.direction {
466 ExpandDirection::Outgoing => "->",
467 ExpandDirection::Incoming => "<-",
468 ExpandDirection::Both => "--",
469 };
470 format!(
471 "({from}){dir}[:{types}]{dir}({to})",
472 from = op.from_variable,
473 to = op.to_variable,
474 )
475 }
476 Self::Filter(op) => {
477 let hint = match &op.pushdown_hint {
478 Some(PushdownHint::IndexLookup { property }) => {
479 format!(" [index: {property}]")
480 }
481 Some(PushdownHint::RangeScan { property }) => {
482 format!(" [range: {property}]")
483 }
484 Some(PushdownHint::LabelFirst) => " [label-first]".to_string(),
485 None => String::new(),
486 };
487 format!("{}{hint}", fmt_expr(&op.predicate))
488 }
489 Self::Project(op) => {
490 let cols: Vec<String> = op
491 .projections
492 .iter()
493 .map(|p| match &p.alias {
494 Some(alias) => alias.clone(),
495 None => fmt_expr(&p.expression),
496 })
497 .collect();
498 cols.join(", ")
499 }
500 Self::Join(op) => format!("{:?}", op.join_type),
501 Self::Aggregate(op) => {
502 let groups: Vec<String> = op.group_by.iter().map(fmt_expr).collect();
503 format!("group: [{}]", groups.join(", "))
504 }
505 Self::Limit(op) => format!("{}", op.count),
506 Self::Skip(op) => format!("{}", op.count),
507 Self::Sort(op) => {
508 let keys: Vec<String> = op
509 .keys
510 .iter()
511 .map(|k| {
512 let dir = match k.order {
513 SortOrder::Ascending => "ASC",
514 SortOrder::Descending => "DESC",
515 };
516 format!("{} {dir}", fmt_expr(&k.expression))
517 })
518 .collect();
519 keys.join(", ")
520 }
521 Self::Distinct(_) => String::new(),
522 Self::Return(op) => {
523 let items: Vec<String> = op
524 .items
525 .iter()
526 .map(|item| match &item.alias {
527 Some(alias) => alias.clone(),
528 None => fmt_expr(&item.expression),
529 })
530 .collect();
531 items.join(", ")
532 }
533 Self::Union(op) => format!("{} branches", op.inputs.len()),
534 Self::MultiWayJoin(op) => {
535 format!("{} inputs", op.inputs.len())
536 }
537 Self::LeftJoin(_) => String::new(),
538 Self::AntiJoin(_) => String::new(),
539 Self::Unwind(op) => op.variable.clone(),
540 Self::Bind(op) => op.variable.clone(),
541 Self::MapCollect(op) => op.alias.clone(),
542 Self::ShortestPath(op) => {
543 format!("{} -> {}", op.source_var, op.target_var)
544 }
545 Self::Merge(op) => op.variable.clone(),
546 Self::MergeRelationship(op) => op.variable.clone(),
547 Self::CreateNode(op) => {
548 let labels = op.labels.join(":");
549 format!("{}:{labels}", op.variable)
550 }
551 Self::CreateEdge(op) => {
552 format!(
553 "[{}:{}]",
554 op.variable.as_deref().unwrap_or("?"),
555 op.edge_type
556 )
557 }
558 Self::DeleteNode(op) => op.variable.clone(),
559 Self::DeleteEdge(op) => op.variable.clone(),
560 Self::SetProperty(op) => op.variable.clone(),
561 Self::AddLabel(op) => {
562 let labels = op.labels.join(":");
563 format!("{}:{labels}", op.variable)
564 }
565 Self::RemoveLabel(op) => {
566 let labels = op.labels.join(":");
567 format!("{}:{labels}", op.variable)
568 }
569 Self::CallProcedure(op) => op.name.join("."),
570 Self::LoadData(op) => format!("{} AS {}", op.path, op.variable),
571 Self::Apply(_) => String::new(),
572 Self::VectorScan(op) => op.variable.clone(),
573 Self::VectorJoin(op) => op.right_variable.clone(),
574 _ => String::new(),
575 }
576 }
577}
578
579impl LogicalOperator {
580 /// Formats this operator tree as a human-readable plan for EXPLAIN output.
581 pub fn explain_tree(&self) -> String {
582 let mut output = String::new();
583 self.fmt_tree(&mut output, 0);
584 output
585 }
586
587 fn fmt_tree(&self, out: &mut String, depth: usize) {
588 use std::fmt::Write;
589
590 let indent = " ".repeat(depth);
591 match self {
592 Self::NodeScan(op) => {
593 let label = op.label.as_deref().unwrap_or("*");
594 let _ = writeln!(out, "{indent}NodeScan ({var}:{label})", var = op.variable);
595 if let Some(input) = &op.input {
596 input.fmt_tree(out, depth + 1);
597 }
598 }
599 Self::EdgeScan(op) => {
600 let types = if op.edge_types.is_empty() {
601 "*".to_string()
602 } else {
603 op.edge_types.join("|")
604 };
605 let _ = writeln!(out, "{indent}EdgeScan ({var}:{types})", var = op.variable);
606 }
607 Self::Expand(op) => {
608 let types = if op.edge_types.is_empty() {
609 "*".to_string()
610 } else {
611 op.edge_types.join("|")
612 };
613 let dir = match op.direction {
614 ExpandDirection::Outgoing => "->",
615 ExpandDirection::Incoming => "<-",
616 ExpandDirection::Both => "--",
617 };
618 let hops = match (op.min_hops, op.max_hops) {
619 (1, Some(1)) => String::new(),
620 (min, Some(max)) if min == max => format!("*{min}"),
621 (min, Some(max)) => format!("*{min}..{max}"),
622 (min, None) => format!("*{min}.."),
623 };
624 let _ = writeln!(
625 out,
626 "{indent}Expand ({from}){dir}[:{types}{hops}]{dir}({to})",
627 from = op.from_variable,
628 to = op.to_variable,
629 );
630 op.input.fmt_tree(out, depth + 1);
631 }
632 Self::Filter(op) => {
633 let hint = match &op.pushdown_hint {
634 Some(PushdownHint::IndexLookup { property }) => {
635 format!(" [index: {property}]")
636 }
637 Some(PushdownHint::RangeScan { property }) => {
638 format!(" [range: {property}]")
639 }
640 Some(PushdownHint::LabelFirst) => " [label-first]".to_string(),
641 None => String::new(),
642 };
643 let _ = writeln!(
644 out,
645 "{indent}Filter ({expr}){hint}",
646 expr = fmt_expr(&op.predicate)
647 );
648 op.input.fmt_tree(out, depth + 1);
649 }
650 Self::Project(op) => {
651 let cols: Vec<String> = op
652 .projections
653 .iter()
654 .map(|p| {
655 let expr = fmt_expr(&p.expression);
656 match &p.alias {
657 Some(alias) => format!("{expr} AS {alias}"),
658 None => expr,
659 }
660 })
661 .collect();
662 let _ = writeln!(out, "{indent}Project ({cols})", cols = cols.join(", "));
663 op.input.fmt_tree(out, depth + 1);
664 }
665 Self::Join(op) => {
666 let _ = writeln!(out, "{indent}Join ({ty:?})", ty = op.join_type);
667 op.left.fmt_tree(out, depth + 1);
668 op.right.fmt_tree(out, depth + 1);
669 }
670 Self::Aggregate(op) => {
671 let groups: Vec<String> = op.group_by.iter().map(fmt_expr).collect();
672 let aggs: Vec<String> = op
673 .aggregates
674 .iter()
675 .map(|a| {
676 let func = format!("{:?}", a.function).to_lowercase();
677 match &a.alias {
678 Some(alias) => format!("{func}(...) AS {alias}"),
679 None => format!("{func}(...)"),
680 }
681 })
682 .collect();
683 let _ = writeln!(
684 out,
685 "{indent}Aggregate (group: [{groups}], aggs: [{aggs}])",
686 groups = groups.join(", "),
687 aggs = aggs.join(", "),
688 );
689 op.input.fmt_tree(out, depth + 1);
690 }
691 Self::Limit(op) => {
692 let _ = writeln!(out, "{indent}Limit ({})", op.count);
693 op.input.fmt_tree(out, depth + 1);
694 }
695 Self::Skip(op) => {
696 let _ = writeln!(out, "{indent}Skip ({})", op.count);
697 op.input.fmt_tree(out, depth + 1);
698 }
699 Self::Sort(op) => {
700 let keys: Vec<String> = op
701 .keys
702 .iter()
703 .map(|k| {
704 let dir = match k.order {
705 SortOrder::Ascending => "ASC",
706 SortOrder::Descending => "DESC",
707 };
708 format!("{} {dir}", fmt_expr(&k.expression))
709 })
710 .collect();
711 let _ = writeln!(out, "{indent}Sort ({keys})", keys = keys.join(", "));
712 op.input.fmt_tree(out, depth + 1);
713 }
714 Self::Distinct(op) => {
715 let _ = writeln!(out, "{indent}Distinct");
716 op.input.fmt_tree(out, depth + 1);
717 }
718 Self::Return(op) => {
719 let items: Vec<String> = op
720 .items
721 .iter()
722 .map(|item| {
723 let expr = fmt_expr(&item.expression);
724 match &item.alias {
725 Some(alias) => format!("{expr} AS {alias}"),
726 None => expr,
727 }
728 })
729 .collect();
730 let distinct = if op.distinct { " DISTINCT" } else { "" };
731 let _ = writeln!(
732 out,
733 "{indent}Return{distinct} ({items})",
734 items = items.join(", ")
735 );
736 op.input.fmt_tree(out, depth + 1);
737 }
738 Self::Union(op) => {
739 let _ = writeln!(out, "{indent}Union ({n} branches)", n = op.inputs.len());
740 for input in &op.inputs {
741 input.fmt_tree(out, depth + 1);
742 }
743 }
744 Self::MultiWayJoin(op) => {
745 let vars = op.shared_variables.join(", ");
746 let _ = writeln!(
747 out,
748 "{indent}MultiWayJoin ({n} inputs, shared: [{vars}])",
749 n = op.inputs.len()
750 );
751 for input in &op.inputs {
752 input.fmt_tree(out, depth + 1);
753 }
754 }
755 Self::LeftJoin(op) => {
756 if let Some(cond) = &op.condition {
757 let _ = writeln!(out, "{indent}LeftJoin (condition: {cond:?})");
758 } else {
759 let _ = writeln!(out, "{indent}LeftJoin");
760 }
761 op.left.fmt_tree(out, depth + 1);
762 op.right.fmt_tree(out, depth + 1);
763 }
764 Self::AntiJoin(op) => {
765 let _ = writeln!(out, "{indent}AntiJoin");
766 op.left.fmt_tree(out, depth + 1);
767 op.right.fmt_tree(out, depth + 1);
768 }
769 Self::Unwind(op) => {
770 let _ = writeln!(out, "{indent}Unwind ({var})", var = op.variable);
771 op.input.fmt_tree(out, depth + 1);
772 }
773 Self::Bind(op) => {
774 let _ = writeln!(out, "{indent}Bind ({var})", var = op.variable);
775 op.input.fmt_tree(out, depth + 1);
776 }
777 Self::MapCollect(op) => {
778 let _ = writeln!(
779 out,
780 "{indent}MapCollect ({key} -> {val} AS {alias})",
781 key = op.key_var,
782 val = op.value_var,
783 alias = op.alias
784 );
785 op.input.fmt_tree(out, depth + 1);
786 }
787 Self::Apply(op) => {
788 let _ = writeln!(out, "{indent}Apply");
789 op.input.fmt_tree(out, depth + 1);
790 op.subplan.fmt_tree(out, depth + 1);
791 }
792 Self::Except(op) => {
793 let all = if op.all { " ALL" } else { "" };
794 let _ = writeln!(out, "{indent}Except{all}");
795 op.left.fmt_tree(out, depth + 1);
796 op.right.fmt_tree(out, depth + 1);
797 }
798 Self::Intersect(op) => {
799 let all = if op.all { " ALL" } else { "" };
800 let _ = writeln!(out, "{indent}Intersect{all}");
801 op.left.fmt_tree(out, depth + 1);
802 op.right.fmt_tree(out, depth + 1);
803 }
804 Self::Otherwise(op) => {
805 let _ = writeln!(out, "{indent}Otherwise");
806 op.left.fmt_tree(out, depth + 1);
807 op.right.fmt_tree(out, depth + 1);
808 }
809 Self::ShortestPath(op) => {
810 let _ = writeln!(
811 out,
812 "{indent}ShortestPath ({from} -> {to})",
813 from = op.source_var,
814 to = op.target_var
815 );
816 op.input.fmt_tree(out, depth + 1);
817 }
818 Self::Merge(op) => {
819 let _ = writeln!(out, "{indent}Merge ({var})", var = op.variable);
820 op.input.fmt_tree(out, depth + 1);
821 }
822 Self::MergeRelationship(op) => {
823 let _ = writeln!(out, "{indent}MergeRelationship ({var})", var = op.variable);
824 op.input.fmt_tree(out, depth + 1);
825 }
826 Self::CreateNode(op) => {
827 let labels = op.labels.join(":");
828 let _ = writeln!(
829 out,
830 "{indent}CreateNode ({var}:{labels})",
831 var = op.variable
832 );
833 if let Some(input) = &op.input {
834 input.fmt_tree(out, depth + 1);
835 }
836 }
837 Self::CreateEdge(op) => {
838 let var = op.variable.as_deref().unwrap_or("?");
839 let _ = writeln!(
840 out,
841 "{indent}CreateEdge ({from})-[{var}:{ty}]->({to})",
842 from = op.from_variable,
843 ty = op.edge_type,
844 to = op.to_variable
845 );
846 op.input.fmt_tree(out, depth + 1);
847 }
848 Self::DeleteNode(op) => {
849 let _ = writeln!(out, "{indent}DeleteNode ({var})", var = op.variable);
850 op.input.fmt_tree(out, depth + 1);
851 }
852 Self::DeleteEdge(op) => {
853 let _ = writeln!(out, "{indent}DeleteEdge ({var})", var = op.variable);
854 op.input.fmt_tree(out, depth + 1);
855 }
856 Self::SetProperty(op) => {
857 let props: Vec<String> = op
858 .properties
859 .iter()
860 .map(|(k, _)| format!("{}.{k}", op.variable))
861 .collect();
862 let _ = writeln!(
863 out,
864 "{indent}SetProperty ({props})",
865 props = props.join(", ")
866 );
867 op.input.fmt_tree(out, depth + 1);
868 }
869 Self::AddLabel(op) => {
870 let labels = op.labels.join(":");
871 let _ = writeln!(out, "{indent}AddLabel ({var}:{labels})", var = op.variable);
872 op.input.fmt_tree(out, depth + 1);
873 }
874 Self::RemoveLabel(op) => {
875 let labels = op.labels.join(":");
876 let _ = writeln!(
877 out,
878 "{indent}RemoveLabel ({var}:{labels})",
879 var = op.variable
880 );
881 op.input.fmt_tree(out, depth + 1);
882 }
883 Self::CallProcedure(op) => {
884 let _ = writeln!(
885 out,
886 "{indent}CallProcedure ({name})",
887 name = op.name.join(".")
888 );
889 }
890 Self::LoadData(op) => {
891 let format_name = match op.format {
892 LoadDataFormat::Csv => "LoadCsv",
893 LoadDataFormat::Jsonl => "LoadJsonl",
894 LoadDataFormat::Parquet => "LoadParquet",
895 };
896 let headers = if op.with_headers && op.format == LoadDataFormat::Csv {
897 " WITH HEADERS"
898 } else {
899 ""
900 };
901 let _ = writeln!(
902 out,
903 "{indent}{format_name}{headers} ('{path}' AS {var})",
904 path = op.path,
905 var = op.variable,
906 );
907 }
908 Self::TripleScan(op) => {
909 let _ = writeln!(
910 out,
911 "{indent}TripleScan ({s} {p} {o})",
912 s = fmt_triple_component(&op.subject),
913 p = fmt_triple_component(&op.predicate),
914 o = fmt_triple_component(&op.object)
915 );
916 if let Some(input) = &op.input {
917 input.fmt_tree(out, depth + 1);
918 }
919 }
920 Self::Empty => {
921 let _ = writeln!(out, "{indent}Empty");
922 }
923 // Remaining operators: show a simple name
924 _ => {
925 let _ = writeln!(out, "{indent}{:?}", std::mem::discriminant(self));
926 }
927 }
928 }
929}
930
931/// Format a logical expression compactly for EXPLAIN output.
932fn fmt_expr(expr: &LogicalExpression) -> String {
933 match expr {
934 LogicalExpression::Variable(name) => name.clone(),
935 LogicalExpression::Property { variable, property } => format!("{variable}.{property}"),
936 LogicalExpression::Literal(val) => format!("{val}"),
937 LogicalExpression::Binary { left, op, right } => {
938 format!("{} {op:?} {}", fmt_expr(left), fmt_expr(right))
939 }
940 LogicalExpression::Unary { op, operand } => {
941 format!("{op:?} {}", fmt_expr(operand))
942 }
943 LogicalExpression::FunctionCall { name, args, .. } => {
944 let arg_strs: Vec<String> = args.iter().map(fmt_expr).collect();
945 format!("{name}({})", arg_strs.join(", "))
946 }
947 _ => format!("{expr:?}"),
948 }
949}
950
951/// Format a triple component for EXPLAIN output.
952fn fmt_triple_component(comp: &TripleComponent) -> String {
953 match comp {
954 TripleComponent::Variable(name) => format!("?{name}"),
955 TripleComponent::Iri(iri) => format!("<{iri}>"),
956 TripleComponent::Literal(val) => format!("{val}"),
957 TripleComponent::LangLiteral { value, lang } => format!("\"{value}\"@{lang}"),
958 TripleComponent::BlankNode(label) => format!("_:{label}"),
959 }
960}
961
962/// Scan nodes from the graph.
963#[derive(Debug, Clone)]
964pub struct NodeScanOp {
965 /// Variable name to bind the node to.
966 pub variable: String,
967 /// Optional label filter.
968 pub label: Option<String>,
969 /// Child operator (if any, for chained patterns).
970 pub input: Option<Box<LogicalOperator>>,
971}
972
973/// Scan edges from the graph.
974#[derive(Debug, Clone)]
975pub struct EdgeScanOp {
976 /// Variable name to bind the edge to.
977 pub variable: String,
978 /// Edge type filter (empty = match all types).
979 pub edge_types: Vec<String>,
980 /// Child operator (if any).
981 pub input: Option<Box<LogicalOperator>>,
982}
983
984/// Path traversal mode for variable-length expansion.
985#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
986pub enum PathMode {
987 /// Allows repeated nodes and edges (default).
988 #[default]
989 Walk,
990 /// No repeated edges.
991 Trail,
992 /// No repeated nodes except endpoints.
993 Simple,
994 /// No repeated nodes at all.
995 Acyclic,
996}
997
998/// Expand from nodes to their neighbors.
999#[derive(Debug, Clone)]
1000pub struct ExpandOp {
1001 /// Source node variable.
1002 pub from_variable: String,
1003 /// Target node variable to bind.
1004 pub to_variable: String,
1005 /// Edge variable to bind (optional).
1006 pub edge_variable: Option<String>,
1007 /// Direction of expansion.
1008 pub direction: ExpandDirection,
1009 /// Edge type filter (empty = match all types, multiple = match any).
1010 pub edge_types: Vec<String>,
1011 /// Minimum hops (for variable-length patterns).
1012 pub min_hops: u32,
1013 /// Maximum hops (for variable-length patterns).
1014 pub max_hops: Option<u32>,
1015 /// Input operator.
1016 pub input: Box<LogicalOperator>,
1017 /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
1018 /// When set, a path length column will be output under this name.
1019 pub path_alias: Option<String>,
1020 /// Path traversal mode (WALK, TRAIL, SIMPLE, ACYCLIC).
1021 pub path_mode: PathMode,
1022}
1023
1024/// Direction for edge expansion.
1025#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1026pub enum ExpandDirection {
1027 /// Follow outgoing edges.
1028 Outgoing,
1029 /// Follow incoming edges.
1030 Incoming,
1031 /// Follow edges in either direction.
1032 Both,
1033}
1034
1035/// Join two inputs.
1036#[derive(Debug, Clone)]
1037pub struct JoinOp {
1038 /// Left input.
1039 pub left: Box<LogicalOperator>,
1040 /// Right input.
1041 pub right: Box<LogicalOperator>,
1042 /// Join type.
1043 pub join_type: JoinType,
1044 /// Join conditions.
1045 pub conditions: Vec<JoinCondition>,
1046}
1047
1048/// Join type.
1049#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1050pub enum JoinType {
1051 /// Inner join.
1052 Inner,
1053 /// Left outer join.
1054 Left,
1055 /// Right outer join.
1056 Right,
1057 /// Full outer join.
1058 Full,
1059 /// Cross join (Cartesian product).
1060 Cross,
1061 /// Semi join (returns left rows with matching right rows).
1062 Semi,
1063 /// Anti join (returns left rows without matching right rows).
1064 Anti,
1065}
1066
1067/// A join condition.
1068#[derive(Debug, Clone)]
1069pub struct JoinCondition {
1070 /// Left expression.
1071 pub left: LogicalExpression,
1072 /// Right expression.
1073 pub right: LogicalExpression,
1074}
1075
1076/// Multi-way join for worst-case optimal joins (leapfrog).
1077///
1078/// Unlike binary `JoinOp`, this joins 3+ relations simultaneously
1079/// using the leapfrog trie join algorithm. Preferred for cyclic patterns
1080/// (triangles, cliques) where cascading binary joins hit O(N^2).
1081#[derive(Debug, Clone)]
1082pub struct MultiWayJoinOp {
1083 /// Input relations (one per relation in the join).
1084 pub inputs: Vec<LogicalOperator>,
1085 /// All pairwise join conditions.
1086 pub conditions: Vec<JoinCondition>,
1087 /// Variables shared across multiple inputs (intersection keys).
1088 pub shared_variables: Vec<String>,
1089}
1090
1091/// Aggregate with grouping.
1092#[derive(Debug, Clone)]
1093pub struct AggregateOp {
1094 /// Group by expressions.
1095 pub group_by: Vec<LogicalExpression>,
1096 /// Aggregate functions.
1097 pub aggregates: Vec<AggregateExpr>,
1098 /// Input operator.
1099 pub input: Box<LogicalOperator>,
1100 /// HAVING clause filter (applied after aggregation).
1101 pub having: Option<LogicalExpression>,
1102}
1103
1104/// Whether a horizontal aggregate operates on edges or nodes.
1105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1106pub enum EntityKind {
1107 /// Aggregate over edges in a path.
1108 Edge,
1109 /// Aggregate over nodes in a path.
1110 Node,
1111}
1112
1113/// Per-row aggregation over a list-valued column (horizontal aggregation, GE09).
1114///
1115/// For each input row, reads a list of entity IDs from `list_column`, accesses
1116/// `property` on each entity, computes the aggregate, and emits the scalar result.
1117#[derive(Debug, Clone)]
1118pub struct HorizontalAggregateOp {
1119 /// The list column name (e.g., `_path_edges_p`).
1120 pub list_column: String,
1121 /// Whether the list contains edge IDs or node IDs.
1122 pub entity_kind: EntityKind,
1123 /// The aggregate function to apply.
1124 pub function: AggregateFunction,
1125 /// The property to access on each entity.
1126 pub property: String,
1127 /// Output alias for the result column.
1128 pub alias: String,
1129 /// Input operator.
1130 pub input: Box<LogicalOperator>,
1131}
1132
1133/// An aggregate expression.
1134#[derive(Debug, Clone)]
1135pub struct AggregateExpr {
1136 /// Aggregate function.
1137 pub function: AggregateFunction,
1138 /// Expression to aggregate (first/only argument, y for binary set functions).
1139 pub expression: Option<LogicalExpression>,
1140 /// Second expression for binary set functions (x for COVAR, CORR, REGR_*).
1141 pub expression2: Option<LogicalExpression>,
1142 /// Whether to use DISTINCT.
1143 pub distinct: bool,
1144 /// Alias for the result.
1145 pub alias: Option<String>,
1146 /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
1147 pub percentile: Option<f64>,
1148 /// Separator string for GROUP_CONCAT / LISTAGG (defaults to space for GROUP_CONCAT, comma for LISTAGG).
1149 pub separator: Option<String>,
1150}
1151
1152/// Aggregate function.
1153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1154pub enum AggregateFunction {
1155 /// Count all rows (COUNT(*)).
1156 Count,
1157 /// Count non-null values (COUNT(expr)).
1158 CountNonNull,
1159 /// Sum values.
1160 Sum,
1161 /// Average values.
1162 Avg,
1163 /// Minimum value.
1164 Min,
1165 /// Maximum value.
1166 Max,
1167 /// Collect into list.
1168 Collect,
1169 /// Sample standard deviation (STDEV).
1170 StdDev,
1171 /// Population standard deviation (STDEVP).
1172 StdDevPop,
1173 /// Sample variance (VAR_SAMP / VARIANCE).
1174 Variance,
1175 /// Population variance (VAR_POP).
1176 VariancePop,
1177 /// Discrete percentile (PERCENTILE_DISC).
1178 PercentileDisc,
1179 /// Continuous percentile (PERCENTILE_CONT).
1180 PercentileCont,
1181 /// Concatenate values with separator (GROUP_CONCAT).
1182 GroupConcat,
1183 /// Return an arbitrary value from the group (SAMPLE).
1184 Sample,
1185 /// Sample covariance (COVAR_SAMP(y, x)).
1186 CovarSamp,
1187 /// Population covariance (COVAR_POP(y, x)).
1188 CovarPop,
1189 /// Pearson correlation coefficient (CORR(y, x)).
1190 Corr,
1191 /// Regression slope (REGR_SLOPE(y, x)).
1192 RegrSlope,
1193 /// Regression intercept (REGR_INTERCEPT(y, x)).
1194 RegrIntercept,
1195 /// Coefficient of determination (REGR_R2(y, x)).
1196 RegrR2,
1197 /// Regression count of non-null pairs (REGR_COUNT(y, x)).
1198 RegrCount,
1199 /// Regression sum of squares for x (REGR_SXX(y, x)).
1200 RegrSxx,
1201 /// Regression sum of squares for y (REGR_SYY(y, x)).
1202 RegrSyy,
1203 /// Regression sum of cross-products (REGR_SXY(y, x)).
1204 RegrSxy,
1205 /// Regression average of x (REGR_AVGX(y, x)).
1206 RegrAvgx,
1207 /// Regression average of y (REGR_AVGY(y, x)).
1208 RegrAvgy,
1209}
1210
1211/// Hint about how a filter will be executed at the physical level.
1212///
1213/// Set during EXPLAIN annotation to communicate pushdown decisions.
1214#[derive(Debug, Clone)]
1215pub enum PushdownHint {
1216 /// Equality predicate resolved via a property index.
1217 IndexLookup {
1218 /// The indexed property name.
1219 property: String,
1220 },
1221 /// Range predicate resolved via a range/btree index.
1222 RangeScan {
1223 /// The indexed property name.
1224 property: String,
1225 },
1226 /// No index available, but label narrows the scan before filtering.
1227 LabelFirst,
1228}
1229
1230/// Filter rows based on a predicate.
1231#[derive(Debug, Clone)]
1232pub struct FilterOp {
1233 /// The filter predicate.
1234 pub predicate: LogicalExpression,
1235 /// Input operator.
1236 pub input: Box<LogicalOperator>,
1237 /// Optional hint about pushdown strategy (populated by EXPLAIN).
1238 pub pushdown_hint: Option<PushdownHint>,
1239}
1240
1241/// Project specific columns.
1242#[derive(Debug, Clone)]
1243pub struct ProjectOp {
1244 /// Columns to project.
1245 pub projections: Vec<Projection>,
1246 /// Input operator.
1247 pub input: Box<LogicalOperator>,
1248 /// When true, all input columns are passed through and the explicit
1249 /// projections are appended as additional output columns. Used by GQL
1250 /// LET clauses which add bindings without replacing the existing scope.
1251 pub pass_through_input: bool,
1252}
1253
1254/// A single projection (column selection or computation).
1255#[derive(Debug, Clone)]
1256pub struct Projection {
1257 /// Expression to compute.
1258 pub expression: LogicalExpression,
1259 /// Alias for the result.
1260 pub alias: Option<String>,
1261}
1262
1263/// Limit the number of results.
1264#[derive(Debug, Clone)]
1265pub struct LimitOp {
1266 /// Maximum number of rows to return (literal or parameter reference).
1267 pub count: CountExpr,
1268 /// Input operator.
1269 pub input: Box<LogicalOperator>,
1270}
1271
1272/// Skip a number of results.
1273#[derive(Debug, Clone)]
1274pub struct SkipOp {
1275 /// Number of rows to skip (literal or parameter reference).
1276 pub count: CountExpr,
1277 /// Input operator.
1278 pub input: Box<LogicalOperator>,
1279}
1280
1281/// Sort results.
1282#[derive(Debug, Clone)]
1283pub struct SortOp {
1284 /// Sort keys.
1285 pub keys: Vec<SortKey>,
1286 /// Input operator.
1287 pub input: Box<LogicalOperator>,
1288}
1289
1290/// A sort key.
1291#[derive(Debug, Clone)]
1292pub struct SortKey {
1293 /// Expression to sort by.
1294 pub expression: LogicalExpression,
1295 /// Sort order.
1296 pub order: SortOrder,
1297 /// Optional null ordering (NULLS FIRST / NULLS LAST).
1298 pub nulls: Option<NullsOrdering>,
1299}
1300
1301/// Sort order.
1302#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1303pub enum SortOrder {
1304 /// Ascending order.
1305 Ascending,
1306 /// Descending order.
1307 Descending,
1308}
1309
1310/// Null ordering for sort operations.
1311#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1312pub enum NullsOrdering {
1313 /// Nulls sort before all non-null values.
1314 First,
1315 /// Nulls sort after all non-null values.
1316 Last,
1317}
1318
1319/// Remove duplicate results.
1320#[derive(Debug, Clone)]
1321pub struct DistinctOp {
1322 /// Input operator.
1323 pub input: Box<LogicalOperator>,
1324 /// Optional columns to use for deduplication.
1325 /// If None, all columns are used.
1326 pub columns: Option<Vec<String>>,
1327}
1328
1329/// Create a new node.
1330#[derive(Debug, Clone)]
1331pub struct CreateNodeOp {
1332 /// Variable name to bind the created node to.
1333 pub variable: String,
1334 /// Labels for the new node.
1335 pub labels: Vec<String>,
1336 /// Properties for the new node.
1337 pub properties: Vec<(String, LogicalExpression)>,
1338 /// Input operator (for chained creates).
1339 pub input: Option<Box<LogicalOperator>>,
1340}
1341
1342/// Create a new edge.
1343#[derive(Debug, Clone)]
1344pub struct CreateEdgeOp {
1345 /// Variable name to bind the created edge to.
1346 pub variable: Option<String>,
1347 /// Source node variable.
1348 pub from_variable: String,
1349 /// Target node variable.
1350 pub to_variable: String,
1351 /// Edge type.
1352 pub edge_type: String,
1353 /// Properties for the new edge.
1354 pub properties: Vec<(String, LogicalExpression)>,
1355 /// Input operator.
1356 pub input: Box<LogicalOperator>,
1357}
1358
1359/// Delete a node.
1360#[derive(Debug, Clone)]
1361pub struct DeleteNodeOp {
1362 /// Variable of the node to delete.
1363 pub variable: String,
1364 /// Whether to detach (delete connected edges) before deleting.
1365 pub detach: bool,
1366 /// Input operator.
1367 pub input: Box<LogicalOperator>,
1368}
1369
1370/// Delete an edge.
1371#[derive(Debug, Clone)]
1372pub struct DeleteEdgeOp {
1373 /// Variable of the edge to delete.
1374 pub variable: String,
1375 /// Input operator.
1376 pub input: Box<LogicalOperator>,
1377}
1378
1379/// Set properties on a node or edge.
1380#[derive(Debug, Clone)]
1381pub struct SetPropertyOp {
1382 /// Variable of the entity to update.
1383 pub variable: String,
1384 /// Properties to set (name -> expression).
1385 pub properties: Vec<(String, LogicalExpression)>,
1386 /// Whether to replace all properties (vs. merge).
1387 pub replace: bool,
1388 /// Whether the target variable is an edge (vs. node).
1389 pub is_edge: bool,
1390 /// Input operator.
1391 pub input: Box<LogicalOperator>,
1392}
1393
1394/// Add labels to a node.
1395#[derive(Debug, Clone)]
1396pub struct AddLabelOp {
1397 /// Variable of the node to update.
1398 pub variable: String,
1399 /// Labels to add.
1400 pub labels: Vec<String>,
1401 /// Input operator.
1402 pub input: Box<LogicalOperator>,
1403}
1404
1405/// Remove labels from a node.
1406#[derive(Debug, Clone)]
1407pub struct RemoveLabelOp {
1408 /// Variable of the node to update.
1409 pub variable: String,
1410 /// Labels to remove.
1411 pub labels: Vec<String>,
1412 /// Input operator.
1413 pub input: Box<LogicalOperator>,
1414}
1415
1416// ==================== RDF/SPARQL Operators ====================
1417
1418/// SPARQL dataset restriction from FROM / FROM NAMED clauses.
1419///
1420/// When present, restricts which graphs are visible to a triple scan:
1421/// - `default_graphs`: IRIs whose union forms the default graph (basic patterns).
1422/// - `named_graphs`: IRIs that enumerate the available named graphs (GRAPH patterns).
1423#[derive(Debug, Clone, Default)]
1424pub struct DatasetRestriction {
1425 /// FROM IRIs: the default graph is the union of these named graphs.
1426 /// Empty means no FROM clause was specified (unrestricted default graph).
1427 pub default_graphs: Vec<String>,
1428 /// FROM NAMED IRIs: only these named graphs are available to GRAPH patterns.
1429 /// Empty means no FROM NAMED clause was specified (all named graphs visible).
1430 pub named_graphs: Vec<String>,
1431}
1432
1433/// Scan RDF triples matching a pattern.
1434#[derive(Debug, Clone)]
1435pub struct TripleScanOp {
1436 /// Subject pattern (variable name or IRI).
1437 pub subject: TripleComponent,
1438 /// Predicate pattern (variable name or IRI).
1439 pub predicate: TripleComponent,
1440 /// Object pattern (variable name, IRI, or literal).
1441 pub object: TripleComponent,
1442 /// Named graph (optional).
1443 pub graph: Option<TripleComponent>,
1444 /// Input operator (for chained patterns).
1445 pub input: Option<Box<LogicalOperator>>,
1446 /// Dataset restriction from SPARQL FROM / FROM NAMED clauses.
1447 pub dataset: Option<DatasetRestriction>,
1448}
1449
1450/// A component of a triple pattern.
1451#[derive(Debug, Clone)]
1452pub enum TripleComponent {
1453 /// A variable to bind.
1454 Variable(String),
1455 /// A constant IRI.
1456 Iri(String),
1457 /// A constant literal value.
1458 Literal(Value),
1459 /// A language-tagged string literal (RDF `rdf:langString`).
1460 ///
1461 /// Carries the lexical value and the BCP47 language tag separately so that
1462 /// the tag survives the translator to planner to RDF store round-trip.
1463 LangLiteral {
1464 /// The lexical string value.
1465 value: String,
1466 /// BCP47 language tag, e.g. `"fr"`, `"en-GB"`.
1467 lang: String,
1468 },
1469 /// A blank node with a scoped label (used in INSERT DATA).
1470 BlankNode(String),
1471}
1472
1473/// Union of multiple result sets.
1474#[derive(Debug, Clone)]
1475pub struct UnionOp {
1476 /// Inputs to union together.
1477 pub inputs: Vec<LogicalOperator>,
1478}
1479
1480/// Set difference: rows in left that are not in right.
1481#[derive(Debug, Clone)]
1482pub struct ExceptOp {
1483 /// Left input.
1484 pub left: Box<LogicalOperator>,
1485 /// Right input (rows to exclude).
1486 pub right: Box<LogicalOperator>,
1487 /// If true, preserve duplicates (EXCEPT ALL); if false, deduplicate (EXCEPT DISTINCT).
1488 pub all: bool,
1489}
1490
1491/// Set intersection: rows common to both inputs.
1492#[derive(Debug, Clone)]
1493pub struct IntersectOp {
1494 /// Left input.
1495 pub left: Box<LogicalOperator>,
1496 /// Right input.
1497 pub right: Box<LogicalOperator>,
1498 /// If true, preserve duplicates (INTERSECT ALL); if false, deduplicate (INTERSECT DISTINCT).
1499 pub all: bool,
1500}
1501
1502/// Fallback operator: use left result if non-empty, otherwise use right.
1503#[derive(Debug, Clone)]
1504pub struct OtherwiseOp {
1505 /// Primary input (preferred).
1506 pub left: Box<LogicalOperator>,
1507 /// Fallback input (used only if left produces zero rows).
1508 pub right: Box<LogicalOperator>,
1509}
1510
1511/// Apply (lateral join): evaluate a subplan for each row of the outer input.
1512///
1513/// The subplan can reference variables bound by the outer input. Results are
1514/// concatenated (cross-product per row).
1515#[derive(Debug, Clone)]
1516pub struct ApplyOp {
1517 /// Outer input providing rows.
1518 pub input: Box<LogicalOperator>,
1519 /// Subplan to evaluate per outer row.
1520 pub subplan: Box<LogicalOperator>,
1521 /// Variables imported from the outer scope into the inner plan.
1522 /// When non-empty, the planner injects these via `ParameterState`.
1523 pub shared_variables: Vec<String>,
1524 /// When true, uses left-join semantics: outer rows with no matching inner
1525 /// rows are emitted with NULLs for the inner columns (OPTIONAL CALL).
1526 pub optional: bool,
1527}
1528
1529/// Parameter scan: leaf operator for correlated subquery inner plans.
1530///
1531/// Emits a single row containing the values injected from the outer Apply.
1532/// Column names correspond to the outer variables imported via WITH.
1533#[derive(Debug, Clone)]
1534pub struct ParameterScanOp {
1535 /// Column names for the injected parameters.
1536 pub columns: Vec<String>,
1537}
1538
1539/// Left outer join for OPTIONAL patterns.
1540#[derive(Debug, Clone)]
1541pub struct LeftJoinOp {
1542 /// Left (required) input.
1543 pub left: Box<LogicalOperator>,
1544 /// Right (optional) input.
1545 pub right: Box<LogicalOperator>,
1546 /// Optional filter condition.
1547 pub condition: Option<LogicalExpression>,
1548}
1549
1550/// Anti-join for MINUS patterns.
1551#[derive(Debug, Clone)]
1552pub struct AntiJoinOp {
1553 /// Left input (results to keep if no match on right).
1554 pub left: Box<LogicalOperator>,
1555 /// Right input (patterns to exclude).
1556 pub right: Box<LogicalOperator>,
1557}
1558
1559/// Bind a variable to an expression.
1560#[derive(Debug, Clone)]
1561pub struct BindOp {
1562 /// Expression to compute.
1563 pub expression: LogicalExpression,
1564 /// Variable to bind the result to.
1565 pub variable: String,
1566 /// Input operator.
1567 pub input: Box<LogicalOperator>,
1568}
1569
1570/// Unwind a list into individual rows.
1571///
1572/// For each input row, evaluates the expression (which should return a list)
1573/// and emits one row for each element in the list.
1574#[derive(Debug, Clone)]
1575pub struct UnwindOp {
1576 /// The list expression to unwind.
1577 pub expression: LogicalExpression,
1578 /// The variable name for each element.
1579 pub variable: String,
1580 /// Optional variable for 1-based element position (ORDINALITY).
1581 pub ordinality_var: Option<String>,
1582 /// Optional variable for 0-based element position (OFFSET).
1583 pub offset_var: Option<String>,
1584 /// Input operator.
1585 pub input: Box<LogicalOperator>,
1586}
1587
1588/// Collect grouped key-value rows into a single Map value.
1589/// Used for Gremlin `groupCount()` semantics.
1590#[derive(Debug, Clone)]
1591pub struct MapCollectOp {
1592 /// Variable holding the map key.
1593 pub key_var: String,
1594 /// Variable holding the map value.
1595 pub value_var: String,
1596 /// Output variable alias.
1597 pub alias: String,
1598 /// Input operator (typically a grouped aggregate).
1599 pub input: Box<LogicalOperator>,
1600}
1601
1602/// Merge a pattern (match or create).
1603///
1604/// MERGE tries to match a pattern in the graph. If found, returns the existing
1605/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
1606/// (optionally applying ON CREATE SET).
1607#[derive(Debug, Clone)]
1608pub struct MergeOp {
1609 /// The node to merge.
1610 pub variable: String,
1611 /// Labels to match/create.
1612 pub labels: Vec<String>,
1613 /// Properties that must match (used for both matching and creation).
1614 pub match_properties: Vec<(String, LogicalExpression)>,
1615 /// Properties to set on CREATE.
1616 pub on_create: Vec<(String, LogicalExpression)>,
1617 /// Properties to set on MATCH.
1618 pub on_match: Vec<(String, LogicalExpression)>,
1619 /// Input operator.
1620 pub input: Box<LogicalOperator>,
1621}
1622
1623/// Merge a relationship pattern (match or create between two bound nodes).
1624///
1625/// MERGE on a relationship tries to find an existing relationship of the given type
1626/// between the source and target nodes. If found, returns the existing relationship
1627/// (optionally applying ON MATCH SET). If not found, creates it (optionally applying
1628/// ON CREATE SET).
1629#[derive(Debug, Clone)]
1630pub struct MergeRelationshipOp {
1631 /// Variable to bind the relationship to.
1632 pub variable: String,
1633 /// Source node variable (must already be bound).
1634 pub source_variable: String,
1635 /// Target node variable (must already be bound).
1636 pub target_variable: String,
1637 /// Relationship type.
1638 pub edge_type: String,
1639 /// Properties that must match (used for both matching and creation).
1640 pub match_properties: Vec<(String, LogicalExpression)>,
1641 /// Properties to set on CREATE.
1642 pub on_create: Vec<(String, LogicalExpression)>,
1643 /// Properties to set on MATCH.
1644 pub on_match: Vec<(String, LogicalExpression)>,
1645 /// Input operator.
1646 pub input: Box<LogicalOperator>,
1647}
1648
1649/// Find shortest path between two nodes.
1650///
1651/// This operator uses Dijkstra's algorithm to find the shortest path(s)
1652/// between a source node and a target node, optionally filtered by edge type.
1653#[derive(Debug, Clone)]
1654pub struct ShortestPathOp {
1655 /// Input operator providing source/target nodes.
1656 pub input: Box<LogicalOperator>,
1657 /// Variable name for the source node.
1658 pub source_var: String,
1659 /// Variable name for the target node.
1660 pub target_var: String,
1661 /// Edge type filter (empty = match all types, multiple = match any).
1662 pub edge_types: Vec<String>,
1663 /// Direction of edge traversal.
1664 pub direction: ExpandDirection,
1665 /// Variable name to bind the path result.
1666 pub path_alias: String,
1667 /// Whether to find all shortest paths (vs. just one).
1668 pub all_paths: bool,
1669}
1670
1671// ==================== SPARQL Update Operators ====================
1672
1673/// Insert RDF triples.
1674#[derive(Debug, Clone)]
1675pub struct InsertTripleOp {
1676 /// Subject of the triple.
1677 pub subject: TripleComponent,
1678 /// Predicate of the triple.
1679 pub predicate: TripleComponent,
1680 /// Object of the triple.
1681 pub object: TripleComponent,
1682 /// Named graph (optional).
1683 pub graph: Option<String>,
1684 /// Input operator (provides variable bindings).
1685 pub input: Option<Box<LogicalOperator>>,
1686}
1687
1688/// Delete RDF triples.
1689#[derive(Debug, Clone)]
1690pub struct DeleteTripleOp {
1691 /// Subject pattern.
1692 pub subject: TripleComponent,
1693 /// Predicate pattern.
1694 pub predicate: TripleComponent,
1695 /// Object pattern.
1696 pub object: TripleComponent,
1697 /// Named graph (optional).
1698 pub graph: Option<String>,
1699 /// Input operator (provides variable bindings).
1700 pub input: Option<Box<LogicalOperator>>,
1701}
1702
1703/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
1704///
1705/// Per SPARQL 1.1 Update spec, this operator:
1706/// 1. Evaluates the WHERE clause once to get bindings
1707/// 2. Applies DELETE templates using those bindings
1708/// 3. Applies INSERT templates using the SAME bindings
1709///
1710/// This ensures DELETE and INSERT see consistent data.
1711#[derive(Debug, Clone)]
1712pub struct ModifyOp {
1713 /// DELETE triple templates (patterns with variables).
1714 pub delete_templates: Vec<TripleTemplate>,
1715 /// INSERT triple templates (patterns with variables).
1716 pub insert_templates: Vec<TripleTemplate>,
1717 /// WHERE clause that provides variable bindings.
1718 pub where_clause: Box<LogicalOperator>,
1719 /// Named graph context (for WITH clause).
1720 pub graph: Option<String>,
1721}
1722
1723/// A triple template for DELETE/INSERT operations.
1724#[derive(Debug, Clone)]
1725pub struct TripleTemplate {
1726 /// Subject (may be a variable).
1727 pub subject: TripleComponent,
1728 /// Predicate (may be a variable).
1729 pub predicate: TripleComponent,
1730 /// Object (may be a variable or literal).
1731 pub object: TripleComponent,
1732 /// Named graph (optional).
1733 pub graph: Option<String>,
1734}
1735
1736/// Clear all triples from a graph.
1737#[derive(Debug, Clone)]
1738pub struct ClearGraphOp {
1739 /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
1740 pub graph: Option<String>,
1741 /// Whether to silently ignore errors.
1742 pub silent: bool,
1743}
1744
1745/// Create a new named graph.
1746#[derive(Debug, Clone)]
1747pub struct CreateGraphOp {
1748 /// IRI of the graph to create.
1749 pub graph: String,
1750 /// Whether to silently ignore if graph already exists.
1751 pub silent: bool,
1752}
1753
1754/// Drop (remove) a named graph.
1755#[derive(Debug, Clone)]
1756pub struct DropGraphOp {
1757 /// Target graph (None = default graph).
1758 pub graph: Option<String>,
1759 /// Whether to silently ignore errors.
1760 pub silent: bool,
1761}
1762
1763/// Load data from a URL into a graph.
1764#[derive(Debug, Clone)]
1765pub struct LoadGraphOp {
1766 /// Source URL to load data from.
1767 pub source: String,
1768 /// Destination graph (None = default graph).
1769 pub destination: Option<String>,
1770 /// Whether to silently ignore errors.
1771 pub silent: bool,
1772}
1773
1774/// Copy triples from one graph to another.
1775#[derive(Debug, Clone)]
1776pub struct CopyGraphOp {
1777 /// Source graph.
1778 pub source: Option<String>,
1779 /// Destination graph.
1780 pub destination: Option<String>,
1781 /// Whether to silently ignore errors.
1782 pub silent: bool,
1783}
1784
1785/// Move triples from one graph to another.
1786#[derive(Debug, Clone)]
1787pub struct MoveGraphOp {
1788 /// Source graph.
1789 pub source: Option<String>,
1790 /// Destination graph.
1791 pub destination: Option<String>,
1792 /// Whether to silently ignore errors.
1793 pub silent: bool,
1794}
1795
1796/// Add (merge) triples from one graph to another.
1797#[derive(Debug, Clone)]
1798pub struct AddGraphOp {
1799 /// Source graph.
1800 pub source: Option<String>,
1801 /// Destination graph.
1802 pub destination: Option<String>,
1803 /// Whether to silently ignore errors.
1804 pub silent: bool,
1805}
1806
1807// ==================== Vector Search Operators ====================
1808
1809/// Vector similarity scan operation.
1810///
1811/// Performs approximate nearest neighbor search using a vector index (HNSW)
1812/// or brute-force search for small datasets. Returns nodes/edges whose
1813/// embeddings are similar to the query vector.
1814///
1815/// # Example GQL
1816///
1817/// ```gql
1818/// MATCH (m:Movie)
1819/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
1820/// RETURN m.title
1821/// ```
1822#[derive(Debug, Clone)]
1823pub struct VectorScanOp {
1824 /// Variable name to bind matching entities to.
1825 pub variable: String,
1826 /// Name of the vector index to use (None = brute-force).
1827 pub index_name: Option<String>,
1828 /// Property containing the vector embedding.
1829 pub property: String,
1830 /// Optional label filter (scan only nodes with this label).
1831 pub label: Option<String>,
1832 /// The query vector expression.
1833 pub query_vector: LogicalExpression,
1834 /// Number of nearest neighbors to return.
1835 pub k: usize,
1836 /// Distance metric (None = use index default, typically cosine).
1837 pub metric: Option<VectorMetric>,
1838 /// Minimum similarity threshold (filters results below this).
1839 pub min_similarity: Option<f32>,
1840 /// Maximum distance threshold (filters results above this).
1841 pub max_distance: Option<f32>,
1842 /// Input operator (for hybrid queries combining graph + vector).
1843 pub input: Option<Box<LogicalOperator>>,
1844}
1845
1846/// Vector distance/similarity metric for vector scan operations.
1847#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1848pub enum VectorMetric {
1849 /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
1850 Cosine,
1851 /// Euclidean (L2) distance. Best when magnitude matters.
1852 Euclidean,
1853 /// Dot product. Best for maximum inner product search.
1854 DotProduct,
1855 /// Manhattan (L1) distance. Less sensitive to outliers.
1856 Manhattan,
1857}
1858
1859/// Join graph patterns with vector similarity search.
1860///
1861/// This operator takes entities from the left input and computes vector
1862/// similarity against a query vector, outputting (entity, distance) pairs.
1863///
1864/// # Use Cases
1865///
1866/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
1867/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
1868/// 3. **Filtering by similarity**: Join with threshold-based filtering
1869///
1870/// # Example
1871///
1872/// ```gql
1873/// // Find movies similar to what the user liked
1874/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
1875/// WITH avg(liked.embedding) AS user_taste
1876/// VECTOR JOIN (m:Movie) ON m.embedding
1877/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
1878/// RETURN m.title
1879/// ```
1880#[derive(Debug, Clone)]
1881pub struct VectorJoinOp {
1882 /// Input operator providing entities to match against.
1883 pub input: Box<LogicalOperator>,
1884 /// Variable from input to extract vectors from (for entity-to-entity similarity).
1885 /// If None, uses `query_vector` directly.
1886 pub left_vector_variable: Option<String>,
1887 /// Property containing the left vector (used with `left_vector_variable`).
1888 pub left_property: Option<String>,
1889 /// The query vector expression (constant or computed).
1890 pub query_vector: LogicalExpression,
1891 /// Variable name to bind the right-side matching entities.
1892 pub right_variable: String,
1893 /// Property containing the right-side vector embeddings.
1894 pub right_property: String,
1895 /// Optional label filter for right-side entities.
1896 pub right_label: Option<String>,
1897 /// Name of vector index on right side (None = brute-force).
1898 pub index_name: Option<String>,
1899 /// Number of nearest neighbors per left-side entity.
1900 pub k: usize,
1901 /// Distance metric.
1902 pub metric: Option<VectorMetric>,
1903 /// Minimum similarity threshold.
1904 pub min_similarity: Option<f32>,
1905 /// Maximum distance threshold.
1906 pub max_distance: Option<f32>,
1907 /// Variable to bind the distance/similarity score.
1908 pub score_variable: Option<String>,
1909}
1910
1911/// Return results (terminal operator).
1912#[derive(Debug, Clone)]
1913pub struct ReturnOp {
1914 /// Items to return.
1915 pub items: Vec<ReturnItem>,
1916 /// Whether to return distinct results.
1917 pub distinct: bool,
1918 /// Input operator.
1919 pub input: Box<LogicalOperator>,
1920}
1921
1922/// A single return item.
1923#[derive(Debug, Clone)]
1924pub struct ReturnItem {
1925 /// Expression to return.
1926 pub expression: LogicalExpression,
1927 /// Alias for the result column.
1928 pub alias: Option<String>,
1929}
1930
1931/// Define a property graph schema (SQL/PGQ DDL).
1932#[derive(Debug, Clone)]
1933pub struct CreatePropertyGraphOp {
1934 /// Graph name.
1935 pub name: String,
1936 /// Node table schemas (label name + column definitions).
1937 pub node_tables: Vec<PropertyGraphNodeTable>,
1938 /// Edge table schemas (type name + column definitions + references).
1939 pub edge_tables: Vec<PropertyGraphEdgeTable>,
1940}
1941
1942/// A node table in a property graph definition.
1943#[derive(Debug, Clone)]
1944pub struct PropertyGraphNodeTable {
1945 /// Table name (maps to a node label).
1946 pub name: String,
1947 /// Column definitions as (name, type_name) pairs.
1948 pub columns: Vec<(String, String)>,
1949}
1950
1951/// An edge table in a property graph definition.
1952#[derive(Debug, Clone)]
1953pub struct PropertyGraphEdgeTable {
1954 /// Table name (maps to an edge type).
1955 pub name: String,
1956 /// Column definitions as (name, type_name) pairs.
1957 pub columns: Vec<(String, String)>,
1958 /// Source node table name.
1959 pub source_table: String,
1960 /// Target node table name.
1961 pub target_table: String,
1962}
1963
1964// ==================== Procedure Call Types ====================
1965
1966/// A CALL procedure operation.
1967///
1968/// ```text
1969/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
1970/// ```
1971#[derive(Debug, Clone)]
1972pub struct CallProcedureOp {
1973 /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
1974 pub name: Vec<String>,
1975 /// Argument expressions (constants in Phase 1).
1976 pub arguments: Vec<LogicalExpression>,
1977 /// Optional YIELD clause: which columns to expose + aliases.
1978 pub yield_items: Option<Vec<ProcedureYield>>,
1979}
1980
1981/// A single YIELD item in a procedure call.
1982#[derive(Debug, Clone)]
1983pub struct ProcedureYield {
1984 /// Column name from the procedure result.
1985 pub field_name: String,
1986 /// Optional alias (YIELD score AS rank).
1987 pub alias: Option<String>,
1988}
1989
1990/// Re-export format enum from the physical operator.
1991pub use grafeo_core::execution::operators::LoadDataFormat;
1992
1993/// LOAD DATA operator: reads a file and produces rows.
1994///
1995/// With headers (CSV), each row is bound as a `Value::Map` with column names as keys.
1996/// Without headers (CSV), each row is bound as a `Value::List` of string values.
1997/// JSONL always produces `Value::Map`. Parquet always produces `Value::Map`.
1998#[derive(Debug, Clone)]
1999pub struct LoadDataOp {
2000 /// File format.
2001 pub format: LoadDataFormat,
2002 /// Whether the file has a header row (CSV only, ignored for JSONL/Parquet).
2003 pub with_headers: bool,
2004 /// File path (local filesystem).
2005 pub path: String,
2006 /// Variable name to bind each row to.
2007 pub variable: String,
2008 /// Field separator character (CSV only, default: comma).
2009 pub field_terminator: Option<char>,
2010}
2011
2012/// A logical expression.
2013#[derive(Debug, Clone)]
2014pub enum LogicalExpression {
2015 /// A literal value.
2016 Literal(Value),
2017
2018 /// A variable reference.
2019 Variable(String),
2020
2021 /// Property access (e.g., n.name).
2022 Property {
2023 /// The variable to access.
2024 variable: String,
2025 /// The property name.
2026 property: String,
2027 },
2028
2029 /// Binary operation.
2030 Binary {
2031 /// Left operand.
2032 left: Box<LogicalExpression>,
2033 /// Operator.
2034 op: BinaryOp,
2035 /// Right operand.
2036 right: Box<LogicalExpression>,
2037 },
2038
2039 /// Unary operation.
2040 Unary {
2041 /// Operator.
2042 op: UnaryOp,
2043 /// Operand.
2044 operand: Box<LogicalExpression>,
2045 },
2046
2047 /// Function call.
2048 FunctionCall {
2049 /// Function name.
2050 name: String,
2051 /// Arguments.
2052 args: Vec<LogicalExpression>,
2053 /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
2054 distinct: bool,
2055 },
2056
2057 /// List literal.
2058 List(Vec<LogicalExpression>),
2059
2060 /// Map literal (e.g., {name: 'Alix', age: 30}).
2061 Map(Vec<(String, LogicalExpression)>),
2062
2063 /// Index access (e.g., `list[0]`).
2064 IndexAccess {
2065 /// The base expression (typically a list or string).
2066 base: Box<LogicalExpression>,
2067 /// The index expression.
2068 index: Box<LogicalExpression>,
2069 },
2070
2071 /// Slice access (e.g., list[1..3]).
2072 SliceAccess {
2073 /// The base expression (typically a list or string).
2074 base: Box<LogicalExpression>,
2075 /// Start index (None means from beginning).
2076 start: Option<Box<LogicalExpression>>,
2077 /// End index (None means to end).
2078 end: Option<Box<LogicalExpression>>,
2079 },
2080
2081 /// CASE expression.
2082 Case {
2083 /// Test expression (for simple CASE).
2084 operand: Option<Box<LogicalExpression>>,
2085 /// WHEN clauses.
2086 when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
2087 /// ELSE clause.
2088 else_clause: Option<Box<LogicalExpression>>,
2089 },
2090
2091 /// Parameter reference.
2092 Parameter(String),
2093
2094 /// Labels of a node.
2095 Labels(String),
2096
2097 /// Type of an edge.
2098 Type(String),
2099
2100 /// ID of a node or edge.
2101 Id(String),
2102
2103 /// List comprehension: [x IN list WHERE predicate | expression]
2104 ListComprehension {
2105 /// Variable name for each element.
2106 variable: String,
2107 /// The source list expression.
2108 list_expr: Box<LogicalExpression>,
2109 /// Optional filter predicate.
2110 filter_expr: Option<Box<LogicalExpression>>,
2111 /// The mapping expression for each element.
2112 map_expr: Box<LogicalExpression>,
2113 },
2114
2115 /// List predicate: all/any/none/single(x IN list WHERE pred).
2116 ListPredicate {
2117 /// The kind of list predicate.
2118 kind: ListPredicateKind,
2119 /// The iteration variable name.
2120 variable: String,
2121 /// The source list expression.
2122 list_expr: Box<LogicalExpression>,
2123 /// The predicate to test for each element.
2124 predicate: Box<LogicalExpression>,
2125 },
2126
2127 /// EXISTS subquery.
2128 ExistsSubquery(Box<LogicalOperator>),
2129
2130 /// COUNT subquery.
2131 CountSubquery(Box<LogicalOperator>),
2132
2133 /// VALUE subquery: returns scalar value from first row of inner query.
2134 ValueSubquery(Box<LogicalOperator>),
2135
2136 /// Map projection: `node { .prop1, .prop2, key: expr, .* }`.
2137 MapProjection {
2138 /// The base variable name.
2139 base: String,
2140 /// Projection entries (property selectors, literal entries, all-properties).
2141 entries: Vec<MapProjectionEntry>,
2142 },
2143
2144 /// reduce() accumulator: `reduce(acc = init, x IN list | expr)`.
2145 Reduce {
2146 /// Accumulator variable name.
2147 accumulator: String,
2148 /// Initial value for the accumulator.
2149 initial: Box<LogicalExpression>,
2150 /// Iteration variable name.
2151 variable: String,
2152 /// List to iterate over.
2153 list: Box<LogicalExpression>,
2154 /// Body expression evaluated per iteration (references both accumulator and variable).
2155 expression: Box<LogicalExpression>,
2156 },
2157
2158 /// Pattern comprehension: `[(pattern) WHERE pred | expr]`.
2159 ///
2160 /// Executes the inner subplan, evaluates the projection for each row,
2161 /// and collects the results into a list.
2162 PatternComprehension {
2163 /// The subplan produced by translating the pattern (+optional WHERE).
2164 subplan: Box<LogicalOperator>,
2165 /// The projection expression evaluated for each match.
2166 projection: Box<LogicalExpression>,
2167 },
2168}
2169
2170/// An entry in a map projection.
2171#[derive(Debug, Clone)]
2172pub enum MapProjectionEntry {
2173 /// `.propertyName`: shorthand for `propertyName: base.propertyName`.
2174 PropertySelector(String),
2175 /// `key: expression`: explicit key-value pair.
2176 LiteralEntry(String, LogicalExpression),
2177 /// `.*`: include all properties of the base entity.
2178 AllProperties,
2179}
2180
2181/// The kind of list predicate function.
2182#[derive(Debug, Clone, PartialEq, Eq)]
2183pub enum ListPredicateKind {
2184 /// all(x IN list WHERE pred): true if pred holds for every element.
2185 All,
2186 /// any(x IN list WHERE pred): true if pred holds for at least one element.
2187 Any,
2188 /// none(x IN list WHERE pred): true if pred holds for no element.
2189 None,
2190 /// single(x IN list WHERE pred): true if pred holds for exactly one element.
2191 Single,
2192}
2193
2194/// Binary operator.
2195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2196pub enum BinaryOp {
2197 /// Equality comparison (=).
2198 Eq,
2199 /// Inequality comparison (<>).
2200 Ne,
2201 /// Less than (<).
2202 Lt,
2203 /// Less than or equal (<=).
2204 Le,
2205 /// Greater than (>).
2206 Gt,
2207 /// Greater than or equal (>=).
2208 Ge,
2209
2210 /// Logical AND.
2211 And,
2212 /// Logical OR.
2213 Or,
2214 /// Logical XOR.
2215 Xor,
2216
2217 /// Addition (+).
2218 Add,
2219 /// Subtraction (-).
2220 Sub,
2221 /// Multiplication (*).
2222 Mul,
2223 /// Division (/).
2224 Div,
2225 /// Modulo (%).
2226 Mod,
2227
2228 /// String concatenation.
2229 Concat,
2230 /// String starts with.
2231 StartsWith,
2232 /// String ends with.
2233 EndsWith,
2234 /// String contains.
2235 Contains,
2236
2237 /// Collection membership (IN).
2238 In,
2239 /// Pattern matching (LIKE).
2240 Like,
2241 /// Regex matching (=~).
2242 Regex,
2243 /// Power/exponentiation (^).
2244 Pow,
2245}
2246
2247/// Unary operator.
2248#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2249pub enum UnaryOp {
2250 /// Logical NOT.
2251 Not,
2252 /// Numeric negation.
2253 Neg,
2254 /// IS NULL check.
2255 IsNull,
2256 /// IS NOT NULL check.
2257 IsNotNull,
2258}
2259
2260#[cfg(test)]
2261mod tests {
2262 use super::*;
2263
2264 #[test]
2265 fn test_simple_node_scan_plan() {
2266 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2267 items: vec![ReturnItem {
2268 expression: LogicalExpression::Variable("n".into()),
2269 alias: None,
2270 }],
2271 distinct: false,
2272 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2273 variable: "n".into(),
2274 label: Some("Person".into()),
2275 input: None,
2276 })),
2277 }));
2278
2279 // Verify structure
2280 if let LogicalOperator::Return(ret) = &plan.root {
2281 assert_eq!(ret.items.len(), 1);
2282 assert!(!ret.distinct);
2283 if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
2284 assert_eq!(scan.variable, "n");
2285 assert_eq!(scan.label, Some("Person".into()));
2286 } else {
2287 panic!("Expected NodeScan");
2288 }
2289 } else {
2290 panic!("Expected Return");
2291 }
2292 }
2293
2294 #[test]
2295 fn test_filter_plan() {
2296 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2297 items: vec![ReturnItem {
2298 expression: LogicalExpression::Property {
2299 variable: "n".into(),
2300 property: "name".into(),
2301 },
2302 alias: Some("name".into()),
2303 }],
2304 distinct: false,
2305 input: Box::new(LogicalOperator::Filter(FilterOp {
2306 predicate: LogicalExpression::Binary {
2307 left: Box::new(LogicalExpression::Property {
2308 variable: "n".into(),
2309 property: "age".into(),
2310 }),
2311 op: BinaryOp::Gt,
2312 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
2313 },
2314 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2315 variable: "n".into(),
2316 label: Some("Person".into()),
2317 input: None,
2318 })),
2319 pushdown_hint: None,
2320 })),
2321 }));
2322
2323 if let LogicalOperator::Return(ret) = &plan.root {
2324 if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
2325 if let LogicalExpression::Binary { op, .. } = &filter.predicate {
2326 assert_eq!(*op, BinaryOp::Gt);
2327 } else {
2328 panic!("Expected Binary expression");
2329 }
2330 } else {
2331 panic!("Expected Filter");
2332 }
2333 } else {
2334 panic!("Expected Return");
2335 }
2336 }
2337}