grafeo_engine/query/plan.rs
1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use std::collections::HashMap;
8use std::fmt;
9
10use grafeo_common::types::Value;
11
12/// A count expression for SKIP/LIMIT: either a resolved literal or an unresolved parameter.
13#[derive(Debug, Clone, PartialEq, Eq)]
14#[non_exhaustive]
15pub enum CountExpr {
16 /// A resolved integer count.
17 Literal(usize),
18 /// An unresolved parameter reference (e.g., `$limit`).
19 Parameter(String),
20}
21
22impl CountExpr {
23 /// Returns the resolved count, or panics if still a parameter reference.
24 ///
25 /// Call this only after parameter substitution has run.
26 ///
27 /// # Panics
28 ///
29 /// Panics if the expression is an unresolved `Parameter` reference.
30 pub fn value(&self) -> usize {
31 match self {
32 Self::Literal(n) => *n,
33 Self::Parameter(name) => panic!("Unresolved parameter: ${name}"),
34 }
35 }
36
37 /// Returns the resolved count, or an error if still a parameter reference.
38 ///
39 /// # Errors
40 ///
41 /// Returns an error string if the expression is an unresolved `Parameter`.
42 pub fn try_value(&self) -> Result<usize, String> {
43 match self {
44 Self::Literal(n) => Ok(*n),
45 Self::Parameter(name) => Err(format!("Unresolved SKIP/LIMIT parameter: ${name}")),
46 }
47 }
48
49 /// Returns the count as f64 for cardinality estimation (defaults to 10 for unresolved params).
50 pub fn estimate(&self) -> f64 {
51 match self {
52 Self::Literal(n) => *n as f64,
53 Self::Parameter(_) => 10.0, // reasonable default for unresolved params
54 }
55 }
56}
57
58impl fmt::Display for CountExpr {
59 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60 match self {
61 Self::Literal(n) => write!(f, "{n}"),
62 Self::Parameter(name) => write!(f, "${name}"),
63 }
64 }
65}
66
67impl From<usize> for CountExpr {
68 fn from(n: usize) -> Self {
69 Self::Literal(n)
70 }
71}
72
73impl PartialEq<usize> for CountExpr {
74 fn eq(&self, other: &usize) -> bool {
75 matches!(self, Self::Literal(n) if n == other)
76 }
77}
78
79/// A logical query plan.
80#[derive(Debug, Clone)]
81pub struct LogicalPlan {
82 /// The root operator of the plan.
83 pub root: LogicalOperator,
84 /// When true, return the plan tree as text instead of executing.
85 pub explain: bool,
86 /// When true, execute the query and return per-operator runtime metrics.
87 pub profile: bool,
88 /// Default parameter values from variable declarations (e.g., GraphQL
89 /// `query($limit: Int = 2)`). The processor merges these with caller-supplied
90 /// params, giving caller values higher precedence.
91 pub default_params: HashMap<String, Value>,
92}
93
94impl LogicalPlan {
95 /// Creates a new logical plan with the given root operator.
96 pub fn new(root: LogicalOperator) -> Self {
97 Self {
98 root,
99 explain: false,
100 profile: false,
101 default_params: HashMap::new(),
102 }
103 }
104
105 /// Creates an EXPLAIN plan that returns the plan tree without executing.
106 pub fn explain(root: LogicalOperator) -> Self {
107 Self {
108 root,
109 explain: true,
110 profile: false,
111 default_params: HashMap::new(),
112 }
113 }
114
115 /// Creates a PROFILE plan that executes and returns per-operator metrics.
116 pub fn profile(root: LogicalOperator) -> Self {
117 Self {
118 root,
119 explain: false,
120 profile: true,
121 default_params: HashMap::new(),
122 }
123 }
124}
125
126/// A logical operator in the query plan.
127#[derive(Debug, Clone)]
128#[non_exhaustive]
129pub enum LogicalOperator {
130 /// Scan all nodes, optionally filtered by label.
131 NodeScan(NodeScanOp),
132
133 /// Scan all edges, optionally filtered by type.
134 EdgeScan(EdgeScanOp),
135
136 /// Expand from nodes to neighbors via edges.
137 Expand(ExpandOp),
138
139 /// Filter rows based on a predicate.
140 Filter(FilterOp),
141
142 /// Project specific columns.
143 Project(ProjectOp),
144
145 /// Join two inputs.
146 Join(JoinOp),
147
148 /// Aggregate with grouping.
149 Aggregate(AggregateOp),
150
151 /// Limit the number of results.
152 Limit(LimitOp),
153
154 /// Skip a number of results.
155 Skip(SkipOp),
156
157 /// Sort results.
158 Sort(SortOp),
159
160 /// Remove duplicate results.
161 Distinct(DistinctOp),
162
163 /// Create a new node.
164 CreateNode(CreateNodeOp),
165
166 /// Create a new edge.
167 CreateEdge(CreateEdgeOp),
168
169 /// Delete a node.
170 DeleteNode(DeleteNodeOp),
171
172 /// Delete an edge.
173 DeleteEdge(DeleteEdgeOp),
174
175 /// Set properties on a node or edge.
176 SetProperty(SetPropertyOp),
177
178 /// Add labels to a node.
179 AddLabel(AddLabelOp),
180
181 /// Remove labels from a node.
182 RemoveLabel(RemoveLabelOp),
183
184 /// Return results (terminal operator).
185 Return(ReturnOp),
186
187 /// Empty result set.
188 Empty,
189
190 // ==================== RDF/SPARQL Operators ====================
191 /// Scan RDF triples matching a pattern.
192 TripleScan(TripleScanOp),
193
194 /// Union of multiple result sets.
195 Union(UnionOp),
196
197 /// Left outer join for OPTIONAL patterns.
198 LeftJoin(LeftJoinOp),
199
200 /// Anti-join for MINUS patterns.
201 AntiJoin(AntiJoinOp),
202
203 /// SPARQL CONSTRUCT: evaluate WHERE, substitute bindings into template,
204 /// output (subject, predicate, object) columns.
205 Construct(ConstructOp),
206
207 /// Bind a variable to an expression.
208 Bind(BindOp),
209
210 /// Unwind a list into individual rows.
211 Unwind(UnwindOp),
212
213 /// Collect grouped key-value rows into a single Map value.
214 /// Used for Gremlin `groupCount()` semantics.
215 MapCollect(MapCollectOp),
216
217 /// Merge a node pattern (match or create).
218 Merge(MergeOp),
219
220 /// Merge a relationship pattern (match or create).
221 MergeRelationship(MergeRelationshipOp),
222
223 /// Find shortest path between nodes.
224 ShortestPath(ShortestPathOp),
225
226 // ==================== SPARQL Update Operators ====================
227 /// Insert RDF triples.
228 InsertTriple(InsertTripleOp),
229
230 /// Delete RDF triples.
231 DeleteTriple(DeleteTripleOp),
232
233 /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
234 /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
235 Modify(ModifyOp),
236
237 /// Clear a graph (remove all triples).
238 ClearGraph(ClearGraphOp),
239
240 /// Create a new named graph.
241 CreateGraph(CreateGraphOp),
242
243 /// Drop (remove) a named graph.
244 DropGraph(DropGraphOp),
245
246 /// Load data from a URL into a graph.
247 LoadGraph(LoadGraphOp),
248
249 /// Copy triples from one graph to another.
250 CopyGraph(CopyGraphOp),
251
252 /// Move triples from one graph to another.
253 MoveGraph(MoveGraphOp),
254
255 /// Add (merge) triples from one graph to another.
256 AddGraph(AddGraphOp),
257
258 /// Per-row aggregation over a list-valued column (horizontal aggregation, GE09).
259 HorizontalAggregate(HorizontalAggregateOp),
260
261 // ==================== Vector Search Operators ====================
262 /// Scan using vector similarity search.
263 VectorScan(VectorScanOp),
264
265 /// Join graph patterns with vector similarity search.
266 ///
267 /// Computes vector distances between entities from the left input and
268 /// a query vector, then joins with similarity scores. Useful for:
269 /// - Filtering graph traversal results by vector similarity
270 /// - Computing aggregated embeddings and finding similar entities
271 /// - Combining multiple vector sources with graph structure
272 VectorJoin(VectorJoinOp),
273
274 // ==================== Set Operations ====================
275 /// Set difference: rows in left that are not in right.
276 Except(ExceptOp),
277
278 /// Set intersection: rows common to all inputs.
279 Intersect(IntersectOp),
280
281 /// Fallback: use left result if non-empty, otherwise right.
282 Otherwise(OtherwiseOp),
283
284 // ==================== Correlated Subquery ====================
285 /// Apply (lateral join): evaluate a subplan per input row.
286 Apply(ApplyOp),
287
288 /// Parameter scan: leaf of a correlated inner plan that receives values
289 /// from the outer Apply operator. The column names match `ApplyOp.shared_variables`.
290 ParameterScan(ParameterScanOp),
291
292 // ==================== DDL Operators ====================
293 /// Define a property graph schema (SQL/PGQ DDL).
294 CreatePropertyGraph(CreatePropertyGraphOp),
295
296 // ==================== Multi-Way Join ====================
297 /// Multi-way join using worst-case optimal join (leapfrog).
298 /// Used for cyclic patterns (triangles, cliques) with 3+ relations.
299 MultiWayJoin(MultiWayJoinOp),
300
301 // ==================== Procedure Call Operators ====================
302 /// Invoke a stored procedure (CALL ... YIELD).
303 CallProcedure(CallProcedureOp),
304
305 // ==================== Data Import Operators ====================
306 /// Load data from a file (CSV, JSONL, or Parquet), producing one row per record.
307 LoadData(LoadDataOp),
308}
309
310impl LogicalOperator {
311 /// Returns `true` if this operator or any of its children perform mutations.
312 #[must_use]
313 pub fn has_mutations(&self) -> bool {
314 match self {
315 // Direct mutation operators
316 Self::CreateNode(_)
317 | Self::CreateEdge(_)
318 | Self::DeleteNode(_)
319 | Self::DeleteEdge(_)
320 | Self::SetProperty(_)
321 | Self::AddLabel(_)
322 | Self::RemoveLabel(_)
323 | Self::Merge(_)
324 | Self::MergeRelationship(_)
325 | Self::InsertTriple(_)
326 | Self::DeleteTriple(_)
327 | Self::Modify(_)
328 | Self::ClearGraph(_)
329 | Self::CreateGraph(_)
330 | Self::DropGraph(_)
331 | Self::LoadGraph(_)
332 | Self::CopyGraph(_)
333 | Self::MoveGraph(_)
334 | Self::AddGraph(_)
335 | Self::CreatePropertyGraph(_) => true,
336
337 // Operators with an `input` child
338 Self::Filter(op) => op.input.has_mutations(),
339 Self::Project(op) => op.input.has_mutations(),
340 Self::Aggregate(op) => op.input.has_mutations(),
341 Self::Limit(op) => op.input.has_mutations(),
342 Self::Skip(op) => op.input.has_mutations(),
343 Self::Sort(op) => op.input.has_mutations(),
344 Self::Distinct(op) => op.input.has_mutations(),
345 Self::Unwind(op) => op.input.has_mutations(),
346 Self::Bind(op) => op.input.has_mutations(),
347 Self::MapCollect(op) => op.input.has_mutations(),
348 Self::Return(op) => op.input.has_mutations(),
349 Self::HorizontalAggregate(op) => op.input.has_mutations(),
350 Self::VectorScan(_) | Self::VectorJoin(_) => false,
351
352 // Operators with two children
353 Self::Join(op) => op.left.has_mutations() || op.right.has_mutations(),
354 Self::LeftJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
355 Self::AntiJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
356 Self::Except(op) => op.left.has_mutations() || op.right.has_mutations(),
357 Self::Intersect(op) => op.left.has_mutations() || op.right.has_mutations(),
358 Self::Otherwise(op) => op.left.has_mutations() || op.right.has_mutations(),
359 Self::Union(op) => op.inputs.iter().any(|i| i.has_mutations()),
360 Self::MultiWayJoin(op) => op.inputs.iter().any(|i| i.has_mutations()),
361 Self::Apply(op) => op.input.has_mutations() || op.subplan.has_mutations(),
362
363 // Leaf operators (read-only)
364 Self::NodeScan(_)
365 | Self::EdgeScan(_)
366 | Self::Expand(_)
367 | Self::TripleScan(_)
368 | Self::ShortestPath(_)
369 | Self::Empty
370 | Self::ParameterScan(_)
371 | Self::CallProcedure(_)
372 | Self::LoadData(_) => false,
373 Self::Construct(op) => op.input.has_mutations(),
374 }
375 }
376
377 /// Returns references to the child operators.
378 ///
379 /// Used by [`crate::query::profile::build_profile_tree`] to walk the logical
380 /// plan tree in post-order, matching operators to profiling entries.
381 #[must_use]
382 pub fn children(&self) -> Vec<&LogicalOperator> {
383 match self {
384 // Optional single input
385 Self::NodeScan(op) => op.input.as_deref().into_iter().collect(),
386 Self::EdgeScan(op) => op.input.as_deref().into_iter().collect(),
387 Self::TripleScan(op) => op.input.as_deref().into_iter().collect(),
388 Self::VectorScan(op) => op.input.as_deref().into_iter().collect(),
389 Self::CreateNode(op) => op.input.as_deref().into_iter().collect(),
390 Self::InsertTriple(op) => op.input.as_deref().into_iter().collect(),
391 Self::DeleteTriple(op) => op.input.as_deref().into_iter().collect(),
392
393 // Single required input
394 Self::Expand(op) => vec![&*op.input],
395 Self::Filter(op) => vec![&*op.input],
396 Self::Project(op) => vec![&*op.input],
397 Self::Aggregate(op) => vec![&*op.input],
398 Self::Limit(op) => vec![&*op.input],
399 Self::Skip(op) => vec![&*op.input],
400 Self::Sort(op) => vec![&*op.input],
401 Self::Distinct(op) => vec![&*op.input],
402 Self::Return(op) => vec![&*op.input],
403 Self::Unwind(op) => vec![&*op.input],
404 Self::Bind(op) => vec![&*op.input],
405 Self::Construct(op) => vec![&*op.input],
406 Self::MapCollect(op) => vec![&*op.input],
407 Self::ShortestPath(op) => vec![&*op.input],
408 Self::Merge(op) => vec![&*op.input],
409 Self::MergeRelationship(op) => vec![&*op.input],
410 Self::CreateEdge(op) => vec![&*op.input],
411 Self::DeleteNode(op) => vec![&*op.input],
412 Self::DeleteEdge(op) => vec![&*op.input],
413 Self::SetProperty(op) => vec![&*op.input],
414 Self::AddLabel(op) => vec![&*op.input],
415 Self::RemoveLabel(op) => vec![&*op.input],
416 Self::HorizontalAggregate(op) => vec![&*op.input],
417 Self::VectorJoin(op) => vec![&*op.input],
418 Self::Modify(op) => vec![&*op.where_clause],
419
420 // Two children (left + right)
421 Self::Join(op) => vec![&*op.left, &*op.right],
422 Self::LeftJoin(op) => vec![&*op.left, &*op.right],
423 Self::AntiJoin(op) => vec![&*op.left, &*op.right],
424 Self::Except(op) => vec![&*op.left, &*op.right],
425 Self::Intersect(op) => vec![&*op.left, &*op.right],
426 Self::Otherwise(op) => vec![&*op.left, &*op.right],
427
428 // Two children (input + subplan)
429 Self::Apply(op) => vec![&*op.input, &*op.subplan],
430
431 // Vec children
432 Self::Union(op) => op.inputs.iter().collect(),
433 Self::MultiWayJoin(op) => op.inputs.iter().collect(),
434
435 // Leaf operators
436 Self::Empty
437 | Self::ParameterScan(_)
438 | Self::CallProcedure(_)
439 | Self::ClearGraph(_)
440 | Self::CreateGraph(_)
441 | Self::DropGraph(_)
442 | Self::LoadGraph(_)
443 | Self::CopyGraph(_)
444 | Self::MoveGraph(_)
445 | Self::AddGraph(_)
446 | Self::CreatePropertyGraph(_)
447 | Self::LoadData(_) => vec![],
448 }
449 }
450
451 /// Returns a compact display label for this operator, used in PROFILE output.
452 #[must_use]
453 pub fn display_label(&self) -> String {
454 match self {
455 Self::NodeScan(op) => {
456 let label = op.label.as_deref().unwrap_or("*");
457 format!("{}:{}", op.variable, label)
458 }
459 Self::EdgeScan(op) => {
460 let types = if op.edge_types.is_empty() {
461 "*".to_string()
462 } else {
463 op.edge_types.join("|")
464 };
465 format!("{}:{}", op.variable, types)
466 }
467 Self::Expand(op) => {
468 let types = if op.edge_types.is_empty() {
469 "*".to_string()
470 } else {
471 op.edge_types.join("|")
472 };
473 let dir = match op.direction {
474 ExpandDirection::Outgoing => "->",
475 ExpandDirection::Incoming => "<-",
476 ExpandDirection::Both => "--",
477 };
478 format!(
479 "({from}){dir}[:{types}]{dir}({to})",
480 from = op.from_variable,
481 to = op.to_variable,
482 )
483 }
484 Self::Filter(op) => {
485 let hint = match &op.pushdown_hint {
486 Some(PushdownHint::IndexLookup { property }) => {
487 format!(" [index: {property}]")
488 }
489 Some(PushdownHint::RangeScan { property }) => {
490 format!(" [range: {property}]")
491 }
492 Some(PushdownHint::LabelFirst) => " [label-first]".to_string(),
493 None => String::new(),
494 };
495 format!("{}{hint}", fmt_expr(&op.predicate))
496 }
497 Self::Project(op) => {
498 let cols: Vec<String> = op
499 .projections
500 .iter()
501 .map(|p| match &p.alias {
502 Some(alias) => alias.clone(),
503 None => fmt_expr(&p.expression),
504 })
505 .collect();
506 cols.join(", ")
507 }
508 Self::Join(op) => format!("{:?}", op.join_type),
509 Self::Aggregate(op) => {
510 let groups: Vec<String> = op.group_by.iter().map(fmt_expr).collect();
511 format!("group: [{}]", groups.join(", "))
512 }
513 Self::Limit(op) => format!("{}", op.count),
514 Self::Skip(op) => format!("{}", op.count),
515 Self::Sort(op) => {
516 let keys: Vec<String> = op
517 .keys
518 .iter()
519 .map(|k| {
520 let dir = match k.order {
521 SortOrder::Ascending => "ASC",
522 SortOrder::Descending => "DESC",
523 };
524 format!("{} {dir}", fmt_expr(&k.expression))
525 })
526 .collect();
527 keys.join(", ")
528 }
529 Self::Distinct(_) => String::new(),
530 Self::Return(op) => {
531 let items: Vec<String> = op
532 .items
533 .iter()
534 .map(|item| match &item.alias {
535 Some(alias) => alias.clone(),
536 None => fmt_expr(&item.expression),
537 })
538 .collect();
539 items.join(", ")
540 }
541 Self::Union(op) => format!("{} branches", op.inputs.len()),
542 Self::MultiWayJoin(op) => {
543 format!("{} inputs", op.inputs.len())
544 }
545 Self::LeftJoin(_) => String::new(),
546 Self::AntiJoin(_) => String::new(),
547 Self::Unwind(op) => op.variable.clone(),
548 Self::Bind(op) => op.variable.clone(),
549 Self::MapCollect(op) => op.alias.clone(),
550 Self::ShortestPath(op) => {
551 format!("{} -> {}", op.source_var, op.target_var)
552 }
553 Self::Merge(op) => op.variable.clone(),
554 Self::MergeRelationship(op) => op.variable.clone(),
555 Self::CreateNode(op) => {
556 let labels = op.labels.join(":");
557 format!("{}:{labels}", op.variable)
558 }
559 Self::CreateEdge(op) => {
560 format!(
561 "[{}:{}]",
562 op.variable.as_deref().unwrap_or("?"),
563 op.edge_type
564 )
565 }
566 Self::DeleteNode(op) => op.variable.clone(),
567 Self::DeleteEdge(op) => op.variable.clone(),
568 Self::SetProperty(op) => op.variable.clone(),
569 Self::AddLabel(op) => {
570 let labels = op.labels.join(":");
571 format!("{}:{labels}", op.variable)
572 }
573 Self::RemoveLabel(op) => {
574 let labels = op.labels.join(":");
575 format!("{}:{labels}", op.variable)
576 }
577 Self::CallProcedure(op) => op.name.join("."),
578 Self::LoadData(op) => format!("{} AS {}", op.path, op.variable),
579 Self::Apply(_) => String::new(),
580 Self::VectorScan(op) => op.variable.clone(),
581 Self::VectorJoin(op) => op.right_variable.clone(),
582 _ => String::new(),
583 }
584 }
585}
586
587impl LogicalOperator {
588 /// Formats this operator tree as a human-readable plan for EXPLAIN output.
589 pub fn explain_tree(&self) -> String {
590 let mut output = String::new();
591 self.fmt_tree(&mut output, 0);
592 output
593 }
594
595 fn fmt_tree(&self, out: &mut String, depth: usize) {
596 use std::fmt::Write;
597
598 let indent = " ".repeat(depth);
599 match self {
600 Self::NodeScan(op) => {
601 let label = op.label.as_deref().unwrap_or("*");
602 let _ = writeln!(out, "{indent}NodeScan ({var}:{label})", var = op.variable);
603 if let Some(input) = &op.input {
604 input.fmt_tree(out, depth + 1);
605 }
606 }
607 Self::EdgeScan(op) => {
608 let types = if op.edge_types.is_empty() {
609 "*".to_string()
610 } else {
611 op.edge_types.join("|")
612 };
613 let _ = writeln!(out, "{indent}EdgeScan ({var}:{types})", var = op.variable);
614 }
615 Self::Expand(op) => {
616 let types = if op.edge_types.is_empty() {
617 "*".to_string()
618 } else {
619 op.edge_types.join("|")
620 };
621 let dir = match op.direction {
622 ExpandDirection::Outgoing => "->",
623 ExpandDirection::Incoming => "<-",
624 ExpandDirection::Both => "--",
625 };
626 let hops = match (op.min_hops, op.max_hops) {
627 (1, Some(1)) => String::new(),
628 (min, Some(max)) if min == max => format!("*{min}"),
629 (min, Some(max)) => format!("*{min}..{max}"),
630 (min, None) => format!("*{min}.."),
631 };
632 let _ = writeln!(
633 out,
634 "{indent}Expand ({from}){dir}[:{types}{hops}]{dir}({to})",
635 from = op.from_variable,
636 to = op.to_variable,
637 );
638 op.input.fmt_tree(out, depth + 1);
639 }
640 Self::Filter(op) => {
641 let hint = match &op.pushdown_hint {
642 Some(PushdownHint::IndexLookup { property }) => {
643 format!(" [index: {property}]")
644 }
645 Some(PushdownHint::RangeScan { property }) => {
646 format!(" [range: {property}]")
647 }
648 Some(PushdownHint::LabelFirst) => " [label-first]".to_string(),
649 None => String::new(),
650 };
651 let _ = writeln!(
652 out,
653 "{indent}Filter ({expr}){hint}",
654 expr = fmt_expr(&op.predicate)
655 );
656 op.input.fmt_tree(out, depth + 1);
657 }
658 Self::Project(op) => {
659 let cols: Vec<String> = op
660 .projections
661 .iter()
662 .map(|p| {
663 let expr = fmt_expr(&p.expression);
664 match &p.alias {
665 Some(alias) => format!("{expr} AS {alias}"),
666 None => expr,
667 }
668 })
669 .collect();
670 let _ = writeln!(out, "{indent}Project ({cols})", cols = cols.join(", "));
671 op.input.fmt_tree(out, depth + 1);
672 }
673 Self::Join(op) => {
674 let _ = writeln!(out, "{indent}Join ({ty:?})", ty = op.join_type);
675 op.left.fmt_tree(out, depth + 1);
676 op.right.fmt_tree(out, depth + 1);
677 }
678 Self::Aggregate(op) => {
679 let groups: Vec<String> = op.group_by.iter().map(fmt_expr).collect();
680 let aggs: Vec<String> = op
681 .aggregates
682 .iter()
683 .map(|a| {
684 let func = format!("{:?}", a.function).to_lowercase();
685 match &a.alias {
686 Some(alias) => format!("{func}(...) AS {alias}"),
687 None => format!("{func}(...)"),
688 }
689 })
690 .collect();
691 let _ = writeln!(
692 out,
693 "{indent}Aggregate (group: [{groups}], aggs: [{aggs}])",
694 groups = groups.join(", "),
695 aggs = aggs.join(", "),
696 );
697 op.input.fmt_tree(out, depth + 1);
698 }
699 Self::Limit(op) => {
700 let _ = writeln!(out, "{indent}Limit ({})", op.count);
701 op.input.fmt_tree(out, depth + 1);
702 }
703 Self::Skip(op) => {
704 let _ = writeln!(out, "{indent}Skip ({})", op.count);
705 op.input.fmt_tree(out, depth + 1);
706 }
707 Self::Sort(op) => {
708 let keys: Vec<String> = op
709 .keys
710 .iter()
711 .map(|k| {
712 let dir = match k.order {
713 SortOrder::Ascending => "ASC",
714 SortOrder::Descending => "DESC",
715 };
716 format!("{} {dir}", fmt_expr(&k.expression))
717 })
718 .collect();
719 let _ = writeln!(out, "{indent}Sort ({keys})", keys = keys.join(", "));
720 op.input.fmt_tree(out, depth + 1);
721 }
722 Self::Distinct(op) => {
723 let _ = writeln!(out, "{indent}Distinct");
724 op.input.fmt_tree(out, depth + 1);
725 }
726 Self::Return(op) => {
727 let items: Vec<String> = op
728 .items
729 .iter()
730 .map(|item| {
731 let expr = fmt_expr(&item.expression);
732 match &item.alias {
733 Some(alias) => format!("{expr} AS {alias}"),
734 None => expr,
735 }
736 })
737 .collect();
738 let distinct = if op.distinct { " DISTINCT" } else { "" };
739 let _ = writeln!(
740 out,
741 "{indent}Return{distinct} ({items})",
742 items = items.join(", ")
743 );
744 op.input.fmt_tree(out, depth + 1);
745 }
746 Self::Union(op) => {
747 let _ = writeln!(out, "{indent}Union ({n} branches)", n = op.inputs.len());
748 for input in &op.inputs {
749 input.fmt_tree(out, depth + 1);
750 }
751 }
752 Self::MultiWayJoin(op) => {
753 let vars = op.shared_variables.join(", ");
754 let _ = writeln!(
755 out,
756 "{indent}MultiWayJoin ({n} inputs, shared: [{vars}])",
757 n = op.inputs.len()
758 );
759 for input in &op.inputs {
760 input.fmt_tree(out, depth + 1);
761 }
762 }
763 Self::LeftJoin(op) => {
764 if let Some(cond) = &op.condition {
765 let _ = writeln!(out, "{indent}LeftJoin (condition: {cond:?})");
766 } else {
767 let _ = writeln!(out, "{indent}LeftJoin");
768 }
769 op.left.fmt_tree(out, depth + 1);
770 op.right.fmt_tree(out, depth + 1);
771 }
772 Self::AntiJoin(op) => {
773 let _ = writeln!(out, "{indent}AntiJoin");
774 op.left.fmt_tree(out, depth + 1);
775 op.right.fmt_tree(out, depth + 1);
776 }
777 Self::Unwind(op) => {
778 let _ = writeln!(out, "{indent}Unwind ({var})", var = op.variable);
779 op.input.fmt_tree(out, depth + 1);
780 }
781 Self::Bind(op) => {
782 let _ = writeln!(out, "{indent}Bind ({var})", var = op.variable);
783 op.input.fmt_tree(out, depth + 1);
784 }
785 Self::MapCollect(op) => {
786 let _ = writeln!(
787 out,
788 "{indent}MapCollect ({key} -> {val} AS {alias})",
789 key = op.key_var,
790 val = op.value_var,
791 alias = op.alias
792 );
793 op.input.fmt_tree(out, depth + 1);
794 }
795 Self::Apply(op) => {
796 let _ = writeln!(out, "{indent}Apply");
797 op.input.fmt_tree(out, depth + 1);
798 op.subplan.fmt_tree(out, depth + 1);
799 }
800 Self::Except(op) => {
801 let all = if op.all { " ALL" } else { "" };
802 let _ = writeln!(out, "{indent}Except{all}");
803 op.left.fmt_tree(out, depth + 1);
804 op.right.fmt_tree(out, depth + 1);
805 }
806 Self::Intersect(op) => {
807 let all = if op.all { " ALL" } else { "" };
808 let _ = writeln!(out, "{indent}Intersect{all}");
809 op.left.fmt_tree(out, depth + 1);
810 op.right.fmt_tree(out, depth + 1);
811 }
812 Self::Otherwise(op) => {
813 let _ = writeln!(out, "{indent}Otherwise");
814 op.left.fmt_tree(out, depth + 1);
815 op.right.fmt_tree(out, depth + 1);
816 }
817 Self::ShortestPath(op) => {
818 let _ = writeln!(
819 out,
820 "{indent}ShortestPath ({from} -> {to})",
821 from = op.source_var,
822 to = op.target_var
823 );
824 op.input.fmt_tree(out, depth + 1);
825 }
826 Self::Merge(op) => {
827 let _ = writeln!(out, "{indent}Merge ({var})", var = op.variable);
828 op.input.fmt_tree(out, depth + 1);
829 }
830 Self::MergeRelationship(op) => {
831 let _ = writeln!(out, "{indent}MergeRelationship ({var})", var = op.variable);
832 op.input.fmt_tree(out, depth + 1);
833 }
834 Self::CreateNode(op) => {
835 let labels = op.labels.join(":");
836 let _ = writeln!(
837 out,
838 "{indent}CreateNode ({var}:{labels})",
839 var = op.variable
840 );
841 if let Some(input) = &op.input {
842 input.fmt_tree(out, depth + 1);
843 }
844 }
845 Self::CreateEdge(op) => {
846 let var = op.variable.as_deref().unwrap_or("?");
847 let _ = writeln!(
848 out,
849 "{indent}CreateEdge ({from})-[{var}:{ty}]->({to})",
850 from = op.from_variable,
851 ty = op.edge_type,
852 to = op.to_variable
853 );
854 op.input.fmt_tree(out, depth + 1);
855 }
856 Self::DeleteNode(op) => {
857 let _ = writeln!(out, "{indent}DeleteNode ({var})", var = op.variable);
858 op.input.fmt_tree(out, depth + 1);
859 }
860 Self::DeleteEdge(op) => {
861 let _ = writeln!(out, "{indent}DeleteEdge ({var})", var = op.variable);
862 op.input.fmt_tree(out, depth + 1);
863 }
864 Self::SetProperty(op) => {
865 let props: Vec<String> = op
866 .properties
867 .iter()
868 .map(|(k, _)| format!("{}.{k}", op.variable))
869 .collect();
870 let _ = writeln!(
871 out,
872 "{indent}SetProperty ({props})",
873 props = props.join(", ")
874 );
875 op.input.fmt_tree(out, depth + 1);
876 }
877 Self::AddLabel(op) => {
878 let labels = op.labels.join(":");
879 let _ = writeln!(out, "{indent}AddLabel ({var}:{labels})", var = op.variable);
880 op.input.fmt_tree(out, depth + 1);
881 }
882 Self::RemoveLabel(op) => {
883 let labels = op.labels.join(":");
884 let _ = writeln!(
885 out,
886 "{indent}RemoveLabel ({var}:{labels})",
887 var = op.variable
888 );
889 op.input.fmt_tree(out, depth + 1);
890 }
891 Self::CallProcedure(op) => {
892 let _ = writeln!(
893 out,
894 "{indent}CallProcedure ({name})",
895 name = op.name.join(".")
896 );
897 }
898 Self::LoadData(op) => {
899 let format_name = match op.format {
900 LoadDataFormat::Csv => "LoadCsv",
901 LoadDataFormat::Jsonl => "LoadJsonl",
902 LoadDataFormat::Parquet => "LoadParquet",
903 _ => "LoadData",
904 };
905 let headers = if op.with_headers && op.format == LoadDataFormat::Csv {
906 " WITH HEADERS"
907 } else {
908 ""
909 };
910 let _ = writeln!(
911 out,
912 "{indent}{format_name}{headers} ('{path}' AS {var})",
913 path = op.path,
914 var = op.variable,
915 );
916 }
917 Self::TripleScan(op) => {
918 let _ = writeln!(
919 out,
920 "{indent}TripleScan ({s} {p} {o})",
921 s = fmt_triple_component(&op.subject),
922 p = fmt_triple_component(&op.predicate),
923 o = fmt_triple_component(&op.object)
924 );
925 if let Some(input) = &op.input {
926 input.fmt_tree(out, depth + 1);
927 }
928 }
929 Self::Empty => {
930 let _ = writeln!(out, "{indent}Empty");
931 }
932 // Remaining operators: show a simple name
933 _ => {
934 let _ = writeln!(out, "{indent}{:?}", std::mem::discriminant(self));
935 }
936 }
937 }
938}
939
940/// Format a logical expression compactly for EXPLAIN output.
941fn fmt_expr(expr: &LogicalExpression) -> String {
942 match expr {
943 LogicalExpression::Variable(name) => name.clone(),
944 LogicalExpression::Property { variable, property } => format!("{variable}.{property}"),
945 LogicalExpression::Literal(val) => format!("{val}"),
946 LogicalExpression::Binary { left, op, right } => {
947 format!("{} {op:?} {}", fmt_expr(left), fmt_expr(right))
948 }
949 LogicalExpression::Unary { op, operand } => {
950 format!("{op:?} {}", fmt_expr(operand))
951 }
952 LogicalExpression::FunctionCall { name, args, .. } => {
953 let arg_strs: Vec<String> = args.iter().map(fmt_expr).collect();
954 format!("{name}({})", arg_strs.join(", "))
955 }
956 _ => format!("{expr:?}"),
957 }
958}
959
960/// Format a triple component for EXPLAIN output.
961fn fmt_triple_component(comp: &TripleComponent) -> String {
962 match comp {
963 TripleComponent::Variable(name) => format!("?{name}"),
964 TripleComponent::Iri(iri) => format!("<{iri}>"),
965 TripleComponent::Literal(val) => format!("{val}"),
966 TripleComponent::LangLiteral { value, lang } => format!("\"{value}\"@{lang}"),
967 TripleComponent::BlankNode(label) => format!("_:{label}"),
968 }
969}
970
971/// Scan nodes from the graph.
972#[derive(Debug, Clone)]
973pub struct NodeScanOp {
974 /// Variable name to bind the node to.
975 pub variable: String,
976 /// Optional label filter.
977 pub label: Option<String>,
978 /// Child operator (if any, for chained patterns).
979 pub input: Option<Box<LogicalOperator>>,
980}
981
982/// Scan edges from the graph.
983#[derive(Debug, Clone)]
984pub struct EdgeScanOp {
985 /// Variable name to bind the edge to.
986 pub variable: String,
987 /// Edge type filter (empty = match all types).
988 pub edge_types: Vec<String>,
989 /// Child operator (if any).
990 pub input: Option<Box<LogicalOperator>>,
991}
992
993/// Path traversal mode for variable-length expansion.
994#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
995#[non_exhaustive]
996pub enum PathMode {
997 /// Allows repeated nodes and edges (default).
998 #[default]
999 Walk,
1000 /// No repeated edges.
1001 Trail,
1002 /// No repeated nodes except endpoints.
1003 Simple,
1004 /// No repeated nodes at all.
1005 Acyclic,
1006}
1007
1008/// Expand from nodes to their neighbors.
1009#[derive(Debug, Clone)]
1010pub struct ExpandOp {
1011 /// Source node variable.
1012 pub from_variable: String,
1013 /// Target node variable to bind.
1014 pub to_variable: String,
1015 /// Edge variable to bind (optional).
1016 pub edge_variable: Option<String>,
1017 /// Direction of expansion.
1018 pub direction: ExpandDirection,
1019 /// Edge type filter (empty = match all types, multiple = match any).
1020 pub edge_types: Vec<String>,
1021 /// Minimum hops (for variable-length patterns).
1022 pub min_hops: u32,
1023 /// Maximum hops (for variable-length patterns).
1024 pub max_hops: Option<u32>,
1025 /// Input operator.
1026 pub input: Box<LogicalOperator>,
1027 /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
1028 /// When set, a path length column will be output under this name.
1029 pub path_alias: Option<String>,
1030 /// Path traversal mode (WALK, TRAIL, SIMPLE, ACYCLIC).
1031 pub path_mode: PathMode,
1032}
1033
1034/// Direction for edge expansion.
1035#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1036#[non_exhaustive]
1037pub enum ExpandDirection {
1038 /// Follow outgoing edges.
1039 Outgoing,
1040 /// Follow incoming edges.
1041 Incoming,
1042 /// Follow edges in either direction.
1043 Both,
1044}
1045
1046/// Join two inputs.
1047#[derive(Debug, Clone)]
1048pub struct JoinOp {
1049 /// Left input.
1050 pub left: Box<LogicalOperator>,
1051 /// Right input.
1052 pub right: Box<LogicalOperator>,
1053 /// Join type.
1054 pub join_type: JoinType,
1055 /// Join conditions.
1056 pub conditions: Vec<JoinCondition>,
1057}
1058
1059/// Join type.
1060#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1061#[non_exhaustive]
1062pub enum JoinType {
1063 /// Inner join.
1064 Inner,
1065 /// Left outer join.
1066 Left,
1067 /// Right outer join.
1068 Right,
1069 /// Full outer join.
1070 Full,
1071 /// Cross join (Cartesian product).
1072 Cross,
1073 /// Semi join (returns left rows with matching right rows).
1074 Semi,
1075 /// Anti join (returns left rows without matching right rows).
1076 Anti,
1077}
1078
1079/// A join condition.
1080#[derive(Debug, Clone)]
1081pub struct JoinCondition {
1082 /// Left expression.
1083 pub left: LogicalExpression,
1084 /// Right expression.
1085 pub right: LogicalExpression,
1086}
1087
1088/// Multi-way join for worst-case optimal joins (leapfrog).
1089///
1090/// Unlike binary `JoinOp`, this joins 3+ relations simultaneously
1091/// using the leapfrog trie join algorithm. Preferred for cyclic patterns
1092/// (triangles, cliques) where cascading binary joins hit O(N^2).
1093#[derive(Debug, Clone)]
1094pub struct MultiWayJoinOp {
1095 /// Input relations (one per relation in the join).
1096 pub inputs: Vec<LogicalOperator>,
1097 /// All pairwise join conditions.
1098 pub conditions: Vec<JoinCondition>,
1099 /// Variables shared across multiple inputs (intersection keys).
1100 pub shared_variables: Vec<String>,
1101}
1102
1103/// Aggregate with grouping.
1104#[derive(Debug, Clone)]
1105pub struct AggregateOp {
1106 /// Group by expressions.
1107 pub group_by: Vec<LogicalExpression>,
1108 /// Aggregate functions.
1109 pub aggregates: Vec<AggregateExpr>,
1110 /// Input operator.
1111 pub input: Box<LogicalOperator>,
1112 /// HAVING clause filter (applied after aggregation).
1113 pub having: Option<LogicalExpression>,
1114}
1115
1116/// Whether a horizontal aggregate operates on edges or nodes.
1117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1118#[non_exhaustive]
1119pub enum EntityKind {
1120 /// Aggregate over edges in a path.
1121 Edge,
1122 /// Aggregate over nodes in a path.
1123 Node,
1124}
1125
1126/// Per-row aggregation over a list-valued column (horizontal aggregation, GE09).
1127///
1128/// For each input row, reads a list of entity IDs from `list_column`, accesses
1129/// `property` on each entity, computes the aggregate, and emits the scalar result.
1130#[derive(Debug, Clone)]
1131pub struct HorizontalAggregateOp {
1132 /// The list column name (e.g., `_path_edges_p`).
1133 pub list_column: String,
1134 /// Whether the list contains edge IDs or node IDs.
1135 pub entity_kind: EntityKind,
1136 /// The aggregate function to apply.
1137 pub function: AggregateFunction,
1138 /// The property to access on each entity.
1139 pub property: String,
1140 /// Output alias for the result column.
1141 pub alias: String,
1142 /// Input operator.
1143 pub input: Box<LogicalOperator>,
1144}
1145
1146/// An aggregate expression.
1147#[derive(Debug, Clone)]
1148pub struct AggregateExpr {
1149 /// Aggregate function.
1150 pub function: AggregateFunction,
1151 /// Expression to aggregate (first/only argument, y for binary set functions).
1152 pub expression: Option<LogicalExpression>,
1153 /// Second expression for binary set functions (x for COVAR, CORR, REGR_*).
1154 pub expression2: Option<LogicalExpression>,
1155 /// Whether to use DISTINCT.
1156 pub distinct: bool,
1157 /// Alias for the result.
1158 pub alias: Option<String>,
1159 /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
1160 pub percentile: Option<f64>,
1161 /// Separator string for GROUP_CONCAT / LISTAGG (defaults to space for GROUP_CONCAT, comma for LISTAGG).
1162 pub separator: Option<String>,
1163}
1164
1165/// Aggregate function.
1166#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1167#[non_exhaustive]
1168pub enum AggregateFunction {
1169 /// Count all rows (COUNT(*)).
1170 Count,
1171 /// Count non-null values (COUNT(expr)).
1172 CountNonNull,
1173 /// Sum values.
1174 Sum,
1175 /// Average values.
1176 Avg,
1177 /// Minimum value.
1178 Min,
1179 /// Maximum value.
1180 Max,
1181 /// Collect into list.
1182 Collect,
1183 /// Sample standard deviation (STDEV).
1184 StdDev,
1185 /// Population standard deviation (STDEVP).
1186 StdDevPop,
1187 /// Sample variance (VAR_SAMP / VARIANCE).
1188 Variance,
1189 /// Population variance (VAR_POP).
1190 VariancePop,
1191 /// Discrete percentile (PERCENTILE_DISC).
1192 PercentileDisc,
1193 /// Continuous percentile (PERCENTILE_CONT).
1194 PercentileCont,
1195 /// Concatenate values with separator (GROUP_CONCAT).
1196 GroupConcat,
1197 /// Return an arbitrary value from the group (SAMPLE).
1198 Sample,
1199 /// Sample covariance (COVAR_SAMP(y, x)).
1200 CovarSamp,
1201 /// Population covariance (COVAR_POP(y, x)).
1202 CovarPop,
1203 /// Pearson correlation coefficient (CORR(y, x)).
1204 Corr,
1205 /// Regression slope (REGR_SLOPE(y, x)).
1206 RegrSlope,
1207 /// Regression intercept (REGR_INTERCEPT(y, x)).
1208 RegrIntercept,
1209 /// Coefficient of determination (REGR_R2(y, x)).
1210 RegrR2,
1211 /// Regression count of non-null pairs (REGR_COUNT(y, x)).
1212 RegrCount,
1213 /// Regression sum of squares for x (REGR_SXX(y, x)).
1214 RegrSxx,
1215 /// Regression sum of squares for y (REGR_SYY(y, x)).
1216 RegrSyy,
1217 /// Regression sum of cross-products (REGR_SXY(y, x)).
1218 RegrSxy,
1219 /// Regression average of x (REGR_AVGX(y, x)).
1220 RegrAvgx,
1221 /// Regression average of y (REGR_AVGY(y, x)).
1222 RegrAvgy,
1223}
1224
1225/// Hint about how a filter will be executed at the physical level.
1226///
1227/// Set during EXPLAIN annotation to communicate pushdown decisions.
1228#[derive(Debug, Clone)]
1229#[non_exhaustive]
1230pub enum PushdownHint {
1231 /// Equality predicate resolved via a property index.
1232 IndexLookup {
1233 /// The indexed property name.
1234 property: String,
1235 },
1236 /// Range predicate resolved via a range/btree index.
1237 RangeScan {
1238 /// The indexed property name.
1239 property: String,
1240 },
1241 /// No index available, but label narrows the scan before filtering.
1242 LabelFirst,
1243}
1244
1245/// Filter rows based on a predicate.
1246#[derive(Debug, Clone)]
1247pub struct FilterOp {
1248 /// The filter predicate.
1249 pub predicate: LogicalExpression,
1250 /// Input operator.
1251 pub input: Box<LogicalOperator>,
1252 /// Optional hint about pushdown strategy (populated by EXPLAIN).
1253 pub pushdown_hint: Option<PushdownHint>,
1254}
1255
1256/// Project specific columns.
1257#[derive(Debug, Clone)]
1258pub struct ProjectOp {
1259 /// Columns to project.
1260 pub projections: Vec<Projection>,
1261 /// Input operator.
1262 pub input: Box<LogicalOperator>,
1263 /// When true, all input columns are passed through and the explicit
1264 /// projections are appended as additional output columns. Used by GQL
1265 /// LET clauses which add bindings without replacing the existing scope.
1266 pub pass_through_input: bool,
1267}
1268
1269/// A single projection (column selection or computation).
1270#[derive(Debug, Clone)]
1271pub struct Projection {
1272 /// Expression to compute.
1273 pub expression: LogicalExpression,
1274 /// Alias for the result.
1275 pub alias: Option<String>,
1276}
1277
1278/// Limit the number of results.
1279#[derive(Debug, Clone)]
1280pub struct LimitOp {
1281 /// Maximum number of rows to return (literal or parameter reference).
1282 pub count: CountExpr,
1283 /// Input operator.
1284 pub input: Box<LogicalOperator>,
1285}
1286
1287/// Skip a number of results.
1288#[derive(Debug, Clone)]
1289pub struct SkipOp {
1290 /// Number of rows to skip (literal or parameter reference).
1291 pub count: CountExpr,
1292 /// Input operator.
1293 pub input: Box<LogicalOperator>,
1294}
1295
1296/// Sort results.
1297#[derive(Debug, Clone)]
1298pub struct SortOp {
1299 /// Sort keys.
1300 pub keys: Vec<SortKey>,
1301 /// Input operator.
1302 pub input: Box<LogicalOperator>,
1303}
1304
1305/// A sort key.
1306#[derive(Debug, Clone)]
1307pub struct SortKey {
1308 /// Expression to sort by.
1309 pub expression: LogicalExpression,
1310 /// Sort order.
1311 pub order: SortOrder,
1312 /// Optional null ordering (NULLS FIRST / NULLS LAST).
1313 pub nulls: Option<NullsOrdering>,
1314}
1315
1316/// Sort order.
1317#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1318#[non_exhaustive]
1319pub enum SortOrder {
1320 /// Ascending order.
1321 Ascending,
1322 /// Descending order.
1323 Descending,
1324}
1325
1326/// Null ordering for sort operations.
1327#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1328#[non_exhaustive]
1329pub enum NullsOrdering {
1330 /// Nulls sort before all non-null values.
1331 First,
1332 /// Nulls sort after all non-null values.
1333 Last,
1334}
1335
1336/// Remove duplicate results.
1337#[derive(Debug, Clone)]
1338pub struct DistinctOp {
1339 /// Input operator.
1340 pub input: Box<LogicalOperator>,
1341 /// Optional columns to use for deduplication.
1342 /// If None, all columns are used.
1343 pub columns: Option<Vec<String>>,
1344}
1345
1346/// Create a new node.
1347#[derive(Debug, Clone)]
1348pub struct CreateNodeOp {
1349 /// Variable name to bind the created node to.
1350 pub variable: String,
1351 /// Labels for the new node.
1352 pub labels: Vec<String>,
1353 /// Properties for the new node.
1354 pub properties: Vec<(String, LogicalExpression)>,
1355 /// Input operator (for chained creates).
1356 pub input: Option<Box<LogicalOperator>>,
1357}
1358
1359/// Create a new edge.
1360#[derive(Debug, Clone)]
1361pub struct CreateEdgeOp {
1362 /// Variable name to bind the created edge to.
1363 pub variable: Option<String>,
1364 /// Source node variable.
1365 pub from_variable: String,
1366 /// Target node variable.
1367 pub to_variable: String,
1368 /// Edge type.
1369 pub edge_type: String,
1370 /// Properties for the new edge.
1371 pub properties: Vec<(String, LogicalExpression)>,
1372 /// Input operator.
1373 pub input: Box<LogicalOperator>,
1374}
1375
1376/// Delete a node.
1377#[derive(Debug, Clone)]
1378pub struct DeleteNodeOp {
1379 /// Variable of the node to delete.
1380 pub variable: String,
1381 /// Whether to detach (delete connected edges) before deleting.
1382 pub detach: bool,
1383 /// Input operator.
1384 pub input: Box<LogicalOperator>,
1385}
1386
1387/// Delete an edge.
1388#[derive(Debug, Clone)]
1389pub struct DeleteEdgeOp {
1390 /// Variable of the edge to delete.
1391 pub variable: String,
1392 /// Input operator.
1393 pub input: Box<LogicalOperator>,
1394}
1395
1396/// Set properties on a node or edge.
1397#[derive(Debug, Clone)]
1398pub struct SetPropertyOp {
1399 /// Variable of the entity to update.
1400 pub variable: String,
1401 /// Properties to set (name -> expression).
1402 pub properties: Vec<(String, LogicalExpression)>,
1403 /// Whether to replace all properties (vs. merge).
1404 pub replace: bool,
1405 /// Whether the target variable is an edge (vs. node).
1406 pub is_edge: bool,
1407 /// Input operator.
1408 pub input: Box<LogicalOperator>,
1409}
1410
1411/// Add labels to a node.
1412#[derive(Debug, Clone)]
1413pub struct AddLabelOp {
1414 /// Variable of the node to update.
1415 pub variable: String,
1416 /// Labels to add.
1417 pub labels: Vec<String>,
1418 /// Input operator.
1419 pub input: Box<LogicalOperator>,
1420}
1421
1422/// Remove labels from a node.
1423#[derive(Debug, Clone)]
1424pub struct RemoveLabelOp {
1425 /// Variable of the node to update.
1426 pub variable: String,
1427 /// Labels to remove.
1428 pub labels: Vec<String>,
1429 /// Input operator.
1430 pub input: Box<LogicalOperator>,
1431}
1432
1433// ==================== RDF/SPARQL Operators ====================
1434
1435/// SPARQL dataset restriction from FROM / FROM NAMED clauses.
1436///
1437/// When present, restricts which graphs are visible to a triple scan:
1438/// - `default_graphs`: IRIs whose union forms the default graph (basic patterns).
1439/// - `named_graphs`: IRIs that enumerate the available named graphs (GRAPH patterns).
1440#[derive(Debug, Clone, Default)]
1441pub struct DatasetRestriction {
1442 /// FROM IRIs: the default graph is the union of these named graphs.
1443 /// Empty means no FROM clause was specified (unrestricted default graph).
1444 pub default_graphs: Vec<String>,
1445 /// FROM NAMED IRIs: only these named graphs are available to GRAPH patterns.
1446 /// Empty means no FROM NAMED clause was specified (all named graphs visible).
1447 pub named_graphs: Vec<String>,
1448}
1449
1450/// Scan RDF triples matching a pattern.
1451#[derive(Debug, Clone)]
1452pub struct TripleScanOp {
1453 /// Subject pattern (variable name or IRI).
1454 pub subject: TripleComponent,
1455 /// Predicate pattern (variable name or IRI).
1456 pub predicate: TripleComponent,
1457 /// Object pattern (variable name, IRI, or literal).
1458 pub object: TripleComponent,
1459 /// Named graph (optional).
1460 pub graph: Option<TripleComponent>,
1461 /// Input operator (for chained patterns).
1462 pub input: Option<Box<LogicalOperator>>,
1463 /// Dataset restriction from SPARQL FROM / FROM NAMED clauses.
1464 pub dataset: Option<DatasetRestriction>,
1465}
1466
1467/// A component of a triple pattern.
1468#[derive(Debug, Clone)]
1469#[non_exhaustive]
1470pub enum TripleComponent {
1471 /// A variable to bind.
1472 Variable(String),
1473 /// A constant IRI.
1474 Iri(String),
1475 /// A constant literal value.
1476 Literal(Value),
1477 /// A language-tagged string literal (RDF `rdf:langString`).
1478 ///
1479 /// Carries the lexical value and the BCP47 language tag separately so that
1480 /// the tag survives the translator to planner to RDF store round-trip.
1481 LangLiteral {
1482 /// The lexical string value.
1483 value: String,
1484 /// BCP47 language tag, e.g. `"fr"`, `"en-GB"`.
1485 lang: String,
1486 },
1487 /// A blank node with a scoped label (used in INSERT DATA).
1488 BlankNode(String),
1489}
1490
1491impl TripleComponent {
1492 /// Returns the variable name if this component is a `Variable`, or `None`.
1493 #[must_use]
1494 pub fn as_variable(&self) -> Option<&str> {
1495 match self {
1496 Self::Variable(v) => Some(v),
1497 _ => None,
1498 }
1499 }
1500}
1501
1502/// Union of multiple result sets.
1503#[derive(Debug, Clone)]
1504pub struct UnionOp {
1505 /// Inputs to union together.
1506 pub inputs: Vec<LogicalOperator>,
1507}
1508
1509/// Set difference: rows in left that are not in right.
1510#[derive(Debug, Clone)]
1511pub struct ExceptOp {
1512 /// Left input.
1513 pub left: Box<LogicalOperator>,
1514 /// Right input (rows to exclude).
1515 pub right: Box<LogicalOperator>,
1516 /// If true, preserve duplicates (EXCEPT ALL); if false, deduplicate (EXCEPT DISTINCT).
1517 pub all: bool,
1518}
1519
1520/// Set intersection: rows common to both inputs.
1521#[derive(Debug, Clone)]
1522pub struct IntersectOp {
1523 /// Left input.
1524 pub left: Box<LogicalOperator>,
1525 /// Right input.
1526 pub right: Box<LogicalOperator>,
1527 /// If true, preserve duplicates (INTERSECT ALL); if false, deduplicate (INTERSECT DISTINCT).
1528 pub all: bool,
1529}
1530
1531/// Fallback operator: use left result if non-empty, otherwise use right.
1532#[derive(Debug, Clone)]
1533pub struct OtherwiseOp {
1534 /// Primary input (preferred).
1535 pub left: Box<LogicalOperator>,
1536 /// Fallback input (used only if left produces zero rows).
1537 pub right: Box<LogicalOperator>,
1538}
1539
1540/// Apply (lateral join): evaluate a subplan for each row of the outer input.
1541///
1542/// The subplan can reference variables bound by the outer input. Results are
1543/// concatenated (cross-product per row).
1544#[derive(Debug, Clone)]
1545pub struct ApplyOp {
1546 /// Outer input providing rows.
1547 pub input: Box<LogicalOperator>,
1548 /// Subplan to evaluate per outer row.
1549 pub subplan: Box<LogicalOperator>,
1550 /// Variables imported from the outer scope into the inner plan.
1551 /// When non-empty, the planner injects these via `ParameterState`.
1552 pub shared_variables: Vec<String>,
1553 /// When true, uses left-join semantics: outer rows with no matching inner
1554 /// rows are emitted with NULLs for the inner columns (OPTIONAL CALL).
1555 pub optional: bool,
1556}
1557
1558/// Parameter scan: leaf operator for correlated subquery inner plans.
1559///
1560/// Emits a single row containing the values injected from the outer Apply.
1561/// Column names correspond to the outer variables imported via WITH.
1562#[derive(Debug, Clone)]
1563pub struct ParameterScanOp {
1564 /// Column names for the injected parameters.
1565 pub columns: Vec<String>,
1566}
1567
1568/// Left outer join for OPTIONAL patterns.
1569#[derive(Debug, Clone)]
1570pub struct LeftJoinOp {
1571 /// Left (required) input.
1572 pub left: Box<LogicalOperator>,
1573 /// Right (optional) input.
1574 pub right: Box<LogicalOperator>,
1575 /// Optional filter condition.
1576 pub condition: Option<LogicalExpression>,
1577}
1578
1579/// Anti-join for MINUS patterns.
1580#[derive(Debug, Clone)]
1581pub struct AntiJoinOp {
1582 /// Left input (results to keep if no match on right).
1583 pub left: Box<LogicalOperator>,
1584 /// Right input (patterns to exclude).
1585 pub right: Box<LogicalOperator>,
1586}
1587
1588/// Bind a variable to an expression.
1589#[derive(Debug, Clone)]
1590pub struct BindOp {
1591 /// Expression to compute.
1592 pub expression: LogicalExpression,
1593 /// Variable to bind the result to.
1594 pub variable: String,
1595 /// Input operator.
1596 pub input: Box<LogicalOperator>,
1597}
1598
1599/// Unwind a list into individual rows.
1600///
1601/// For each input row, evaluates the expression (which should return a list)
1602/// and emits one row for each element in the list.
1603#[derive(Debug, Clone)]
1604pub struct UnwindOp {
1605 /// The list expression to unwind.
1606 pub expression: LogicalExpression,
1607 /// The variable name for each element.
1608 pub variable: String,
1609 /// Optional variable for 1-based element position (ORDINALITY).
1610 pub ordinality_var: Option<String>,
1611 /// Optional variable for 0-based element position (OFFSET).
1612 pub offset_var: Option<String>,
1613 /// Input operator.
1614 pub input: Box<LogicalOperator>,
1615}
1616
1617/// Collect grouped key-value rows into a single Map value.
1618/// Used for Gremlin `groupCount()` semantics.
1619#[derive(Debug, Clone)]
1620pub struct MapCollectOp {
1621 /// Variable holding the map key.
1622 pub key_var: String,
1623 /// Variable holding the map value.
1624 pub value_var: String,
1625 /// Output variable alias.
1626 pub alias: String,
1627 /// Input operator (typically a grouped aggregate).
1628 pub input: Box<LogicalOperator>,
1629}
1630
1631/// Merge a pattern (match or create).
1632///
1633/// MERGE tries to match a pattern in the graph. If found, returns the existing
1634/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
1635/// (optionally applying ON CREATE SET).
1636#[derive(Debug, Clone)]
1637pub struct MergeOp {
1638 /// The node to merge.
1639 pub variable: String,
1640 /// Labels to match/create.
1641 pub labels: Vec<String>,
1642 /// Properties that must match (used for both matching and creation).
1643 pub match_properties: Vec<(String, LogicalExpression)>,
1644 /// Properties to set on CREATE.
1645 pub on_create: Vec<(String, LogicalExpression)>,
1646 /// Properties to set on MATCH.
1647 pub on_match: Vec<(String, LogicalExpression)>,
1648 /// Input operator.
1649 pub input: Box<LogicalOperator>,
1650}
1651
1652/// Merge a relationship pattern (match or create between two bound nodes).
1653///
1654/// MERGE on a relationship tries to find an existing relationship of the given type
1655/// between the source and target nodes. If found, returns the existing relationship
1656/// (optionally applying ON MATCH SET). If not found, creates it (optionally applying
1657/// ON CREATE SET).
1658#[derive(Debug, Clone)]
1659pub struct MergeRelationshipOp {
1660 /// Variable to bind the relationship to.
1661 pub variable: String,
1662 /// Source node variable (must already be bound).
1663 pub source_variable: String,
1664 /// Target node variable (must already be bound).
1665 pub target_variable: String,
1666 /// Relationship type.
1667 pub edge_type: String,
1668 /// Properties that must match (used for both matching and creation).
1669 pub match_properties: Vec<(String, LogicalExpression)>,
1670 /// Properties to set on CREATE.
1671 pub on_create: Vec<(String, LogicalExpression)>,
1672 /// Properties to set on MATCH.
1673 pub on_match: Vec<(String, LogicalExpression)>,
1674 /// Input operator.
1675 pub input: Box<LogicalOperator>,
1676}
1677
1678/// Find shortest path between two nodes.
1679///
1680/// This operator uses Dijkstra's algorithm to find the shortest path(s)
1681/// between a source node and a target node, optionally filtered by edge type.
1682#[derive(Debug, Clone)]
1683pub struct ShortestPathOp {
1684 /// Input operator providing source/target nodes.
1685 pub input: Box<LogicalOperator>,
1686 /// Variable name for the source node.
1687 pub source_var: String,
1688 /// Variable name for the target node.
1689 pub target_var: String,
1690 /// Edge type filter (empty = match all types, multiple = match any).
1691 pub edge_types: Vec<String>,
1692 /// Direction of edge traversal.
1693 pub direction: ExpandDirection,
1694 /// Variable name to bind the path result.
1695 pub path_alias: String,
1696 /// Whether to find all shortest paths (vs. just one).
1697 pub all_paths: bool,
1698}
1699
1700// ==================== SPARQL Update Operators ====================
1701
1702/// Insert RDF triples.
1703#[derive(Debug, Clone)]
1704pub struct InsertTripleOp {
1705 /// Subject of the triple.
1706 pub subject: TripleComponent,
1707 /// Predicate of the triple.
1708 pub predicate: TripleComponent,
1709 /// Object of the triple.
1710 pub object: TripleComponent,
1711 /// Named graph (optional).
1712 pub graph: Option<String>,
1713 /// Input operator (provides variable bindings).
1714 pub input: Option<Box<LogicalOperator>>,
1715}
1716
1717/// Delete RDF triples.
1718#[derive(Debug, Clone)]
1719pub struct DeleteTripleOp {
1720 /// Subject pattern.
1721 pub subject: TripleComponent,
1722 /// Predicate pattern.
1723 pub predicate: TripleComponent,
1724 /// Object pattern.
1725 pub object: TripleComponent,
1726 /// Named graph (optional).
1727 pub graph: Option<String>,
1728 /// Input operator (provides variable bindings).
1729 pub input: Option<Box<LogicalOperator>>,
1730}
1731
1732/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
1733///
1734/// Per SPARQL 1.1 Update spec, this operator:
1735/// 1. Evaluates the WHERE clause once to get bindings
1736/// 2. Applies DELETE templates using those bindings
1737/// 3. Applies INSERT templates using the SAME bindings
1738///
1739/// This ensures DELETE and INSERT see consistent data.
1740#[derive(Debug, Clone)]
1741pub struct ModifyOp {
1742 /// DELETE triple templates (patterns with variables).
1743 pub delete_templates: Vec<TripleTemplate>,
1744 /// INSERT triple templates (patterns with variables).
1745 pub insert_templates: Vec<TripleTemplate>,
1746 /// WHERE clause that provides variable bindings.
1747 pub where_clause: Box<LogicalOperator>,
1748 /// Named graph context (for WITH clause).
1749 pub graph: Option<String>,
1750}
1751
1752/// A triple template for DELETE/INSERT operations.
1753#[derive(Debug, Clone)]
1754pub struct TripleTemplate {
1755 /// Subject (may be a variable).
1756 pub subject: TripleComponent,
1757 /// Predicate (may be a variable).
1758 pub predicate: TripleComponent,
1759 /// Object (may be a variable or literal).
1760 pub object: TripleComponent,
1761 /// Named graph (optional).
1762 pub graph: Option<String>,
1763}
1764
1765/// SPARQL CONSTRUCT: evaluate WHERE, substitute bindings into template.
1766///
1767/// Produces rows with columns `subject`, `predicate`, `object` by instantiating
1768/// the template once per binding from the WHERE clause.
1769#[derive(Debug, Clone)]
1770pub struct ConstructOp {
1771 /// Triple templates to instantiate.
1772 pub templates: Vec<TripleTemplate>,
1773 /// Input operator (WHERE clause evaluation).
1774 pub input: Box<LogicalOperator>,
1775}
1776
1777/// Clear all triples from a graph.
1778#[derive(Debug, Clone)]
1779pub struct ClearGraphOp {
1780 /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
1781 pub graph: Option<String>,
1782 /// Whether to silently ignore errors.
1783 pub silent: bool,
1784}
1785
1786/// Create a new named graph.
1787#[derive(Debug, Clone)]
1788pub struct CreateGraphOp {
1789 /// IRI of the graph to create.
1790 pub graph: String,
1791 /// Whether to silently ignore if graph already exists.
1792 pub silent: bool,
1793}
1794
1795/// Drop (remove) a named graph.
1796#[derive(Debug, Clone)]
1797pub struct DropGraphOp {
1798 /// Target graph (None = default graph).
1799 pub graph: Option<String>,
1800 /// Whether to silently ignore errors.
1801 pub silent: bool,
1802}
1803
1804/// Load data from a URL into a graph.
1805#[derive(Debug, Clone)]
1806pub struct LoadGraphOp {
1807 /// Source URL to load data from.
1808 pub source: String,
1809 /// Destination graph (None = default graph).
1810 pub destination: Option<String>,
1811 /// Whether to silently ignore errors.
1812 pub silent: bool,
1813}
1814
1815/// Copy triples from one graph to another.
1816#[derive(Debug, Clone)]
1817pub struct CopyGraphOp {
1818 /// Source graph.
1819 pub source: Option<String>,
1820 /// Destination graph.
1821 pub destination: Option<String>,
1822 /// Whether to silently ignore errors.
1823 pub silent: bool,
1824}
1825
1826/// Move triples from one graph to another.
1827#[derive(Debug, Clone)]
1828pub struct MoveGraphOp {
1829 /// Source graph.
1830 pub source: Option<String>,
1831 /// Destination graph.
1832 pub destination: Option<String>,
1833 /// Whether to silently ignore errors.
1834 pub silent: bool,
1835}
1836
1837/// Add (merge) triples from one graph to another.
1838#[derive(Debug, Clone)]
1839pub struct AddGraphOp {
1840 /// Source graph.
1841 pub source: Option<String>,
1842 /// Destination graph.
1843 pub destination: Option<String>,
1844 /// Whether to silently ignore errors.
1845 pub silent: bool,
1846}
1847
1848// ==================== Vector Search Operators ====================
1849
1850/// Vector similarity scan operation.
1851///
1852/// Performs approximate nearest neighbor search using a vector index (HNSW)
1853/// or brute-force search for small datasets. Returns nodes/edges whose
1854/// embeddings are similar to the query vector.
1855///
1856/// # Example GQL
1857///
1858/// ```gql
1859/// MATCH (m:Movie)
1860/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
1861/// RETURN m.title
1862/// ```
1863#[derive(Debug, Clone)]
1864pub struct VectorScanOp {
1865 /// Variable name to bind matching entities to.
1866 pub variable: String,
1867 /// Name of the vector index to use (None = brute-force).
1868 pub index_name: Option<String>,
1869 /// Property containing the vector embedding.
1870 pub property: String,
1871 /// Optional label filter (scan only nodes with this label).
1872 pub label: Option<String>,
1873 /// The query vector expression.
1874 pub query_vector: LogicalExpression,
1875 /// Number of nearest neighbors to return.
1876 pub k: usize,
1877 /// Distance metric (None = use index default, typically cosine).
1878 pub metric: Option<VectorMetric>,
1879 /// Minimum similarity threshold (filters results below this).
1880 pub min_similarity: Option<f32>,
1881 /// Maximum distance threshold (filters results above this).
1882 pub max_distance: Option<f32>,
1883 /// Input operator (for hybrid queries combining graph + vector).
1884 pub input: Option<Box<LogicalOperator>>,
1885}
1886
1887/// Vector distance/similarity metric for vector scan operations.
1888#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1889#[non_exhaustive]
1890pub enum VectorMetric {
1891 /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
1892 Cosine,
1893 /// Euclidean (L2) distance. Best when magnitude matters.
1894 Euclidean,
1895 /// Dot product. Best for maximum inner product search.
1896 DotProduct,
1897 /// Manhattan (L1) distance. Less sensitive to outliers.
1898 Manhattan,
1899}
1900
1901/// Join graph patterns with vector similarity search.
1902///
1903/// This operator takes entities from the left input and computes vector
1904/// similarity against a query vector, outputting (entity, distance) pairs.
1905///
1906/// # Use Cases
1907///
1908/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
1909/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
1910/// 3. **Filtering by similarity**: Join with threshold-based filtering
1911///
1912/// # Example
1913///
1914/// ```gql
1915/// // Find movies similar to what the user liked
1916/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
1917/// WITH avg(liked.embedding) AS user_taste
1918/// VECTOR JOIN (m:Movie) ON m.embedding
1919/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
1920/// RETURN m.title
1921/// ```
1922#[derive(Debug, Clone)]
1923pub struct VectorJoinOp {
1924 /// Input operator providing entities to match against.
1925 pub input: Box<LogicalOperator>,
1926 /// Variable from input to extract vectors from (for entity-to-entity similarity).
1927 /// If None, uses `query_vector` directly.
1928 pub left_vector_variable: Option<String>,
1929 /// Property containing the left vector (used with `left_vector_variable`).
1930 pub left_property: Option<String>,
1931 /// The query vector expression (constant or computed).
1932 pub query_vector: LogicalExpression,
1933 /// Variable name to bind the right-side matching entities.
1934 pub right_variable: String,
1935 /// Property containing the right-side vector embeddings.
1936 pub right_property: String,
1937 /// Optional label filter for right-side entities.
1938 pub right_label: Option<String>,
1939 /// Name of vector index on right side (None = brute-force).
1940 pub index_name: Option<String>,
1941 /// Number of nearest neighbors per left-side entity.
1942 pub k: usize,
1943 /// Distance metric.
1944 pub metric: Option<VectorMetric>,
1945 /// Minimum similarity threshold.
1946 pub min_similarity: Option<f32>,
1947 /// Maximum distance threshold.
1948 pub max_distance: Option<f32>,
1949 /// Variable to bind the distance/similarity score.
1950 pub score_variable: Option<String>,
1951}
1952
1953/// Return results (terminal operator).
1954#[derive(Debug, Clone)]
1955pub struct ReturnOp {
1956 /// Items to return.
1957 pub items: Vec<ReturnItem>,
1958 /// Whether to return distinct results.
1959 pub distinct: bool,
1960 /// Input operator.
1961 pub input: Box<LogicalOperator>,
1962}
1963
1964/// A single return item.
1965#[derive(Debug, Clone)]
1966pub struct ReturnItem {
1967 /// Expression to return.
1968 pub expression: LogicalExpression,
1969 /// Alias for the result column.
1970 pub alias: Option<String>,
1971}
1972
1973/// Define a property graph schema (SQL/PGQ DDL).
1974#[derive(Debug, Clone)]
1975pub struct CreatePropertyGraphOp {
1976 /// Graph name.
1977 pub name: String,
1978 /// Node table schemas (label name + column definitions).
1979 pub node_tables: Vec<PropertyGraphNodeTable>,
1980 /// Edge table schemas (type name + column definitions + references).
1981 pub edge_tables: Vec<PropertyGraphEdgeTable>,
1982}
1983
1984/// A node table in a property graph definition.
1985#[derive(Debug, Clone)]
1986pub struct PropertyGraphNodeTable {
1987 /// Table name (maps to a node label).
1988 pub name: String,
1989 /// Column definitions as (name, type_name) pairs.
1990 pub columns: Vec<(String, String)>,
1991}
1992
1993/// An edge table in a property graph definition.
1994#[derive(Debug, Clone)]
1995pub struct PropertyGraphEdgeTable {
1996 /// Table name (maps to an edge type).
1997 pub name: String,
1998 /// Column definitions as (name, type_name) pairs.
1999 pub columns: Vec<(String, String)>,
2000 /// Source node table name.
2001 pub source_table: String,
2002 /// Target node table name.
2003 pub target_table: String,
2004}
2005
2006// ==================== Procedure Call Types ====================
2007
2008/// A CALL procedure operation.
2009///
2010/// ```text
2011/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
2012/// ```
2013#[derive(Debug, Clone)]
2014pub struct CallProcedureOp {
2015 /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
2016 pub name: Vec<String>,
2017 /// Argument expressions (constants in Phase 1).
2018 pub arguments: Vec<LogicalExpression>,
2019 /// Optional YIELD clause: which columns to expose + aliases.
2020 pub yield_items: Option<Vec<ProcedureYield>>,
2021}
2022
2023/// A single YIELD item in a procedure call.
2024#[derive(Debug, Clone)]
2025pub struct ProcedureYield {
2026 /// Column name from the procedure result.
2027 pub field_name: String,
2028 /// Optional alias (YIELD score AS rank).
2029 pub alias: Option<String>,
2030}
2031
2032/// Re-export format enum from the physical operator.
2033pub use grafeo_core::execution::operators::LoadDataFormat;
2034
2035/// LOAD DATA operator: reads a file and produces rows.
2036///
2037/// With headers (CSV), each row is bound as a `Value::Map` with column names as keys.
2038/// Without headers (CSV), each row is bound as a `Value::List` of string values.
2039/// JSONL always produces `Value::Map`. Parquet always produces `Value::Map`.
2040#[derive(Debug, Clone)]
2041pub struct LoadDataOp {
2042 /// File format.
2043 pub format: LoadDataFormat,
2044 /// Whether the file has a header row (CSV only, ignored for JSONL/Parquet).
2045 pub with_headers: bool,
2046 /// File path (local filesystem).
2047 pub path: String,
2048 /// Variable name to bind each row to.
2049 pub variable: String,
2050 /// Field separator character (CSV only, default: comma).
2051 pub field_terminator: Option<char>,
2052}
2053
2054/// A logical expression.
2055#[derive(Debug, Clone)]
2056#[non_exhaustive]
2057pub enum LogicalExpression {
2058 /// A literal value.
2059 Literal(Value),
2060
2061 /// A variable reference.
2062 Variable(String),
2063
2064 /// Property access (e.g., n.name).
2065 Property {
2066 /// The variable to access.
2067 variable: String,
2068 /// The property name.
2069 property: String,
2070 },
2071
2072 /// Binary operation.
2073 Binary {
2074 /// Left operand.
2075 left: Box<LogicalExpression>,
2076 /// Operator.
2077 op: BinaryOp,
2078 /// Right operand.
2079 right: Box<LogicalExpression>,
2080 },
2081
2082 /// Unary operation.
2083 Unary {
2084 /// Operator.
2085 op: UnaryOp,
2086 /// Operand.
2087 operand: Box<LogicalExpression>,
2088 },
2089
2090 /// Function call.
2091 FunctionCall {
2092 /// Function name.
2093 name: String,
2094 /// Arguments.
2095 args: Vec<LogicalExpression>,
2096 /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
2097 distinct: bool,
2098 },
2099
2100 /// List literal.
2101 List(Vec<LogicalExpression>),
2102
2103 /// Map literal (e.g., {name: 'Alix', age: 30}).
2104 Map(Vec<(String, LogicalExpression)>),
2105
2106 /// Index access (e.g., `list[0]`).
2107 IndexAccess {
2108 /// The base expression (typically a list or string).
2109 base: Box<LogicalExpression>,
2110 /// The index expression.
2111 index: Box<LogicalExpression>,
2112 },
2113
2114 /// Slice access (e.g., list[1..3]).
2115 SliceAccess {
2116 /// The base expression (typically a list or string).
2117 base: Box<LogicalExpression>,
2118 /// Start index (None means from beginning).
2119 start: Option<Box<LogicalExpression>>,
2120 /// End index (None means to end).
2121 end: Option<Box<LogicalExpression>>,
2122 },
2123
2124 /// CASE expression.
2125 Case {
2126 /// Test expression (for simple CASE).
2127 operand: Option<Box<LogicalExpression>>,
2128 /// WHEN clauses.
2129 when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
2130 /// ELSE clause.
2131 else_clause: Option<Box<LogicalExpression>>,
2132 },
2133
2134 /// Parameter reference.
2135 Parameter(String),
2136
2137 /// Labels of a node.
2138 Labels(String),
2139
2140 /// Type of an edge.
2141 Type(String),
2142
2143 /// ID of a node or edge.
2144 Id(String),
2145
2146 /// List comprehension: [x IN list WHERE predicate | expression]
2147 ListComprehension {
2148 /// Variable name for each element.
2149 variable: String,
2150 /// The source list expression.
2151 list_expr: Box<LogicalExpression>,
2152 /// Optional filter predicate.
2153 filter_expr: Option<Box<LogicalExpression>>,
2154 /// The mapping expression for each element.
2155 map_expr: Box<LogicalExpression>,
2156 },
2157
2158 /// List predicate: all/any/none/single(x IN list WHERE pred).
2159 ListPredicate {
2160 /// The kind of list predicate.
2161 kind: ListPredicateKind,
2162 /// The iteration variable name.
2163 variable: String,
2164 /// The source list expression.
2165 list_expr: Box<LogicalExpression>,
2166 /// The predicate to test for each element.
2167 predicate: Box<LogicalExpression>,
2168 },
2169
2170 /// EXISTS subquery.
2171 ExistsSubquery(Box<LogicalOperator>),
2172
2173 /// COUNT subquery.
2174 CountSubquery(Box<LogicalOperator>),
2175
2176 /// VALUE subquery: returns scalar value from first row of inner query.
2177 ValueSubquery(Box<LogicalOperator>),
2178
2179 /// Map projection: `node { .prop1, .prop2, key: expr, .* }`.
2180 MapProjection {
2181 /// The base variable name.
2182 base: String,
2183 /// Projection entries (property selectors, literal entries, all-properties).
2184 entries: Vec<MapProjectionEntry>,
2185 },
2186
2187 /// reduce() accumulator: `reduce(acc = init, x IN list | expr)`.
2188 Reduce {
2189 /// Accumulator variable name.
2190 accumulator: String,
2191 /// Initial value for the accumulator.
2192 initial: Box<LogicalExpression>,
2193 /// Iteration variable name.
2194 variable: String,
2195 /// List to iterate over.
2196 list: Box<LogicalExpression>,
2197 /// Body expression evaluated per iteration (references both accumulator and variable).
2198 expression: Box<LogicalExpression>,
2199 },
2200
2201 /// Pattern comprehension: `[(pattern) WHERE pred | expr]`.
2202 ///
2203 /// Executes the inner subplan, evaluates the projection for each row,
2204 /// and collects the results into a list.
2205 PatternComprehension {
2206 /// The subplan produced by translating the pattern (+optional WHERE).
2207 subplan: Box<LogicalOperator>,
2208 /// The projection expression evaluated for each match.
2209 projection: Box<LogicalExpression>,
2210 },
2211}
2212
2213/// An entry in a map projection.
2214#[derive(Debug, Clone)]
2215#[non_exhaustive]
2216pub enum MapProjectionEntry {
2217 /// `.propertyName`: shorthand for `propertyName: base.propertyName`.
2218 PropertySelector(String),
2219 /// `key: expression`: explicit key-value pair.
2220 LiteralEntry(String, LogicalExpression),
2221 /// `.*`: include all properties of the base entity.
2222 AllProperties,
2223}
2224
2225/// The kind of list predicate function.
2226#[derive(Debug, Clone, PartialEq, Eq)]
2227#[non_exhaustive]
2228pub enum ListPredicateKind {
2229 /// all(x IN list WHERE pred): true if pred holds for every element.
2230 All,
2231 /// any(x IN list WHERE pred): true if pred holds for at least one element.
2232 Any,
2233 /// none(x IN list WHERE pred): true if pred holds for no element.
2234 None,
2235 /// single(x IN list WHERE pred): true if pred holds for exactly one element.
2236 Single,
2237}
2238
2239/// Binary operator.
2240#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2241#[non_exhaustive]
2242pub enum BinaryOp {
2243 /// Equality comparison (=).
2244 Eq,
2245 /// Inequality comparison (<>).
2246 Ne,
2247 /// Less than (<).
2248 Lt,
2249 /// Less than or equal (<=).
2250 Le,
2251 /// Greater than (>).
2252 Gt,
2253 /// Greater than or equal (>=).
2254 Ge,
2255
2256 /// Logical AND.
2257 And,
2258 /// Logical OR.
2259 Or,
2260 /// Logical XOR.
2261 Xor,
2262
2263 /// Addition (+).
2264 Add,
2265 /// Subtraction (-).
2266 Sub,
2267 /// Multiplication (*).
2268 Mul,
2269 /// Division (/).
2270 Div,
2271 /// Modulo (%).
2272 Mod,
2273
2274 /// String concatenation.
2275 Concat,
2276 /// String starts with.
2277 StartsWith,
2278 /// String ends with.
2279 EndsWith,
2280 /// String contains.
2281 Contains,
2282
2283 /// Collection membership (IN).
2284 In,
2285 /// Pattern matching (LIKE).
2286 Like,
2287 /// Regex matching (=~).
2288 Regex,
2289 /// Power/exponentiation (^).
2290 Pow,
2291}
2292
2293/// Unary operator.
2294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2295#[non_exhaustive]
2296pub enum UnaryOp {
2297 /// Logical NOT.
2298 Not,
2299 /// Numeric negation.
2300 Neg,
2301 /// IS NULL check.
2302 IsNull,
2303 /// IS NOT NULL check.
2304 IsNotNull,
2305}
2306
2307#[cfg(test)]
2308mod tests {
2309 use super::*;
2310
2311 #[test]
2312 fn test_simple_node_scan_plan() {
2313 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2314 items: vec![ReturnItem {
2315 expression: LogicalExpression::Variable("n".into()),
2316 alias: None,
2317 }],
2318 distinct: false,
2319 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2320 variable: "n".into(),
2321 label: Some("Person".into()),
2322 input: None,
2323 })),
2324 }));
2325
2326 // Verify structure
2327 if let LogicalOperator::Return(ret) = &plan.root {
2328 assert_eq!(ret.items.len(), 1);
2329 assert!(!ret.distinct);
2330 if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
2331 assert_eq!(scan.variable, "n");
2332 assert_eq!(scan.label, Some("Person".into()));
2333 } else {
2334 panic!("Expected NodeScan");
2335 }
2336 } else {
2337 panic!("Expected Return");
2338 }
2339 }
2340
2341 #[test]
2342 fn test_filter_plan() {
2343 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2344 items: vec![ReturnItem {
2345 expression: LogicalExpression::Property {
2346 variable: "n".into(),
2347 property: "name".into(),
2348 },
2349 alias: Some("name".into()),
2350 }],
2351 distinct: false,
2352 input: Box::new(LogicalOperator::Filter(FilterOp {
2353 predicate: LogicalExpression::Binary {
2354 left: Box::new(LogicalExpression::Property {
2355 variable: "n".into(),
2356 property: "age".into(),
2357 }),
2358 op: BinaryOp::Gt,
2359 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
2360 },
2361 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2362 variable: "n".into(),
2363 label: Some("Person".into()),
2364 input: None,
2365 })),
2366 pushdown_hint: None,
2367 })),
2368 }));
2369
2370 if let LogicalOperator::Return(ret) = &plan.root {
2371 if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
2372 if let LogicalExpression::Binary { op, .. } = &filter.predicate {
2373 assert_eq!(*op, BinaryOp::Gt);
2374 } else {
2375 panic!("Expected Binary expression");
2376 }
2377 } else {
2378 panic!("Expected Filter");
2379 }
2380 } else {
2381 panic!("Expected Return");
2382 }
2383 }
2384}