grafeo_engine/query/plan.rs
1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12 /// The root operator of the plan.
13 pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17 /// Creates a new logical plan with the given root operator.
18 pub fn new(root: LogicalOperator) -> Self {
19 Self { root }
20 }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26 /// Scan all nodes, optionally filtered by label.
27 NodeScan(NodeScanOp),
28
29 /// Scan all edges, optionally filtered by type.
30 EdgeScan(EdgeScanOp),
31
32 /// Expand from nodes to neighbors via edges.
33 Expand(ExpandOp),
34
35 /// Filter rows based on a predicate.
36 Filter(FilterOp),
37
38 /// Project specific columns.
39 Project(ProjectOp),
40
41 /// Join two inputs.
42 Join(JoinOp),
43
44 /// Aggregate with grouping.
45 Aggregate(AggregateOp),
46
47 /// Limit the number of results.
48 Limit(LimitOp),
49
50 /// Skip a number of results.
51 Skip(SkipOp),
52
53 /// Sort results.
54 Sort(SortOp),
55
56 /// Remove duplicate results.
57 Distinct(DistinctOp),
58
59 /// Create a new node.
60 CreateNode(CreateNodeOp),
61
62 /// Create a new edge.
63 CreateEdge(CreateEdgeOp),
64
65 /// Delete a node.
66 DeleteNode(DeleteNodeOp),
67
68 /// Delete an edge.
69 DeleteEdge(DeleteEdgeOp),
70
71 /// Set properties on a node or edge.
72 SetProperty(SetPropertyOp),
73
74 /// Add labels to a node.
75 AddLabel(AddLabelOp),
76
77 /// Remove labels from a node.
78 RemoveLabel(RemoveLabelOp),
79
80 /// Return results (terminal operator).
81 Return(ReturnOp),
82
83 /// Empty result set.
84 Empty,
85
86 // ==================== RDF/SPARQL Operators ====================
87 /// Scan RDF triples matching a pattern.
88 TripleScan(TripleScanOp),
89
90 /// Union of multiple result sets.
91 Union(UnionOp),
92
93 /// Left outer join for OPTIONAL patterns.
94 LeftJoin(LeftJoinOp),
95
96 /// Anti-join for MINUS patterns.
97 AntiJoin(AntiJoinOp),
98
99 /// Bind a variable to an expression.
100 Bind(BindOp),
101
102 /// Unwind a list into individual rows.
103 Unwind(UnwindOp),
104
105 /// Collect grouped key-value rows into a single Map value.
106 /// Used for Gremlin `groupCount()` semantics.
107 MapCollect(MapCollectOp),
108
109 /// Merge a node pattern (match or create).
110 Merge(MergeOp),
111
112 /// Merge a relationship pattern (match or create).
113 MergeRelationship(MergeRelationshipOp),
114
115 /// Find shortest path between nodes.
116 ShortestPath(ShortestPathOp),
117
118 // ==================== SPARQL Update Operators ====================
119 /// Insert RDF triples.
120 InsertTriple(InsertTripleOp),
121
122 /// Delete RDF triples.
123 DeleteTriple(DeleteTripleOp),
124
125 /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
126 /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
127 Modify(ModifyOp),
128
129 /// Clear a graph (remove all triples).
130 ClearGraph(ClearGraphOp),
131
132 /// Create a new named graph.
133 CreateGraph(CreateGraphOp),
134
135 /// Drop (remove) a named graph.
136 DropGraph(DropGraphOp),
137
138 /// Load data from a URL into a graph.
139 LoadGraph(LoadGraphOp),
140
141 /// Copy triples from one graph to another.
142 CopyGraph(CopyGraphOp),
143
144 /// Move triples from one graph to another.
145 MoveGraph(MoveGraphOp),
146
147 /// Add (merge) triples from one graph to another.
148 AddGraph(AddGraphOp),
149
150 // ==================== Vector Search Operators ====================
151 /// Scan using vector similarity search.
152 VectorScan(VectorScanOp),
153
154 /// Join graph patterns with vector similarity search.
155 ///
156 /// Computes vector distances between entities from the left input and
157 /// a query vector, then joins with similarity scores. Useful for:
158 /// - Filtering graph traversal results by vector similarity
159 /// - Computing aggregated embeddings and finding similar entities
160 /// - Combining multiple vector sources with graph structure
161 VectorJoin(VectorJoinOp),
162
163 // ==================== Set Operations ====================
164 /// Set difference: rows in left that are not in right.
165 Except(ExceptOp),
166
167 /// Set intersection: rows common to all inputs.
168 Intersect(IntersectOp),
169
170 /// Fallback: use left result if non-empty, otherwise right.
171 Otherwise(OtherwiseOp),
172
173 // ==================== Correlated Subquery ====================
174 /// Apply (lateral join): evaluate a subplan per input row.
175 Apply(ApplyOp),
176
177 // ==================== DDL Operators ====================
178 /// Define a property graph schema (SQL/PGQ DDL).
179 CreatePropertyGraph(CreatePropertyGraphOp),
180
181 // ==================== Procedure Call Operators ====================
182 /// Invoke a stored procedure (CALL ... YIELD).
183 CallProcedure(CallProcedureOp),
184}
185
186impl LogicalOperator {
187 /// Returns `true` if this operator or any of its children perform mutations.
188 #[must_use]
189 pub fn has_mutations(&self) -> bool {
190 match self {
191 // Direct mutation operators
192 Self::CreateNode(_)
193 | Self::CreateEdge(_)
194 | Self::DeleteNode(_)
195 | Self::DeleteEdge(_)
196 | Self::SetProperty(_)
197 | Self::AddLabel(_)
198 | Self::RemoveLabel(_)
199 | Self::Merge(_)
200 | Self::MergeRelationship(_)
201 | Self::InsertTriple(_)
202 | Self::DeleteTriple(_)
203 | Self::Modify(_)
204 | Self::ClearGraph(_)
205 | Self::CreateGraph(_)
206 | Self::DropGraph(_)
207 | Self::LoadGraph(_)
208 | Self::CopyGraph(_)
209 | Self::MoveGraph(_)
210 | Self::AddGraph(_)
211 | Self::CreatePropertyGraph(_) => true,
212
213 // Operators with an `input` child
214 Self::Filter(op) => op.input.has_mutations(),
215 Self::Project(op) => op.input.has_mutations(),
216 Self::Aggregate(op) => op.input.has_mutations(),
217 Self::Limit(op) => op.input.has_mutations(),
218 Self::Skip(op) => op.input.has_mutations(),
219 Self::Sort(op) => op.input.has_mutations(),
220 Self::Distinct(op) => op.input.has_mutations(),
221 Self::Unwind(op) => op.input.has_mutations(),
222 Self::Bind(op) => op.input.has_mutations(),
223 Self::MapCollect(op) => op.input.has_mutations(),
224 Self::Return(op) => op.input.has_mutations(),
225 Self::VectorScan(_) | Self::VectorJoin(_) => false,
226
227 // Operators with two children
228 Self::Join(op) => op.left.has_mutations() || op.right.has_mutations(),
229 Self::LeftJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
230 Self::AntiJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
231 Self::Except(op) => op.left.has_mutations() || op.right.has_mutations(),
232 Self::Intersect(op) => op.left.has_mutations() || op.right.has_mutations(),
233 Self::Otherwise(op) => op.left.has_mutations() || op.right.has_mutations(),
234 Self::Union(op) => op.inputs.iter().any(|i| i.has_mutations()),
235 Self::Apply(op) => op.input.has_mutations() || op.subplan.has_mutations(),
236
237 // Leaf operators (read-only)
238 Self::NodeScan(_)
239 | Self::EdgeScan(_)
240 | Self::Expand(_)
241 | Self::TripleScan(_)
242 | Self::ShortestPath(_)
243 | Self::Empty
244 | Self::CallProcedure(_) => false,
245 }
246 }
247}
248
249/// Scan nodes from the graph.
250#[derive(Debug, Clone)]
251pub struct NodeScanOp {
252 /// Variable name to bind the node to.
253 pub variable: String,
254 /// Optional label filter.
255 pub label: Option<String>,
256 /// Child operator (if any, for chained patterns).
257 pub input: Option<Box<LogicalOperator>>,
258}
259
260/// Scan edges from the graph.
261#[derive(Debug, Clone)]
262pub struct EdgeScanOp {
263 /// Variable name to bind the edge to.
264 pub variable: String,
265 /// Edge type filter (empty = match all types).
266 pub edge_types: Vec<String>,
267 /// Child operator (if any).
268 pub input: Option<Box<LogicalOperator>>,
269}
270
271/// Path traversal mode for variable-length expansion.
272#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
273pub enum PathMode {
274 /// Allows repeated nodes and edges (default).
275 #[default]
276 Walk,
277 /// No repeated edges.
278 Trail,
279 /// No repeated nodes except endpoints.
280 Simple,
281 /// No repeated nodes at all.
282 Acyclic,
283}
284
285/// Expand from nodes to their neighbors.
286#[derive(Debug, Clone)]
287pub struct ExpandOp {
288 /// Source node variable.
289 pub from_variable: String,
290 /// Target node variable to bind.
291 pub to_variable: String,
292 /// Edge variable to bind (optional).
293 pub edge_variable: Option<String>,
294 /// Direction of expansion.
295 pub direction: ExpandDirection,
296 /// Edge type filter (empty = match all types, multiple = match any).
297 pub edge_types: Vec<String>,
298 /// Minimum hops (for variable-length patterns).
299 pub min_hops: u32,
300 /// Maximum hops (for variable-length patterns).
301 pub max_hops: Option<u32>,
302 /// Input operator.
303 pub input: Box<LogicalOperator>,
304 /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
305 /// When set, a path length column will be output under this name.
306 pub path_alias: Option<String>,
307 /// Path traversal mode (WALK, TRAIL, SIMPLE, ACYCLIC).
308 pub path_mode: PathMode,
309}
310
311/// Direction for edge expansion.
312#[derive(Debug, Clone, Copy, PartialEq, Eq)]
313pub enum ExpandDirection {
314 /// Follow outgoing edges.
315 Outgoing,
316 /// Follow incoming edges.
317 Incoming,
318 /// Follow edges in either direction.
319 Both,
320}
321
322/// Join two inputs.
323#[derive(Debug, Clone)]
324pub struct JoinOp {
325 /// Left input.
326 pub left: Box<LogicalOperator>,
327 /// Right input.
328 pub right: Box<LogicalOperator>,
329 /// Join type.
330 pub join_type: JoinType,
331 /// Join conditions.
332 pub conditions: Vec<JoinCondition>,
333}
334
335/// Join type.
336#[derive(Debug, Clone, Copy, PartialEq, Eq)]
337pub enum JoinType {
338 /// Inner join.
339 Inner,
340 /// Left outer join.
341 Left,
342 /// Right outer join.
343 Right,
344 /// Full outer join.
345 Full,
346 /// Cross join (Cartesian product).
347 Cross,
348 /// Semi join (returns left rows with matching right rows).
349 Semi,
350 /// Anti join (returns left rows without matching right rows).
351 Anti,
352}
353
354/// A join condition.
355#[derive(Debug, Clone)]
356pub struct JoinCondition {
357 /// Left expression.
358 pub left: LogicalExpression,
359 /// Right expression.
360 pub right: LogicalExpression,
361}
362
363/// Aggregate with grouping.
364#[derive(Debug, Clone)]
365pub struct AggregateOp {
366 /// Group by expressions.
367 pub group_by: Vec<LogicalExpression>,
368 /// Aggregate functions.
369 pub aggregates: Vec<AggregateExpr>,
370 /// Input operator.
371 pub input: Box<LogicalOperator>,
372 /// HAVING clause filter (applied after aggregation).
373 pub having: Option<LogicalExpression>,
374}
375
376/// An aggregate expression.
377#[derive(Debug, Clone)]
378pub struct AggregateExpr {
379 /// Aggregate function.
380 pub function: AggregateFunction,
381 /// Expression to aggregate.
382 pub expression: Option<LogicalExpression>,
383 /// Whether to use DISTINCT.
384 pub distinct: bool,
385 /// Alias for the result.
386 pub alias: Option<String>,
387 /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
388 pub percentile: Option<f64>,
389}
390
391/// Aggregate function.
392#[derive(Debug, Clone, Copy, PartialEq, Eq)]
393pub enum AggregateFunction {
394 /// Count all rows (COUNT(*)).
395 Count,
396 /// Count non-null values (COUNT(expr)).
397 CountNonNull,
398 /// Sum values.
399 Sum,
400 /// Average values.
401 Avg,
402 /// Minimum value.
403 Min,
404 /// Maximum value.
405 Max,
406 /// Collect into list.
407 Collect,
408 /// Sample standard deviation (STDEV).
409 StdDev,
410 /// Population standard deviation (STDEVP).
411 StdDevPop,
412 /// Discrete percentile (PERCENTILE_DISC).
413 PercentileDisc,
414 /// Continuous percentile (PERCENTILE_CONT).
415 PercentileCont,
416}
417
418/// Filter rows based on a predicate.
419#[derive(Debug, Clone)]
420pub struct FilterOp {
421 /// The filter predicate.
422 pub predicate: LogicalExpression,
423 /// Input operator.
424 pub input: Box<LogicalOperator>,
425}
426
427/// Project specific columns.
428#[derive(Debug, Clone)]
429pub struct ProjectOp {
430 /// Columns to project.
431 pub projections: Vec<Projection>,
432 /// Input operator.
433 pub input: Box<LogicalOperator>,
434}
435
436/// A single projection (column selection or computation).
437#[derive(Debug, Clone)]
438pub struct Projection {
439 /// Expression to compute.
440 pub expression: LogicalExpression,
441 /// Alias for the result.
442 pub alias: Option<String>,
443}
444
445/// Limit the number of results.
446#[derive(Debug, Clone)]
447pub struct LimitOp {
448 /// Maximum number of rows to return.
449 pub count: usize,
450 /// Input operator.
451 pub input: Box<LogicalOperator>,
452}
453
454/// Skip a number of results.
455#[derive(Debug, Clone)]
456pub struct SkipOp {
457 /// Number of rows to skip.
458 pub count: usize,
459 /// Input operator.
460 pub input: Box<LogicalOperator>,
461}
462
463/// Sort results.
464#[derive(Debug, Clone)]
465pub struct SortOp {
466 /// Sort keys.
467 pub keys: Vec<SortKey>,
468 /// Input operator.
469 pub input: Box<LogicalOperator>,
470}
471
472/// A sort key.
473#[derive(Debug, Clone)]
474pub struct SortKey {
475 /// Expression to sort by.
476 pub expression: LogicalExpression,
477 /// Sort order.
478 pub order: SortOrder,
479}
480
481/// Sort order.
482#[derive(Debug, Clone, Copy, PartialEq, Eq)]
483pub enum SortOrder {
484 /// Ascending order.
485 Ascending,
486 /// Descending order.
487 Descending,
488}
489
490/// Remove duplicate results.
491#[derive(Debug, Clone)]
492pub struct DistinctOp {
493 /// Input operator.
494 pub input: Box<LogicalOperator>,
495 /// Optional columns to use for deduplication.
496 /// If None, all columns are used.
497 pub columns: Option<Vec<String>>,
498}
499
500/// Create a new node.
501#[derive(Debug, Clone)]
502pub struct CreateNodeOp {
503 /// Variable name to bind the created node to.
504 pub variable: String,
505 /// Labels for the new node.
506 pub labels: Vec<String>,
507 /// Properties for the new node.
508 pub properties: Vec<(String, LogicalExpression)>,
509 /// Input operator (for chained creates).
510 pub input: Option<Box<LogicalOperator>>,
511}
512
513/// Create a new edge.
514#[derive(Debug, Clone)]
515pub struct CreateEdgeOp {
516 /// Variable name to bind the created edge to.
517 pub variable: Option<String>,
518 /// Source node variable.
519 pub from_variable: String,
520 /// Target node variable.
521 pub to_variable: String,
522 /// Edge type.
523 pub edge_type: String,
524 /// Properties for the new edge.
525 pub properties: Vec<(String, LogicalExpression)>,
526 /// Input operator.
527 pub input: Box<LogicalOperator>,
528}
529
530/// Delete a node.
531#[derive(Debug, Clone)]
532pub struct DeleteNodeOp {
533 /// Variable of the node to delete.
534 pub variable: String,
535 /// Whether to detach (delete connected edges) before deleting.
536 pub detach: bool,
537 /// Input operator.
538 pub input: Box<LogicalOperator>,
539}
540
541/// Delete an edge.
542#[derive(Debug, Clone)]
543pub struct DeleteEdgeOp {
544 /// Variable of the edge to delete.
545 pub variable: String,
546 /// Input operator.
547 pub input: Box<LogicalOperator>,
548}
549
550/// Set properties on a node or edge.
551#[derive(Debug, Clone)]
552pub struct SetPropertyOp {
553 /// Variable of the entity to update.
554 pub variable: String,
555 /// Properties to set (name -> expression).
556 pub properties: Vec<(String, LogicalExpression)>,
557 /// Whether to replace all properties (vs. merge).
558 pub replace: bool,
559 /// Whether the target variable is an edge (vs. node).
560 pub is_edge: bool,
561 /// Input operator.
562 pub input: Box<LogicalOperator>,
563}
564
565/// Add labels to a node.
566#[derive(Debug, Clone)]
567pub struct AddLabelOp {
568 /// Variable of the node to update.
569 pub variable: String,
570 /// Labels to add.
571 pub labels: Vec<String>,
572 /// Input operator.
573 pub input: Box<LogicalOperator>,
574}
575
576/// Remove labels from a node.
577#[derive(Debug, Clone)]
578pub struct RemoveLabelOp {
579 /// Variable of the node to update.
580 pub variable: String,
581 /// Labels to remove.
582 pub labels: Vec<String>,
583 /// Input operator.
584 pub input: Box<LogicalOperator>,
585}
586
587// ==================== RDF/SPARQL Operators ====================
588
589/// Scan RDF triples matching a pattern.
590#[derive(Debug, Clone)]
591pub struct TripleScanOp {
592 /// Subject pattern (variable name or IRI).
593 pub subject: TripleComponent,
594 /// Predicate pattern (variable name or IRI).
595 pub predicate: TripleComponent,
596 /// Object pattern (variable name, IRI, or literal).
597 pub object: TripleComponent,
598 /// Named graph (optional).
599 pub graph: Option<TripleComponent>,
600 /// Input operator (for chained patterns).
601 pub input: Option<Box<LogicalOperator>>,
602}
603
604/// A component of a triple pattern.
605#[derive(Debug, Clone)]
606pub enum TripleComponent {
607 /// A variable to bind.
608 Variable(String),
609 /// A constant IRI.
610 Iri(String),
611 /// A constant literal value.
612 Literal(Value),
613}
614
615/// Union of multiple result sets.
616#[derive(Debug, Clone)]
617pub struct UnionOp {
618 /// Inputs to union together.
619 pub inputs: Vec<LogicalOperator>,
620}
621
622/// Set difference: rows in left that are not in right.
623#[derive(Debug, Clone)]
624pub struct ExceptOp {
625 /// Left input.
626 pub left: Box<LogicalOperator>,
627 /// Right input (rows to exclude).
628 pub right: Box<LogicalOperator>,
629 /// If true, preserve duplicates (EXCEPT ALL); if false, deduplicate (EXCEPT DISTINCT).
630 pub all: bool,
631}
632
633/// Set intersection: rows common to both inputs.
634#[derive(Debug, Clone)]
635pub struct IntersectOp {
636 /// Left input.
637 pub left: Box<LogicalOperator>,
638 /// Right input.
639 pub right: Box<LogicalOperator>,
640 /// If true, preserve duplicates (INTERSECT ALL); if false, deduplicate (INTERSECT DISTINCT).
641 pub all: bool,
642}
643
644/// Fallback operator: use left result if non-empty, otherwise use right.
645#[derive(Debug, Clone)]
646pub struct OtherwiseOp {
647 /// Primary input (preferred).
648 pub left: Box<LogicalOperator>,
649 /// Fallback input (used only if left produces zero rows).
650 pub right: Box<LogicalOperator>,
651}
652
653/// Apply (lateral join): evaluate a subplan for each row of the outer input.
654///
655/// The subplan can reference variables bound by the outer input. Results are
656/// concatenated (cross-product per row).
657#[derive(Debug, Clone)]
658pub struct ApplyOp {
659 /// Outer input providing rows.
660 pub input: Box<LogicalOperator>,
661 /// Subplan to evaluate per outer row.
662 pub subplan: Box<LogicalOperator>,
663}
664
665/// Left outer join for OPTIONAL patterns.
666#[derive(Debug, Clone)]
667pub struct LeftJoinOp {
668 /// Left (required) input.
669 pub left: Box<LogicalOperator>,
670 /// Right (optional) input.
671 pub right: Box<LogicalOperator>,
672 /// Optional filter condition.
673 pub condition: Option<LogicalExpression>,
674}
675
676/// Anti-join for MINUS patterns.
677#[derive(Debug, Clone)]
678pub struct AntiJoinOp {
679 /// Left input (results to keep if no match on right).
680 pub left: Box<LogicalOperator>,
681 /// Right input (patterns to exclude).
682 pub right: Box<LogicalOperator>,
683}
684
685/// Bind a variable to an expression.
686#[derive(Debug, Clone)]
687pub struct BindOp {
688 /// Expression to compute.
689 pub expression: LogicalExpression,
690 /// Variable to bind the result to.
691 pub variable: String,
692 /// Input operator.
693 pub input: Box<LogicalOperator>,
694}
695
696/// Unwind a list into individual rows.
697///
698/// For each input row, evaluates the expression (which should return a list)
699/// and emits one row for each element in the list.
700#[derive(Debug, Clone)]
701pub struct UnwindOp {
702 /// The list expression to unwind.
703 pub expression: LogicalExpression,
704 /// The variable name for each element.
705 pub variable: String,
706 /// Optional variable for 1-based element position (ORDINALITY).
707 pub ordinality_var: Option<String>,
708 /// Optional variable for 0-based element position (OFFSET).
709 pub offset_var: Option<String>,
710 /// Input operator.
711 pub input: Box<LogicalOperator>,
712}
713
714/// Collect grouped key-value rows into a single Map value.
715/// Used for Gremlin `groupCount()` semantics.
716#[derive(Debug, Clone)]
717pub struct MapCollectOp {
718 /// Variable holding the map key.
719 pub key_var: String,
720 /// Variable holding the map value.
721 pub value_var: String,
722 /// Output variable alias.
723 pub alias: String,
724 /// Input operator (typically a grouped aggregate).
725 pub input: Box<LogicalOperator>,
726}
727
728/// Merge a pattern (match or create).
729///
730/// MERGE tries to match a pattern in the graph. If found, returns the existing
731/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
732/// (optionally applying ON CREATE SET).
733#[derive(Debug, Clone)]
734pub struct MergeOp {
735 /// The node to merge.
736 pub variable: String,
737 /// Labels to match/create.
738 pub labels: Vec<String>,
739 /// Properties that must match (used for both matching and creation).
740 pub match_properties: Vec<(String, LogicalExpression)>,
741 /// Properties to set on CREATE.
742 pub on_create: Vec<(String, LogicalExpression)>,
743 /// Properties to set on MATCH.
744 pub on_match: Vec<(String, LogicalExpression)>,
745 /// Input operator.
746 pub input: Box<LogicalOperator>,
747}
748
749/// Merge a relationship pattern (match or create between two bound nodes).
750///
751/// MERGE on a relationship tries to find an existing relationship of the given type
752/// between the source and target nodes. If found, returns the existing relationship
753/// (optionally applying ON MATCH SET). If not found, creates it (optionally applying
754/// ON CREATE SET).
755#[derive(Debug, Clone)]
756pub struct MergeRelationshipOp {
757 /// Variable to bind the relationship to.
758 pub variable: String,
759 /// Source node variable (must already be bound).
760 pub source_variable: String,
761 /// Target node variable (must already be bound).
762 pub target_variable: String,
763 /// Relationship type.
764 pub edge_type: String,
765 /// Properties that must match (used for both matching and creation).
766 pub match_properties: Vec<(String, LogicalExpression)>,
767 /// Properties to set on CREATE.
768 pub on_create: Vec<(String, LogicalExpression)>,
769 /// Properties to set on MATCH.
770 pub on_match: Vec<(String, LogicalExpression)>,
771 /// Input operator.
772 pub input: Box<LogicalOperator>,
773}
774
775/// Find shortest path between two nodes.
776///
777/// This operator uses Dijkstra's algorithm to find the shortest path(s)
778/// between a source node and a target node, optionally filtered by edge type.
779#[derive(Debug, Clone)]
780pub struct ShortestPathOp {
781 /// Input operator providing source/target nodes.
782 pub input: Box<LogicalOperator>,
783 /// Variable name for the source node.
784 pub source_var: String,
785 /// Variable name for the target node.
786 pub target_var: String,
787 /// Edge type filter (empty = match all types, multiple = match any).
788 pub edge_types: Vec<String>,
789 /// Direction of edge traversal.
790 pub direction: ExpandDirection,
791 /// Variable name to bind the path result.
792 pub path_alias: String,
793 /// Whether to find all shortest paths (vs. just one).
794 pub all_paths: bool,
795}
796
797// ==================== SPARQL Update Operators ====================
798
799/// Insert RDF triples.
800#[derive(Debug, Clone)]
801pub struct InsertTripleOp {
802 /// Subject of the triple.
803 pub subject: TripleComponent,
804 /// Predicate of the triple.
805 pub predicate: TripleComponent,
806 /// Object of the triple.
807 pub object: TripleComponent,
808 /// Named graph (optional).
809 pub graph: Option<String>,
810 /// Input operator (provides variable bindings).
811 pub input: Option<Box<LogicalOperator>>,
812}
813
814/// Delete RDF triples.
815#[derive(Debug, Clone)]
816pub struct DeleteTripleOp {
817 /// Subject pattern.
818 pub subject: TripleComponent,
819 /// Predicate pattern.
820 pub predicate: TripleComponent,
821 /// Object pattern.
822 pub object: TripleComponent,
823 /// Named graph (optional).
824 pub graph: Option<String>,
825 /// Input operator (provides variable bindings).
826 pub input: Option<Box<LogicalOperator>>,
827}
828
829/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
830///
831/// Per SPARQL 1.1 Update spec, this operator:
832/// 1. Evaluates the WHERE clause once to get bindings
833/// 2. Applies DELETE templates using those bindings
834/// 3. Applies INSERT templates using the SAME bindings
835///
836/// This ensures DELETE and INSERT see consistent data.
837#[derive(Debug, Clone)]
838pub struct ModifyOp {
839 /// DELETE triple templates (patterns with variables).
840 pub delete_templates: Vec<TripleTemplate>,
841 /// INSERT triple templates (patterns with variables).
842 pub insert_templates: Vec<TripleTemplate>,
843 /// WHERE clause that provides variable bindings.
844 pub where_clause: Box<LogicalOperator>,
845 /// Named graph context (for WITH clause).
846 pub graph: Option<String>,
847}
848
849/// A triple template for DELETE/INSERT operations.
850#[derive(Debug, Clone)]
851pub struct TripleTemplate {
852 /// Subject (may be a variable).
853 pub subject: TripleComponent,
854 /// Predicate (may be a variable).
855 pub predicate: TripleComponent,
856 /// Object (may be a variable or literal).
857 pub object: TripleComponent,
858 /// Named graph (optional).
859 pub graph: Option<String>,
860}
861
862/// Clear all triples from a graph.
863#[derive(Debug, Clone)]
864pub struct ClearGraphOp {
865 /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
866 pub graph: Option<String>,
867 /// Whether to silently ignore errors.
868 pub silent: bool,
869}
870
871/// Create a new named graph.
872#[derive(Debug, Clone)]
873pub struct CreateGraphOp {
874 /// IRI of the graph to create.
875 pub graph: String,
876 /// Whether to silently ignore if graph already exists.
877 pub silent: bool,
878}
879
880/// Drop (remove) a named graph.
881#[derive(Debug, Clone)]
882pub struct DropGraphOp {
883 /// Target graph (None = default graph).
884 pub graph: Option<String>,
885 /// Whether to silently ignore errors.
886 pub silent: bool,
887}
888
889/// Load data from a URL into a graph.
890#[derive(Debug, Clone)]
891pub struct LoadGraphOp {
892 /// Source URL to load data from.
893 pub source: String,
894 /// Destination graph (None = default graph).
895 pub destination: Option<String>,
896 /// Whether to silently ignore errors.
897 pub silent: bool,
898}
899
900/// Copy triples from one graph to another.
901#[derive(Debug, Clone)]
902pub struct CopyGraphOp {
903 /// Source graph.
904 pub source: Option<String>,
905 /// Destination graph.
906 pub destination: Option<String>,
907 /// Whether to silently ignore errors.
908 pub silent: bool,
909}
910
911/// Move triples from one graph to another.
912#[derive(Debug, Clone)]
913pub struct MoveGraphOp {
914 /// Source graph.
915 pub source: Option<String>,
916 /// Destination graph.
917 pub destination: Option<String>,
918 /// Whether to silently ignore errors.
919 pub silent: bool,
920}
921
922/// Add (merge) triples from one graph to another.
923#[derive(Debug, Clone)]
924pub struct AddGraphOp {
925 /// Source graph.
926 pub source: Option<String>,
927 /// Destination graph.
928 pub destination: Option<String>,
929 /// Whether to silently ignore errors.
930 pub silent: bool,
931}
932
933// ==================== Vector Search Operators ====================
934
935/// Vector similarity scan operation.
936///
937/// Performs approximate nearest neighbor search using a vector index (HNSW)
938/// or brute-force search for small datasets. Returns nodes/edges whose
939/// embeddings are similar to the query vector.
940///
941/// # Example GQL
942///
943/// ```gql
944/// MATCH (m:Movie)
945/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
946/// RETURN m.title
947/// ```
948#[derive(Debug, Clone)]
949pub struct VectorScanOp {
950 /// Variable name to bind matching entities to.
951 pub variable: String,
952 /// Name of the vector index to use (None = brute-force).
953 pub index_name: Option<String>,
954 /// Property containing the vector embedding.
955 pub property: String,
956 /// Optional label filter (scan only nodes with this label).
957 pub label: Option<String>,
958 /// The query vector expression.
959 pub query_vector: LogicalExpression,
960 /// Number of nearest neighbors to return.
961 pub k: usize,
962 /// Distance metric (None = use index default, typically cosine).
963 pub metric: Option<VectorMetric>,
964 /// Minimum similarity threshold (filters results below this).
965 pub min_similarity: Option<f32>,
966 /// Maximum distance threshold (filters results above this).
967 pub max_distance: Option<f32>,
968 /// Input operator (for hybrid queries combining graph + vector).
969 pub input: Option<Box<LogicalOperator>>,
970}
971
972/// Vector distance/similarity metric for vector scan operations.
973#[derive(Debug, Clone, Copy, PartialEq, Eq)]
974pub enum VectorMetric {
975 /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
976 Cosine,
977 /// Euclidean (L2) distance. Best when magnitude matters.
978 Euclidean,
979 /// Dot product. Best for maximum inner product search.
980 DotProduct,
981 /// Manhattan (L1) distance. Less sensitive to outliers.
982 Manhattan,
983}
984
985/// Join graph patterns with vector similarity search.
986///
987/// This operator takes entities from the left input and computes vector
988/// similarity against a query vector, outputting (entity, distance) pairs.
989///
990/// # Use Cases
991///
992/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
993/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
994/// 3. **Filtering by similarity**: Join with threshold-based filtering
995///
996/// # Example
997///
998/// ```gql
999/// // Find movies similar to what the user liked
1000/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
1001/// WITH avg(liked.embedding) AS user_taste
1002/// VECTOR JOIN (m:Movie) ON m.embedding
1003/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
1004/// RETURN m.title
1005/// ```
1006#[derive(Debug, Clone)]
1007pub struct VectorJoinOp {
1008 /// Input operator providing entities to match against.
1009 pub input: Box<LogicalOperator>,
1010 /// Variable from input to extract vectors from (for entity-to-entity similarity).
1011 /// If None, uses `query_vector` directly.
1012 pub left_vector_variable: Option<String>,
1013 /// Property containing the left vector (used with `left_vector_variable`).
1014 pub left_property: Option<String>,
1015 /// The query vector expression (constant or computed).
1016 pub query_vector: LogicalExpression,
1017 /// Variable name to bind the right-side matching entities.
1018 pub right_variable: String,
1019 /// Property containing the right-side vector embeddings.
1020 pub right_property: String,
1021 /// Optional label filter for right-side entities.
1022 pub right_label: Option<String>,
1023 /// Name of vector index on right side (None = brute-force).
1024 pub index_name: Option<String>,
1025 /// Number of nearest neighbors per left-side entity.
1026 pub k: usize,
1027 /// Distance metric.
1028 pub metric: Option<VectorMetric>,
1029 /// Minimum similarity threshold.
1030 pub min_similarity: Option<f32>,
1031 /// Maximum distance threshold.
1032 pub max_distance: Option<f32>,
1033 /// Variable to bind the distance/similarity score.
1034 pub score_variable: Option<String>,
1035}
1036
1037/// Return results (terminal operator).
1038#[derive(Debug, Clone)]
1039pub struct ReturnOp {
1040 /// Items to return.
1041 pub items: Vec<ReturnItem>,
1042 /// Whether to return distinct results.
1043 pub distinct: bool,
1044 /// Input operator.
1045 pub input: Box<LogicalOperator>,
1046}
1047
1048/// A single return item.
1049#[derive(Debug, Clone)]
1050pub struct ReturnItem {
1051 /// Expression to return.
1052 pub expression: LogicalExpression,
1053 /// Alias for the result column.
1054 pub alias: Option<String>,
1055}
1056
1057/// Define a property graph schema (SQL/PGQ DDL).
1058#[derive(Debug, Clone)]
1059pub struct CreatePropertyGraphOp {
1060 /// Graph name.
1061 pub name: String,
1062 /// Node table schemas (label name + column definitions).
1063 pub node_tables: Vec<PropertyGraphNodeTable>,
1064 /// Edge table schemas (type name + column definitions + references).
1065 pub edge_tables: Vec<PropertyGraphEdgeTable>,
1066}
1067
1068/// A node table in a property graph definition.
1069#[derive(Debug, Clone)]
1070pub struct PropertyGraphNodeTable {
1071 /// Table name (maps to a node label).
1072 pub name: String,
1073 /// Column definitions as (name, type_name) pairs.
1074 pub columns: Vec<(String, String)>,
1075}
1076
1077/// An edge table in a property graph definition.
1078#[derive(Debug, Clone)]
1079pub struct PropertyGraphEdgeTable {
1080 /// Table name (maps to an edge type).
1081 pub name: String,
1082 /// Column definitions as (name, type_name) pairs.
1083 pub columns: Vec<(String, String)>,
1084 /// Source node table name.
1085 pub source_table: String,
1086 /// Target node table name.
1087 pub target_table: String,
1088}
1089
1090// ==================== Procedure Call Types ====================
1091
1092/// A CALL procedure operation.
1093///
1094/// ```text
1095/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
1096/// ```
1097#[derive(Debug, Clone)]
1098pub struct CallProcedureOp {
1099 /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
1100 pub name: Vec<String>,
1101 /// Argument expressions (constants in Phase 1).
1102 pub arguments: Vec<LogicalExpression>,
1103 /// Optional YIELD clause: which columns to expose + aliases.
1104 pub yield_items: Option<Vec<ProcedureYield>>,
1105}
1106
1107/// A single YIELD item in a procedure call.
1108#[derive(Debug, Clone)]
1109pub struct ProcedureYield {
1110 /// Column name from the procedure result.
1111 pub field_name: String,
1112 /// Optional alias (YIELD score AS rank).
1113 pub alias: Option<String>,
1114}
1115
1116/// A logical expression.
1117#[derive(Debug, Clone)]
1118pub enum LogicalExpression {
1119 /// A literal value.
1120 Literal(Value),
1121
1122 /// A variable reference.
1123 Variable(String),
1124
1125 /// Property access (e.g., n.name).
1126 Property {
1127 /// The variable to access.
1128 variable: String,
1129 /// The property name.
1130 property: String,
1131 },
1132
1133 /// Binary operation.
1134 Binary {
1135 /// Left operand.
1136 left: Box<LogicalExpression>,
1137 /// Operator.
1138 op: BinaryOp,
1139 /// Right operand.
1140 right: Box<LogicalExpression>,
1141 },
1142
1143 /// Unary operation.
1144 Unary {
1145 /// Operator.
1146 op: UnaryOp,
1147 /// Operand.
1148 operand: Box<LogicalExpression>,
1149 },
1150
1151 /// Function call.
1152 FunctionCall {
1153 /// Function name.
1154 name: String,
1155 /// Arguments.
1156 args: Vec<LogicalExpression>,
1157 /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
1158 distinct: bool,
1159 },
1160
1161 /// List literal.
1162 List(Vec<LogicalExpression>),
1163
1164 /// Map literal (e.g., {name: 'Alice', age: 30}).
1165 Map(Vec<(String, LogicalExpression)>),
1166
1167 /// Index access (e.g., `list[0]`).
1168 IndexAccess {
1169 /// The base expression (typically a list or string).
1170 base: Box<LogicalExpression>,
1171 /// The index expression.
1172 index: Box<LogicalExpression>,
1173 },
1174
1175 /// Slice access (e.g., list[1..3]).
1176 SliceAccess {
1177 /// The base expression (typically a list or string).
1178 base: Box<LogicalExpression>,
1179 /// Start index (None means from beginning).
1180 start: Option<Box<LogicalExpression>>,
1181 /// End index (None means to end).
1182 end: Option<Box<LogicalExpression>>,
1183 },
1184
1185 /// CASE expression.
1186 Case {
1187 /// Test expression (for simple CASE).
1188 operand: Option<Box<LogicalExpression>>,
1189 /// WHEN clauses.
1190 when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
1191 /// ELSE clause.
1192 else_clause: Option<Box<LogicalExpression>>,
1193 },
1194
1195 /// Parameter reference.
1196 Parameter(String),
1197
1198 /// Labels of a node.
1199 Labels(String),
1200
1201 /// Type of an edge.
1202 Type(String),
1203
1204 /// ID of a node or edge.
1205 Id(String),
1206
1207 /// List comprehension: [x IN list WHERE predicate | expression]
1208 ListComprehension {
1209 /// Variable name for each element.
1210 variable: String,
1211 /// The source list expression.
1212 list_expr: Box<LogicalExpression>,
1213 /// Optional filter predicate.
1214 filter_expr: Option<Box<LogicalExpression>>,
1215 /// The mapping expression for each element.
1216 map_expr: Box<LogicalExpression>,
1217 },
1218
1219 /// List predicate: all/any/none/single(x IN list WHERE pred).
1220 ListPredicate {
1221 /// The kind of list predicate.
1222 kind: ListPredicateKind,
1223 /// The iteration variable name.
1224 variable: String,
1225 /// The source list expression.
1226 list_expr: Box<LogicalExpression>,
1227 /// The predicate to test for each element.
1228 predicate: Box<LogicalExpression>,
1229 },
1230
1231 /// EXISTS subquery.
1232 ExistsSubquery(Box<LogicalOperator>),
1233
1234 /// COUNT subquery.
1235 CountSubquery(Box<LogicalOperator>),
1236
1237 /// Map projection: `node { .prop1, .prop2, key: expr, .* }`.
1238 MapProjection {
1239 /// The base variable name.
1240 base: String,
1241 /// Projection entries (property selectors, literal entries, all-properties).
1242 entries: Vec<MapProjectionEntry>,
1243 },
1244
1245 /// reduce() accumulator: `reduce(acc = init, x IN list | expr)`.
1246 Reduce {
1247 /// Accumulator variable name.
1248 accumulator: String,
1249 /// Initial value for the accumulator.
1250 initial: Box<LogicalExpression>,
1251 /// Iteration variable name.
1252 variable: String,
1253 /// List to iterate over.
1254 list: Box<LogicalExpression>,
1255 /// Body expression evaluated per iteration (references both accumulator and variable).
1256 expression: Box<LogicalExpression>,
1257 },
1258
1259 /// Pattern comprehension: `[(pattern) WHERE pred | expr]`.
1260 ///
1261 /// Executes the inner subplan, evaluates the projection for each row,
1262 /// and collects the results into a list.
1263 PatternComprehension {
1264 /// The subplan produced by translating the pattern (+optional WHERE).
1265 subplan: Box<LogicalOperator>,
1266 /// The projection expression evaluated for each match.
1267 projection: Box<LogicalExpression>,
1268 },
1269}
1270
1271/// An entry in a map projection.
1272#[derive(Debug, Clone)]
1273pub enum MapProjectionEntry {
1274 /// `.propertyName`: shorthand for `propertyName: base.propertyName`.
1275 PropertySelector(String),
1276 /// `key: expression`: explicit key-value pair.
1277 LiteralEntry(String, LogicalExpression),
1278 /// `.*`: include all properties of the base entity.
1279 AllProperties,
1280}
1281
1282/// The kind of list predicate function.
1283#[derive(Debug, Clone, PartialEq, Eq)]
1284pub enum ListPredicateKind {
1285 /// all(x IN list WHERE pred): true if pred holds for every element.
1286 All,
1287 /// any(x IN list WHERE pred): true if pred holds for at least one element.
1288 Any,
1289 /// none(x IN list WHERE pred): true if pred holds for no element.
1290 None,
1291 /// single(x IN list WHERE pred): true if pred holds for exactly one element.
1292 Single,
1293}
1294
1295/// Binary operator.
1296#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1297pub enum BinaryOp {
1298 /// Equality comparison (=).
1299 Eq,
1300 /// Inequality comparison (<>).
1301 Ne,
1302 /// Less than (<).
1303 Lt,
1304 /// Less than or equal (<=).
1305 Le,
1306 /// Greater than (>).
1307 Gt,
1308 /// Greater than or equal (>=).
1309 Ge,
1310
1311 /// Logical AND.
1312 And,
1313 /// Logical OR.
1314 Or,
1315 /// Logical XOR.
1316 Xor,
1317
1318 /// Addition (+).
1319 Add,
1320 /// Subtraction (-).
1321 Sub,
1322 /// Multiplication (*).
1323 Mul,
1324 /// Division (/).
1325 Div,
1326 /// Modulo (%).
1327 Mod,
1328
1329 /// String concatenation.
1330 Concat,
1331 /// String starts with.
1332 StartsWith,
1333 /// String ends with.
1334 EndsWith,
1335 /// String contains.
1336 Contains,
1337
1338 /// Collection membership (IN).
1339 In,
1340 /// Pattern matching (LIKE).
1341 Like,
1342 /// Regex matching (=~).
1343 Regex,
1344 /// Power/exponentiation (^).
1345 Pow,
1346}
1347
1348/// Unary operator.
1349#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1350pub enum UnaryOp {
1351 /// Logical NOT.
1352 Not,
1353 /// Numeric negation.
1354 Neg,
1355 /// IS NULL check.
1356 IsNull,
1357 /// IS NOT NULL check.
1358 IsNotNull,
1359}
1360
1361#[cfg(test)]
1362mod tests {
1363 use super::*;
1364
1365 #[test]
1366 fn test_simple_node_scan_plan() {
1367 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1368 items: vec![ReturnItem {
1369 expression: LogicalExpression::Variable("n".into()),
1370 alias: None,
1371 }],
1372 distinct: false,
1373 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1374 variable: "n".into(),
1375 label: Some("Person".into()),
1376 input: None,
1377 })),
1378 }));
1379
1380 // Verify structure
1381 if let LogicalOperator::Return(ret) = &plan.root {
1382 assert_eq!(ret.items.len(), 1);
1383 assert!(!ret.distinct);
1384 if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1385 assert_eq!(scan.variable, "n");
1386 assert_eq!(scan.label, Some("Person".into()));
1387 } else {
1388 panic!("Expected NodeScan");
1389 }
1390 } else {
1391 panic!("Expected Return");
1392 }
1393 }
1394
1395 #[test]
1396 fn test_filter_plan() {
1397 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1398 items: vec![ReturnItem {
1399 expression: LogicalExpression::Property {
1400 variable: "n".into(),
1401 property: "name".into(),
1402 },
1403 alias: Some("name".into()),
1404 }],
1405 distinct: false,
1406 input: Box::new(LogicalOperator::Filter(FilterOp {
1407 predicate: LogicalExpression::Binary {
1408 left: Box::new(LogicalExpression::Property {
1409 variable: "n".into(),
1410 property: "age".into(),
1411 }),
1412 op: BinaryOp::Gt,
1413 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1414 },
1415 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1416 variable: "n".into(),
1417 label: Some("Person".into()),
1418 input: None,
1419 })),
1420 })),
1421 }));
1422
1423 if let LogicalOperator::Return(ret) = &plan.root {
1424 if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1425 if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1426 assert_eq!(*op, BinaryOp::Gt);
1427 } else {
1428 panic!("Expected Binary expression");
1429 }
1430 } else {
1431 panic!("Expected Filter");
1432 }
1433 } else {
1434 panic!("Expected Return");
1435 }
1436 }
1437}