grafeo_engine/query/plan.rs
1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12 /// The root operator of the plan.
13 pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17 /// Creates a new logical plan with the given root operator.
18 pub fn new(root: LogicalOperator) -> Self {
19 Self { root }
20 }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26 /// Scan all nodes, optionally filtered by label.
27 NodeScan(NodeScanOp),
28
29 /// Scan all edges, optionally filtered by type.
30 EdgeScan(EdgeScanOp),
31
32 /// Expand from nodes to neighbors via edges.
33 Expand(ExpandOp),
34
35 /// Filter rows based on a predicate.
36 Filter(FilterOp),
37
38 /// Project specific columns.
39 Project(ProjectOp),
40
41 /// Join two inputs.
42 Join(JoinOp),
43
44 /// Aggregate with grouping.
45 Aggregate(AggregateOp),
46
47 /// Limit the number of results.
48 Limit(LimitOp),
49
50 /// Skip a number of results.
51 Skip(SkipOp),
52
53 /// Sort results.
54 Sort(SortOp),
55
56 /// Remove duplicate results.
57 Distinct(DistinctOp),
58
59 /// Create a new node.
60 CreateNode(CreateNodeOp),
61
62 /// Create a new edge.
63 CreateEdge(CreateEdgeOp),
64
65 /// Delete a node.
66 DeleteNode(DeleteNodeOp),
67
68 /// Delete an edge.
69 DeleteEdge(DeleteEdgeOp),
70
71 /// Set properties on a node or edge.
72 SetProperty(SetPropertyOp),
73
74 /// Add labels to a node.
75 AddLabel(AddLabelOp),
76
77 /// Remove labels from a node.
78 RemoveLabel(RemoveLabelOp),
79
80 /// Return results (terminal operator).
81 Return(ReturnOp),
82
83 /// Empty result set.
84 Empty,
85
86 // ==================== RDF/SPARQL Operators ====================
87 /// Scan RDF triples matching a pattern.
88 TripleScan(TripleScanOp),
89
90 /// Union of multiple result sets.
91 Union(UnionOp),
92
93 /// Left outer join for OPTIONAL patterns.
94 LeftJoin(LeftJoinOp),
95
96 /// Anti-join for MINUS patterns.
97 AntiJoin(AntiJoinOp),
98
99 /// Bind a variable to an expression.
100 Bind(BindOp),
101
102 /// Unwind a list into individual rows.
103 Unwind(UnwindOp),
104
105 /// Collect grouped key-value rows into a single Map value.
106 /// Used for Gremlin `groupCount()` semantics.
107 MapCollect(MapCollectOp),
108
109 /// Merge a node pattern (match or create).
110 Merge(MergeOp),
111
112 /// Merge a relationship pattern (match or create).
113 MergeRelationship(MergeRelationshipOp),
114
115 /// Find shortest path between nodes.
116 ShortestPath(ShortestPathOp),
117
118 // ==================== SPARQL Update Operators ====================
119 /// Insert RDF triples.
120 InsertTriple(InsertTripleOp),
121
122 /// Delete RDF triples.
123 DeleteTriple(DeleteTripleOp),
124
125 /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
126 /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
127 Modify(ModifyOp),
128
129 /// Clear a graph (remove all triples).
130 ClearGraph(ClearGraphOp),
131
132 /// Create a new named graph.
133 CreateGraph(CreateGraphOp),
134
135 /// Drop (remove) a named graph.
136 DropGraph(DropGraphOp),
137
138 /// Load data from a URL into a graph.
139 LoadGraph(LoadGraphOp),
140
141 /// Copy triples from one graph to another.
142 CopyGraph(CopyGraphOp),
143
144 /// Move triples from one graph to another.
145 MoveGraph(MoveGraphOp),
146
147 /// Add (merge) triples from one graph to another.
148 AddGraph(AddGraphOp),
149
150 // ==================== Vector Search Operators ====================
151 /// Scan using vector similarity search.
152 VectorScan(VectorScanOp),
153
154 /// Join graph patterns with vector similarity search.
155 ///
156 /// Computes vector distances between entities from the left input and
157 /// a query vector, then joins with similarity scores. Useful for:
158 /// - Filtering graph traversal results by vector similarity
159 /// - Computing aggregated embeddings and finding similar entities
160 /// - Combining multiple vector sources with graph structure
161 VectorJoin(VectorJoinOp),
162
163 // ==================== DDL Operators ====================
164 /// Define a property graph schema (SQL/PGQ DDL).
165 CreatePropertyGraph(CreatePropertyGraphOp),
166
167 // ==================== Procedure Call Operators ====================
168 /// Invoke a stored procedure (CALL ... YIELD).
169 CallProcedure(CallProcedureOp),
170}
171
172/// Scan nodes from the graph.
173#[derive(Debug, Clone)]
174pub struct NodeScanOp {
175 /// Variable name to bind the node to.
176 pub variable: String,
177 /// Optional label filter.
178 pub label: Option<String>,
179 /// Child operator (if any, for chained patterns).
180 pub input: Option<Box<LogicalOperator>>,
181}
182
183/// Scan edges from the graph.
184#[derive(Debug, Clone)]
185pub struct EdgeScanOp {
186 /// Variable name to bind the edge to.
187 pub variable: String,
188 /// Optional edge type filter.
189 pub edge_type: Option<String>,
190 /// Child operator (if any).
191 pub input: Option<Box<LogicalOperator>>,
192}
193
194/// Expand from nodes to their neighbors.
195#[derive(Debug, Clone)]
196pub struct ExpandOp {
197 /// Source node variable.
198 pub from_variable: String,
199 /// Target node variable to bind.
200 pub to_variable: String,
201 /// Edge variable to bind (optional).
202 pub edge_variable: Option<String>,
203 /// Direction of expansion.
204 pub direction: ExpandDirection,
205 /// Optional edge type filter.
206 pub edge_type: Option<String>,
207 /// Minimum hops (for variable-length patterns).
208 pub min_hops: u32,
209 /// Maximum hops (for variable-length patterns).
210 pub max_hops: Option<u32>,
211 /// Input operator.
212 pub input: Box<LogicalOperator>,
213 /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
214 /// When set, a path length column will be output under this name.
215 pub path_alias: Option<String>,
216}
217
218/// Direction for edge expansion.
219#[derive(Debug, Clone, Copy, PartialEq, Eq)]
220pub enum ExpandDirection {
221 /// Follow outgoing edges.
222 Outgoing,
223 /// Follow incoming edges.
224 Incoming,
225 /// Follow edges in either direction.
226 Both,
227}
228
229/// Join two inputs.
230#[derive(Debug, Clone)]
231pub struct JoinOp {
232 /// Left input.
233 pub left: Box<LogicalOperator>,
234 /// Right input.
235 pub right: Box<LogicalOperator>,
236 /// Join type.
237 pub join_type: JoinType,
238 /// Join conditions.
239 pub conditions: Vec<JoinCondition>,
240}
241
242/// Join type.
243#[derive(Debug, Clone, Copy, PartialEq, Eq)]
244pub enum JoinType {
245 /// Inner join.
246 Inner,
247 /// Left outer join.
248 Left,
249 /// Right outer join.
250 Right,
251 /// Full outer join.
252 Full,
253 /// Cross join (Cartesian product).
254 Cross,
255 /// Semi join (returns left rows with matching right rows).
256 Semi,
257 /// Anti join (returns left rows without matching right rows).
258 Anti,
259}
260
261/// A join condition.
262#[derive(Debug, Clone)]
263pub struct JoinCondition {
264 /// Left expression.
265 pub left: LogicalExpression,
266 /// Right expression.
267 pub right: LogicalExpression,
268}
269
270/// Aggregate with grouping.
271#[derive(Debug, Clone)]
272pub struct AggregateOp {
273 /// Group by expressions.
274 pub group_by: Vec<LogicalExpression>,
275 /// Aggregate functions.
276 pub aggregates: Vec<AggregateExpr>,
277 /// Input operator.
278 pub input: Box<LogicalOperator>,
279 /// HAVING clause filter (applied after aggregation).
280 pub having: Option<LogicalExpression>,
281}
282
283/// An aggregate expression.
284#[derive(Debug, Clone)]
285pub struct AggregateExpr {
286 /// Aggregate function.
287 pub function: AggregateFunction,
288 /// Expression to aggregate.
289 pub expression: Option<LogicalExpression>,
290 /// Whether to use DISTINCT.
291 pub distinct: bool,
292 /// Alias for the result.
293 pub alias: Option<String>,
294 /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
295 pub percentile: Option<f64>,
296}
297
298/// Aggregate function.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum AggregateFunction {
301 /// Count all rows (COUNT(*)).
302 Count,
303 /// Count non-null values (COUNT(expr)).
304 CountNonNull,
305 /// Sum values.
306 Sum,
307 /// Average values.
308 Avg,
309 /// Minimum value.
310 Min,
311 /// Maximum value.
312 Max,
313 /// Collect into list.
314 Collect,
315 /// Sample standard deviation (STDEV).
316 StdDev,
317 /// Population standard deviation (STDEVP).
318 StdDevPop,
319 /// Discrete percentile (PERCENTILE_DISC).
320 PercentileDisc,
321 /// Continuous percentile (PERCENTILE_CONT).
322 PercentileCont,
323}
324
325/// Filter rows based on a predicate.
326#[derive(Debug, Clone)]
327pub struct FilterOp {
328 /// The filter predicate.
329 pub predicate: LogicalExpression,
330 /// Input operator.
331 pub input: Box<LogicalOperator>,
332}
333
334/// Project specific columns.
335#[derive(Debug, Clone)]
336pub struct ProjectOp {
337 /// Columns to project.
338 pub projections: Vec<Projection>,
339 /// Input operator.
340 pub input: Box<LogicalOperator>,
341}
342
343/// A single projection (column selection or computation).
344#[derive(Debug, Clone)]
345pub struct Projection {
346 /// Expression to compute.
347 pub expression: LogicalExpression,
348 /// Alias for the result.
349 pub alias: Option<String>,
350}
351
352/// Limit the number of results.
353#[derive(Debug, Clone)]
354pub struct LimitOp {
355 /// Maximum number of rows to return.
356 pub count: usize,
357 /// Input operator.
358 pub input: Box<LogicalOperator>,
359}
360
361/// Skip a number of results.
362#[derive(Debug, Clone)]
363pub struct SkipOp {
364 /// Number of rows to skip.
365 pub count: usize,
366 /// Input operator.
367 pub input: Box<LogicalOperator>,
368}
369
370/// Sort results.
371#[derive(Debug, Clone)]
372pub struct SortOp {
373 /// Sort keys.
374 pub keys: Vec<SortKey>,
375 /// Input operator.
376 pub input: Box<LogicalOperator>,
377}
378
379/// A sort key.
380#[derive(Debug, Clone)]
381pub struct SortKey {
382 /// Expression to sort by.
383 pub expression: LogicalExpression,
384 /// Sort order.
385 pub order: SortOrder,
386}
387
388/// Sort order.
389#[derive(Debug, Clone, Copy, PartialEq, Eq)]
390pub enum SortOrder {
391 /// Ascending order.
392 Ascending,
393 /// Descending order.
394 Descending,
395}
396
397/// Remove duplicate results.
398#[derive(Debug, Clone)]
399pub struct DistinctOp {
400 /// Input operator.
401 pub input: Box<LogicalOperator>,
402 /// Optional columns to use for deduplication.
403 /// If None, all columns are used.
404 pub columns: Option<Vec<String>>,
405}
406
407/// Create a new node.
408#[derive(Debug, Clone)]
409pub struct CreateNodeOp {
410 /// Variable name to bind the created node to.
411 pub variable: String,
412 /// Labels for the new node.
413 pub labels: Vec<String>,
414 /// Properties for the new node.
415 pub properties: Vec<(String, LogicalExpression)>,
416 /// Input operator (for chained creates).
417 pub input: Option<Box<LogicalOperator>>,
418}
419
420/// Create a new edge.
421#[derive(Debug, Clone)]
422pub struct CreateEdgeOp {
423 /// Variable name to bind the created edge to.
424 pub variable: Option<String>,
425 /// Source node variable.
426 pub from_variable: String,
427 /// Target node variable.
428 pub to_variable: String,
429 /// Edge type.
430 pub edge_type: String,
431 /// Properties for the new edge.
432 pub properties: Vec<(String, LogicalExpression)>,
433 /// Input operator.
434 pub input: Box<LogicalOperator>,
435}
436
437/// Delete a node.
438#[derive(Debug, Clone)]
439pub struct DeleteNodeOp {
440 /// Variable of the node to delete.
441 pub variable: String,
442 /// Whether to detach (delete connected edges) before deleting.
443 pub detach: bool,
444 /// Input operator.
445 pub input: Box<LogicalOperator>,
446}
447
448/// Delete an edge.
449#[derive(Debug, Clone)]
450pub struct DeleteEdgeOp {
451 /// Variable of the edge to delete.
452 pub variable: String,
453 /// Input operator.
454 pub input: Box<LogicalOperator>,
455}
456
457/// Set properties on a node or edge.
458#[derive(Debug, Clone)]
459pub struct SetPropertyOp {
460 /// Variable of the entity to update.
461 pub variable: String,
462 /// Properties to set (name -> expression).
463 pub properties: Vec<(String, LogicalExpression)>,
464 /// Whether to replace all properties (vs. merge).
465 pub replace: bool,
466 /// Whether the target variable is an edge (vs. node).
467 pub is_edge: bool,
468 /// Input operator.
469 pub input: Box<LogicalOperator>,
470}
471
472/// Add labels to a node.
473#[derive(Debug, Clone)]
474pub struct AddLabelOp {
475 /// Variable of the node to update.
476 pub variable: String,
477 /// Labels to add.
478 pub labels: Vec<String>,
479 /// Input operator.
480 pub input: Box<LogicalOperator>,
481}
482
483/// Remove labels from a node.
484#[derive(Debug, Clone)]
485pub struct RemoveLabelOp {
486 /// Variable of the node to update.
487 pub variable: String,
488 /// Labels to remove.
489 pub labels: Vec<String>,
490 /// Input operator.
491 pub input: Box<LogicalOperator>,
492}
493
494// ==================== RDF/SPARQL Operators ====================
495
496/// Scan RDF triples matching a pattern.
497#[derive(Debug, Clone)]
498pub struct TripleScanOp {
499 /// Subject pattern (variable name or IRI).
500 pub subject: TripleComponent,
501 /// Predicate pattern (variable name or IRI).
502 pub predicate: TripleComponent,
503 /// Object pattern (variable name, IRI, or literal).
504 pub object: TripleComponent,
505 /// Named graph (optional).
506 pub graph: Option<TripleComponent>,
507 /// Input operator (for chained patterns).
508 pub input: Option<Box<LogicalOperator>>,
509}
510
511/// A component of a triple pattern.
512#[derive(Debug, Clone)]
513pub enum TripleComponent {
514 /// A variable to bind.
515 Variable(String),
516 /// A constant IRI.
517 Iri(String),
518 /// A constant literal value.
519 Literal(Value),
520}
521
522/// Union of multiple result sets.
523#[derive(Debug, Clone)]
524pub struct UnionOp {
525 /// Inputs to union together.
526 pub inputs: Vec<LogicalOperator>,
527}
528
529/// Left outer join for OPTIONAL patterns.
530#[derive(Debug, Clone)]
531pub struct LeftJoinOp {
532 /// Left (required) input.
533 pub left: Box<LogicalOperator>,
534 /// Right (optional) input.
535 pub right: Box<LogicalOperator>,
536 /// Optional filter condition.
537 pub condition: Option<LogicalExpression>,
538}
539
540/// Anti-join for MINUS patterns.
541#[derive(Debug, Clone)]
542pub struct AntiJoinOp {
543 /// Left input (results to keep if no match on right).
544 pub left: Box<LogicalOperator>,
545 /// Right input (patterns to exclude).
546 pub right: Box<LogicalOperator>,
547}
548
549/// Bind a variable to an expression.
550#[derive(Debug, Clone)]
551pub struct BindOp {
552 /// Expression to compute.
553 pub expression: LogicalExpression,
554 /// Variable to bind the result to.
555 pub variable: String,
556 /// Input operator.
557 pub input: Box<LogicalOperator>,
558}
559
560/// Unwind a list into individual rows.
561///
562/// For each input row, evaluates the expression (which should return a list)
563/// and emits one row for each element in the list.
564#[derive(Debug, Clone)]
565pub struct UnwindOp {
566 /// The list expression to unwind.
567 pub expression: LogicalExpression,
568 /// The variable name for each element.
569 pub variable: String,
570 /// Optional variable for 1-based element position (ORDINALITY).
571 pub ordinality_var: Option<String>,
572 /// Optional variable for 0-based element position (OFFSET).
573 pub offset_var: Option<String>,
574 /// Input operator.
575 pub input: Box<LogicalOperator>,
576}
577
578/// Collect grouped key-value rows into a single Map value.
579/// Used for Gremlin `groupCount()` semantics.
580#[derive(Debug, Clone)]
581pub struct MapCollectOp {
582 /// Variable holding the map key.
583 pub key_var: String,
584 /// Variable holding the map value.
585 pub value_var: String,
586 /// Output variable alias.
587 pub alias: String,
588 /// Input operator (typically a grouped aggregate).
589 pub input: Box<LogicalOperator>,
590}
591
592/// Merge a pattern (match or create).
593///
594/// MERGE tries to match a pattern in the graph. If found, returns the existing
595/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
596/// (optionally applying ON CREATE SET).
597#[derive(Debug, Clone)]
598pub struct MergeOp {
599 /// The node to merge.
600 pub variable: String,
601 /// Labels to match/create.
602 pub labels: Vec<String>,
603 /// Properties that must match (used for both matching and creation).
604 pub match_properties: Vec<(String, LogicalExpression)>,
605 /// Properties to set on CREATE.
606 pub on_create: Vec<(String, LogicalExpression)>,
607 /// Properties to set on MATCH.
608 pub on_match: Vec<(String, LogicalExpression)>,
609 /// Input operator.
610 pub input: Box<LogicalOperator>,
611}
612
613/// Merge a relationship pattern (match or create between two bound nodes).
614///
615/// MERGE on a relationship tries to find an existing relationship of the given type
616/// between the source and target nodes. If found, returns the existing relationship
617/// (optionally applying ON MATCH SET). If not found, creates it (optionally applying
618/// ON CREATE SET).
619#[derive(Debug, Clone)]
620pub struct MergeRelationshipOp {
621 /// Variable to bind the relationship to.
622 pub variable: String,
623 /// Source node variable (must already be bound).
624 pub source_variable: String,
625 /// Target node variable (must already be bound).
626 pub target_variable: String,
627 /// Relationship type.
628 pub edge_type: String,
629 /// Properties that must match (used for both matching and creation).
630 pub match_properties: Vec<(String, LogicalExpression)>,
631 /// Properties to set on CREATE.
632 pub on_create: Vec<(String, LogicalExpression)>,
633 /// Properties to set on MATCH.
634 pub on_match: Vec<(String, LogicalExpression)>,
635 /// Input operator.
636 pub input: Box<LogicalOperator>,
637}
638
639/// Find shortest path between two nodes.
640///
641/// This operator uses Dijkstra's algorithm to find the shortest path(s)
642/// between a source node and a target node, optionally filtered by edge type.
643#[derive(Debug, Clone)]
644pub struct ShortestPathOp {
645 /// Input operator providing source/target nodes.
646 pub input: Box<LogicalOperator>,
647 /// Variable name for the source node.
648 pub source_var: String,
649 /// Variable name for the target node.
650 pub target_var: String,
651 /// Optional edge type filter.
652 pub edge_type: Option<String>,
653 /// Direction of edge traversal.
654 pub direction: ExpandDirection,
655 /// Variable name to bind the path result.
656 pub path_alias: String,
657 /// Whether to find all shortest paths (vs. just one).
658 pub all_paths: bool,
659}
660
661// ==================== SPARQL Update Operators ====================
662
663/// Insert RDF triples.
664#[derive(Debug, Clone)]
665pub struct InsertTripleOp {
666 /// Subject of the triple.
667 pub subject: TripleComponent,
668 /// Predicate of the triple.
669 pub predicate: TripleComponent,
670 /// Object of the triple.
671 pub object: TripleComponent,
672 /// Named graph (optional).
673 pub graph: Option<String>,
674 /// Input operator (provides variable bindings).
675 pub input: Option<Box<LogicalOperator>>,
676}
677
678/// Delete RDF triples.
679#[derive(Debug, Clone)]
680pub struct DeleteTripleOp {
681 /// Subject pattern.
682 pub subject: TripleComponent,
683 /// Predicate pattern.
684 pub predicate: TripleComponent,
685 /// Object pattern.
686 pub object: TripleComponent,
687 /// Named graph (optional).
688 pub graph: Option<String>,
689 /// Input operator (provides variable bindings).
690 pub input: Option<Box<LogicalOperator>>,
691}
692
693/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
694///
695/// Per SPARQL 1.1 Update spec, this operator:
696/// 1. Evaluates the WHERE clause once to get bindings
697/// 2. Applies DELETE templates using those bindings
698/// 3. Applies INSERT templates using the SAME bindings
699///
700/// This ensures DELETE and INSERT see consistent data.
701#[derive(Debug, Clone)]
702pub struct ModifyOp {
703 /// DELETE triple templates (patterns with variables).
704 pub delete_templates: Vec<TripleTemplate>,
705 /// INSERT triple templates (patterns with variables).
706 pub insert_templates: Vec<TripleTemplate>,
707 /// WHERE clause that provides variable bindings.
708 pub where_clause: Box<LogicalOperator>,
709 /// Named graph context (for WITH clause).
710 pub graph: Option<String>,
711}
712
713/// A triple template for DELETE/INSERT operations.
714#[derive(Debug, Clone)]
715pub struct TripleTemplate {
716 /// Subject (may be a variable).
717 pub subject: TripleComponent,
718 /// Predicate (may be a variable).
719 pub predicate: TripleComponent,
720 /// Object (may be a variable or literal).
721 pub object: TripleComponent,
722 /// Named graph (optional).
723 pub graph: Option<String>,
724}
725
726/// Clear all triples from a graph.
727#[derive(Debug, Clone)]
728pub struct ClearGraphOp {
729 /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
730 pub graph: Option<String>,
731 /// Whether to silently ignore errors.
732 pub silent: bool,
733}
734
735/// Create a new named graph.
736#[derive(Debug, Clone)]
737pub struct CreateGraphOp {
738 /// IRI of the graph to create.
739 pub graph: String,
740 /// Whether to silently ignore if graph already exists.
741 pub silent: bool,
742}
743
744/// Drop (remove) a named graph.
745#[derive(Debug, Clone)]
746pub struct DropGraphOp {
747 /// Target graph (None = default graph).
748 pub graph: Option<String>,
749 /// Whether to silently ignore errors.
750 pub silent: bool,
751}
752
753/// Load data from a URL into a graph.
754#[derive(Debug, Clone)]
755pub struct LoadGraphOp {
756 /// Source URL to load data from.
757 pub source: String,
758 /// Destination graph (None = default graph).
759 pub destination: Option<String>,
760 /// Whether to silently ignore errors.
761 pub silent: bool,
762}
763
764/// Copy triples from one graph to another.
765#[derive(Debug, Clone)]
766pub struct CopyGraphOp {
767 /// Source graph.
768 pub source: Option<String>,
769 /// Destination graph.
770 pub destination: Option<String>,
771 /// Whether to silently ignore errors.
772 pub silent: bool,
773}
774
775/// Move triples from one graph to another.
776#[derive(Debug, Clone)]
777pub struct MoveGraphOp {
778 /// Source graph.
779 pub source: Option<String>,
780 /// Destination graph.
781 pub destination: Option<String>,
782 /// Whether to silently ignore errors.
783 pub silent: bool,
784}
785
786/// Add (merge) triples from one graph to another.
787#[derive(Debug, Clone)]
788pub struct AddGraphOp {
789 /// Source graph.
790 pub source: Option<String>,
791 /// Destination graph.
792 pub destination: Option<String>,
793 /// Whether to silently ignore errors.
794 pub silent: bool,
795}
796
797// ==================== Vector Search Operators ====================
798
799/// Vector similarity scan operation.
800///
801/// Performs approximate nearest neighbor search using a vector index (HNSW)
802/// or brute-force search for small datasets. Returns nodes/edges whose
803/// embeddings are similar to the query vector.
804///
805/// # Example GQL
806///
807/// ```gql
808/// MATCH (m:Movie)
809/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
810/// RETURN m.title
811/// ```
812#[derive(Debug, Clone)]
813pub struct VectorScanOp {
814 /// Variable name to bind matching entities to.
815 pub variable: String,
816 /// Name of the vector index to use (None = brute-force).
817 pub index_name: Option<String>,
818 /// Property containing the vector embedding.
819 pub property: String,
820 /// Optional label filter (scan only nodes with this label).
821 pub label: Option<String>,
822 /// The query vector expression.
823 pub query_vector: LogicalExpression,
824 /// Number of nearest neighbors to return.
825 pub k: usize,
826 /// Distance metric (None = use index default, typically cosine).
827 pub metric: Option<VectorMetric>,
828 /// Minimum similarity threshold (filters results below this).
829 pub min_similarity: Option<f32>,
830 /// Maximum distance threshold (filters results above this).
831 pub max_distance: Option<f32>,
832 /// Input operator (for hybrid queries combining graph + vector).
833 pub input: Option<Box<LogicalOperator>>,
834}
835
836/// Vector distance/similarity metric for vector scan operations.
837#[derive(Debug, Clone, Copy, PartialEq, Eq)]
838pub enum VectorMetric {
839 /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
840 Cosine,
841 /// Euclidean (L2) distance. Best when magnitude matters.
842 Euclidean,
843 /// Dot product. Best for maximum inner product search.
844 DotProduct,
845 /// Manhattan (L1) distance. Less sensitive to outliers.
846 Manhattan,
847}
848
849/// Join graph patterns with vector similarity search.
850///
851/// This operator takes entities from the left input and computes vector
852/// similarity against a query vector, outputting (entity, distance) pairs.
853///
854/// # Use Cases
855///
856/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
857/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
858/// 3. **Filtering by similarity**: Join with threshold-based filtering
859///
860/// # Example
861///
862/// ```gql
863/// // Find movies similar to what the user liked
864/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
865/// WITH avg(liked.embedding) AS user_taste
866/// VECTOR JOIN (m:Movie) ON m.embedding
867/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
868/// RETURN m.title
869/// ```
870#[derive(Debug, Clone)]
871pub struct VectorJoinOp {
872 /// Input operator providing entities to match against.
873 pub input: Box<LogicalOperator>,
874 /// Variable from input to extract vectors from (for entity-to-entity similarity).
875 /// If None, uses `query_vector` directly.
876 pub left_vector_variable: Option<String>,
877 /// Property containing the left vector (used with `left_vector_variable`).
878 pub left_property: Option<String>,
879 /// The query vector expression (constant or computed).
880 pub query_vector: LogicalExpression,
881 /// Variable name to bind the right-side matching entities.
882 pub right_variable: String,
883 /// Property containing the right-side vector embeddings.
884 pub right_property: String,
885 /// Optional label filter for right-side entities.
886 pub right_label: Option<String>,
887 /// Name of vector index on right side (None = brute-force).
888 pub index_name: Option<String>,
889 /// Number of nearest neighbors per left-side entity.
890 pub k: usize,
891 /// Distance metric.
892 pub metric: Option<VectorMetric>,
893 /// Minimum similarity threshold.
894 pub min_similarity: Option<f32>,
895 /// Maximum distance threshold.
896 pub max_distance: Option<f32>,
897 /// Variable to bind the distance/similarity score.
898 pub score_variable: Option<String>,
899}
900
901/// Return results (terminal operator).
902#[derive(Debug, Clone)]
903pub struct ReturnOp {
904 /// Items to return.
905 pub items: Vec<ReturnItem>,
906 /// Whether to return distinct results.
907 pub distinct: bool,
908 /// Input operator.
909 pub input: Box<LogicalOperator>,
910}
911
912/// A single return item.
913#[derive(Debug, Clone)]
914pub struct ReturnItem {
915 /// Expression to return.
916 pub expression: LogicalExpression,
917 /// Alias for the result column.
918 pub alias: Option<String>,
919}
920
921/// Define a property graph schema (SQL/PGQ DDL).
922#[derive(Debug, Clone)]
923pub struct CreatePropertyGraphOp {
924 /// Graph name.
925 pub name: String,
926 /// Node table schemas (label name + column definitions).
927 pub node_tables: Vec<PropertyGraphNodeTable>,
928 /// Edge table schemas (type name + column definitions + references).
929 pub edge_tables: Vec<PropertyGraphEdgeTable>,
930}
931
932/// A node table in a property graph definition.
933#[derive(Debug, Clone)]
934pub struct PropertyGraphNodeTable {
935 /// Table name (maps to a node label).
936 pub name: String,
937 /// Column definitions as (name, type_name) pairs.
938 pub columns: Vec<(String, String)>,
939}
940
941/// An edge table in a property graph definition.
942#[derive(Debug, Clone)]
943pub struct PropertyGraphEdgeTable {
944 /// Table name (maps to an edge type).
945 pub name: String,
946 /// Column definitions as (name, type_name) pairs.
947 pub columns: Vec<(String, String)>,
948 /// Source node table name.
949 pub source_table: String,
950 /// Target node table name.
951 pub target_table: String,
952}
953
954// ==================== Procedure Call Types ====================
955
956/// A CALL procedure operation.
957///
958/// ```text
959/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
960/// ```
961#[derive(Debug, Clone)]
962pub struct CallProcedureOp {
963 /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
964 pub name: Vec<String>,
965 /// Argument expressions (constants in Phase 1).
966 pub arguments: Vec<LogicalExpression>,
967 /// Optional YIELD clause: which columns to expose + aliases.
968 pub yield_items: Option<Vec<ProcedureYield>>,
969}
970
971/// A single YIELD item in a procedure call.
972#[derive(Debug, Clone)]
973pub struct ProcedureYield {
974 /// Column name from the procedure result.
975 pub field_name: String,
976 /// Optional alias (YIELD score AS rank).
977 pub alias: Option<String>,
978}
979
980/// A logical expression.
981#[derive(Debug, Clone)]
982pub enum LogicalExpression {
983 /// A literal value.
984 Literal(Value),
985
986 /// A variable reference.
987 Variable(String),
988
989 /// Property access (e.g., n.name).
990 Property {
991 /// The variable to access.
992 variable: String,
993 /// The property name.
994 property: String,
995 },
996
997 /// Binary operation.
998 Binary {
999 /// Left operand.
1000 left: Box<LogicalExpression>,
1001 /// Operator.
1002 op: BinaryOp,
1003 /// Right operand.
1004 right: Box<LogicalExpression>,
1005 },
1006
1007 /// Unary operation.
1008 Unary {
1009 /// Operator.
1010 op: UnaryOp,
1011 /// Operand.
1012 operand: Box<LogicalExpression>,
1013 },
1014
1015 /// Function call.
1016 FunctionCall {
1017 /// Function name.
1018 name: String,
1019 /// Arguments.
1020 args: Vec<LogicalExpression>,
1021 /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
1022 distinct: bool,
1023 },
1024
1025 /// List literal.
1026 List(Vec<LogicalExpression>),
1027
1028 /// Map literal (e.g., {name: 'Alice', age: 30}).
1029 Map(Vec<(String, LogicalExpression)>),
1030
1031 /// Index access (e.g., `list[0]`).
1032 IndexAccess {
1033 /// The base expression (typically a list or string).
1034 base: Box<LogicalExpression>,
1035 /// The index expression.
1036 index: Box<LogicalExpression>,
1037 },
1038
1039 /// Slice access (e.g., list[1..3]).
1040 SliceAccess {
1041 /// The base expression (typically a list or string).
1042 base: Box<LogicalExpression>,
1043 /// Start index (None means from beginning).
1044 start: Option<Box<LogicalExpression>>,
1045 /// End index (None means to end).
1046 end: Option<Box<LogicalExpression>>,
1047 },
1048
1049 /// CASE expression.
1050 Case {
1051 /// Test expression (for simple CASE).
1052 operand: Option<Box<LogicalExpression>>,
1053 /// WHEN clauses.
1054 when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
1055 /// ELSE clause.
1056 else_clause: Option<Box<LogicalExpression>>,
1057 },
1058
1059 /// Parameter reference.
1060 Parameter(String),
1061
1062 /// Labels of a node.
1063 Labels(String),
1064
1065 /// Type of an edge.
1066 Type(String),
1067
1068 /// ID of a node or edge.
1069 Id(String),
1070
1071 /// List comprehension: [x IN list WHERE predicate | expression]
1072 ListComprehension {
1073 /// Variable name for each element.
1074 variable: String,
1075 /// The source list expression.
1076 list_expr: Box<LogicalExpression>,
1077 /// Optional filter predicate.
1078 filter_expr: Option<Box<LogicalExpression>>,
1079 /// The mapping expression for each element.
1080 map_expr: Box<LogicalExpression>,
1081 },
1082
1083 /// List predicate: all/any/none/single(x IN list WHERE pred).
1084 ListPredicate {
1085 /// The kind of list predicate.
1086 kind: ListPredicateKind,
1087 /// The iteration variable name.
1088 variable: String,
1089 /// The source list expression.
1090 list_expr: Box<LogicalExpression>,
1091 /// The predicate to test for each element.
1092 predicate: Box<LogicalExpression>,
1093 },
1094
1095 /// EXISTS subquery.
1096 ExistsSubquery(Box<LogicalOperator>),
1097
1098 /// COUNT subquery.
1099 CountSubquery(Box<LogicalOperator>),
1100}
1101
1102/// The kind of list predicate function.
1103#[derive(Debug, Clone, PartialEq, Eq)]
1104pub enum ListPredicateKind {
1105 /// all(x IN list WHERE pred): true if pred holds for every element.
1106 All,
1107 /// any(x IN list WHERE pred): true if pred holds for at least one element.
1108 Any,
1109 /// none(x IN list WHERE pred): true if pred holds for no element.
1110 None,
1111 /// single(x IN list WHERE pred): true if pred holds for exactly one element.
1112 Single,
1113}
1114
1115/// Binary operator.
1116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1117pub enum BinaryOp {
1118 /// Equality comparison (=).
1119 Eq,
1120 /// Inequality comparison (<>).
1121 Ne,
1122 /// Less than (<).
1123 Lt,
1124 /// Less than or equal (<=).
1125 Le,
1126 /// Greater than (>).
1127 Gt,
1128 /// Greater than or equal (>=).
1129 Ge,
1130
1131 /// Logical AND.
1132 And,
1133 /// Logical OR.
1134 Or,
1135 /// Logical XOR.
1136 Xor,
1137
1138 /// Addition (+).
1139 Add,
1140 /// Subtraction (-).
1141 Sub,
1142 /// Multiplication (*).
1143 Mul,
1144 /// Division (/).
1145 Div,
1146 /// Modulo (%).
1147 Mod,
1148
1149 /// String concatenation.
1150 Concat,
1151 /// String starts with.
1152 StartsWith,
1153 /// String ends with.
1154 EndsWith,
1155 /// String contains.
1156 Contains,
1157
1158 /// Collection membership (IN).
1159 In,
1160 /// Pattern matching (LIKE).
1161 Like,
1162 /// Regex matching (=~).
1163 Regex,
1164 /// Power/exponentiation (^).
1165 Pow,
1166}
1167
1168/// Unary operator.
1169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1170pub enum UnaryOp {
1171 /// Logical NOT.
1172 Not,
1173 /// Numeric negation.
1174 Neg,
1175 /// IS NULL check.
1176 IsNull,
1177 /// IS NOT NULL check.
1178 IsNotNull,
1179}
1180
1181#[cfg(test)]
1182mod tests {
1183 use super::*;
1184
1185 #[test]
1186 fn test_simple_node_scan_plan() {
1187 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1188 items: vec![ReturnItem {
1189 expression: LogicalExpression::Variable("n".into()),
1190 alias: None,
1191 }],
1192 distinct: false,
1193 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1194 variable: "n".into(),
1195 label: Some("Person".into()),
1196 input: None,
1197 })),
1198 }));
1199
1200 // Verify structure
1201 if let LogicalOperator::Return(ret) = &plan.root {
1202 assert_eq!(ret.items.len(), 1);
1203 assert!(!ret.distinct);
1204 if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1205 assert_eq!(scan.variable, "n");
1206 assert_eq!(scan.label, Some("Person".into()));
1207 } else {
1208 panic!("Expected NodeScan");
1209 }
1210 } else {
1211 panic!("Expected Return");
1212 }
1213 }
1214
1215 #[test]
1216 fn test_filter_plan() {
1217 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1218 items: vec![ReturnItem {
1219 expression: LogicalExpression::Property {
1220 variable: "n".into(),
1221 property: "name".into(),
1222 },
1223 alias: Some("name".into()),
1224 }],
1225 distinct: false,
1226 input: Box::new(LogicalOperator::Filter(FilterOp {
1227 predicate: LogicalExpression::Binary {
1228 left: Box::new(LogicalExpression::Property {
1229 variable: "n".into(),
1230 property: "age".into(),
1231 }),
1232 op: BinaryOp::Gt,
1233 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1234 },
1235 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1236 variable: "n".into(),
1237 label: Some("Person".into()),
1238 input: None,
1239 })),
1240 })),
1241 }));
1242
1243 if let LogicalOperator::Return(ret) = &plan.root {
1244 if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1245 if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1246 assert_eq!(*op, BinaryOp::Gt);
1247 } else {
1248 panic!("Expected Binary expression");
1249 }
1250 } else {
1251 panic!("Expected Filter");
1252 }
1253 } else {
1254 panic!("Expected Return");
1255 }
1256 }
1257}