grafeo_engine/query/plan.rs
1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12 /// The root operator of the plan.
13 pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17 /// Creates a new logical plan with the given root operator.
18 pub fn new(root: LogicalOperator) -> Self {
19 Self { root }
20 }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26 /// Scan all nodes, optionally filtered by label.
27 NodeScan(NodeScanOp),
28
29 /// Scan all edges, optionally filtered by type.
30 EdgeScan(EdgeScanOp),
31
32 /// Expand from nodes to neighbors via edges.
33 Expand(ExpandOp),
34
35 /// Filter rows based on a predicate.
36 Filter(FilterOp),
37
38 /// Project specific columns.
39 Project(ProjectOp),
40
41 /// Join two inputs.
42 Join(JoinOp),
43
44 /// Aggregate with grouping.
45 Aggregate(AggregateOp),
46
47 /// Limit the number of results.
48 Limit(LimitOp),
49
50 /// Skip a number of results.
51 Skip(SkipOp),
52
53 /// Sort results.
54 Sort(SortOp),
55
56 /// Remove duplicate results.
57 Distinct(DistinctOp),
58
59 /// Create a new node.
60 CreateNode(CreateNodeOp),
61
62 /// Create a new edge.
63 CreateEdge(CreateEdgeOp),
64
65 /// Delete a node.
66 DeleteNode(DeleteNodeOp),
67
68 /// Delete an edge.
69 DeleteEdge(DeleteEdgeOp),
70
71 /// Set properties on a node or edge.
72 SetProperty(SetPropertyOp),
73
74 /// Add labels to a node.
75 AddLabel(AddLabelOp),
76
77 /// Remove labels from a node.
78 RemoveLabel(RemoveLabelOp),
79
80 /// Return results (terminal operator).
81 Return(ReturnOp),
82
83 /// Empty result set.
84 Empty,
85
86 // ==================== RDF/SPARQL Operators ====================
87 /// Scan RDF triples matching a pattern.
88 TripleScan(TripleScanOp),
89
90 /// Union of multiple result sets.
91 Union(UnionOp),
92
93 /// Left outer join for OPTIONAL patterns.
94 LeftJoin(LeftJoinOp),
95
96 /// Anti-join for MINUS patterns.
97 AntiJoin(AntiJoinOp),
98
99 /// Bind a variable to an expression.
100 Bind(BindOp),
101
102 /// Unwind a list into individual rows.
103 Unwind(UnwindOp),
104
105 /// Merge a pattern (match or create).
106 Merge(MergeOp),
107
108 /// Find shortest path between nodes.
109 ShortestPath(ShortestPathOp),
110
111 // ==================== SPARQL Update Operators ====================
112 /// Insert RDF triples.
113 InsertTriple(InsertTripleOp),
114
115 /// Delete RDF triples.
116 DeleteTriple(DeleteTripleOp),
117
118 /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
119 /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
120 Modify(ModifyOp),
121
122 /// Clear a graph (remove all triples).
123 ClearGraph(ClearGraphOp),
124
125 /// Create a new named graph.
126 CreateGraph(CreateGraphOp),
127
128 /// Drop (remove) a named graph.
129 DropGraph(DropGraphOp),
130
131 /// Load data from a URL into a graph.
132 LoadGraph(LoadGraphOp),
133
134 /// Copy triples from one graph to another.
135 CopyGraph(CopyGraphOp),
136
137 /// Move triples from one graph to another.
138 MoveGraph(MoveGraphOp),
139
140 /// Add (merge) triples from one graph to another.
141 AddGraph(AddGraphOp),
142
143 // ==================== Vector Search Operators ====================
144 /// Scan using vector similarity search.
145 VectorScan(VectorScanOp),
146
147 /// Join graph patterns with vector similarity search.
148 ///
149 /// Computes vector distances between entities from the left input and
150 /// a query vector, then joins with similarity scores. Useful for:
151 /// - Filtering graph traversal results by vector similarity
152 /// - Computing aggregated embeddings and finding similar entities
153 /// - Combining multiple vector sources with graph structure
154 VectorJoin(VectorJoinOp),
155
156 // ==================== DDL Operators ====================
157 /// Define a property graph schema (SQL/PGQ DDL).
158 CreatePropertyGraph(CreatePropertyGraphOp),
159
160 // ==================== Procedure Call Operators ====================
161 /// Invoke a stored procedure (CALL ... YIELD).
162 CallProcedure(CallProcedureOp),
163}
164
165/// Scan nodes from the graph.
166#[derive(Debug, Clone)]
167pub struct NodeScanOp {
168 /// Variable name to bind the node to.
169 pub variable: String,
170 /// Optional label filter.
171 pub label: Option<String>,
172 /// Child operator (if any, for chained patterns).
173 pub input: Option<Box<LogicalOperator>>,
174}
175
176/// Scan edges from the graph.
177#[derive(Debug, Clone)]
178pub struct EdgeScanOp {
179 /// Variable name to bind the edge to.
180 pub variable: String,
181 /// Optional edge type filter.
182 pub edge_type: Option<String>,
183 /// Child operator (if any).
184 pub input: Option<Box<LogicalOperator>>,
185}
186
187/// Expand from nodes to their neighbors.
188#[derive(Debug, Clone)]
189pub struct ExpandOp {
190 /// Source node variable.
191 pub from_variable: String,
192 /// Target node variable to bind.
193 pub to_variable: String,
194 /// Edge variable to bind (optional).
195 pub edge_variable: Option<String>,
196 /// Direction of expansion.
197 pub direction: ExpandDirection,
198 /// Optional edge type filter.
199 pub edge_type: Option<String>,
200 /// Minimum hops (for variable-length patterns).
201 pub min_hops: u32,
202 /// Maximum hops (for variable-length patterns).
203 pub max_hops: Option<u32>,
204 /// Input operator.
205 pub input: Box<LogicalOperator>,
206 /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
207 /// When set, a path length column will be output under this name.
208 pub path_alias: Option<String>,
209}
210
211/// Direction for edge expansion.
212#[derive(Debug, Clone, Copy, PartialEq, Eq)]
213pub enum ExpandDirection {
214 /// Follow outgoing edges.
215 Outgoing,
216 /// Follow incoming edges.
217 Incoming,
218 /// Follow edges in either direction.
219 Both,
220}
221
222/// Join two inputs.
223#[derive(Debug, Clone)]
224pub struct JoinOp {
225 /// Left input.
226 pub left: Box<LogicalOperator>,
227 /// Right input.
228 pub right: Box<LogicalOperator>,
229 /// Join type.
230 pub join_type: JoinType,
231 /// Join conditions.
232 pub conditions: Vec<JoinCondition>,
233}
234
235/// Join type.
236#[derive(Debug, Clone, Copy, PartialEq, Eq)]
237pub enum JoinType {
238 /// Inner join.
239 Inner,
240 /// Left outer join.
241 Left,
242 /// Right outer join.
243 Right,
244 /// Full outer join.
245 Full,
246 /// Cross join (Cartesian product).
247 Cross,
248 /// Semi join (returns left rows with matching right rows).
249 Semi,
250 /// Anti join (returns left rows without matching right rows).
251 Anti,
252}
253
254/// A join condition.
255#[derive(Debug, Clone)]
256pub struct JoinCondition {
257 /// Left expression.
258 pub left: LogicalExpression,
259 /// Right expression.
260 pub right: LogicalExpression,
261}
262
263/// Aggregate with grouping.
264#[derive(Debug, Clone)]
265pub struct AggregateOp {
266 /// Group by expressions.
267 pub group_by: Vec<LogicalExpression>,
268 /// Aggregate functions.
269 pub aggregates: Vec<AggregateExpr>,
270 /// Input operator.
271 pub input: Box<LogicalOperator>,
272 /// HAVING clause filter (applied after aggregation).
273 pub having: Option<LogicalExpression>,
274}
275
276/// An aggregate expression.
277#[derive(Debug, Clone)]
278pub struct AggregateExpr {
279 /// Aggregate function.
280 pub function: AggregateFunction,
281 /// Expression to aggregate.
282 pub expression: Option<LogicalExpression>,
283 /// Whether to use DISTINCT.
284 pub distinct: bool,
285 /// Alias for the result.
286 pub alias: Option<String>,
287 /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
288 pub percentile: Option<f64>,
289}
290
291/// Aggregate function.
292#[derive(Debug, Clone, Copy, PartialEq, Eq)]
293pub enum AggregateFunction {
294 /// Count all rows (COUNT(*)).
295 Count,
296 /// Count non-null values (COUNT(expr)).
297 CountNonNull,
298 /// Sum values.
299 Sum,
300 /// Average values.
301 Avg,
302 /// Minimum value.
303 Min,
304 /// Maximum value.
305 Max,
306 /// Collect into list.
307 Collect,
308 /// Sample standard deviation (STDEV).
309 StdDev,
310 /// Population standard deviation (STDEVP).
311 StdDevPop,
312 /// Discrete percentile (PERCENTILE_DISC).
313 PercentileDisc,
314 /// Continuous percentile (PERCENTILE_CONT).
315 PercentileCont,
316}
317
318/// Filter rows based on a predicate.
319#[derive(Debug, Clone)]
320pub struct FilterOp {
321 /// The filter predicate.
322 pub predicate: LogicalExpression,
323 /// Input operator.
324 pub input: Box<LogicalOperator>,
325}
326
327/// Project specific columns.
328#[derive(Debug, Clone)]
329pub struct ProjectOp {
330 /// Columns to project.
331 pub projections: Vec<Projection>,
332 /// Input operator.
333 pub input: Box<LogicalOperator>,
334}
335
336/// A single projection (column selection or computation).
337#[derive(Debug, Clone)]
338pub struct Projection {
339 /// Expression to compute.
340 pub expression: LogicalExpression,
341 /// Alias for the result.
342 pub alias: Option<String>,
343}
344
345/// Limit the number of results.
346#[derive(Debug, Clone)]
347pub struct LimitOp {
348 /// Maximum number of rows to return.
349 pub count: usize,
350 /// Input operator.
351 pub input: Box<LogicalOperator>,
352}
353
354/// Skip a number of results.
355#[derive(Debug, Clone)]
356pub struct SkipOp {
357 /// Number of rows to skip.
358 pub count: usize,
359 /// Input operator.
360 pub input: Box<LogicalOperator>,
361}
362
363/// Sort results.
364#[derive(Debug, Clone)]
365pub struct SortOp {
366 /// Sort keys.
367 pub keys: Vec<SortKey>,
368 /// Input operator.
369 pub input: Box<LogicalOperator>,
370}
371
372/// A sort key.
373#[derive(Debug, Clone)]
374pub struct SortKey {
375 /// Expression to sort by.
376 pub expression: LogicalExpression,
377 /// Sort order.
378 pub order: SortOrder,
379}
380
381/// Sort order.
382#[derive(Debug, Clone, Copy, PartialEq, Eq)]
383pub enum SortOrder {
384 /// Ascending order.
385 Ascending,
386 /// Descending order.
387 Descending,
388}
389
390/// Remove duplicate results.
391#[derive(Debug, Clone)]
392pub struct DistinctOp {
393 /// Input operator.
394 pub input: Box<LogicalOperator>,
395 /// Optional columns to use for deduplication.
396 /// If None, all columns are used.
397 pub columns: Option<Vec<String>>,
398}
399
400/// Create a new node.
401#[derive(Debug, Clone)]
402pub struct CreateNodeOp {
403 /// Variable name to bind the created node to.
404 pub variable: String,
405 /// Labels for the new node.
406 pub labels: Vec<String>,
407 /// Properties for the new node.
408 pub properties: Vec<(String, LogicalExpression)>,
409 /// Input operator (for chained creates).
410 pub input: Option<Box<LogicalOperator>>,
411}
412
413/// Create a new edge.
414#[derive(Debug, Clone)]
415pub struct CreateEdgeOp {
416 /// Variable name to bind the created edge to.
417 pub variable: Option<String>,
418 /// Source node variable.
419 pub from_variable: String,
420 /// Target node variable.
421 pub to_variable: String,
422 /// Edge type.
423 pub edge_type: String,
424 /// Properties for the new edge.
425 pub properties: Vec<(String, LogicalExpression)>,
426 /// Input operator.
427 pub input: Box<LogicalOperator>,
428}
429
430/// Delete a node.
431#[derive(Debug, Clone)]
432pub struct DeleteNodeOp {
433 /// Variable of the node to delete.
434 pub variable: String,
435 /// Whether to detach (delete connected edges) before deleting.
436 pub detach: bool,
437 /// Input operator.
438 pub input: Box<LogicalOperator>,
439}
440
441/// Delete an edge.
442#[derive(Debug, Clone)]
443pub struct DeleteEdgeOp {
444 /// Variable of the edge to delete.
445 pub variable: String,
446 /// Input operator.
447 pub input: Box<LogicalOperator>,
448}
449
450/// Set properties on a node or edge.
451#[derive(Debug, Clone)]
452pub struct SetPropertyOp {
453 /// Variable of the entity to update.
454 pub variable: String,
455 /// Properties to set (name -> expression).
456 pub properties: Vec<(String, LogicalExpression)>,
457 /// Whether to replace all properties (vs. merge).
458 pub replace: bool,
459 /// Input operator.
460 pub input: Box<LogicalOperator>,
461}
462
463/// Add labels to a node.
464#[derive(Debug, Clone)]
465pub struct AddLabelOp {
466 /// Variable of the node to update.
467 pub variable: String,
468 /// Labels to add.
469 pub labels: Vec<String>,
470 /// Input operator.
471 pub input: Box<LogicalOperator>,
472}
473
474/// Remove labels from a node.
475#[derive(Debug, Clone)]
476pub struct RemoveLabelOp {
477 /// Variable of the node to update.
478 pub variable: String,
479 /// Labels to remove.
480 pub labels: Vec<String>,
481 /// Input operator.
482 pub input: Box<LogicalOperator>,
483}
484
485// ==================== RDF/SPARQL Operators ====================
486
487/// Scan RDF triples matching a pattern.
488#[derive(Debug, Clone)]
489pub struct TripleScanOp {
490 /// Subject pattern (variable name or IRI).
491 pub subject: TripleComponent,
492 /// Predicate pattern (variable name or IRI).
493 pub predicate: TripleComponent,
494 /// Object pattern (variable name, IRI, or literal).
495 pub object: TripleComponent,
496 /// Named graph (optional).
497 pub graph: Option<TripleComponent>,
498 /// Input operator (for chained patterns).
499 pub input: Option<Box<LogicalOperator>>,
500}
501
502/// A component of a triple pattern.
503#[derive(Debug, Clone)]
504pub enum TripleComponent {
505 /// A variable to bind.
506 Variable(String),
507 /// A constant IRI.
508 Iri(String),
509 /// A constant literal value.
510 Literal(Value),
511}
512
513/// Union of multiple result sets.
514#[derive(Debug, Clone)]
515pub struct UnionOp {
516 /// Inputs to union together.
517 pub inputs: Vec<LogicalOperator>,
518}
519
520/// Left outer join for OPTIONAL patterns.
521#[derive(Debug, Clone)]
522pub struct LeftJoinOp {
523 /// Left (required) input.
524 pub left: Box<LogicalOperator>,
525 /// Right (optional) input.
526 pub right: Box<LogicalOperator>,
527 /// Optional filter condition.
528 pub condition: Option<LogicalExpression>,
529}
530
531/// Anti-join for MINUS patterns.
532#[derive(Debug, Clone)]
533pub struct AntiJoinOp {
534 /// Left input (results to keep if no match on right).
535 pub left: Box<LogicalOperator>,
536 /// Right input (patterns to exclude).
537 pub right: Box<LogicalOperator>,
538}
539
540/// Bind a variable to an expression.
541#[derive(Debug, Clone)]
542pub struct BindOp {
543 /// Expression to compute.
544 pub expression: LogicalExpression,
545 /// Variable to bind the result to.
546 pub variable: String,
547 /// Input operator.
548 pub input: Box<LogicalOperator>,
549}
550
551/// Unwind a list into individual rows.
552///
553/// For each input row, evaluates the expression (which should return a list)
554/// and emits one row for each element in the list.
555#[derive(Debug, Clone)]
556pub struct UnwindOp {
557 /// The list expression to unwind.
558 pub expression: LogicalExpression,
559 /// The variable name for each element.
560 pub variable: String,
561 /// Input operator.
562 pub input: Box<LogicalOperator>,
563}
564
565/// Merge a pattern (match or create).
566///
567/// MERGE tries to match a pattern in the graph. If found, returns the existing
568/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
569/// (optionally applying ON CREATE SET).
570#[derive(Debug, Clone)]
571pub struct MergeOp {
572 /// The node to merge.
573 pub variable: String,
574 /// Labels to match/create.
575 pub labels: Vec<String>,
576 /// Properties that must match (used for both matching and creation).
577 pub match_properties: Vec<(String, LogicalExpression)>,
578 /// Properties to set on CREATE.
579 pub on_create: Vec<(String, LogicalExpression)>,
580 /// Properties to set on MATCH.
581 pub on_match: Vec<(String, LogicalExpression)>,
582 /// Input operator.
583 pub input: Box<LogicalOperator>,
584}
585
586/// Find shortest path between two nodes.
587///
588/// This operator uses Dijkstra's algorithm to find the shortest path(s)
589/// between a source node and a target node, optionally filtered by edge type.
590#[derive(Debug, Clone)]
591pub struct ShortestPathOp {
592 /// Input operator providing source/target nodes.
593 pub input: Box<LogicalOperator>,
594 /// Variable name for the source node.
595 pub source_var: String,
596 /// Variable name for the target node.
597 pub target_var: String,
598 /// Optional edge type filter.
599 pub edge_type: Option<String>,
600 /// Direction of edge traversal.
601 pub direction: ExpandDirection,
602 /// Variable name to bind the path result.
603 pub path_alias: String,
604 /// Whether to find all shortest paths (vs. just one).
605 pub all_paths: bool,
606}
607
608// ==================== SPARQL Update Operators ====================
609
610/// Insert RDF triples.
611#[derive(Debug, Clone)]
612pub struct InsertTripleOp {
613 /// Subject of the triple.
614 pub subject: TripleComponent,
615 /// Predicate of the triple.
616 pub predicate: TripleComponent,
617 /// Object of the triple.
618 pub object: TripleComponent,
619 /// Named graph (optional).
620 pub graph: Option<String>,
621 /// Input operator (provides variable bindings).
622 pub input: Option<Box<LogicalOperator>>,
623}
624
625/// Delete RDF triples.
626#[derive(Debug, Clone)]
627pub struct DeleteTripleOp {
628 /// Subject pattern.
629 pub subject: TripleComponent,
630 /// Predicate pattern.
631 pub predicate: TripleComponent,
632 /// Object pattern.
633 pub object: TripleComponent,
634 /// Named graph (optional).
635 pub graph: Option<String>,
636 /// Input operator (provides variable bindings).
637 pub input: Option<Box<LogicalOperator>>,
638}
639
640/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
641///
642/// Per SPARQL 1.1 Update spec, this operator:
643/// 1. Evaluates the WHERE clause once to get bindings
644/// 2. Applies DELETE templates using those bindings
645/// 3. Applies INSERT templates using the SAME bindings
646///
647/// This ensures DELETE and INSERT see consistent data.
648#[derive(Debug, Clone)]
649pub struct ModifyOp {
650 /// DELETE triple templates (patterns with variables).
651 pub delete_templates: Vec<TripleTemplate>,
652 /// INSERT triple templates (patterns with variables).
653 pub insert_templates: Vec<TripleTemplate>,
654 /// WHERE clause that provides variable bindings.
655 pub where_clause: Box<LogicalOperator>,
656 /// Named graph context (for WITH clause).
657 pub graph: Option<String>,
658}
659
660/// A triple template for DELETE/INSERT operations.
661#[derive(Debug, Clone)]
662pub struct TripleTemplate {
663 /// Subject (may be a variable).
664 pub subject: TripleComponent,
665 /// Predicate (may be a variable).
666 pub predicate: TripleComponent,
667 /// Object (may be a variable or literal).
668 pub object: TripleComponent,
669 /// Named graph (optional).
670 pub graph: Option<String>,
671}
672
673/// Clear all triples from a graph.
674#[derive(Debug, Clone)]
675pub struct ClearGraphOp {
676 /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
677 pub graph: Option<String>,
678 /// Whether to silently ignore errors.
679 pub silent: bool,
680}
681
682/// Create a new named graph.
683#[derive(Debug, Clone)]
684pub struct CreateGraphOp {
685 /// IRI of the graph to create.
686 pub graph: String,
687 /// Whether to silently ignore if graph already exists.
688 pub silent: bool,
689}
690
691/// Drop (remove) a named graph.
692#[derive(Debug, Clone)]
693pub struct DropGraphOp {
694 /// Target graph (None = default graph).
695 pub graph: Option<String>,
696 /// Whether to silently ignore errors.
697 pub silent: bool,
698}
699
700/// Load data from a URL into a graph.
701#[derive(Debug, Clone)]
702pub struct LoadGraphOp {
703 /// Source URL to load data from.
704 pub source: String,
705 /// Destination graph (None = default graph).
706 pub destination: Option<String>,
707 /// Whether to silently ignore errors.
708 pub silent: bool,
709}
710
711/// Copy triples from one graph to another.
712#[derive(Debug, Clone)]
713pub struct CopyGraphOp {
714 /// Source graph.
715 pub source: Option<String>,
716 /// Destination graph.
717 pub destination: Option<String>,
718 /// Whether to silently ignore errors.
719 pub silent: bool,
720}
721
722/// Move triples from one graph to another.
723#[derive(Debug, Clone)]
724pub struct MoveGraphOp {
725 /// Source graph.
726 pub source: Option<String>,
727 /// Destination graph.
728 pub destination: Option<String>,
729 /// Whether to silently ignore errors.
730 pub silent: bool,
731}
732
733/// Add (merge) triples from one graph to another.
734#[derive(Debug, Clone)]
735pub struct AddGraphOp {
736 /// Source graph.
737 pub source: Option<String>,
738 /// Destination graph.
739 pub destination: Option<String>,
740 /// Whether to silently ignore errors.
741 pub silent: bool,
742}
743
744// ==================== Vector Search Operators ====================
745
746/// Vector similarity scan operation.
747///
748/// Performs approximate nearest neighbor search using a vector index (HNSW)
749/// or brute-force search for small datasets. Returns nodes/edges whose
750/// embeddings are similar to the query vector.
751///
752/// # Example GQL
753///
754/// ```gql
755/// MATCH (m:Movie)
756/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
757/// RETURN m.title
758/// ```
759#[derive(Debug, Clone)]
760pub struct VectorScanOp {
761 /// Variable name to bind matching entities to.
762 pub variable: String,
763 /// Name of the vector index to use (None = brute-force).
764 pub index_name: Option<String>,
765 /// Property containing the vector embedding.
766 pub property: String,
767 /// Optional label filter (scan only nodes with this label).
768 pub label: Option<String>,
769 /// The query vector expression.
770 pub query_vector: LogicalExpression,
771 /// Number of nearest neighbors to return.
772 pub k: usize,
773 /// Distance metric (None = use index default, typically cosine).
774 pub metric: Option<VectorMetric>,
775 /// Minimum similarity threshold (filters results below this).
776 pub min_similarity: Option<f32>,
777 /// Maximum distance threshold (filters results above this).
778 pub max_distance: Option<f32>,
779 /// Input operator (for hybrid queries combining graph + vector).
780 pub input: Option<Box<LogicalOperator>>,
781}
782
783/// Vector distance/similarity metric for vector scan operations.
784#[derive(Debug, Clone, Copy, PartialEq, Eq)]
785pub enum VectorMetric {
786 /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
787 Cosine,
788 /// Euclidean (L2) distance. Best when magnitude matters.
789 Euclidean,
790 /// Dot product. Best for maximum inner product search.
791 DotProduct,
792 /// Manhattan (L1) distance. Less sensitive to outliers.
793 Manhattan,
794}
795
796/// Join graph patterns with vector similarity search.
797///
798/// This operator takes entities from the left input and computes vector
799/// similarity against a query vector, outputting (entity, distance) pairs.
800///
801/// # Use Cases
802///
803/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
804/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
805/// 3. **Filtering by similarity**: Join with threshold-based filtering
806///
807/// # Example
808///
809/// ```gql
810/// // Find movies similar to what the user liked
811/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
812/// WITH avg(liked.embedding) AS user_taste
813/// VECTOR JOIN (m:Movie) ON m.embedding
814/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
815/// RETURN m.title
816/// ```
817#[derive(Debug, Clone)]
818pub struct VectorJoinOp {
819 /// Input operator providing entities to match against.
820 pub input: Box<LogicalOperator>,
821 /// Variable from input to extract vectors from (for entity-to-entity similarity).
822 /// If None, uses `query_vector` directly.
823 pub left_vector_variable: Option<String>,
824 /// Property containing the left vector (used with `left_vector_variable`).
825 pub left_property: Option<String>,
826 /// The query vector expression (constant or computed).
827 pub query_vector: LogicalExpression,
828 /// Variable name to bind the right-side matching entities.
829 pub right_variable: String,
830 /// Property containing the right-side vector embeddings.
831 pub right_property: String,
832 /// Optional label filter for right-side entities.
833 pub right_label: Option<String>,
834 /// Name of vector index on right side (None = brute-force).
835 pub index_name: Option<String>,
836 /// Number of nearest neighbors per left-side entity.
837 pub k: usize,
838 /// Distance metric.
839 pub metric: Option<VectorMetric>,
840 /// Minimum similarity threshold.
841 pub min_similarity: Option<f32>,
842 /// Maximum distance threshold.
843 pub max_distance: Option<f32>,
844 /// Variable to bind the distance/similarity score.
845 pub score_variable: Option<String>,
846}
847
848/// Return results (terminal operator).
849#[derive(Debug, Clone)]
850pub struct ReturnOp {
851 /// Items to return.
852 pub items: Vec<ReturnItem>,
853 /// Whether to return distinct results.
854 pub distinct: bool,
855 /// Input operator.
856 pub input: Box<LogicalOperator>,
857}
858
859/// A single return item.
860#[derive(Debug, Clone)]
861pub struct ReturnItem {
862 /// Expression to return.
863 pub expression: LogicalExpression,
864 /// Alias for the result column.
865 pub alias: Option<String>,
866}
867
868/// Define a property graph schema (SQL/PGQ DDL).
869#[derive(Debug, Clone)]
870pub struct CreatePropertyGraphOp {
871 /// Graph name.
872 pub name: String,
873 /// Node table schemas (label name + column definitions).
874 pub node_tables: Vec<PropertyGraphNodeTable>,
875 /// Edge table schemas (type name + column definitions + references).
876 pub edge_tables: Vec<PropertyGraphEdgeTable>,
877}
878
879/// A node table in a property graph definition.
880#[derive(Debug, Clone)]
881pub struct PropertyGraphNodeTable {
882 /// Table name (maps to a node label).
883 pub name: String,
884 /// Column definitions as (name, type_name) pairs.
885 pub columns: Vec<(String, String)>,
886}
887
888/// An edge table in a property graph definition.
889#[derive(Debug, Clone)]
890pub struct PropertyGraphEdgeTable {
891 /// Table name (maps to an edge type).
892 pub name: String,
893 /// Column definitions as (name, type_name) pairs.
894 pub columns: Vec<(String, String)>,
895 /// Source node table name.
896 pub source_table: String,
897 /// Target node table name.
898 pub target_table: String,
899}
900
901// ==================== Procedure Call Types ====================
902
903/// A CALL procedure operation.
904///
905/// ```text
906/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
907/// ```
908#[derive(Debug, Clone)]
909pub struct CallProcedureOp {
910 /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
911 pub name: Vec<String>,
912 /// Argument expressions (constants in Phase 1).
913 pub arguments: Vec<LogicalExpression>,
914 /// Optional YIELD clause: which columns to expose + aliases.
915 pub yield_items: Option<Vec<ProcedureYield>>,
916}
917
918/// A single YIELD item in a procedure call.
919#[derive(Debug, Clone)]
920pub struct ProcedureYield {
921 /// Column name from the procedure result.
922 pub field_name: String,
923 /// Optional alias (YIELD score AS rank).
924 pub alias: Option<String>,
925}
926
927/// A logical expression.
928#[derive(Debug, Clone)]
929pub enum LogicalExpression {
930 /// A literal value.
931 Literal(Value),
932
933 /// A variable reference.
934 Variable(String),
935
936 /// Property access (e.g., n.name).
937 Property {
938 /// The variable to access.
939 variable: String,
940 /// The property name.
941 property: String,
942 },
943
944 /// Binary operation.
945 Binary {
946 /// Left operand.
947 left: Box<LogicalExpression>,
948 /// Operator.
949 op: BinaryOp,
950 /// Right operand.
951 right: Box<LogicalExpression>,
952 },
953
954 /// Unary operation.
955 Unary {
956 /// Operator.
957 op: UnaryOp,
958 /// Operand.
959 operand: Box<LogicalExpression>,
960 },
961
962 /// Function call.
963 FunctionCall {
964 /// Function name.
965 name: String,
966 /// Arguments.
967 args: Vec<LogicalExpression>,
968 /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
969 distinct: bool,
970 },
971
972 /// List literal.
973 List(Vec<LogicalExpression>),
974
975 /// Map literal (e.g., {name: 'Alice', age: 30}).
976 Map(Vec<(String, LogicalExpression)>),
977
978 /// Index access (e.g., `list[0]`).
979 IndexAccess {
980 /// The base expression (typically a list or string).
981 base: Box<LogicalExpression>,
982 /// The index expression.
983 index: Box<LogicalExpression>,
984 },
985
986 /// Slice access (e.g., list[1..3]).
987 SliceAccess {
988 /// The base expression (typically a list or string).
989 base: Box<LogicalExpression>,
990 /// Start index (None means from beginning).
991 start: Option<Box<LogicalExpression>>,
992 /// End index (None means to end).
993 end: Option<Box<LogicalExpression>>,
994 },
995
996 /// CASE expression.
997 Case {
998 /// Test expression (for simple CASE).
999 operand: Option<Box<LogicalExpression>>,
1000 /// WHEN clauses.
1001 when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
1002 /// ELSE clause.
1003 else_clause: Option<Box<LogicalExpression>>,
1004 },
1005
1006 /// Parameter reference.
1007 Parameter(String),
1008
1009 /// Labels of a node.
1010 Labels(String),
1011
1012 /// Type of an edge.
1013 Type(String),
1014
1015 /// ID of a node or edge.
1016 Id(String),
1017
1018 /// List comprehension: [x IN list WHERE predicate | expression]
1019 ListComprehension {
1020 /// Variable name for each element.
1021 variable: String,
1022 /// The source list expression.
1023 list_expr: Box<LogicalExpression>,
1024 /// Optional filter predicate.
1025 filter_expr: Option<Box<LogicalExpression>>,
1026 /// The mapping expression for each element.
1027 map_expr: Box<LogicalExpression>,
1028 },
1029
1030 /// EXISTS subquery.
1031 ExistsSubquery(Box<LogicalOperator>),
1032
1033 /// COUNT subquery.
1034 CountSubquery(Box<LogicalOperator>),
1035}
1036
1037/// Binary operator.
1038#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1039pub enum BinaryOp {
1040 /// Equality comparison (=).
1041 Eq,
1042 /// Inequality comparison (<>).
1043 Ne,
1044 /// Less than (<).
1045 Lt,
1046 /// Less than or equal (<=).
1047 Le,
1048 /// Greater than (>).
1049 Gt,
1050 /// Greater than or equal (>=).
1051 Ge,
1052
1053 /// Logical AND.
1054 And,
1055 /// Logical OR.
1056 Or,
1057 /// Logical XOR.
1058 Xor,
1059
1060 /// Addition (+).
1061 Add,
1062 /// Subtraction (-).
1063 Sub,
1064 /// Multiplication (*).
1065 Mul,
1066 /// Division (/).
1067 Div,
1068 /// Modulo (%).
1069 Mod,
1070
1071 /// String concatenation.
1072 Concat,
1073 /// String starts with.
1074 StartsWith,
1075 /// String ends with.
1076 EndsWith,
1077 /// String contains.
1078 Contains,
1079
1080 /// Collection membership (IN).
1081 In,
1082 /// Pattern matching (LIKE).
1083 Like,
1084 /// Regex matching (=~).
1085 Regex,
1086 /// Power/exponentiation (^).
1087 Pow,
1088}
1089
1090/// Unary operator.
1091#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1092pub enum UnaryOp {
1093 /// Logical NOT.
1094 Not,
1095 /// Numeric negation.
1096 Neg,
1097 /// IS NULL check.
1098 IsNull,
1099 /// IS NOT NULL check.
1100 IsNotNull,
1101}
1102
1103#[cfg(test)]
1104mod tests {
1105 use super::*;
1106
1107 #[test]
1108 fn test_simple_node_scan_plan() {
1109 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1110 items: vec![ReturnItem {
1111 expression: LogicalExpression::Variable("n".into()),
1112 alias: None,
1113 }],
1114 distinct: false,
1115 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1116 variable: "n".into(),
1117 label: Some("Person".into()),
1118 input: None,
1119 })),
1120 }));
1121
1122 // Verify structure
1123 if let LogicalOperator::Return(ret) = &plan.root {
1124 assert_eq!(ret.items.len(), 1);
1125 assert!(!ret.distinct);
1126 if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1127 assert_eq!(scan.variable, "n");
1128 assert_eq!(scan.label, Some("Person".into()));
1129 } else {
1130 panic!("Expected NodeScan");
1131 }
1132 } else {
1133 panic!("Expected Return");
1134 }
1135 }
1136
1137 #[test]
1138 fn test_filter_plan() {
1139 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1140 items: vec![ReturnItem {
1141 expression: LogicalExpression::Property {
1142 variable: "n".into(),
1143 property: "name".into(),
1144 },
1145 alias: Some("name".into()),
1146 }],
1147 distinct: false,
1148 input: Box::new(LogicalOperator::Filter(FilterOp {
1149 predicate: LogicalExpression::Binary {
1150 left: Box::new(LogicalExpression::Property {
1151 variable: "n".into(),
1152 property: "age".into(),
1153 }),
1154 op: BinaryOp::Gt,
1155 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1156 },
1157 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1158 variable: "n".into(),
1159 label: Some("Person".into()),
1160 input: None,
1161 })),
1162 })),
1163 }));
1164
1165 if let LogicalOperator::Return(ret) = &plan.root {
1166 if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1167 if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1168 assert_eq!(*op, BinaryOp::Gt);
1169 } else {
1170 panic!("Expected Binary expression");
1171 }
1172 } else {
1173 panic!("Expected Filter");
1174 }
1175 } else {
1176 panic!("Expected Return");
1177 }
1178 }
1179}