grafeo_engine/query/plan.rs
1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12 /// The root operator of the plan.
13 pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17 /// Creates a new logical plan with the given root operator.
18 pub fn new(root: LogicalOperator) -> Self {
19 Self { root }
20 }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26 /// Scan all nodes, optionally filtered by label.
27 NodeScan(NodeScanOp),
28
29 /// Scan all edges, optionally filtered by type.
30 EdgeScan(EdgeScanOp),
31
32 /// Expand from nodes to neighbors via edges.
33 Expand(ExpandOp),
34
35 /// Filter rows based on a predicate.
36 Filter(FilterOp),
37
38 /// Project specific columns.
39 Project(ProjectOp),
40
41 /// Join two inputs.
42 Join(JoinOp),
43
44 /// Aggregate with grouping.
45 Aggregate(AggregateOp),
46
47 /// Limit the number of results.
48 Limit(LimitOp),
49
50 /// Skip a number of results.
51 Skip(SkipOp),
52
53 /// Sort results.
54 Sort(SortOp),
55
56 /// Remove duplicate results.
57 Distinct(DistinctOp),
58
59 /// Create a new node.
60 CreateNode(CreateNodeOp),
61
62 /// Create a new edge.
63 CreateEdge(CreateEdgeOp),
64
65 /// Delete a node.
66 DeleteNode(DeleteNodeOp),
67
68 /// Delete an edge.
69 DeleteEdge(DeleteEdgeOp),
70
71 /// Set properties on a node or edge.
72 SetProperty(SetPropertyOp),
73
74 /// Add labels to a node.
75 AddLabel(AddLabelOp),
76
77 /// Remove labels from a node.
78 RemoveLabel(RemoveLabelOp),
79
80 /// Return results (terminal operator).
81 Return(ReturnOp),
82
83 /// Empty result set.
84 Empty,
85
86 // ==================== RDF/SPARQL Operators ====================
87 /// Scan RDF triples matching a pattern.
88 TripleScan(TripleScanOp),
89
90 /// Union of multiple result sets.
91 Union(UnionOp),
92
93 /// Left outer join for OPTIONAL patterns.
94 LeftJoin(LeftJoinOp),
95
96 /// Anti-join for MINUS patterns.
97 AntiJoin(AntiJoinOp),
98
99 /// Bind a variable to an expression.
100 Bind(BindOp),
101
102 /// Unwind a list into individual rows.
103 Unwind(UnwindOp),
104
105 /// Merge a pattern (match or create).
106 Merge(MergeOp),
107
108 /// Find shortest path between nodes.
109 ShortestPath(ShortestPathOp),
110
111 // ==================== SPARQL Update Operators ====================
112 /// Insert RDF triples.
113 InsertTriple(InsertTripleOp),
114
115 /// Delete RDF triples.
116 DeleteTriple(DeleteTripleOp),
117
118 /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
119 /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
120 Modify(ModifyOp),
121
122 /// Clear a graph (remove all triples).
123 ClearGraph(ClearGraphOp),
124
125 /// Create a new named graph.
126 CreateGraph(CreateGraphOp),
127
128 /// Drop (remove) a named graph.
129 DropGraph(DropGraphOp),
130
131 /// Load data from a URL into a graph.
132 LoadGraph(LoadGraphOp),
133
134 /// Copy triples from one graph to another.
135 CopyGraph(CopyGraphOp),
136
137 /// Move triples from one graph to another.
138 MoveGraph(MoveGraphOp),
139
140 /// Add (merge) triples from one graph to another.
141 AddGraph(AddGraphOp),
142
143 // ==================== Vector Search Operators ====================
144 /// Scan using vector similarity search.
145 VectorScan(VectorScanOp),
146
147 /// Join graph patterns with vector similarity search.
148 ///
149 /// Computes vector distances between entities from the left input and
150 /// a query vector, then joins with similarity scores. Useful for:
151 /// - Filtering graph traversal results by vector similarity
152 /// - Computing aggregated embeddings and finding similar entities
153 /// - Combining multiple vector sources with graph structure
154 VectorJoin(VectorJoinOp),
155
156 // ==================== DDL Operators ====================
157 /// Define a property graph schema (SQL/PGQ DDL).
158 CreatePropertyGraph(CreatePropertyGraphOp),
159
160 // ==================== Procedure Call Operators ====================
161 /// Invoke a stored procedure (CALL ... YIELD).
162 CallProcedure(CallProcedureOp),
163}
164
165/// Scan nodes from the graph.
166#[derive(Debug, Clone)]
167pub struct NodeScanOp {
168 /// Variable name to bind the node to.
169 pub variable: String,
170 /// Optional label filter.
171 pub label: Option<String>,
172 /// Child operator (if any, for chained patterns).
173 pub input: Option<Box<LogicalOperator>>,
174}
175
176/// Scan edges from the graph.
177#[derive(Debug, Clone)]
178pub struct EdgeScanOp {
179 /// Variable name to bind the edge to.
180 pub variable: String,
181 /// Optional edge type filter.
182 pub edge_type: Option<String>,
183 /// Child operator (if any).
184 pub input: Option<Box<LogicalOperator>>,
185}
186
187/// Expand from nodes to their neighbors.
188#[derive(Debug, Clone)]
189pub struct ExpandOp {
190 /// Source node variable.
191 pub from_variable: String,
192 /// Target node variable to bind.
193 pub to_variable: String,
194 /// Edge variable to bind (optional).
195 pub edge_variable: Option<String>,
196 /// Direction of expansion.
197 pub direction: ExpandDirection,
198 /// Optional edge type filter.
199 pub edge_type: Option<String>,
200 /// Minimum hops (for variable-length patterns).
201 pub min_hops: u32,
202 /// Maximum hops (for variable-length patterns).
203 pub max_hops: Option<u32>,
204 /// Input operator.
205 pub input: Box<LogicalOperator>,
206 /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
207 /// When set, a path length column will be output under this name.
208 pub path_alias: Option<String>,
209}
210
211/// Direction for edge expansion.
212#[derive(Debug, Clone, Copy, PartialEq, Eq)]
213pub enum ExpandDirection {
214 /// Follow outgoing edges.
215 Outgoing,
216 /// Follow incoming edges.
217 Incoming,
218 /// Follow edges in either direction.
219 Both,
220}
221
222/// Join two inputs.
223#[derive(Debug, Clone)]
224pub struct JoinOp {
225 /// Left input.
226 pub left: Box<LogicalOperator>,
227 /// Right input.
228 pub right: Box<LogicalOperator>,
229 /// Join type.
230 pub join_type: JoinType,
231 /// Join conditions.
232 pub conditions: Vec<JoinCondition>,
233}
234
235/// Join type.
236#[derive(Debug, Clone, Copy, PartialEq, Eq)]
237pub enum JoinType {
238 /// Inner join.
239 Inner,
240 /// Left outer join.
241 Left,
242 /// Right outer join.
243 Right,
244 /// Full outer join.
245 Full,
246 /// Cross join (Cartesian product).
247 Cross,
248 /// Semi join (returns left rows with matching right rows).
249 Semi,
250 /// Anti join (returns left rows without matching right rows).
251 Anti,
252}
253
254/// A join condition.
255#[derive(Debug, Clone)]
256pub struct JoinCondition {
257 /// Left expression.
258 pub left: LogicalExpression,
259 /// Right expression.
260 pub right: LogicalExpression,
261}
262
263/// Aggregate with grouping.
264#[derive(Debug, Clone)]
265pub struct AggregateOp {
266 /// Group by expressions.
267 pub group_by: Vec<LogicalExpression>,
268 /// Aggregate functions.
269 pub aggregates: Vec<AggregateExpr>,
270 /// Input operator.
271 pub input: Box<LogicalOperator>,
272 /// HAVING clause filter (applied after aggregation).
273 pub having: Option<LogicalExpression>,
274}
275
276/// An aggregate expression.
277#[derive(Debug, Clone)]
278pub struct AggregateExpr {
279 /// Aggregate function.
280 pub function: AggregateFunction,
281 /// Expression to aggregate.
282 pub expression: Option<LogicalExpression>,
283 /// Whether to use DISTINCT.
284 pub distinct: bool,
285 /// Alias for the result.
286 pub alias: Option<String>,
287 /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
288 pub percentile: Option<f64>,
289}
290
291/// Aggregate function.
292#[derive(Debug, Clone, Copy, PartialEq, Eq)]
293pub enum AggregateFunction {
294 /// Count all rows (COUNT(*)).
295 Count,
296 /// Count non-null values (COUNT(expr)).
297 CountNonNull,
298 /// Sum values.
299 Sum,
300 /// Average values.
301 Avg,
302 /// Minimum value.
303 Min,
304 /// Maximum value.
305 Max,
306 /// Collect into list.
307 Collect,
308 /// Sample standard deviation (STDEV).
309 StdDev,
310 /// Population standard deviation (STDEVP).
311 StdDevPop,
312 /// Discrete percentile (PERCENTILE_DISC).
313 PercentileDisc,
314 /// Continuous percentile (PERCENTILE_CONT).
315 PercentileCont,
316}
317
318/// Filter rows based on a predicate.
319#[derive(Debug, Clone)]
320pub struct FilterOp {
321 /// The filter predicate.
322 pub predicate: LogicalExpression,
323 /// Input operator.
324 pub input: Box<LogicalOperator>,
325}
326
327/// Project specific columns.
328#[derive(Debug, Clone)]
329pub struct ProjectOp {
330 /// Columns to project.
331 pub projections: Vec<Projection>,
332 /// Input operator.
333 pub input: Box<LogicalOperator>,
334}
335
336/// A single projection (column selection or computation).
337#[derive(Debug, Clone)]
338pub struct Projection {
339 /// Expression to compute.
340 pub expression: LogicalExpression,
341 /// Alias for the result.
342 pub alias: Option<String>,
343}
344
345/// Limit the number of results.
346#[derive(Debug, Clone)]
347pub struct LimitOp {
348 /// Maximum number of rows to return.
349 pub count: usize,
350 /// Input operator.
351 pub input: Box<LogicalOperator>,
352}
353
354/// Skip a number of results.
355#[derive(Debug, Clone)]
356pub struct SkipOp {
357 /// Number of rows to skip.
358 pub count: usize,
359 /// Input operator.
360 pub input: Box<LogicalOperator>,
361}
362
363/// Sort results.
364#[derive(Debug, Clone)]
365pub struct SortOp {
366 /// Sort keys.
367 pub keys: Vec<SortKey>,
368 /// Input operator.
369 pub input: Box<LogicalOperator>,
370}
371
372/// A sort key.
373#[derive(Debug, Clone)]
374pub struct SortKey {
375 /// Expression to sort by.
376 pub expression: LogicalExpression,
377 /// Sort order.
378 pub order: SortOrder,
379}
380
381/// Sort order.
382#[derive(Debug, Clone, Copy, PartialEq, Eq)]
383pub enum SortOrder {
384 /// Ascending order.
385 Ascending,
386 /// Descending order.
387 Descending,
388}
389
390/// Remove duplicate results.
391#[derive(Debug, Clone)]
392pub struct DistinctOp {
393 /// Input operator.
394 pub input: Box<LogicalOperator>,
395 /// Optional columns to use for deduplication.
396 /// If None, all columns are used.
397 pub columns: Option<Vec<String>>,
398}
399
400/// Create a new node.
401#[derive(Debug, Clone)]
402pub struct CreateNodeOp {
403 /// Variable name to bind the created node to.
404 pub variable: String,
405 /// Labels for the new node.
406 pub labels: Vec<String>,
407 /// Properties for the new node.
408 pub properties: Vec<(String, LogicalExpression)>,
409 /// Input operator (for chained creates).
410 pub input: Option<Box<LogicalOperator>>,
411}
412
413/// Create a new edge.
414#[derive(Debug, Clone)]
415pub struct CreateEdgeOp {
416 /// Variable name to bind the created edge to.
417 pub variable: Option<String>,
418 /// Source node variable.
419 pub from_variable: String,
420 /// Target node variable.
421 pub to_variable: String,
422 /// Edge type.
423 pub edge_type: String,
424 /// Properties for the new edge.
425 pub properties: Vec<(String, LogicalExpression)>,
426 /// Input operator.
427 pub input: Box<LogicalOperator>,
428}
429
430/// Delete a node.
431#[derive(Debug, Clone)]
432pub struct DeleteNodeOp {
433 /// Variable of the node to delete.
434 pub variable: String,
435 /// Whether to detach (delete connected edges) before deleting.
436 pub detach: bool,
437 /// Input operator.
438 pub input: Box<LogicalOperator>,
439}
440
441/// Delete an edge.
442#[derive(Debug, Clone)]
443pub struct DeleteEdgeOp {
444 /// Variable of the edge to delete.
445 pub variable: String,
446 /// Input operator.
447 pub input: Box<LogicalOperator>,
448}
449
450/// Set properties on a node or edge.
451#[derive(Debug, Clone)]
452pub struct SetPropertyOp {
453 /// Variable of the entity to update.
454 pub variable: String,
455 /// Properties to set (name -> expression).
456 pub properties: Vec<(String, LogicalExpression)>,
457 /// Whether to replace all properties (vs. merge).
458 pub replace: bool,
459 /// Input operator.
460 pub input: Box<LogicalOperator>,
461}
462
463/// Add labels to a node.
464#[derive(Debug, Clone)]
465pub struct AddLabelOp {
466 /// Variable of the node to update.
467 pub variable: String,
468 /// Labels to add.
469 pub labels: Vec<String>,
470 /// Input operator.
471 pub input: Box<LogicalOperator>,
472}
473
474/// Remove labels from a node.
475#[derive(Debug, Clone)]
476pub struct RemoveLabelOp {
477 /// Variable of the node to update.
478 pub variable: String,
479 /// Labels to remove.
480 pub labels: Vec<String>,
481 /// Input operator.
482 pub input: Box<LogicalOperator>,
483}
484
485// ==================== RDF/SPARQL Operators ====================
486
487/// Scan RDF triples matching a pattern.
488#[derive(Debug, Clone)]
489pub struct TripleScanOp {
490 /// Subject pattern (variable name or IRI).
491 pub subject: TripleComponent,
492 /// Predicate pattern (variable name or IRI).
493 pub predicate: TripleComponent,
494 /// Object pattern (variable name, IRI, or literal).
495 pub object: TripleComponent,
496 /// Named graph (optional).
497 pub graph: Option<TripleComponent>,
498 /// Input operator (for chained patterns).
499 pub input: Option<Box<LogicalOperator>>,
500}
501
502/// A component of a triple pattern.
503#[derive(Debug, Clone)]
504pub enum TripleComponent {
505 /// A variable to bind.
506 Variable(String),
507 /// A constant IRI.
508 Iri(String),
509 /// A constant literal value.
510 Literal(Value),
511}
512
513/// Union of multiple result sets.
514#[derive(Debug, Clone)]
515pub struct UnionOp {
516 /// Inputs to union together.
517 pub inputs: Vec<LogicalOperator>,
518}
519
520/// Left outer join for OPTIONAL patterns.
521#[derive(Debug, Clone)]
522pub struct LeftJoinOp {
523 /// Left (required) input.
524 pub left: Box<LogicalOperator>,
525 /// Right (optional) input.
526 pub right: Box<LogicalOperator>,
527 /// Optional filter condition.
528 pub condition: Option<LogicalExpression>,
529}
530
531/// Anti-join for MINUS patterns.
532#[derive(Debug, Clone)]
533pub struct AntiJoinOp {
534 /// Left input (results to keep if no match on right).
535 pub left: Box<LogicalOperator>,
536 /// Right input (patterns to exclude).
537 pub right: Box<LogicalOperator>,
538}
539
540/// Bind a variable to an expression.
541#[derive(Debug, Clone)]
542pub struct BindOp {
543 /// Expression to compute.
544 pub expression: LogicalExpression,
545 /// Variable to bind the result to.
546 pub variable: String,
547 /// Input operator.
548 pub input: Box<LogicalOperator>,
549}
550
551/// Unwind a list into individual rows.
552///
553/// For each input row, evaluates the expression (which should return a list)
554/// and emits one row for each element in the list.
555#[derive(Debug, Clone)]
556pub struct UnwindOp {
557 /// The list expression to unwind.
558 pub expression: LogicalExpression,
559 /// The variable name for each element.
560 pub variable: String,
561 /// Optional variable for 1-based element position (ORDINALITY).
562 pub ordinality_var: Option<String>,
563 /// Optional variable for 0-based element position (OFFSET).
564 pub offset_var: Option<String>,
565 /// Input operator.
566 pub input: Box<LogicalOperator>,
567}
568
569/// Merge a pattern (match or create).
570///
571/// MERGE tries to match a pattern in the graph. If found, returns the existing
572/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
573/// (optionally applying ON CREATE SET).
574#[derive(Debug, Clone)]
575pub struct MergeOp {
576 /// The node to merge.
577 pub variable: String,
578 /// Labels to match/create.
579 pub labels: Vec<String>,
580 /// Properties that must match (used for both matching and creation).
581 pub match_properties: Vec<(String, LogicalExpression)>,
582 /// Properties to set on CREATE.
583 pub on_create: Vec<(String, LogicalExpression)>,
584 /// Properties to set on MATCH.
585 pub on_match: Vec<(String, LogicalExpression)>,
586 /// Input operator.
587 pub input: Box<LogicalOperator>,
588}
589
590/// Find shortest path between two nodes.
591///
592/// This operator uses Dijkstra's algorithm to find the shortest path(s)
593/// between a source node and a target node, optionally filtered by edge type.
594#[derive(Debug, Clone)]
595pub struct ShortestPathOp {
596 /// Input operator providing source/target nodes.
597 pub input: Box<LogicalOperator>,
598 /// Variable name for the source node.
599 pub source_var: String,
600 /// Variable name for the target node.
601 pub target_var: String,
602 /// Optional edge type filter.
603 pub edge_type: Option<String>,
604 /// Direction of edge traversal.
605 pub direction: ExpandDirection,
606 /// Variable name to bind the path result.
607 pub path_alias: String,
608 /// Whether to find all shortest paths (vs. just one).
609 pub all_paths: bool,
610}
611
612// ==================== SPARQL Update Operators ====================
613
614/// Insert RDF triples.
615#[derive(Debug, Clone)]
616pub struct InsertTripleOp {
617 /// Subject of the triple.
618 pub subject: TripleComponent,
619 /// Predicate of the triple.
620 pub predicate: TripleComponent,
621 /// Object of the triple.
622 pub object: TripleComponent,
623 /// Named graph (optional).
624 pub graph: Option<String>,
625 /// Input operator (provides variable bindings).
626 pub input: Option<Box<LogicalOperator>>,
627}
628
629/// Delete RDF triples.
630#[derive(Debug, Clone)]
631pub struct DeleteTripleOp {
632 /// Subject pattern.
633 pub subject: TripleComponent,
634 /// Predicate pattern.
635 pub predicate: TripleComponent,
636 /// Object pattern.
637 pub object: TripleComponent,
638 /// Named graph (optional).
639 pub graph: Option<String>,
640 /// Input operator (provides variable bindings).
641 pub input: Option<Box<LogicalOperator>>,
642}
643
644/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
645///
646/// Per SPARQL 1.1 Update spec, this operator:
647/// 1. Evaluates the WHERE clause once to get bindings
648/// 2. Applies DELETE templates using those bindings
649/// 3. Applies INSERT templates using the SAME bindings
650///
651/// This ensures DELETE and INSERT see consistent data.
652#[derive(Debug, Clone)]
653pub struct ModifyOp {
654 /// DELETE triple templates (patterns with variables).
655 pub delete_templates: Vec<TripleTemplate>,
656 /// INSERT triple templates (patterns with variables).
657 pub insert_templates: Vec<TripleTemplate>,
658 /// WHERE clause that provides variable bindings.
659 pub where_clause: Box<LogicalOperator>,
660 /// Named graph context (for WITH clause).
661 pub graph: Option<String>,
662}
663
664/// A triple template for DELETE/INSERT operations.
665#[derive(Debug, Clone)]
666pub struct TripleTemplate {
667 /// Subject (may be a variable).
668 pub subject: TripleComponent,
669 /// Predicate (may be a variable).
670 pub predicate: TripleComponent,
671 /// Object (may be a variable or literal).
672 pub object: TripleComponent,
673 /// Named graph (optional).
674 pub graph: Option<String>,
675}
676
677/// Clear all triples from a graph.
678#[derive(Debug, Clone)]
679pub struct ClearGraphOp {
680 /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
681 pub graph: Option<String>,
682 /// Whether to silently ignore errors.
683 pub silent: bool,
684}
685
686/// Create a new named graph.
687#[derive(Debug, Clone)]
688pub struct CreateGraphOp {
689 /// IRI of the graph to create.
690 pub graph: String,
691 /// Whether to silently ignore if graph already exists.
692 pub silent: bool,
693}
694
695/// Drop (remove) a named graph.
696#[derive(Debug, Clone)]
697pub struct DropGraphOp {
698 /// Target graph (None = default graph).
699 pub graph: Option<String>,
700 /// Whether to silently ignore errors.
701 pub silent: bool,
702}
703
704/// Load data from a URL into a graph.
705#[derive(Debug, Clone)]
706pub struct LoadGraphOp {
707 /// Source URL to load data from.
708 pub source: String,
709 /// Destination graph (None = default graph).
710 pub destination: Option<String>,
711 /// Whether to silently ignore errors.
712 pub silent: bool,
713}
714
715/// Copy triples from one graph to another.
716#[derive(Debug, Clone)]
717pub struct CopyGraphOp {
718 /// Source graph.
719 pub source: Option<String>,
720 /// Destination graph.
721 pub destination: Option<String>,
722 /// Whether to silently ignore errors.
723 pub silent: bool,
724}
725
726/// Move triples from one graph to another.
727#[derive(Debug, Clone)]
728pub struct MoveGraphOp {
729 /// Source graph.
730 pub source: Option<String>,
731 /// Destination graph.
732 pub destination: Option<String>,
733 /// Whether to silently ignore errors.
734 pub silent: bool,
735}
736
737/// Add (merge) triples from one graph to another.
738#[derive(Debug, Clone)]
739pub struct AddGraphOp {
740 /// Source graph.
741 pub source: Option<String>,
742 /// Destination graph.
743 pub destination: Option<String>,
744 /// Whether to silently ignore errors.
745 pub silent: bool,
746}
747
748// ==================== Vector Search Operators ====================
749
750/// Vector similarity scan operation.
751///
752/// Performs approximate nearest neighbor search using a vector index (HNSW)
753/// or brute-force search for small datasets. Returns nodes/edges whose
754/// embeddings are similar to the query vector.
755///
756/// # Example GQL
757///
758/// ```gql
759/// MATCH (m:Movie)
760/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
761/// RETURN m.title
762/// ```
763#[derive(Debug, Clone)]
764pub struct VectorScanOp {
765 /// Variable name to bind matching entities to.
766 pub variable: String,
767 /// Name of the vector index to use (None = brute-force).
768 pub index_name: Option<String>,
769 /// Property containing the vector embedding.
770 pub property: String,
771 /// Optional label filter (scan only nodes with this label).
772 pub label: Option<String>,
773 /// The query vector expression.
774 pub query_vector: LogicalExpression,
775 /// Number of nearest neighbors to return.
776 pub k: usize,
777 /// Distance metric (None = use index default, typically cosine).
778 pub metric: Option<VectorMetric>,
779 /// Minimum similarity threshold (filters results below this).
780 pub min_similarity: Option<f32>,
781 /// Maximum distance threshold (filters results above this).
782 pub max_distance: Option<f32>,
783 /// Input operator (for hybrid queries combining graph + vector).
784 pub input: Option<Box<LogicalOperator>>,
785}
786
787/// Vector distance/similarity metric for vector scan operations.
788#[derive(Debug, Clone, Copy, PartialEq, Eq)]
789pub enum VectorMetric {
790 /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
791 Cosine,
792 /// Euclidean (L2) distance. Best when magnitude matters.
793 Euclidean,
794 /// Dot product. Best for maximum inner product search.
795 DotProduct,
796 /// Manhattan (L1) distance. Less sensitive to outliers.
797 Manhattan,
798}
799
800/// Join graph patterns with vector similarity search.
801///
802/// This operator takes entities from the left input and computes vector
803/// similarity against a query vector, outputting (entity, distance) pairs.
804///
805/// # Use Cases
806///
807/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
808/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
809/// 3. **Filtering by similarity**: Join with threshold-based filtering
810///
811/// # Example
812///
813/// ```gql
814/// // Find movies similar to what the user liked
815/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
816/// WITH avg(liked.embedding) AS user_taste
817/// VECTOR JOIN (m:Movie) ON m.embedding
818/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
819/// RETURN m.title
820/// ```
821#[derive(Debug, Clone)]
822pub struct VectorJoinOp {
823 /// Input operator providing entities to match against.
824 pub input: Box<LogicalOperator>,
825 /// Variable from input to extract vectors from (for entity-to-entity similarity).
826 /// If None, uses `query_vector` directly.
827 pub left_vector_variable: Option<String>,
828 /// Property containing the left vector (used with `left_vector_variable`).
829 pub left_property: Option<String>,
830 /// The query vector expression (constant or computed).
831 pub query_vector: LogicalExpression,
832 /// Variable name to bind the right-side matching entities.
833 pub right_variable: String,
834 /// Property containing the right-side vector embeddings.
835 pub right_property: String,
836 /// Optional label filter for right-side entities.
837 pub right_label: Option<String>,
838 /// Name of vector index on right side (None = brute-force).
839 pub index_name: Option<String>,
840 /// Number of nearest neighbors per left-side entity.
841 pub k: usize,
842 /// Distance metric.
843 pub metric: Option<VectorMetric>,
844 /// Minimum similarity threshold.
845 pub min_similarity: Option<f32>,
846 /// Maximum distance threshold.
847 pub max_distance: Option<f32>,
848 /// Variable to bind the distance/similarity score.
849 pub score_variable: Option<String>,
850}
851
852/// Return results (terminal operator).
853#[derive(Debug, Clone)]
854pub struct ReturnOp {
855 /// Items to return.
856 pub items: Vec<ReturnItem>,
857 /// Whether to return distinct results.
858 pub distinct: bool,
859 /// Input operator.
860 pub input: Box<LogicalOperator>,
861}
862
863/// A single return item.
864#[derive(Debug, Clone)]
865pub struct ReturnItem {
866 /// Expression to return.
867 pub expression: LogicalExpression,
868 /// Alias for the result column.
869 pub alias: Option<String>,
870}
871
872/// Define a property graph schema (SQL/PGQ DDL).
873#[derive(Debug, Clone)]
874pub struct CreatePropertyGraphOp {
875 /// Graph name.
876 pub name: String,
877 /// Node table schemas (label name + column definitions).
878 pub node_tables: Vec<PropertyGraphNodeTable>,
879 /// Edge table schemas (type name + column definitions + references).
880 pub edge_tables: Vec<PropertyGraphEdgeTable>,
881}
882
883/// A node table in a property graph definition.
884#[derive(Debug, Clone)]
885pub struct PropertyGraphNodeTable {
886 /// Table name (maps to a node label).
887 pub name: String,
888 /// Column definitions as (name, type_name) pairs.
889 pub columns: Vec<(String, String)>,
890}
891
892/// An edge table in a property graph definition.
893#[derive(Debug, Clone)]
894pub struct PropertyGraphEdgeTable {
895 /// Table name (maps to an edge type).
896 pub name: String,
897 /// Column definitions as (name, type_name) pairs.
898 pub columns: Vec<(String, String)>,
899 /// Source node table name.
900 pub source_table: String,
901 /// Target node table name.
902 pub target_table: String,
903}
904
905// ==================== Procedure Call Types ====================
906
907/// A CALL procedure operation.
908///
909/// ```text
910/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
911/// ```
912#[derive(Debug, Clone)]
913pub struct CallProcedureOp {
914 /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
915 pub name: Vec<String>,
916 /// Argument expressions (constants in Phase 1).
917 pub arguments: Vec<LogicalExpression>,
918 /// Optional YIELD clause: which columns to expose + aliases.
919 pub yield_items: Option<Vec<ProcedureYield>>,
920}
921
922/// A single YIELD item in a procedure call.
923#[derive(Debug, Clone)]
924pub struct ProcedureYield {
925 /// Column name from the procedure result.
926 pub field_name: String,
927 /// Optional alias (YIELD score AS rank).
928 pub alias: Option<String>,
929}
930
931/// A logical expression.
932#[derive(Debug, Clone)]
933pub enum LogicalExpression {
934 /// A literal value.
935 Literal(Value),
936
937 /// A variable reference.
938 Variable(String),
939
940 /// Property access (e.g., n.name).
941 Property {
942 /// The variable to access.
943 variable: String,
944 /// The property name.
945 property: String,
946 },
947
948 /// Binary operation.
949 Binary {
950 /// Left operand.
951 left: Box<LogicalExpression>,
952 /// Operator.
953 op: BinaryOp,
954 /// Right operand.
955 right: Box<LogicalExpression>,
956 },
957
958 /// Unary operation.
959 Unary {
960 /// Operator.
961 op: UnaryOp,
962 /// Operand.
963 operand: Box<LogicalExpression>,
964 },
965
966 /// Function call.
967 FunctionCall {
968 /// Function name.
969 name: String,
970 /// Arguments.
971 args: Vec<LogicalExpression>,
972 /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
973 distinct: bool,
974 },
975
976 /// List literal.
977 List(Vec<LogicalExpression>),
978
979 /// Map literal (e.g., {name: 'Alice', age: 30}).
980 Map(Vec<(String, LogicalExpression)>),
981
982 /// Index access (e.g., `list[0]`).
983 IndexAccess {
984 /// The base expression (typically a list or string).
985 base: Box<LogicalExpression>,
986 /// The index expression.
987 index: Box<LogicalExpression>,
988 },
989
990 /// Slice access (e.g., list[1..3]).
991 SliceAccess {
992 /// The base expression (typically a list or string).
993 base: Box<LogicalExpression>,
994 /// Start index (None means from beginning).
995 start: Option<Box<LogicalExpression>>,
996 /// End index (None means to end).
997 end: Option<Box<LogicalExpression>>,
998 },
999
1000 /// CASE expression.
1001 Case {
1002 /// Test expression (for simple CASE).
1003 operand: Option<Box<LogicalExpression>>,
1004 /// WHEN clauses.
1005 when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
1006 /// ELSE clause.
1007 else_clause: Option<Box<LogicalExpression>>,
1008 },
1009
1010 /// Parameter reference.
1011 Parameter(String),
1012
1013 /// Labels of a node.
1014 Labels(String),
1015
1016 /// Type of an edge.
1017 Type(String),
1018
1019 /// ID of a node or edge.
1020 Id(String),
1021
1022 /// List comprehension: [x IN list WHERE predicate | expression]
1023 ListComprehension {
1024 /// Variable name for each element.
1025 variable: String,
1026 /// The source list expression.
1027 list_expr: Box<LogicalExpression>,
1028 /// Optional filter predicate.
1029 filter_expr: Option<Box<LogicalExpression>>,
1030 /// The mapping expression for each element.
1031 map_expr: Box<LogicalExpression>,
1032 },
1033
1034 /// EXISTS subquery.
1035 ExistsSubquery(Box<LogicalOperator>),
1036
1037 /// COUNT subquery.
1038 CountSubquery(Box<LogicalOperator>),
1039}
1040
1041/// Binary operator.
1042#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1043pub enum BinaryOp {
1044 /// Equality comparison (=).
1045 Eq,
1046 /// Inequality comparison (<>).
1047 Ne,
1048 /// Less than (<).
1049 Lt,
1050 /// Less than or equal (<=).
1051 Le,
1052 /// Greater than (>).
1053 Gt,
1054 /// Greater than or equal (>=).
1055 Ge,
1056
1057 /// Logical AND.
1058 And,
1059 /// Logical OR.
1060 Or,
1061 /// Logical XOR.
1062 Xor,
1063
1064 /// Addition (+).
1065 Add,
1066 /// Subtraction (-).
1067 Sub,
1068 /// Multiplication (*).
1069 Mul,
1070 /// Division (/).
1071 Div,
1072 /// Modulo (%).
1073 Mod,
1074
1075 /// String concatenation.
1076 Concat,
1077 /// String starts with.
1078 StartsWith,
1079 /// String ends with.
1080 EndsWith,
1081 /// String contains.
1082 Contains,
1083
1084 /// Collection membership (IN).
1085 In,
1086 /// Pattern matching (LIKE).
1087 Like,
1088 /// Regex matching (=~).
1089 Regex,
1090 /// Power/exponentiation (^).
1091 Pow,
1092}
1093
1094/// Unary operator.
1095#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1096pub enum UnaryOp {
1097 /// Logical NOT.
1098 Not,
1099 /// Numeric negation.
1100 Neg,
1101 /// IS NULL check.
1102 IsNull,
1103 /// IS NOT NULL check.
1104 IsNotNull,
1105}
1106
1107#[cfg(test)]
1108mod tests {
1109 use super::*;
1110
1111 #[test]
1112 fn test_simple_node_scan_plan() {
1113 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1114 items: vec![ReturnItem {
1115 expression: LogicalExpression::Variable("n".into()),
1116 alias: None,
1117 }],
1118 distinct: false,
1119 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1120 variable: "n".into(),
1121 label: Some("Person".into()),
1122 input: None,
1123 })),
1124 }));
1125
1126 // Verify structure
1127 if let LogicalOperator::Return(ret) = &plan.root {
1128 assert_eq!(ret.items.len(), 1);
1129 assert!(!ret.distinct);
1130 if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1131 assert_eq!(scan.variable, "n");
1132 assert_eq!(scan.label, Some("Person".into()));
1133 } else {
1134 panic!("Expected NodeScan");
1135 }
1136 } else {
1137 panic!("Expected Return");
1138 }
1139 }
1140
1141 #[test]
1142 fn test_filter_plan() {
1143 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1144 items: vec![ReturnItem {
1145 expression: LogicalExpression::Property {
1146 variable: "n".into(),
1147 property: "name".into(),
1148 },
1149 alias: Some("name".into()),
1150 }],
1151 distinct: false,
1152 input: Box::new(LogicalOperator::Filter(FilterOp {
1153 predicate: LogicalExpression::Binary {
1154 left: Box::new(LogicalExpression::Property {
1155 variable: "n".into(),
1156 property: "age".into(),
1157 }),
1158 op: BinaryOp::Gt,
1159 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1160 },
1161 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1162 variable: "n".into(),
1163 label: Some("Person".into()),
1164 input: None,
1165 })),
1166 })),
1167 }));
1168
1169 if let LogicalOperator::Return(ret) = &plan.root {
1170 if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1171 if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1172 assert_eq!(*op, BinaryOp::Gt);
1173 } else {
1174 panic!("Expected Binary expression");
1175 }
1176 } else {
1177 panic!("Expected Filter");
1178 }
1179 } else {
1180 panic!("Expected Return");
1181 }
1182 }
1183}