grafeo_engine/query/plan.rs
1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12 /// The root operator of the plan.
13 pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17 /// Creates a new logical plan with the given root operator.
18 pub fn new(root: LogicalOperator) -> Self {
19 Self { root }
20 }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26 /// Scan all nodes, optionally filtered by label.
27 NodeScan(NodeScanOp),
28
29 /// Scan all edges, optionally filtered by type.
30 EdgeScan(EdgeScanOp),
31
32 /// Expand from nodes to neighbors via edges.
33 Expand(ExpandOp),
34
35 /// Filter rows based on a predicate.
36 Filter(FilterOp),
37
38 /// Project specific columns.
39 Project(ProjectOp),
40
41 /// Join two inputs.
42 Join(JoinOp),
43
44 /// Aggregate with grouping.
45 Aggregate(AggregateOp),
46
47 /// Limit the number of results.
48 Limit(LimitOp),
49
50 /// Skip a number of results.
51 Skip(SkipOp),
52
53 /// Sort results.
54 Sort(SortOp),
55
56 /// Remove duplicate results.
57 Distinct(DistinctOp),
58
59 /// Create a new node.
60 CreateNode(CreateNodeOp),
61
62 /// Create a new edge.
63 CreateEdge(CreateEdgeOp),
64
65 /// Delete a node.
66 DeleteNode(DeleteNodeOp),
67
68 /// Delete an edge.
69 DeleteEdge(DeleteEdgeOp),
70
71 /// Set properties on a node or edge.
72 SetProperty(SetPropertyOp),
73
74 /// Add labels to a node.
75 AddLabel(AddLabelOp),
76
77 /// Remove labels from a node.
78 RemoveLabel(RemoveLabelOp),
79
80 /// Return results (terminal operator).
81 Return(ReturnOp),
82
83 /// Empty result set.
84 Empty,
85
86 // ==================== RDF/SPARQL Operators ====================
87 /// Scan RDF triples matching a pattern.
88 TripleScan(TripleScanOp),
89
90 /// Union of multiple result sets.
91 Union(UnionOp),
92
93 /// Left outer join for OPTIONAL patterns.
94 LeftJoin(LeftJoinOp),
95
96 /// Anti-join for MINUS patterns.
97 AntiJoin(AntiJoinOp),
98
99 /// Bind a variable to an expression.
100 Bind(BindOp),
101
102 /// Unwind a list into individual rows.
103 Unwind(UnwindOp),
104
105 /// Merge a pattern (match or create).
106 Merge(MergeOp),
107
108 /// Find shortest path between nodes.
109 ShortestPath(ShortestPathOp),
110
111 // ==================== SPARQL Update Operators ====================
112 /// Insert RDF triples.
113 InsertTriple(InsertTripleOp),
114
115 /// Delete RDF triples.
116 DeleteTriple(DeleteTripleOp),
117
118 /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
119 /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
120 Modify(ModifyOp),
121
122 /// Clear a graph (remove all triples).
123 ClearGraph(ClearGraphOp),
124
125 /// Create a new named graph.
126 CreateGraph(CreateGraphOp),
127
128 /// Drop (remove) a named graph.
129 DropGraph(DropGraphOp),
130
131 /// Load data from a URL into a graph.
132 LoadGraph(LoadGraphOp),
133
134 /// Copy triples from one graph to another.
135 CopyGraph(CopyGraphOp),
136
137 /// Move triples from one graph to another.
138 MoveGraph(MoveGraphOp),
139
140 /// Add (merge) triples from one graph to another.
141 AddGraph(AddGraphOp),
142
143 // ==================== Vector Search Operators ====================
144 /// Scan using vector similarity search.
145 VectorScan(VectorScanOp),
146
147 /// Join graph patterns with vector similarity search.
148 ///
149 /// Computes vector distances between entities from the left input and
150 /// a query vector, then joins with similarity scores. Useful for:
151 /// - Filtering graph traversal results by vector similarity
152 /// - Computing aggregated embeddings and finding similar entities
153 /// - Combining multiple vector sources with graph structure
154 VectorJoin(VectorJoinOp),
155
156 // ==================== DDL Operators ====================
157 /// Define a property graph schema (SQL/PGQ DDL).
158 CreatePropertyGraph(CreatePropertyGraphOp),
159}
160
161/// Scan nodes from the graph.
162#[derive(Debug, Clone)]
163pub struct NodeScanOp {
164 /// Variable name to bind the node to.
165 pub variable: String,
166 /// Optional label filter.
167 pub label: Option<String>,
168 /// Child operator (if any, for chained patterns).
169 pub input: Option<Box<LogicalOperator>>,
170}
171
172/// Scan edges from the graph.
173#[derive(Debug, Clone)]
174pub struct EdgeScanOp {
175 /// Variable name to bind the edge to.
176 pub variable: String,
177 /// Optional edge type filter.
178 pub edge_type: Option<String>,
179 /// Child operator (if any).
180 pub input: Option<Box<LogicalOperator>>,
181}
182
183/// Expand from nodes to their neighbors.
184#[derive(Debug, Clone)]
185pub struct ExpandOp {
186 /// Source node variable.
187 pub from_variable: String,
188 /// Target node variable to bind.
189 pub to_variable: String,
190 /// Edge variable to bind (optional).
191 pub edge_variable: Option<String>,
192 /// Direction of expansion.
193 pub direction: ExpandDirection,
194 /// Optional edge type filter.
195 pub edge_type: Option<String>,
196 /// Minimum hops (for variable-length patterns).
197 pub min_hops: u32,
198 /// Maximum hops (for variable-length patterns).
199 pub max_hops: Option<u32>,
200 /// Input operator.
201 pub input: Box<LogicalOperator>,
202 /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
203 /// When set, a path length column will be output under this name.
204 pub path_alias: Option<String>,
205}
206
207/// Direction for edge expansion.
208#[derive(Debug, Clone, Copy, PartialEq, Eq)]
209pub enum ExpandDirection {
210 /// Follow outgoing edges.
211 Outgoing,
212 /// Follow incoming edges.
213 Incoming,
214 /// Follow edges in either direction.
215 Both,
216}
217
218/// Join two inputs.
219#[derive(Debug, Clone)]
220pub struct JoinOp {
221 /// Left input.
222 pub left: Box<LogicalOperator>,
223 /// Right input.
224 pub right: Box<LogicalOperator>,
225 /// Join type.
226 pub join_type: JoinType,
227 /// Join conditions.
228 pub conditions: Vec<JoinCondition>,
229}
230
231/// Join type.
232#[derive(Debug, Clone, Copy, PartialEq, Eq)]
233pub enum JoinType {
234 /// Inner join.
235 Inner,
236 /// Left outer join.
237 Left,
238 /// Right outer join.
239 Right,
240 /// Full outer join.
241 Full,
242 /// Cross join (Cartesian product).
243 Cross,
244 /// Semi join (returns left rows with matching right rows).
245 Semi,
246 /// Anti join (returns left rows without matching right rows).
247 Anti,
248}
249
250/// A join condition.
251#[derive(Debug, Clone)]
252pub struct JoinCondition {
253 /// Left expression.
254 pub left: LogicalExpression,
255 /// Right expression.
256 pub right: LogicalExpression,
257}
258
259/// Aggregate with grouping.
260#[derive(Debug, Clone)]
261pub struct AggregateOp {
262 /// Group by expressions.
263 pub group_by: Vec<LogicalExpression>,
264 /// Aggregate functions.
265 pub aggregates: Vec<AggregateExpr>,
266 /// Input operator.
267 pub input: Box<LogicalOperator>,
268 /// HAVING clause filter (applied after aggregation).
269 pub having: Option<LogicalExpression>,
270}
271
272/// An aggregate expression.
273#[derive(Debug, Clone)]
274pub struct AggregateExpr {
275 /// Aggregate function.
276 pub function: AggregateFunction,
277 /// Expression to aggregate.
278 pub expression: Option<LogicalExpression>,
279 /// Whether to use DISTINCT.
280 pub distinct: bool,
281 /// Alias for the result.
282 pub alias: Option<String>,
283 /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
284 pub percentile: Option<f64>,
285}
286
287/// Aggregate function.
288#[derive(Debug, Clone, Copy, PartialEq, Eq)]
289pub enum AggregateFunction {
290 /// Count all rows (COUNT(*)).
291 Count,
292 /// Count non-null values (COUNT(expr)).
293 CountNonNull,
294 /// Sum values.
295 Sum,
296 /// Average values.
297 Avg,
298 /// Minimum value.
299 Min,
300 /// Maximum value.
301 Max,
302 /// Collect into list.
303 Collect,
304 /// Sample standard deviation (STDEV).
305 StdDev,
306 /// Population standard deviation (STDEVP).
307 StdDevPop,
308 /// Discrete percentile (PERCENTILE_DISC).
309 PercentileDisc,
310 /// Continuous percentile (PERCENTILE_CONT).
311 PercentileCont,
312}
313
314/// Filter rows based on a predicate.
315#[derive(Debug, Clone)]
316pub struct FilterOp {
317 /// The filter predicate.
318 pub predicate: LogicalExpression,
319 /// Input operator.
320 pub input: Box<LogicalOperator>,
321}
322
323/// Project specific columns.
324#[derive(Debug, Clone)]
325pub struct ProjectOp {
326 /// Columns to project.
327 pub projections: Vec<Projection>,
328 /// Input operator.
329 pub input: Box<LogicalOperator>,
330}
331
332/// A single projection (column selection or computation).
333#[derive(Debug, Clone)]
334pub struct Projection {
335 /// Expression to compute.
336 pub expression: LogicalExpression,
337 /// Alias for the result.
338 pub alias: Option<String>,
339}
340
341/// Limit the number of results.
342#[derive(Debug, Clone)]
343pub struct LimitOp {
344 /// Maximum number of rows to return.
345 pub count: usize,
346 /// Input operator.
347 pub input: Box<LogicalOperator>,
348}
349
350/// Skip a number of results.
351#[derive(Debug, Clone)]
352pub struct SkipOp {
353 /// Number of rows to skip.
354 pub count: usize,
355 /// Input operator.
356 pub input: Box<LogicalOperator>,
357}
358
359/// Sort results.
360#[derive(Debug, Clone)]
361pub struct SortOp {
362 /// Sort keys.
363 pub keys: Vec<SortKey>,
364 /// Input operator.
365 pub input: Box<LogicalOperator>,
366}
367
368/// A sort key.
369#[derive(Debug, Clone)]
370pub struct SortKey {
371 /// Expression to sort by.
372 pub expression: LogicalExpression,
373 /// Sort order.
374 pub order: SortOrder,
375}
376
377/// Sort order.
378#[derive(Debug, Clone, Copy, PartialEq, Eq)]
379pub enum SortOrder {
380 /// Ascending order.
381 Ascending,
382 /// Descending order.
383 Descending,
384}
385
386/// Remove duplicate results.
387#[derive(Debug, Clone)]
388pub struct DistinctOp {
389 /// Input operator.
390 pub input: Box<LogicalOperator>,
391 /// Optional columns to use for deduplication.
392 /// If None, all columns are used.
393 pub columns: Option<Vec<String>>,
394}
395
396/// Create a new node.
397#[derive(Debug, Clone)]
398pub struct CreateNodeOp {
399 /// Variable name to bind the created node to.
400 pub variable: String,
401 /// Labels for the new node.
402 pub labels: Vec<String>,
403 /// Properties for the new node.
404 pub properties: Vec<(String, LogicalExpression)>,
405 /// Input operator (for chained creates).
406 pub input: Option<Box<LogicalOperator>>,
407}
408
409/// Create a new edge.
410#[derive(Debug, Clone)]
411pub struct CreateEdgeOp {
412 /// Variable name to bind the created edge to.
413 pub variable: Option<String>,
414 /// Source node variable.
415 pub from_variable: String,
416 /// Target node variable.
417 pub to_variable: String,
418 /// Edge type.
419 pub edge_type: String,
420 /// Properties for the new edge.
421 pub properties: Vec<(String, LogicalExpression)>,
422 /// Input operator.
423 pub input: Box<LogicalOperator>,
424}
425
426/// Delete a node.
427#[derive(Debug, Clone)]
428pub struct DeleteNodeOp {
429 /// Variable of the node to delete.
430 pub variable: String,
431 /// Whether to detach (delete connected edges) before deleting.
432 pub detach: bool,
433 /// Input operator.
434 pub input: Box<LogicalOperator>,
435}
436
437/// Delete an edge.
438#[derive(Debug, Clone)]
439pub struct DeleteEdgeOp {
440 /// Variable of the edge to delete.
441 pub variable: String,
442 /// Input operator.
443 pub input: Box<LogicalOperator>,
444}
445
446/// Set properties on a node or edge.
447#[derive(Debug, Clone)]
448pub struct SetPropertyOp {
449 /// Variable of the entity to update.
450 pub variable: String,
451 /// Properties to set (name -> expression).
452 pub properties: Vec<(String, LogicalExpression)>,
453 /// Whether to replace all properties (vs. merge).
454 pub replace: bool,
455 /// Input operator.
456 pub input: Box<LogicalOperator>,
457}
458
459/// Add labels to a node.
460#[derive(Debug, Clone)]
461pub struct AddLabelOp {
462 /// Variable of the node to update.
463 pub variable: String,
464 /// Labels to add.
465 pub labels: Vec<String>,
466 /// Input operator.
467 pub input: Box<LogicalOperator>,
468}
469
470/// Remove labels from a node.
471#[derive(Debug, Clone)]
472pub struct RemoveLabelOp {
473 /// Variable of the node to update.
474 pub variable: String,
475 /// Labels to remove.
476 pub labels: Vec<String>,
477 /// Input operator.
478 pub input: Box<LogicalOperator>,
479}
480
481// ==================== RDF/SPARQL Operators ====================
482
483/// Scan RDF triples matching a pattern.
484#[derive(Debug, Clone)]
485pub struct TripleScanOp {
486 /// Subject pattern (variable name or IRI).
487 pub subject: TripleComponent,
488 /// Predicate pattern (variable name or IRI).
489 pub predicate: TripleComponent,
490 /// Object pattern (variable name, IRI, or literal).
491 pub object: TripleComponent,
492 /// Named graph (optional).
493 pub graph: Option<TripleComponent>,
494 /// Input operator (for chained patterns).
495 pub input: Option<Box<LogicalOperator>>,
496}
497
498/// A component of a triple pattern.
499#[derive(Debug, Clone)]
500pub enum TripleComponent {
501 /// A variable to bind.
502 Variable(String),
503 /// A constant IRI.
504 Iri(String),
505 /// A constant literal value.
506 Literal(Value),
507}
508
509/// Union of multiple result sets.
510#[derive(Debug, Clone)]
511pub struct UnionOp {
512 /// Inputs to union together.
513 pub inputs: Vec<LogicalOperator>,
514}
515
516/// Left outer join for OPTIONAL patterns.
517#[derive(Debug, Clone)]
518pub struct LeftJoinOp {
519 /// Left (required) input.
520 pub left: Box<LogicalOperator>,
521 /// Right (optional) input.
522 pub right: Box<LogicalOperator>,
523 /// Optional filter condition.
524 pub condition: Option<LogicalExpression>,
525}
526
527/// Anti-join for MINUS patterns.
528#[derive(Debug, Clone)]
529pub struct AntiJoinOp {
530 /// Left input (results to keep if no match on right).
531 pub left: Box<LogicalOperator>,
532 /// Right input (patterns to exclude).
533 pub right: Box<LogicalOperator>,
534}
535
536/// Bind a variable to an expression.
537#[derive(Debug, Clone)]
538pub struct BindOp {
539 /// Expression to compute.
540 pub expression: LogicalExpression,
541 /// Variable to bind the result to.
542 pub variable: String,
543 /// Input operator.
544 pub input: Box<LogicalOperator>,
545}
546
547/// Unwind a list into individual rows.
548///
549/// For each input row, evaluates the expression (which should return a list)
550/// and emits one row for each element in the list.
551#[derive(Debug, Clone)]
552pub struct UnwindOp {
553 /// The list expression to unwind.
554 pub expression: LogicalExpression,
555 /// The variable name for each element.
556 pub variable: String,
557 /// Input operator.
558 pub input: Box<LogicalOperator>,
559}
560
561/// Merge a pattern (match or create).
562///
563/// MERGE tries to match a pattern in the graph. If found, returns the existing
564/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
565/// (optionally applying ON CREATE SET).
566#[derive(Debug, Clone)]
567pub struct MergeOp {
568 /// The node to merge.
569 pub variable: String,
570 /// Labels to match/create.
571 pub labels: Vec<String>,
572 /// Properties that must match (used for both matching and creation).
573 pub match_properties: Vec<(String, LogicalExpression)>,
574 /// Properties to set on CREATE.
575 pub on_create: Vec<(String, LogicalExpression)>,
576 /// Properties to set on MATCH.
577 pub on_match: Vec<(String, LogicalExpression)>,
578 /// Input operator.
579 pub input: Box<LogicalOperator>,
580}
581
582/// Find shortest path between two nodes.
583///
584/// This operator uses Dijkstra's algorithm to find the shortest path(s)
585/// between a source node and a target node, optionally filtered by edge type.
586#[derive(Debug, Clone)]
587pub struct ShortestPathOp {
588 /// Input operator providing source/target nodes.
589 pub input: Box<LogicalOperator>,
590 /// Variable name for the source node.
591 pub source_var: String,
592 /// Variable name for the target node.
593 pub target_var: String,
594 /// Optional edge type filter.
595 pub edge_type: Option<String>,
596 /// Direction of edge traversal.
597 pub direction: ExpandDirection,
598 /// Variable name to bind the path result.
599 pub path_alias: String,
600 /// Whether to find all shortest paths (vs. just one).
601 pub all_paths: bool,
602}
603
604// ==================== SPARQL Update Operators ====================
605
606/// Insert RDF triples.
607#[derive(Debug, Clone)]
608pub struct InsertTripleOp {
609 /// Subject of the triple.
610 pub subject: TripleComponent,
611 /// Predicate of the triple.
612 pub predicate: TripleComponent,
613 /// Object of the triple.
614 pub object: TripleComponent,
615 /// Named graph (optional).
616 pub graph: Option<String>,
617 /// Input operator (provides variable bindings).
618 pub input: Option<Box<LogicalOperator>>,
619}
620
621/// Delete RDF triples.
622#[derive(Debug, Clone)]
623pub struct DeleteTripleOp {
624 /// Subject pattern.
625 pub subject: TripleComponent,
626 /// Predicate pattern.
627 pub predicate: TripleComponent,
628 /// Object pattern.
629 pub object: TripleComponent,
630 /// Named graph (optional).
631 pub graph: Option<String>,
632 /// Input operator (provides variable bindings).
633 pub input: Option<Box<LogicalOperator>>,
634}
635
636/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
637///
638/// Per SPARQL 1.1 Update spec, this operator:
639/// 1. Evaluates the WHERE clause once to get bindings
640/// 2. Applies DELETE templates using those bindings
641/// 3. Applies INSERT templates using the SAME bindings
642///
643/// This ensures DELETE and INSERT see consistent data.
644#[derive(Debug, Clone)]
645pub struct ModifyOp {
646 /// DELETE triple templates (patterns with variables).
647 pub delete_templates: Vec<TripleTemplate>,
648 /// INSERT triple templates (patterns with variables).
649 pub insert_templates: Vec<TripleTemplate>,
650 /// WHERE clause that provides variable bindings.
651 pub where_clause: Box<LogicalOperator>,
652 /// Named graph context (for WITH clause).
653 pub graph: Option<String>,
654}
655
656/// A triple template for DELETE/INSERT operations.
657#[derive(Debug, Clone)]
658pub struct TripleTemplate {
659 /// Subject (may be a variable).
660 pub subject: TripleComponent,
661 /// Predicate (may be a variable).
662 pub predicate: TripleComponent,
663 /// Object (may be a variable or literal).
664 pub object: TripleComponent,
665 /// Named graph (optional).
666 pub graph: Option<String>,
667}
668
669/// Clear all triples from a graph.
670#[derive(Debug, Clone)]
671pub struct ClearGraphOp {
672 /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
673 pub graph: Option<String>,
674 /// Whether to silently ignore errors.
675 pub silent: bool,
676}
677
678/// Create a new named graph.
679#[derive(Debug, Clone)]
680pub struct CreateGraphOp {
681 /// IRI of the graph to create.
682 pub graph: String,
683 /// Whether to silently ignore if graph already exists.
684 pub silent: bool,
685}
686
687/// Drop (remove) a named graph.
688#[derive(Debug, Clone)]
689pub struct DropGraphOp {
690 /// Target graph (None = default graph).
691 pub graph: Option<String>,
692 /// Whether to silently ignore errors.
693 pub silent: bool,
694}
695
696/// Load data from a URL into a graph.
697#[derive(Debug, Clone)]
698pub struct LoadGraphOp {
699 /// Source URL to load data from.
700 pub source: String,
701 /// Destination graph (None = default graph).
702 pub destination: Option<String>,
703 /// Whether to silently ignore errors.
704 pub silent: bool,
705}
706
707/// Copy triples from one graph to another.
708#[derive(Debug, Clone)]
709pub struct CopyGraphOp {
710 /// Source graph.
711 pub source: Option<String>,
712 /// Destination graph.
713 pub destination: Option<String>,
714 /// Whether to silently ignore errors.
715 pub silent: bool,
716}
717
718/// Move triples from one graph to another.
719#[derive(Debug, Clone)]
720pub struct MoveGraphOp {
721 /// Source graph.
722 pub source: Option<String>,
723 /// Destination graph.
724 pub destination: Option<String>,
725 /// Whether to silently ignore errors.
726 pub silent: bool,
727}
728
729/// Add (merge) triples from one graph to another.
730#[derive(Debug, Clone)]
731pub struct AddGraphOp {
732 /// Source graph.
733 pub source: Option<String>,
734 /// Destination graph.
735 pub destination: Option<String>,
736 /// Whether to silently ignore errors.
737 pub silent: bool,
738}
739
740// ==================== Vector Search Operators ====================
741
742/// Vector similarity scan operation.
743///
744/// Performs approximate nearest neighbor search using a vector index (HNSW)
745/// or brute-force search for small datasets. Returns nodes/edges whose
746/// embeddings are similar to the query vector.
747///
748/// # Example GQL
749///
750/// ```gql
751/// MATCH (m:Movie)
752/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
753/// RETURN m.title
754/// ```
755#[derive(Debug, Clone)]
756pub struct VectorScanOp {
757 /// Variable name to bind matching entities to.
758 pub variable: String,
759 /// Name of the vector index to use (None = brute-force).
760 pub index_name: Option<String>,
761 /// Property containing the vector embedding.
762 pub property: String,
763 /// Optional label filter (scan only nodes with this label).
764 pub label: Option<String>,
765 /// The query vector expression.
766 pub query_vector: LogicalExpression,
767 /// Number of nearest neighbors to return.
768 pub k: usize,
769 /// Distance metric (None = use index default, typically cosine).
770 pub metric: Option<VectorMetric>,
771 /// Minimum similarity threshold (filters results below this).
772 pub min_similarity: Option<f32>,
773 /// Maximum distance threshold (filters results above this).
774 pub max_distance: Option<f32>,
775 /// Input operator (for hybrid queries combining graph + vector).
776 pub input: Option<Box<LogicalOperator>>,
777}
778
779/// Vector distance/similarity metric for vector scan operations.
780#[derive(Debug, Clone, Copy, PartialEq, Eq)]
781pub enum VectorMetric {
782 /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
783 Cosine,
784 /// Euclidean (L2) distance. Best when magnitude matters.
785 Euclidean,
786 /// Dot product. Best for maximum inner product search.
787 DotProduct,
788 /// Manhattan (L1) distance. Less sensitive to outliers.
789 Manhattan,
790}
791
792/// Join graph patterns with vector similarity search.
793///
794/// This operator takes entities from the left input and computes vector
795/// similarity against a query vector, outputting (entity, distance) pairs.
796///
797/// # Use Cases
798///
799/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
800/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
801/// 3. **Filtering by similarity**: Join with threshold-based filtering
802///
803/// # Example
804///
805/// ```gql
806/// // Find movies similar to what the user liked
807/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
808/// WITH avg(liked.embedding) AS user_taste
809/// VECTOR JOIN (m:Movie) ON m.embedding
810/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
811/// RETURN m.title
812/// ```
813#[derive(Debug, Clone)]
814pub struct VectorJoinOp {
815 /// Input operator providing entities to match against.
816 pub input: Box<LogicalOperator>,
817 /// Variable from input to extract vectors from (for entity-to-entity similarity).
818 /// If None, uses `query_vector` directly.
819 pub left_vector_variable: Option<String>,
820 /// Property containing the left vector (used with `left_vector_variable`).
821 pub left_property: Option<String>,
822 /// The query vector expression (constant or computed).
823 pub query_vector: LogicalExpression,
824 /// Variable name to bind the right-side matching entities.
825 pub right_variable: String,
826 /// Property containing the right-side vector embeddings.
827 pub right_property: String,
828 /// Optional label filter for right-side entities.
829 pub right_label: Option<String>,
830 /// Name of vector index on right side (None = brute-force).
831 pub index_name: Option<String>,
832 /// Number of nearest neighbors per left-side entity.
833 pub k: usize,
834 /// Distance metric.
835 pub metric: Option<VectorMetric>,
836 /// Minimum similarity threshold.
837 pub min_similarity: Option<f32>,
838 /// Maximum distance threshold.
839 pub max_distance: Option<f32>,
840 /// Variable to bind the distance/similarity score.
841 pub score_variable: Option<String>,
842}
843
844/// Return results (terminal operator).
845#[derive(Debug, Clone)]
846pub struct ReturnOp {
847 /// Items to return.
848 pub items: Vec<ReturnItem>,
849 /// Whether to return distinct results.
850 pub distinct: bool,
851 /// Input operator.
852 pub input: Box<LogicalOperator>,
853}
854
855/// A single return item.
856#[derive(Debug, Clone)]
857pub struct ReturnItem {
858 /// Expression to return.
859 pub expression: LogicalExpression,
860 /// Alias for the result column.
861 pub alias: Option<String>,
862}
863
864/// Define a property graph schema (SQL/PGQ DDL).
865#[derive(Debug, Clone)]
866pub struct CreatePropertyGraphOp {
867 /// Graph name.
868 pub name: String,
869 /// Node table schemas (label name + column definitions).
870 pub node_tables: Vec<PropertyGraphNodeTable>,
871 /// Edge table schemas (type name + column definitions + references).
872 pub edge_tables: Vec<PropertyGraphEdgeTable>,
873}
874
875/// A node table in a property graph definition.
876#[derive(Debug, Clone)]
877pub struct PropertyGraphNodeTable {
878 /// Table name (maps to a node label).
879 pub name: String,
880 /// Column definitions as (name, type_name) pairs.
881 pub columns: Vec<(String, String)>,
882}
883
884/// An edge table in a property graph definition.
885#[derive(Debug, Clone)]
886pub struct PropertyGraphEdgeTable {
887 /// Table name (maps to an edge type).
888 pub name: String,
889 /// Column definitions as (name, type_name) pairs.
890 pub columns: Vec<(String, String)>,
891 /// Source node table name.
892 pub source_table: String,
893 /// Target node table name.
894 pub target_table: String,
895}
896
897/// A logical expression.
898#[derive(Debug, Clone)]
899pub enum LogicalExpression {
900 /// A literal value.
901 Literal(Value),
902
903 /// A variable reference.
904 Variable(String),
905
906 /// Property access (e.g., n.name).
907 Property {
908 /// The variable to access.
909 variable: String,
910 /// The property name.
911 property: String,
912 },
913
914 /// Binary operation.
915 Binary {
916 /// Left operand.
917 left: Box<LogicalExpression>,
918 /// Operator.
919 op: BinaryOp,
920 /// Right operand.
921 right: Box<LogicalExpression>,
922 },
923
924 /// Unary operation.
925 Unary {
926 /// Operator.
927 op: UnaryOp,
928 /// Operand.
929 operand: Box<LogicalExpression>,
930 },
931
932 /// Function call.
933 FunctionCall {
934 /// Function name.
935 name: String,
936 /// Arguments.
937 args: Vec<LogicalExpression>,
938 /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
939 distinct: bool,
940 },
941
942 /// List literal.
943 List(Vec<LogicalExpression>),
944
945 /// Map literal (e.g., {name: 'Alice', age: 30}).
946 Map(Vec<(String, LogicalExpression)>),
947
948 /// Index access (e.g., `list[0]`).
949 IndexAccess {
950 /// The base expression (typically a list or string).
951 base: Box<LogicalExpression>,
952 /// The index expression.
953 index: Box<LogicalExpression>,
954 },
955
956 /// Slice access (e.g., list[1..3]).
957 SliceAccess {
958 /// The base expression (typically a list or string).
959 base: Box<LogicalExpression>,
960 /// Start index (None means from beginning).
961 start: Option<Box<LogicalExpression>>,
962 /// End index (None means to end).
963 end: Option<Box<LogicalExpression>>,
964 },
965
966 /// CASE expression.
967 Case {
968 /// Test expression (for simple CASE).
969 operand: Option<Box<LogicalExpression>>,
970 /// WHEN clauses.
971 when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
972 /// ELSE clause.
973 else_clause: Option<Box<LogicalExpression>>,
974 },
975
976 /// Parameter reference.
977 Parameter(String),
978
979 /// Labels of a node.
980 Labels(String),
981
982 /// Type of an edge.
983 Type(String),
984
985 /// ID of a node or edge.
986 Id(String),
987
988 /// List comprehension: [x IN list WHERE predicate | expression]
989 ListComprehension {
990 /// Variable name for each element.
991 variable: String,
992 /// The source list expression.
993 list_expr: Box<LogicalExpression>,
994 /// Optional filter predicate.
995 filter_expr: Option<Box<LogicalExpression>>,
996 /// The mapping expression for each element.
997 map_expr: Box<LogicalExpression>,
998 },
999
1000 /// EXISTS subquery.
1001 ExistsSubquery(Box<LogicalOperator>),
1002
1003 /// COUNT subquery.
1004 CountSubquery(Box<LogicalOperator>),
1005}
1006
1007/// Binary operator.
1008#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1009pub enum BinaryOp {
1010 /// Equality comparison (=).
1011 Eq,
1012 /// Inequality comparison (<>).
1013 Ne,
1014 /// Less than (<).
1015 Lt,
1016 /// Less than or equal (<=).
1017 Le,
1018 /// Greater than (>).
1019 Gt,
1020 /// Greater than or equal (>=).
1021 Ge,
1022
1023 /// Logical AND.
1024 And,
1025 /// Logical OR.
1026 Or,
1027 /// Logical XOR.
1028 Xor,
1029
1030 /// Addition (+).
1031 Add,
1032 /// Subtraction (-).
1033 Sub,
1034 /// Multiplication (*).
1035 Mul,
1036 /// Division (/).
1037 Div,
1038 /// Modulo (%).
1039 Mod,
1040
1041 /// String concatenation.
1042 Concat,
1043 /// String starts with.
1044 StartsWith,
1045 /// String ends with.
1046 EndsWith,
1047 /// String contains.
1048 Contains,
1049
1050 /// Collection membership (IN).
1051 In,
1052 /// Pattern matching (LIKE).
1053 Like,
1054 /// Regex matching (=~).
1055 Regex,
1056 /// Power/exponentiation (^).
1057 Pow,
1058}
1059
1060/// Unary operator.
1061#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1062pub enum UnaryOp {
1063 /// Logical NOT.
1064 Not,
1065 /// Numeric negation.
1066 Neg,
1067 /// IS NULL check.
1068 IsNull,
1069 /// IS NOT NULL check.
1070 IsNotNull,
1071}
1072
1073#[cfg(test)]
1074mod tests {
1075 use super::*;
1076
1077 #[test]
1078 fn test_simple_node_scan_plan() {
1079 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1080 items: vec![ReturnItem {
1081 expression: LogicalExpression::Variable("n".into()),
1082 alias: None,
1083 }],
1084 distinct: false,
1085 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1086 variable: "n".into(),
1087 label: Some("Person".into()),
1088 input: None,
1089 })),
1090 }));
1091
1092 // Verify structure
1093 if let LogicalOperator::Return(ret) = &plan.root {
1094 assert_eq!(ret.items.len(), 1);
1095 assert!(!ret.distinct);
1096 if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1097 assert_eq!(scan.variable, "n");
1098 assert_eq!(scan.label, Some("Person".into()));
1099 } else {
1100 panic!("Expected NodeScan");
1101 }
1102 } else {
1103 panic!("Expected Return");
1104 }
1105 }
1106
1107 #[test]
1108 fn test_filter_plan() {
1109 let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1110 items: vec![ReturnItem {
1111 expression: LogicalExpression::Property {
1112 variable: "n".into(),
1113 property: "name".into(),
1114 },
1115 alias: Some("name".into()),
1116 }],
1117 distinct: false,
1118 input: Box::new(LogicalOperator::Filter(FilterOp {
1119 predicate: LogicalExpression::Binary {
1120 left: Box::new(LogicalExpression::Property {
1121 variable: "n".into(),
1122 property: "age".into(),
1123 }),
1124 op: BinaryOp::Gt,
1125 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1126 },
1127 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1128 variable: "n".into(),
1129 label: Some("Person".into()),
1130 input: None,
1131 })),
1132 })),
1133 }));
1134
1135 if let LogicalOperator::Return(ret) = &plan.root {
1136 if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1137 if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1138 assert_eq!(*op, BinaryOp::Gt);
1139 } else {
1140 panic!("Expected Binary expression");
1141 }
1142 } else {
1143 panic!("Expected Filter");
1144 }
1145 } else {
1146 panic!("Expected Return");
1147 }
1148 }
1149}