Skip to main content

grafeo_engine/query/
plan.rs

1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12    /// The root operator of the plan.
13    pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17    /// Creates a new logical plan with the given root operator.
18    pub fn new(root: LogicalOperator) -> Self {
19        Self { root }
20    }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26    /// Scan all nodes, optionally filtered by label.
27    NodeScan(NodeScanOp),
28
29    /// Scan all edges, optionally filtered by type.
30    EdgeScan(EdgeScanOp),
31
32    /// Expand from nodes to neighbors via edges.
33    Expand(ExpandOp),
34
35    /// Filter rows based on a predicate.
36    Filter(FilterOp),
37
38    /// Project specific columns.
39    Project(ProjectOp),
40
41    /// Join two inputs.
42    Join(JoinOp),
43
44    /// Aggregate with grouping.
45    Aggregate(AggregateOp),
46
47    /// Limit the number of results.
48    Limit(LimitOp),
49
50    /// Skip a number of results.
51    Skip(SkipOp),
52
53    /// Sort results.
54    Sort(SortOp),
55
56    /// Remove duplicate results.
57    Distinct(DistinctOp),
58
59    /// Create a new node.
60    CreateNode(CreateNodeOp),
61
62    /// Create a new edge.
63    CreateEdge(CreateEdgeOp),
64
65    /// Delete a node.
66    DeleteNode(DeleteNodeOp),
67
68    /// Delete an edge.
69    DeleteEdge(DeleteEdgeOp),
70
71    /// Set properties on a node or edge.
72    SetProperty(SetPropertyOp),
73
74    /// Add labels to a node.
75    AddLabel(AddLabelOp),
76
77    /// Remove labels from a node.
78    RemoveLabel(RemoveLabelOp),
79
80    /// Return results (terminal operator).
81    Return(ReturnOp),
82
83    /// Empty result set.
84    Empty,
85
86    // ==================== RDF/SPARQL Operators ====================
87    /// Scan RDF triples matching a pattern.
88    TripleScan(TripleScanOp),
89
90    /// Union of multiple result sets.
91    Union(UnionOp),
92
93    /// Left outer join for OPTIONAL patterns.
94    LeftJoin(LeftJoinOp),
95
96    /// Anti-join for MINUS patterns.
97    AntiJoin(AntiJoinOp),
98
99    /// Bind a variable to an expression.
100    Bind(BindOp),
101
102    /// Unwind a list into individual rows.
103    Unwind(UnwindOp),
104
105    /// Merge a pattern (match or create).
106    Merge(MergeOp),
107
108    /// Find shortest path between nodes.
109    ShortestPath(ShortestPathOp),
110
111    // ==================== SPARQL Update Operators ====================
112    /// Insert RDF triples.
113    InsertTriple(InsertTripleOp),
114
115    /// Delete RDF triples.
116    DeleteTriple(DeleteTripleOp),
117
118    /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
119    /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
120    Modify(ModifyOp),
121
122    /// Clear a graph (remove all triples).
123    ClearGraph(ClearGraphOp),
124
125    /// Create a new named graph.
126    CreateGraph(CreateGraphOp),
127
128    /// Drop (remove) a named graph.
129    DropGraph(DropGraphOp),
130
131    /// Load data from a URL into a graph.
132    LoadGraph(LoadGraphOp),
133
134    /// Copy triples from one graph to another.
135    CopyGraph(CopyGraphOp),
136
137    /// Move triples from one graph to another.
138    MoveGraph(MoveGraphOp),
139
140    /// Add (merge) triples from one graph to another.
141    AddGraph(AddGraphOp),
142
143    // ==================== Vector Search Operators ====================
144    /// Scan using vector similarity search.
145    VectorScan(VectorScanOp),
146
147    /// Join graph patterns with vector similarity search.
148    ///
149    /// Computes vector distances between entities from the left input and
150    /// a query vector, then joins with similarity scores. Useful for:
151    /// - Filtering graph traversal results by vector similarity
152    /// - Computing aggregated embeddings and finding similar entities
153    /// - Combining multiple vector sources with graph structure
154    VectorJoin(VectorJoinOp),
155
156    // ==================== DDL Operators ====================
157    /// Define a property graph schema (SQL/PGQ DDL).
158    CreatePropertyGraph(CreatePropertyGraphOp),
159
160    // ==================== Procedure Call Operators ====================
161    /// Invoke a stored procedure (CALL ... YIELD).
162    CallProcedure(CallProcedureOp),
163}
164
165/// Scan nodes from the graph.
166#[derive(Debug, Clone)]
167pub struct NodeScanOp {
168    /// Variable name to bind the node to.
169    pub variable: String,
170    /// Optional label filter.
171    pub label: Option<String>,
172    /// Child operator (if any, for chained patterns).
173    pub input: Option<Box<LogicalOperator>>,
174}
175
176/// Scan edges from the graph.
177#[derive(Debug, Clone)]
178pub struct EdgeScanOp {
179    /// Variable name to bind the edge to.
180    pub variable: String,
181    /// Optional edge type filter.
182    pub edge_type: Option<String>,
183    /// Child operator (if any).
184    pub input: Option<Box<LogicalOperator>>,
185}
186
187/// Expand from nodes to their neighbors.
188#[derive(Debug, Clone)]
189pub struct ExpandOp {
190    /// Source node variable.
191    pub from_variable: String,
192    /// Target node variable to bind.
193    pub to_variable: String,
194    /// Edge variable to bind (optional).
195    pub edge_variable: Option<String>,
196    /// Direction of expansion.
197    pub direction: ExpandDirection,
198    /// Optional edge type filter.
199    pub edge_type: Option<String>,
200    /// Minimum hops (for variable-length patterns).
201    pub min_hops: u32,
202    /// Maximum hops (for variable-length patterns).
203    pub max_hops: Option<u32>,
204    /// Input operator.
205    pub input: Box<LogicalOperator>,
206    /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
207    /// When set, a path length column will be output under this name.
208    pub path_alias: Option<String>,
209}
210
211/// Direction for edge expansion.
212#[derive(Debug, Clone, Copy, PartialEq, Eq)]
213pub enum ExpandDirection {
214    /// Follow outgoing edges.
215    Outgoing,
216    /// Follow incoming edges.
217    Incoming,
218    /// Follow edges in either direction.
219    Both,
220}
221
222/// Join two inputs.
223#[derive(Debug, Clone)]
224pub struct JoinOp {
225    /// Left input.
226    pub left: Box<LogicalOperator>,
227    /// Right input.
228    pub right: Box<LogicalOperator>,
229    /// Join type.
230    pub join_type: JoinType,
231    /// Join conditions.
232    pub conditions: Vec<JoinCondition>,
233}
234
235/// Join type.
236#[derive(Debug, Clone, Copy, PartialEq, Eq)]
237pub enum JoinType {
238    /// Inner join.
239    Inner,
240    /// Left outer join.
241    Left,
242    /// Right outer join.
243    Right,
244    /// Full outer join.
245    Full,
246    /// Cross join (Cartesian product).
247    Cross,
248    /// Semi join (returns left rows with matching right rows).
249    Semi,
250    /// Anti join (returns left rows without matching right rows).
251    Anti,
252}
253
254/// A join condition.
255#[derive(Debug, Clone)]
256pub struct JoinCondition {
257    /// Left expression.
258    pub left: LogicalExpression,
259    /// Right expression.
260    pub right: LogicalExpression,
261}
262
263/// Aggregate with grouping.
264#[derive(Debug, Clone)]
265pub struct AggregateOp {
266    /// Group by expressions.
267    pub group_by: Vec<LogicalExpression>,
268    /// Aggregate functions.
269    pub aggregates: Vec<AggregateExpr>,
270    /// Input operator.
271    pub input: Box<LogicalOperator>,
272    /// HAVING clause filter (applied after aggregation).
273    pub having: Option<LogicalExpression>,
274}
275
276/// An aggregate expression.
277#[derive(Debug, Clone)]
278pub struct AggregateExpr {
279    /// Aggregate function.
280    pub function: AggregateFunction,
281    /// Expression to aggregate.
282    pub expression: Option<LogicalExpression>,
283    /// Whether to use DISTINCT.
284    pub distinct: bool,
285    /// Alias for the result.
286    pub alias: Option<String>,
287    /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
288    pub percentile: Option<f64>,
289}
290
291/// Aggregate function.
292#[derive(Debug, Clone, Copy, PartialEq, Eq)]
293pub enum AggregateFunction {
294    /// Count all rows (COUNT(*)).
295    Count,
296    /// Count non-null values (COUNT(expr)).
297    CountNonNull,
298    /// Sum values.
299    Sum,
300    /// Average values.
301    Avg,
302    /// Minimum value.
303    Min,
304    /// Maximum value.
305    Max,
306    /// Collect into list.
307    Collect,
308    /// Sample standard deviation (STDEV).
309    StdDev,
310    /// Population standard deviation (STDEVP).
311    StdDevPop,
312    /// Discrete percentile (PERCENTILE_DISC).
313    PercentileDisc,
314    /// Continuous percentile (PERCENTILE_CONT).
315    PercentileCont,
316}
317
318/// Filter rows based on a predicate.
319#[derive(Debug, Clone)]
320pub struct FilterOp {
321    /// The filter predicate.
322    pub predicate: LogicalExpression,
323    /// Input operator.
324    pub input: Box<LogicalOperator>,
325}
326
327/// Project specific columns.
328#[derive(Debug, Clone)]
329pub struct ProjectOp {
330    /// Columns to project.
331    pub projections: Vec<Projection>,
332    /// Input operator.
333    pub input: Box<LogicalOperator>,
334}
335
336/// A single projection (column selection or computation).
337#[derive(Debug, Clone)]
338pub struct Projection {
339    /// Expression to compute.
340    pub expression: LogicalExpression,
341    /// Alias for the result.
342    pub alias: Option<String>,
343}
344
345/// Limit the number of results.
346#[derive(Debug, Clone)]
347pub struct LimitOp {
348    /// Maximum number of rows to return.
349    pub count: usize,
350    /// Input operator.
351    pub input: Box<LogicalOperator>,
352}
353
354/// Skip a number of results.
355#[derive(Debug, Clone)]
356pub struct SkipOp {
357    /// Number of rows to skip.
358    pub count: usize,
359    /// Input operator.
360    pub input: Box<LogicalOperator>,
361}
362
363/// Sort results.
364#[derive(Debug, Clone)]
365pub struct SortOp {
366    /// Sort keys.
367    pub keys: Vec<SortKey>,
368    /// Input operator.
369    pub input: Box<LogicalOperator>,
370}
371
372/// A sort key.
373#[derive(Debug, Clone)]
374pub struct SortKey {
375    /// Expression to sort by.
376    pub expression: LogicalExpression,
377    /// Sort order.
378    pub order: SortOrder,
379}
380
381/// Sort order.
382#[derive(Debug, Clone, Copy, PartialEq, Eq)]
383pub enum SortOrder {
384    /// Ascending order.
385    Ascending,
386    /// Descending order.
387    Descending,
388}
389
390/// Remove duplicate results.
391#[derive(Debug, Clone)]
392pub struct DistinctOp {
393    /// Input operator.
394    pub input: Box<LogicalOperator>,
395    /// Optional columns to use for deduplication.
396    /// If None, all columns are used.
397    pub columns: Option<Vec<String>>,
398}
399
400/// Create a new node.
401#[derive(Debug, Clone)]
402pub struct CreateNodeOp {
403    /// Variable name to bind the created node to.
404    pub variable: String,
405    /// Labels for the new node.
406    pub labels: Vec<String>,
407    /// Properties for the new node.
408    pub properties: Vec<(String, LogicalExpression)>,
409    /// Input operator (for chained creates).
410    pub input: Option<Box<LogicalOperator>>,
411}
412
413/// Create a new edge.
414#[derive(Debug, Clone)]
415pub struct CreateEdgeOp {
416    /// Variable name to bind the created edge to.
417    pub variable: Option<String>,
418    /// Source node variable.
419    pub from_variable: String,
420    /// Target node variable.
421    pub to_variable: String,
422    /// Edge type.
423    pub edge_type: String,
424    /// Properties for the new edge.
425    pub properties: Vec<(String, LogicalExpression)>,
426    /// Input operator.
427    pub input: Box<LogicalOperator>,
428}
429
430/// Delete a node.
431#[derive(Debug, Clone)]
432pub struct DeleteNodeOp {
433    /// Variable of the node to delete.
434    pub variable: String,
435    /// Whether to detach (delete connected edges) before deleting.
436    pub detach: bool,
437    /// Input operator.
438    pub input: Box<LogicalOperator>,
439}
440
441/// Delete an edge.
442#[derive(Debug, Clone)]
443pub struct DeleteEdgeOp {
444    /// Variable of the edge to delete.
445    pub variable: String,
446    /// Input operator.
447    pub input: Box<LogicalOperator>,
448}
449
450/// Set properties on a node or edge.
451#[derive(Debug, Clone)]
452pub struct SetPropertyOp {
453    /// Variable of the entity to update.
454    pub variable: String,
455    /// Properties to set (name -> expression).
456    pub properties: Vec<(String, LogicalExpression)>,
457    /// Whether to replace all properties (vs. merge).
458    pub replace: bool,
459    /// Input operator.
460    pub input: Box<LogicalOperator>,
461}
462
463/// Add labels to a node.
464#[derive(Debug, Clone)]
465pub struct AddLabelOp {
466    /// Variable of the node to update.
467    pub variable: String,
468    /// Labels to add.
469    pub labels: Vec<String>,
470    /// Input operator.
471    pub input: Box<LogicalOperator>,
472}
473
474/// Remove labels from a node.
475#[derive(Debug, Clone)]
476pub struct RemoveLabelOp {
477    /// Variable of the node to update.
478    pub variable: String,
479    /// Labels to remove.
480    pub labels: Vec<String>,
481    /// Input operator.
482    pub input: Box<LogicalOperator>,
483}
484
485// ==================== RDF/SPARQL Operators ====================
486
487/// Scan RDF triples matching a pattern.
488#[derive(Debug, Clone)]
489pub struct TripleScanOp {
490    /// Subject pattern (variable name or IRI).
491    pub subject: TripleComponent,
492    /// Predicate pattern (variable name or IRI).
493    pub predicate: TripleComponent,
494    /// Object pattern (variable name, IRI, or literal).
495    pub object: TripleComponent,
496    /// Named graph (optional).
497    pub graph: Option<TripleComponent>,
498    /// Input operator (for chained patterns).
499    pub input: Option<Box<LogicalOperator>>,
500}
501
502/// A component of a triple pattern.
503#[derive(Debug, Clone)]
504pub enum TripleComponent {
505    /// A variable to bind.
506    Variable(String),
507    /// A constant IRI.
508    Iri(String),
509    /// A constant literal value.
510    Literal(Value),
511}
512
513/// Union of multiple result sets.
514#[derive(Debug, Clone)]
515pub struct UnionOp {
516    /// Inputs to union together.
517    pub inputs: Vec<LogicalOperator>,
518}
519
520/// Left outer join for OPTIONAL patterns.
521#[derive(Debug, Clone)]
522pub struct LeftJoinOp {
523    /// Left (required) input.
524    pub left: Box<LogicalOperator>,
525    /// Right (optional) input.
526    pub right: Box<LogicalOperator>,
527    /// Optional filter condition.
528    pub condition: Option<LogicalExpression>,
529}
530
531/// Anti-join for MINUS patterns.
532#[derive(Debug, Clone)]
533pub struct AntiJoinOp {
534    /// Left input (results to keep if no match on right).
535    pub left: Box<LogicalOperator>,
536    /// Right input (patterns to exclude).
537    pub right: Box<LogicalOperator>,
538}
539
540/// Bind a variable to an expression.
541#[derive(Debug, Clone)]
542pub struct BindOp {
543    /// Expression to compute.
544    pub expression: LogicalExpression,
545    /// Variable to bind the result to.
546    pub variable: String,
547    /// Input operator.
548    pub input: Box<LogicalOperator>,
549}
550
551/// Unwind a list into individual rows.
552///
553/// For each input row, evaluates the expression (which should return a list)
554/// and emits one row for each element in the list.
555#[derive(Debug, Clone)]
556pub struct UnwindOp {
557    /// The list expression to unwind.
558    pub expression: LogicalExpression,
559    /// The variable name for each element.
560    pub variable: String,
561    /// Optional variable for 1-based element position (ORDINALITY).
562    pub ordinality_var: Option<String>,
563    /// Optional variable for 0-based element position (OFFSET).
564    pub offset_var: Option<String>,
565    /// Input operator.
566    pub input: Box<LogicalOperator>,
567}
568
569/// Merge a pattern (match or create).
570///
571/// MERGE tries to match a pattern in the graph. If found, returns the existing
572/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
573/// (optionally applying ON CREATE SET).
574#[derive(Debug, Clone)]
575pub struct MergeOp {
576    /// The node to merge.
577    pub variable: String,
578    /// Labels to match/create.
579    pub labels: Vec<String>,
580    /// Properties that must match (used for both matching and creation).
581    pub match_properties: Vec<(String, LogicalExpression)>,
582    /// Properties to set on CREATE.
583    pub on_create: Vec<(String, LogicalExpression)>,
584    /// Properties to set on MATCH.
585    pub on_match: Vec<(String, LogicalExpression)>,
586    /// Input operator.
587    pub input: Box<LogicalOperator>,
588}
589
590/// Find shortest path between two nodes.
591///
592/// This operator uses Dijkstra's algorithm to find the shortest path(s)
593/// between a source node and a target node, optionally filtered by edge type.
594#[derive(Debug, Clone)]
595pub struct ShortestPathOp {
596    /// Input operator providing source/target nodes.
597    pub input: Box<LogicalOperator>,
598    /// Variable name for the source node.
599    pub source_var: String,
600    /// Variable name for the target node.
601    pub target_var: String,
602    /// Optional edge type filter.
603    pub edge_type: Option<String>,
604    /// Direction of edge traversal.
605    pub direction: ExpandDirection,
606    /// Variable name to bind the path result.
607    pub path_alias: String,
608    /// Whether to find all shortest paths (vs. just one).
609    pub all_paths: bool,
610}
611
612// ==================== SPARQL Update Operators ====================
613
614/// Insert RDF triples.
615#[derive(Debug, Clone)]
616pub struct InsertTripleOp {
617    /// Subject of the triple.
618    pub subject: TripleComponent,
619    /// Predicate of the triple.
620    pub predicate: TripleComponent,
621    /// Object of the triple.
622    pub object: TripleComponent,
623    /// Named graph (optional).
624    pub graph: Option<String>,
625    /// Input operator (provides variable bindings).
626    pub input: Option<Box<LogicalOperator>>,
627}
628
629/// Delete RDF triples.
630#[derive(Debug, Clone)]
631pub struct DeleteTripleOp {
632    /// Subject pattern.
633    pub subject: TripleComponent,
634    /// Predicate pattern.
635    pub predicate: TripleComponent,
636    /// Object pattern.
637    pub object: TripleComponent,
638    /// Named graph (optional).
639    pub graph: Option<String>,
640    /// Input operator (provides variable bindings).
641    pub input: Option<Box<LogicalOperator>>,
642}
643
644/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
645///
646/// Per SPARQL 1.1 Update spec, this operator:
647/// 1. Evaluates the WHERE clause once to get bindings
648/// 2. Applies DELETE templates using those bindings
649/// 3. Applies INSERT templates using the SAME bindings
650///
651/// This ensures DELETE and INSERT see consistent data.
652#[derive(Debug, Clone)]
653pub struct ModifyOp {
654    /// DELETE triple templates (patterns with variables).
655    pub delete_templates: Vec<TripleTemplate>,
656    /// INSERT triple templates (patterns with variables).
657    pub insert_templates: Vec<TripleTemplate>,
658    /// WHERE clause that provides variable bindings.
659    pub where_clause: Box<LogicalOperator>,
660    /// Named graph context (for WITH clause).
661    pub graph: Option<String>,
662}
663
664/// A triple template for DELETE/INSERT operations.
665#[derive(Debug, Clone)]
666pub struct TripleTemplate {
667    /// Subject (may be a variable).
668    pub subject: TripleComponent,
669    /// Predicate (may be a variable).
670    pub predicate: TripleComponent,
671    /// Object (may be a variable or literal).
672    pub object: TripleComponent,
673    /// Named graph (optional).
674    pub graph: Option<String>,
675}
676
677/// Clear all triples from a graph.
678#[derive(Debug, Clone)]
679pub struct ClearGraphOp {
680    /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
681    pub graph: Option<String>,
682    /// Whether to silently ignore errors.
683    pub silent: bool,
684}
685
686/// Create a new named graph.
687#[derive(Debug, Clone)]
688pub struct CreateGraphOp {
689    /// IRI of the graph to create.
690    pub graph: String,
691    /// Whether to silently ignore if graph already exists.
692    pub silent: bool,
693}
694
695/// Drop (remove) a named graph.
696#[derive(Debug, Clone)]
697pub struct DropGraphOp {
698    /// Target graph (None = default graph).
699    pub graph: Option<String>,
700    /// Whether to silently ignore errors.
701    pub silent: bool,
702}
703
704/// Load data from a URL into a graph.
705#[derive(Debug, Clone)]
706pub struct LoadGraphOp {
707    /// Source URL to load data from.
708    pub source: String,
709    /// Destination graph (None = default graph).
710    pub destination: Option<String>,
711    /// Whether to silently ignore errors.
712    pub silent: bool,
713}
714
715/// Copy triples from one graph to another.
716#[derive(Debug, Clone)]
717pub struct CopyGraphOp {
718    /// Source graph.
719    pub source: Option<String>,
720    /// Destination graph.
721    pub destination: Option<String>,
722    /// Whether to silently ignore errors.
723    pub silent: bool,
724}
725
726/// Move triples from one graph to another.
727#[derive(Debug, Clone)]
728pub struct MoveGraphOp {
729    /// Source graph.
730    pub source: Option<String>,
731    /// Destination graph.
732    pub destination: Option<String>,
733    /// Whether to silently ignore errors.
734    pub silent: bool,
735}
736
737/// Add (merge) triples from one graph to another.
738#[derive(Debug, Clone)]
739pub struct AddGraphOp {
740    /// Source graph.
741    pub source: Option<String>,
742    /// Destination graph.
743    pub destination: Option<String>,
744    /// Whether to silently ignore errors.
745    pub silent: bool,
746}
747
748// ==================== Vector Search Operators ====================
749
750/// Vector similarity scan operation.
751///
752/// Performs approximate nearest neighbor search using a vector index (HNSW)
753/// or brute-force search for small datasets. Returns nodes/edges whose
754/// embeddings are similar to the query vector.
755///
756/// # Example GQL
757///
758/// ```gql
759/// MATCH (m:Movie)
760/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
761/// RETURN m.title
762/// ```
763#[derive(Debug, Clone)]
764pub struct VectorScanOp {
765    /// Variable name to bind matching entities to.
766    pub variable: String,
767    /// Name of the vector index to use (None = brute-force).
768    pub index_name: Option<String>,
769    /// Property containing the vector embedding.
770    pub property: String,
771    /// Optional label filter (scan only nodes with this label).
772    pub label: Option<String>,
773    /// The query vector expression.
774    pub query_vector: LogicalExpression,
775    /// Number of nearest neighbors to return.
776    pub k: usize,
777    /// Distance metric (None = use index default, typically cosine).
778    pub metric: Option<VectorMetric>,
779    /// Minimum similarity threshold (filters results below this).
780    pub min_similarity: Option<f32>,
781    /// Maximum distance threshold (filters results above this).
782    pub max_distance: Option<f32>,
783    /// Input operator (for hybrid queries combining graph + vector).
784    pub input: Option<Box<LogicalOperator>>,
785}
786
787/// Vector distance/similarity metric for vector scan operations.
788#[derive(Debug, Clone, Copy, PartialEq, Eq)]
789pub enum VectorMetric {
790    /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
791    Cosine,
792    /// Euclidean (L2) distance. Best when magnitude matters.
793    Euclidean,
794    /// Dot product. Best for maximum inner product search.
795    DotProduct,
796    /// Manhattan (L1) distance. Less sensitive to outliers.
797    Manhattan,
798}
799
800/// Join graph patterns with vector similarity search.
801///
802/// This operator takes entities from the left input and computes vector
803/// similarity against a query vector, outputting (entity, distance) pairs.
804///
805/// # Use Cases
806///
807/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
808/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
809/// 3. **Filtering by similarity**: Join with threshold-based filtering
810///
811/// # Example
812///
813/// ```gql
814/// // Find movies similar to what the user liked
815/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
816/// WITH avg(liked.embedding) AS user_taste
817/// VECTOR JOIN (m:Movie) ON m.embedding
818/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
819/// RETURN m.title
820/// ```
821#[derive(Debug, Clone)]
822pub struct VectorJoinOp {
823    /// Input operator providing entities to match against.
824    pub input: Box<LogicalOperator>,
825    /// Variable from input to extract vectors from (for entity-to-entity similarity).
826    /// If None, uses `query_vector` directly.
827    pub left_vector_variable: Option<String>,
828    /// Property containing the left vector (used with `left_vector_variable`).
829    pub left_property: Option<String>,
830    /// The query vector expression (constant or computed).
831    pub query_vector: LogicalExpression,
832    /// Variable name to bind the right-side matching entities.
833    pub right_variable: String,
834    /// Property containing the right-side vector embeddings.
835    pub right_property: String,
836    /// Optional label filter for right-side entities.
837    pub right_label: Option<String>,
838    /// Name of vector index on right side (None = brute-force).
839    pub index_name: Option<String>,
840    /// Number of nearest neighbors per left-side entity.
841    pub k: usize,
842    /// Distance metric.
843    pub metric: Option<VectorMetric>,
844    /// Minimum similarity threshold.
845    pub min_similarity: Option<f32>,
846    /// Maximum distance threshold.
847    pub max_distance: Option<f32>,
848    /// Variable to bind the distance/similarity score.
849    pub score_variable: Option<String>,
850}
851
852/// Return results (terminal operator).
853#[derive(Debug, Clone)]
854pub struct ReturnOp {
855    /// Items to return.
856    pub items: Vec<ReturnItem>,
857    /// Whether to return distinct results.
858    pub distinct: bool,
859    /// Input operator.
860    pub input: Box<LogicalOperator>,
861}
862
863/// A single return item.
864#[derive(Debug, Clone)]
865pub struct ReturnItem {
866    /// Expression to return.
867    pub expression: LogicalExpression,
868    /// Alias for the result column.
869    pub alias: Option<String>,
870}
871
872/// Define a property graph schema (SQL/PGQ DDL).
873#[derive(Debug, Clone)]
874pub struct CreatePropertyGraphOp {
875    /// Graph name.
876    pub name: String,
877    /// Node table schemas (label name + column definitions).
878    pub node_tables: Vec<PropertyGraphNodeTable>,
879    /// Edge table schemas (type name + column definitions + references).
880    pub edge_tables: Vec<PropertyGraphEdgeTable>,
881}
882
883/// A node table in a property graph definition.
884#[derive(Debug, Clone)]
885pub struct PropertyGraphNodeTable {
886    /// Table name (maps to a node label).
887    pub name: String,
888    /// Column definitions as (name, type_name) pairs.
889    pub columns: Vec<(String, String)>,
890}
891
892/// An edge table in a property graph definition.
893#[derive(Debug, Clone)]
894pub struct PropertyGraphEdgeTable {
895    /// Table name (maps to an edge type).
896    pub name: String,
897    /// Column definitions as (name, type_name) pairs.
898    pub columns: Vec<(String, String)>,
899    /// Source node table name.
900    pub source_table: String,
901    /// Target node table name.
902    pub target_table: String,
903}
904
905// ==================== Procedure Call Types ====================
906
907/// A CALL procedure operation.
908///
909/// ```text
910/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
911/// ```
912#[derive(Debug, Clone)]
913pub struct CallProcedureOp {
914    /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
915    pub name: Vec<String>,
916    /// Argument expressions (constants in Phase 1).
917    pub arguments: Vec<LogicalExpression>,
918    /// Optional YIELD clause: which columns to expose + aliases.
919    pub yield_items: Option<Vec<ProcedureYield>>,
920}
921
922/// A single YIELD item in a procedure call.
923#[derive(Debug, Clone)]
924pub struct ProcedureYield {
925    /// Column name from the procedure result.
926    pub field_name: String,
927    /// Optional alias (YIELD score AS rank).
928    pub alias: Option<String>,
929}
930
931/// A logical expression.
932#[derive(Debug, Clone)]
933pub enum LogicalExpression {
934    /// A literal value.
935    Literal(Value),
936
937    /// A variable reference.
938    Variable(String),
939
940    /// Property access (e.g., n.name).
941    Property {
942        /// The variable to access.
943        variable: String,
944        /// The property name.
945        property: String,
946    },
947
948    /// Binary operation.
949    Binary {
950        /// Left operand.
951        left: Box<LogicalExpression>,
952        /// Operator.
953        op: BinaryOp,
954        /// Right operand.
955        right: Box<LogicalExpression>,
956    },
957
958    /// Unary operation.
959    Unary {
960        /// Operator.
961        op: UnaryOp,
962        /// Operand.
963        operand: Box<LogicalExpression>,
964    },
965
966    /// Function call.
967    FunctionCall {
968        /// Function name.
969        name: String,
970        /// Arguments.
971        args: Vec<LogicalExpression>,
972        /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
973        distinct: bool,
974    },
975
976    /// List literal.
977    List(Vec<LogicalExpression>),
978
979    /// Map literal (e.g., {name: 'Alice', age: 30}).
980    Map(Vec<(String, LogicalExpression)>),
981
982    /// Index access (e.g., `list[0]`).
983    IndexAccess {
984        /// The base expression (typically a list or string).
985        base: Box<LogicalExpression>,
986        /// The index expression.
987        index: Box<LogicalExpression>,
988    },
989
990    /// Slice access (e.g., list[1..3]).
991    SliceAccess {
992        /// The base expression (typically a list or string).
993        base: Box<LogicalExpression>,
994        /// Start index (None means from beginning).
995        start: Option<Box<LogicalExpression>>,
996        /// End index (None means to end).
997        end: Option<Box<LogicalExpression>>,
998    },
999
1000    /// CASE expression.
1001    Case {
1002        /// Test expression (for simple CASE).
1003        operand: Option<Box<LogicalExpression>>,
1004        /// WHEN clauses.
1005        when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
1006        /// ELSE clause.
1007        else_clause: Option<Box<LogicalExpression>>,
1008    },
1009
1010    /// Parameter reference.
1011    Parameter(String),
1012
1013    /// Labels of a node.
1014    Labels(String),
1015
1016    /// Type of an edge.
1017    Type(String),
1018
1019    /// ID of a node or edge.
1020    Id(String),
1021
1022    /// List comprehension: [x IN list WHERE predicate | expression]
1023    ListComprehension {
1024        /// Variable name for each element.
1025        variable: String,
1026        /// The source list expression.
1027        list_expr: Box<LogicalExpression>,
1028        /// Optional filter predicate.
1029        filter_expr: Option<Box<LogicalExpression>>,
1030        /// The mapping expression for each element.
1031        map_expr: Box<LogicalExpression>,
1032    },
1033
1034    /// EXISTS subquery.
1035    ExistsSubquery(Box<LogicalOperator>),
1036
1037    /// COUNT subquery.
1038    CountSubquery(Box<LogicalOperator>),
1039}
1040
1041/// Binary operator.
1042#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1043pub enum BinaryOp {
1044    /// Equality comparison (=).
1045    Eq,
1046    /// Inequality comparison (<>).
1047    Ne,
1048    /// Less than (<).
1049    Lt,
1050    /// Less than or equal (<=).
1051    Le,
1052    /// Greater than (>).
1053    Gt,
1054    /// Greater than or equal (>=).
1055    Ge,
1056
1057    /// Logical AND.
1058    And,
1059    /// Logical OR.
1060    Or,
1061    /// Logical XOR.
1062    Xor,
1063
1064    /// Addition (+).
1065    Add,
1066    /// Subtraction (-).
1067    Sub,
1068    /// Multiplication (*).
1069    Mul,
1070    /// Division (/).
1071    Div,
1072    /// Modulo (%).
1073    Mod,
1074
1075    /// String concatenation.
1076    Concat,
1077    /// String starts with.
1078    StartsWith,
1079    /// String ends with.
1080    EndsWith,
1081    /// String contains.
1082    Contains,
1083
1084    /// Collection membership (IN).
1085    In,
1086    /// Pattern matching (LIKE).
1087    Like,
1088    /// Regex matching (=~).
1089    Regex,
1090    /// Power/exponentiation (^).
1091    Pow,
1092}
1093
1094/// Unary operator.
1095#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1096pub enum UnaryOp {
1097    /// Logical NOT.
1098    Not,
1099    /// Numeric negation.
1100    Neg,
1101    /// IS NULL check.
1102    IsNull,
1103    /// IS NOT NULL check.
1104    IsNotNull,
1105}
1106
1107#[cfg(test)]
1108mod tests {
1109    use super::*;
1110
1111    #[test]
1112    fn test_simple_node_scan_plan() {
1113        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1114            items: vec![ReturnItem {
1115                expression: LogicalExpression::Variable("n".into()),
1116                alias: None,
1117            }],
1118            distinct: false,
1119            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1120                variable: "n".into(),
1121                label: Some("Person".into()),
1122                input: None,
1123            })),
1124        }));
1125
1126        // Verify structure
1127        if let LogicalOperator::Return(ret) = &plan.root {
1128            assert_eq!(ret.items.len(), 1);
1129            assert!(!ret.distinct);
1130            if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1131                assert_eq!(scan.variable, "n");
1132                assert_eq!(scan.label, Some("Person".into()));
1133            } else {
1134                panic!("Expected NodeScan");
1135            }
1136        } else {
1137            panic!("Expected Return");
1138        }
1139    }
1140
1141    #[test]
1142    fn test_filter_plan() {
1143        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1144            items: vec![ReturnItem {
1145                expression: LogicalExpression::Property {
1146                    variable: "n".into(),
1147                    property: "name".into(),
1148                },
1149                alias: Some("name".into()),
1150            }],
1151            distinct: false,
1152            input: Box::new(LogicalOperator::Filter(FilterOp {
1153                predicate: LogicalExpression::Binary {
1154                    left: Box::new(LogicalExpression::Property {
1155                        variable: "n".into(),
1156                        property: "age".into(),
1157                    }),
1158                    op: BinaryOp::Gt,
1159                    right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1160                },
1161                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1162                    variable: "n".into(),
1163                    label: Some("Person".into()),
1164                    input: None,
1165                })),
1166            })),
1167        }));
1168
1169        if let LogicalOperator::Return(ret) = &plan.root {
1170            if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1171                if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1172                    assert_eq!(*op, BinaryOp::Gt);
1173                } else {
1174                    panic!("Expected Binary expression");
1175                }
1176            } else {
1177                panic!("Expected Filter");
1178            }
1179        } else {
1180            panic!("Expected Return");
1181        }
1182    }
1183}