Skip to main content

grafeo_engine/query/
plan.rs

1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12    /// The root operator of the plan.
13    pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17    /// Creates a new logical plan with the given root operator.
18    pub fn new(root: LogicalOperator) -> Self {
19        Self { root }
20    }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26    /// Scan all nodes, optionally filtered by label.
27    NodeScan(NodeScanOp),
28
29    /// Scan all edges, optionally filtered by type.
30    EdgeScan(EdgeScanOp),
31
32    /// Expand from nodes to neighbors via edges.
33    Expand(ExpandOp),
34
35    /// Filter rows based on a predicate.
36    Filter(FilterOp),
37
38    /// Project specific columns.
39    Project(ProjectOp),
40
41    /// Join two inputs.
42    Join(JoinOp),
43
44    /// Aggregate with grouping.
45    Aggregate(AggregateOp),
46
47    /// Limit the number of results.
48    Limit(LimitOp),
49
50    /// Skip a number of results.
51    Skip(SkipOp),
52
53    /// Sort results.
54    Sort(SortOp),
55
56    /// Remove duplicate results.
57    Distinct(DistinctOp),
58
59    /// Create a new node.
60    CreateNode(CreateNodeOp),
61
62    /// Create a new edge.
63    CreateEdge(CreateEdgeOp),
64
65    /// Delete a node.
66    DeleteNode(DeleteNodeOp),
67
68    /// Delete an edge.
69    DeleteEdge(DeleteEdgeOp),
70
71    /// Set properties on a node or edge.
72    SetProperty(SetPropertyOp),
73
74    /// Add labels to a node.
75    AddLabel(AddLabelOp),
76
77    /// Remove labels from a node.
78    RemoveLabel(RemoveLabelOp),
79
80    /// Return results (terminal operator).
81    Return(ReturnOp),
82
83    /// Empty result set.
84    Empty,
85
86    // ==================== RDF/SPARQL Operators ====================
87    /// Scan RDF triples matching a pattern.
88    TripleScan(TripleScanOp),
89
90    /// Union of multiple result sets.
91    Union(UnionOp),
92
93    /// Left outer join for OPTIONAL patterns.
94    LeftJoin(LeftJoinOp),
95
96    /// Anti-join for MINUS patterns.
97    AntiJoin(AntiJoinOp),
98
99    /// Bind a variable to an expression.
100    Bind(BindOp),
101
102    /// Unwind a list into individual rows.
103    Unwind(UnwindOp),
104
105    /// Merge a pattern (match or create).
106    Merge(MergeOp),
107
108    /// Find shortest path between nodes.
109    ShortestPath(ShortestPathOp),
110
111    // ==================== SPARQL Update Operators ====================
112    /// Insert RDF triples.
113    InsertTriple(InsertTripleOp),
114
115    /// Delete RDF triples.
116    DeleteTriple(DeleteTripleOp),
117
118    /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
119    /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
120    Modify(ModifyOp),
121
122    /// Clear a graph (remove all triples).
123    ClearGraph(ClearGraphOp),
124
125    /// Create a new named graph.
126    CreateGraph(CreateGraphOp),
127
128    /// Drop (remove) a named graph.
129    DropGraph(DropGraphOp),
130
131    /// Load data from a URL into a graph.
132    LoadGraph(LoadGraphOp),
133
134    /// Copy triples from one graph to another.
135    CopyGraph(CopyGraphOp),
136
137    /// Move triples from one graph to another.
138    MoveGraph(MoveGraphOp),
139
140    /// Add (merge) triples from one graph to another.
141    AddGraph(AddGraphOp),
142
143    // ==================== Vector Search Operators ====================
144    /// Scan using vector similarity search.
145    VectorScan(VectorScanOp),
146
147    /// Join graph patterns with vector similarity search.
148    ///
149    /// Computes vector distances between entities from the left input and
150    /// a query vector, then joins with similarity scores. Useful for:
151    /// - Filtering graph traversal results by vector similarity
152    /// - Computing aggregated embeddings and finding similar entities
153    /// - Combining multiple vector sources with graph structure
154    VectorJoin(VectorJoinOp),
155
156    // ==================== DDL Operators ====================
157    /// Define a property graph schema (SQL/PGQ DDL).
158    CreatePropertyGraph(CreatePropertyGraphOp),
159
160    // ==================== Procedure Call Operators ====================
161    /// Invoke a stored procedure (CALL ... YIELD).
162    CallProcedure(CallProcedureOp),
163}
164
165/// Scan nodes from the graph.
166#[derive(Debug, Clone)]
167pub struct NodeScanOp {
168    /// Variable name to bind the node to.
169    pub variable: String,
170    /// Optional label filter.
171    pub label: Option<String>,
172    /// Child operator (if any, for chained patterns).
173    pub input: Option<Box<LogicalOperator>>,
174}
175
176/// Scan edges from the graph.
177#[derive(Debug, Clone)]
178pub struct EdgeScanOp {
179    /// Variable name to bind the edge to.
180    pub variable: String,
181    /// Optional edge type filter.
182    pub edge_type: Option<String>,
183    /// Child operator (if any).
184    pub input: Option<Box<LogicalOperator>>,
185}
186
187/// Expand from nodes to their neighbors.
188#[derive(Debug, Clone)]
189pub struct ExpandOp {
190    /// Source node variable.
191    pub from_variable: String,
192    /// Target node variable to bind.
193    pub to_variable: String,
194    /// Edge variable to bind (optional).
195    pub edge_variable: Option<String>,
196    /// Direction of expansion.
197    pub direction: ExpandDirection,
198    /// Optional edge type filter.
199    pub edge_type: Option<String>,
200    /// Minimum hops (for variable-length patterns).
201    pub min_hops: u32,
202    /// Maximum hops (for variable-length patterns).
203    pub max_hops: Option<u32>,
204    /// Input operator.
205    pub input: Box<LogicalOperator>,
206    /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
207    /// When set, a path length column will be output under this name.
208    pub path_alias: Option<String>,
209}
210
211/// Direction for edge expansion.
212#[derive(Debug, Clone, Copy, PartialEq, Eq)]
213pub enum ExpandDirection {
214    /// Follow outgoing edges.
215    Outgoing,
216    /// Follow incoming edges.
217    Incoming,
218    /// Follow edges in either direction.
219    Both,
220}
221
222/// Join two inputs.
223#[derive(Debug, Clone)]
224pub struct JoinOp {
225    /// Left input.
226    pub left: Box<LogicalOperator>,
227    /// Right input.
228    pub right: Box<LogicalOperator>,
229    /// Join type.
230    pub join_type: JoinType,
231    /// Join conditions.
232    pub conditions: Vec<JoinCondition>,
233}
234
235/// Join type.
236#[derive(Debug, Clone, Copy, PartialEq, Eq)]
237pub enum JoinType {
238    /// Inner join.
239    Inner,
240    /// Left outer join.
241    Left,
242    /// Right outer join.
243    Right,
244    /// Full outer join.
245    Full,
246    /// Cross join (Cartesian product).
247    Cross,
248    /// Semi join (returns left rows with matching right rows).
249    Semi,
250    /// Anti join (returns left rows without matching right rows).
251    Anti,
252}
253
254/// A join condition.
255#[derive(Debug, Clone)]
256pub struct JoinCondition {
257    /// Left expression.
258    pub left: LogicalExpression,
259    /// Right expression.
260    pub right: LogicalExpression,
261}
262
263/// Aggregate with grouping.
264#[derive(Debug, Clone)]
265pub struct AggregateOp {
266    /// Group by expressions.
267    pub group_by: Vec<LogicalExpression>,
268    /// Aggregate functions.
269    pub aggregates: Vec<AggregateExpr>,
270    /// Input operator.
271    pub input: Box<LogicalOperator>,
272    /// HAVING clause filter (applied after aggregation).
273    pub having: Option<LogicalExpression>,
274}
275
276/// An aggregate expression.
277#[derive(Debug, Clone)]
278pub struct AggregateExpr {
279    /// Aggregate function.
280    pub function: AggregateFunction,
281    /// Expression to aggregate.
282    pub expression: Option<LogicalExpression>,
283    /// Whether to use DISTINCT.
284    pub distinct: bool,
285    /// Alias for the result.
286    pub alias: Option<String>,
287    /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
288    pub percentile: Option<f64>,
289}
290
291/// Aggregate function.
292#[derive(Debug, Clone, Copy, PartialEq, Eq)]
293pub enum AggregateFunction {
294    /// Count all rows (COUNT(*)).
295    Count,
296    /// Count non-null values (COUNT(expr)).
297    CountNonNull,
298    /// Sum values.
299    Sum,
300    /// Average values.
301    Avg,
302    /// Minimum value.
303    Min,
304    /// Maximum value.
305    Max,
306    /// Collect into list.
307    Collect,
308    /// Sample standard deviation (STDEV).
309    StdDev,
310    /// Population standard deviation (STDEVP).
311    StdDevPop,
312    /// Discrete percentile (PERCENTILE_DISC).
313    PercentileDisc,
314    /// Continuous percentile (PERCENTILE_CONT).
315    PercentileCont,
316}
317
318/// Filter rows based on a predicate.
319#[derive(Debug, Clone)]
320pub struct FilterOp {
321    /// The filter predicate.
322    pub predicate: LogicalExpression,
323    /// Input operator.
324    pub input: Box<LogicalOperator>,
325}
326
327/// Project specific columns.
328#[derive(Debug, Clone)]
329pub struct ProjectOp {
330    /// Columns to project.
331    pub projections: Vec<Projection>,
332    /// Input operator.
333    pub input: Box<LogicalOperator>,
334}
335
336/// A single projection (column selection or computation).
337#[derive(Debug, Clone)]
338pub struct Projection {
339    /// Expression to compute.
340    pub expression: LogicalExpression,
341    /// Alias for the result.
342    pub alias: Option<String>,
343}
344
345/// Limit the number of results.
346#[derive(Debug, Clone)]
347pub struct LimitOp {
348    /// Maximum number of rows to return.
349    pub count: usize,
350    /// Input operator.
351    pub input: Box<LogicalOperator>,
352}
353
354/// Skip a number of results.
355#[derive(Debug, Clone)]
356pub struct SkipOp {
357    /// Number of rows to skip.
358    pub count: usize,
359    /// Input operator.
360    pub input: Box<LogicalOperator>,
361}
362
363/// Sort results.
364#[derive(Debug, Clone)]
365pub struct SortOp {
366    /// Sort keys.
367    pub keys: Vec<SortKey>,
368    /// Input operator.
369    pub input: Box<LogicalOperator>,
370}
371
372/// A sort key.
373#[derive(Debug, Clone)]
374pub struct SortKey {
375    /// Expression to sort by.
376    pub expression: LogicalExpression,
377    /// Sort order.
378    pub order: SortOrder,
379}
380
381/// Sort order.
382#[derive(Debug, Clone, Copy, PartialEq, Eq)]
383pub enum SortOrder {
384    /// Ascending order.
385    Ascending,
386    /// Descending order.
387    Descending,
388}
389
390/// Remove duplicate results.
391#[derive(Debug, Clone)]
392pub struct DistinctOp {
393    /// Input operator.
394    pub input: Box<LogicalOperator>,
395    /// Optional columns to use for deduplication.
396    /// If None, all columns are used.
397    pub columns: Option<Vec<String>>,
398}
399
400/// Create a new node.
401#[derive(Debug, Clone)]
402pub struct CreateNodeOp {
403    /// Variable name to bind the created node to.
404    pub variable: String,
405    /// Labels for the new node.
406    pub labels: Vec<String>,
407    /// Properties for the new node.
408    pub properties: Vec<(String, LogicalExpression)>,
409    /// Input operator (for chained creates).
410    pub input: Option<Box<LogicalOperator>>,
411}
412
413/// Create a new edge.
414#[derive(Debug, Clone)]
415pub struct CreateEdgeOp {
416    /// Variable name to bind the created edge to.
417    pub variable: Option<String>,
418    /// Source node variable.
419    pub from_variable: String,
420    /// Target node variable.
421    pub to_variable: String,
422    /// Edge type.
423    pub edge_type: String,
424    /// Properties for the new edge.
425    pub properties: Vec<(String, LogicalExpression)>,
426    /// Input operator.
427    pub input: Box<LogicalOperator>,
428}
429
430/// Delete a node.
431#[derive(Debug, Clone)]
432pub struct DeleteNodeOp {
433    /// Variable of the node to delete.
434    pub variable: String,
435    /// Whether to detach (delete connected edges) before deleting.
436    pub detach: bool,
437    /// Input operator.
438    pub input: Box<LogicalOperator>,
439}
440
441/// Delete an edge.
442#[derive(Debug, Clone)]
443pub struct DeleteEdgeOp {
444    /// Variable of the edge to delete.
445    pub variable: String,
446    /// Input operator.
447    pub input: Box<LogicalOperator>,
448}
449
450/// Set properties on a node or edge.
451#[derive(Debug, Clone)]
452pub struct SetPropertyOp {
453    /// Variable of the entity to update.
454    pub variable: String,
455    /// Properties to set (name -> expression).
456    pub properties: Vec<(String, LogicalExpression)>,
457    /// Whether to replace all properties (vs. merge).
458    pub replace: bool,
459    /// Input operator.
460    pub input: Box<LogicalOperator>,
461}
462
463/// Add labels to a node.
464#[derive(Debug, Clone)]
465pub struct AddLabelOp {
466    /// Variable of the node to update.
467    pub variable: String,
468    /// Labels to add.
469    pub labels: Vec<String>,
470    /// Input operator.
471    pub input: Box<LogicalOperator>,
472}
473
474/// Remove labels from a node.
475#[derive(Debug, Clone)]
476pub struct RemoveLabelOp {
477    /// Variable of the node to update.
478    pub variable: String,
479    /// Labels to remove.
480    pub labels: Vec<String>,
481    /// Input operator.
482    pub input: Box<LogicalOperator>,
483}
484
485// ==================== RDF/SPARQL Operators ====================
486
487/// Scan RDF triples matching a pattern.
488#[derive(Debug, Clone)]
489pub struct TripleScanOp {
490    /// Subject pattern (variable name or IRI).
491    pub subject: TripleComponent,
492    /// Predicate pattern (variable name or IRI).
493    pub predicate: TripleComponent,
494    /// Object pattern (variable name, IRI, or literal).
495    pub object: TripleComponent,
496    /// Named graph (optional).
497    pub graph: Option<TripleComponent>,
498    /// Input operator (for chained patterns).
499    pub input: Option<Box<LogicalOperator>>,
500}
501
502/// A component of a triple pattern.
503#[derive(Debug, Clone)]
504pub enum TripleComponent {
505    /// A variable to bind.
506    Variable(String),
507    /// A constant IRI.
508    Iri(String),
509    /// A constant literal value.
510    Literal(Value),
511}
512
513/// Union of multiple result sets.
514#[derive(Debug, Clone)]
515pub struct UnionOp {
516    /// Inputs to union together.
517    pub inputs: Vec<LogicalOperator>,
518}
519
520/// Left outer join for OPTIONAL patterns.
521#[derive(Debug, Clone)]
522pub struct LeftJoinOp {
523    /// Left (required) input.
524    pub left: Box<LogicalOperator>,
525    /// Right (optional) input.
526    pub right: Box<LogicalOperator>,
527    /// Optional filter condition.
528    pub condition: Option<LogicalExpression>,
529}
530
531/// Anti-join for MINUS patterns.
532#[derive(Debug, Clone)]
533pub struct AntiJoinOp {
534    /// Left input (results to keep if no match on right).
535    pub left: Box<LogicalOperator>,
536    /// Right input (patterns to exclude).
537    pub right: Box<LogicalOperator>,
538}
539
540/// Bind a variable to an expression.
541#[derive(Debug, Clone)]
542pub struct BindOp {
543    /// Expression to compute.
544    pub expression: LogicalExpression,
545    /// Variable to bind the result to.
546    pub variable: String,
547    /// Input operator.
548    pub input: Box<LogicalOperator>,
549}
550
551/// Unwind a list into individual rows.
552///
553/// For each input row, evaluates the expression (which should return a list)
554/// and emits one row for each element in the list.
555#[derive(Debug, Clone)]
556pub struct UnwindOp {
557    /// The list expression to unwind.
558    pub expression: LogicalExpression,
559    /// The variable name for each element.
560    pub variable: String,
561    /// Input operator.
562    pub input: Box<LogicalOperator>,
563}
564
565/// Merge a pattern (match or create).
566///
567/// MERGE tries to match a pattern in the graph. If found, returns the existing
568/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
569/// (optionally applying ON CREATE SET).
570#[derive(Debug, Clone)]
571pub struct MergeOp {
572    /// The node to merge.
573    pub variable: String,
574    /// Labels to match/create.
575    pub labels: Vec<String>,
576    /// Properties that must match (used for both matching and creation).
577    pub match_properties: Vec<(String, LogicalExpression)>,
578    /// Properties to set on CREATE.
579    pub on_create: Vec<(String, LogicalExpression)>,
580    /// Properties to set on MATCH.
581    pub on_match: Vec<(String, LogicalExpression)>,
582    /// Input operator.
583    pub input: Box<LogicalOperator>,
584}
585
586/// Find shortest path between two nodes.
587///
588/// This operator uses Dijkstra's algorithm to find the shortest path(s)
589/// between a source node and a target node, optionally filtered by edge type.
590#[derive(Debug, Clone)]
591pub struct ShortestPathOp {
592    /// Input operator providing source/target nodes.
593    pub input: Box<LogicalOperator>,
594    /// Variable name for the source node.
595    pub source_var: String,
596    /// Variable name for the target node.
597    pub target_var: String,
598    /// Optional edge type filter.
599    pub edge_type: Option<String>,
600    /// Direction of edge traversal.
601    pub direction: ExpandDirection,
602    /// Variable name to bind the path result.
603    pub path_alias: String,
604    /// Whether to find all shortest paths (vs. just one).
605    pub all_paths: bool,
606}
607
608// ==================== SPARQL Update Operators ====================
609
610/// Insert RDF triples.
611#[derive(Debug, Clone)]
612pub struct InsertTripleOp {
613    /// Subject of the triple.
614    pub subject: TripleComponent,
615    /// Predicate of the triple.
616    pub predicate: TripleComponent,
617    /// Object of the triple.
618    pub object: TripleComponent,
619    /// Named graph (optional).
620    pub graph: Option<String>,
621    /// Input operator (provides variable bindings).
622    pub input: Option<Box<LogicalOperator>>,
623}
624
625/// Delete RDF triples.
626#[derive(Debug, Clone)]
627pub struct DeleteTripleOp {
628    /// Subject pattern.
629    pub subject: TripleComponent,
630    /// Predicate pattern.
631    pub predicate: TripleComponent,
632    /// Object pattern.
633    pub object: TripleComponent,
634    /// Named graph (optional).
635    pub graph: Option<String>,
636    /// Input operator (provides variable bindings).
637    pub input: Option<Box<LogicalOperator>>,
638}
639
640/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
641///
642/// Per SPARQL 1.1 Update spec, this operator:
643/// 1. Evaluates the WHERE clause once to get bindings
644/// 2. Applies DELETE templates using those bindings
645/// 3. Applies INSERT templates using the SAME bindings
646///
647/// This ensures DELETE and INSERT see consistent data.
648#[derive(Debug, Clone)]
649pub struct ModifyOp {
650    /// DELETE triple templates (patterns with variables).
651    pub delete_templates: Vec<TripleTemplate>,
652    /// INSERT triple templates (patterns with variables).
653    pub insert_templates: Vec<TripleTemplate>,
654    /// WHERE clause that provides variable bindings.
655    pub where_clause: Box<LogicalOperator>,
656    /// Named graph context (for WITH clause).
657    pub graph: Option<String>,
658}
659
660/// A triple template for DELETE/INSERT operations.
661#[derive(Debug, Clone)]
662pub struct TripleTemplate {
663    /// Subject (may be a variable).
664    pub subject: TripleComponent,
665    /// Predicate (may be a variable).
666    pub predicate: TripleComponent,
667    /// Object (may be a variable or literal).
668    pub object: TripleComponent,
669    /// Named graph (optional).
670    pub graph: Option<String>,
671}
672
673/// Clear all triples from a graph.
674#[derive(Debug, Clone)]
675pub struct ClearGraphOp {
676    /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
677    pub graph: Option<String>,
678    /// Whether to silently ignore errors.
679    pub silent: bool,
680}
681
682/// Create a new named graph.
683#[derive(Debug, Clone)]
684pub struct CreateGraphOp {
685    /// IRI of the graph to create.
686    pub graph: String,
687    /// Whether to silently ignore if graph already exists.
688    pub silent: bool,
689}
690
691/// Drop (remove) a named graph.
692#[derive(Debug, Clone)]
693pub struct DropGraphOp {
694    /// Target graph (None = default graph).
695    pub graph: Option<String>,
696    /// Whether to silently ignore errors.
697    pub silent: bool,
698}
699
700/// Load data from a URL into a graph.
701#[derive(Debug, Clone)]
702pub struct LoadGraphOp {
703    /// Source URL to load data from.
704    pub source: String,
705    /// Destination graph (None = default graph).
706    pub destination: Option<String>,
707    /// Whether to silently ignore errors.
708    pub silent: bool,
709}
710
711/// Copy triples from one graph to another.
712#[derive(Debug, Clone)]
713pub struct CopyGraphOp {
714    /// Source graph.
715    pub source: Option<String>,
716    /// Destination graph.
717    pub destination: Option<String>,
718    /// Whether to silently ignore errors.
719    pub silent: bool,
720}
721
722/// Move triples from one graph to another.
723#[derive(Debug, Clone)]
724pub struct MoveGraphOp {
725    /// Source graph.
726    pub source: Option<String>,
727    /// Destination graph.
728    pub destination: Option<String>,
729    /// Whether to silently ignore errors.
730    pub silent: bool,
731}
732
733/// Add (merge) triples from one graph to another.
734#[derive(Debug, Clone)]
735pub struct AddGraphOp {
736    /// Source graph.
737    pub source: Option<String>,
738    /// Destination graph.
739    pub destination: Option<String>,
740    /// Whether to silently ignore errors.
741    pub silent: bool,
742}
743
744// ==================== Vector Search Operators ====================
745
746/// Vector similarity scan operation.
747///
748/// Performs approximate nearest neighbor search using a vector index (HNSW)
749/// or brute-force search for small datasets. Returns nodes/edges whose
750/// embeddings are similar to the query vector.
751///
752/// # Example GQL
753///
754/// ```gql
755/// MATCH (m:Movie)
756/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
757/// RETURN m.title
758/// ```
759#[derive(Debug, Clone)]
760pub struct VectorScanOp {
761    /// Variable name to bind matching entities to.
762    pub variable: String,
763    /// Name of the vector index to use (None = brute-force).
764    pub index_name: Option<String>,
765    /// Property containing the vector embedding.
766    pub property: String,
767    /// Optional label filter (scan only nodes with this label).
768    pub label: Option<String>,
769    /// The query vector expression.
770    pub query_vector: LogicalExpression,
771    /// Number of nearest neighbors to return.
772    pub k: usize,
773    /// Distance metric (None = use index default, typically cosine).
774    pub metric: Option<VectorMetric>,
775    /// Minimum similarity threshold (filters results below this).
776    pub min_similarity: Option<f32>,
777    /// Maximum distance threshold (filters results above this).
778    pub max_distance: Option<f32>,
779    /// Input operator (for hybrid queries combining graph + vector).
780    pub input: Option<Box<LogicalOperator>>,
781}
782
783/// Vector distance/similarity metric for vector scan operations.
784#[derive(Debug, Clone, Copy, PartialEq, Eq)]
785pub enum VectorMetric {
786    /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
787    Cosine,
788    /// Euclidean (L2) distance. Best when magnitude matters.
789    Euclidean,
790    /// Dot product. Best for maximum inner product search.
791    DotProduct,
792    /// Manhattan (L1) distance. Less sensitive to outliers.
793    Manhattan,
794}
795
796/// Join graph patterns with vector similarity search.
797///
798/// This operator takes entities from the left input and computes vector
799/// similarity against a query vector, outputting (entity, distance) pairs.
800///
801/// # Use Cases
802///
803/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
804/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
805/// 3. **Filtering by similarity**: Join with threshold-based filtering
806///
807/// # Example
808///
809/// ```gql
810/// // Find movies similar to what the user liked
811/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
812/// WITH avg(liked.embedding) AS user_taste
813/// VECTOR JOIN (m:Movie) ON m.embedding
814/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
815/// RETURN m.title
816/// ```
817#[derive(Debug, Clone)]
818pub struct VectorJoinOp {
819    /// Input operator providing entities to match against.
820    pub input: Box<LogicalOperator>,
821    /// Variable from input to extract vectors from (for entity-to-entity similarity).
822    /// If None, uses `query_vector` directly.
823    pub left_vector_variable: Option<String>,
824    /// Property containing the left vector (used with `left_vector_variable`).
825    pub left_property: Option<String>,
826    /// The query vector expression (constant or computed).
827    pub query_vector: LogicalExpression,
828    /// Variable name to bind the right-side matching entities.
829    pub right_variable: String,
830    /// Property containing the right-side vector embeddings.
831    pub right_property: String,
832    /// Optional label filter for right-side entities.
833    pub right_label: Option<String>,
834    /// Name of vector index on right side (None = brute-force).
835    pub index_name: Option<String>,
836    /// Number of nearest neighbors per left-side entity.
837    pub k: usize,
838    /// Distance metric.
839    pub metric: Option<VectorMetric>,
840    /// Minimum similarity threshold.
841    pub min_similarity: Option<f32>,
842    /// Maximum distance threshold.
843    pub max_distance: Option<f32>,
844    /// Variable to bind the distance/similarity score.
845    pub score_variable: Option<String>,
846}
847
848/// Return results (terminal operator).
849#[derive(Debug, Clone)]
850pub struct ReturnOp {
851    /// Items to return.
852    pub items: Vec<ReturnItem>,
853    /// Whether to return distinct results.
854    pub distinct: bool,
855    /// Input operator.
856    pub input: Box<LogicalOperator>,
857}
858
859/// A single return item.
860#[derive(Debug, Clone)]
861pub struct ReturnItem {
862    /// Expression to return.
863    pub expression: LogicalExpression,
864    /// Alias for the result column.
865    pub alias: Option<String>,
866}
867
868/// Define a property graph schema (SQL/PGQ DDL).
869#[derive(Debug, Clone)]
870pub struct CreatePropertyGraphOp {
871    /// Graph name.
872    pub name: String,
873    /// Node table schemas (label name + column definitions).
874    pub node_tables: Vec<PropertyGraphNodeTable>,
875    /// Edge table schemas (type name + column definitions + references).
876    pub edge_tables: Vec<PropertyGraphEdgeTable>,
877}
878
879/// A node table in a property graph definition.
880#[derive(Debug, Clone)]
881pub struct PropertyGraphNodeTable {
882    /// Table name (maps to a node label).
883    pub name: String,
884    /// Column definitions as (name, type_name) pairs.
885    pub columns: Vec<(String, String)>,
886}
887
888/// An edge table in a property graph definition.
889#[derive(Debug, Clone)]
890pub struct PropertyGraphEdgeTable {
891    /// Table name (maps to an edge type).
892    pub name: String,
893    /// Column definitions as (name, type_name) pairs.
894    pub columns: Vec<(String, String)>,
895    /// Source node table name.
896    pub source_table: String,
897    /// Target node table name.
898    pub target_table: String,
899}
900
901// ==================== Procedure Call Types ====================
902
903/// A CALL procedure operation.
904///
905/// ```text
906/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
907/// ```
908#[derive(Debug, Clone)]
909pub struct CallProcedureOp {
910    /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
911    pub name: Vec<String>,
912    /// Argument expressions (constants in Phase 1).
913    pub arguments: Vec<LogicalExpression>,
914    /// Optional YIELD clause: which columns to expose + aliases.
915    pub yield_items: Option<Vec<ProcedureYield>>,
916}
917
918/// A single YIELD item in a procedure call.
919#[derive(Debug, Clone)]
920pub struct ProcedureYield {
921    /// Column name from the procedure result.
922    pub field_name: String,
923    /// Optional alias (YIELD score AS rank).
924    pub alias: Option<String>,
925}
926
927/// A logical expression.
928#[derive(Debug, Clone)]
929pub enum LogicalExpression {
930    /// A literal value.
931    Literal(Value),
932
933    /// A variable reference.
934    Variable(String),
935
936    /// Property access (e.g., n.name).
937    Property {
938        /// The variable to access.
939        variable: String,
940        /// The property name.
941        property: String,
942    },
943
944    /// Binary operation.
945    Binary {
946        /// Left operand.
947        left: Box<LogicalExpression>,
948        /// Operator.
949        op: BinaryOp,
950        /// Right operand.
951        right: Box<LogicalExpression>,
952    },
953
954    /// Unary operation.
955    Unary {
956        /// Operator.
957        op: UnaryOp,
958        /// Operand.
959        operand: Box<LogicalExpression>,
960    },
961
962    /// Function call.
963    FunctionCall {
964        /// Function name.
965        name: String,
966        /// Arguments.
967        args: Vec<LogicalExpression>,
968        /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
969        distinct: bool,
970    },
971
972    /// List literal.
973    List(Vec<LogicalExpression>),
974
975    /// Map literal (e.g., {name: 'Alice', age: 30}).
976    Map(Vec<(String, LogicalExpression)>),
977
978    /// Index access (e.g., `list[0]`).
979    IndexAccess {
980        /// The base expression (typically a list or string).
981        base: Box<LogicalExpression>,
982        /// The index expression.
983        index: Box<LogicalExpression>,
984    },
985
986    /// Slice access (e.g., list[1..3]).
987    SliceAccess {
988        /// The base expression (typically a list or string).
989        base: Box<LogicalExpression>,
990        /// Start index (None means from beginning).
991        start: Option<Box<LogicalExpression>>,
992        /// End index (None means to end).
993        end: Option<Box<LogicalExpression>>,
994    },
995
996    /// CASE expression.
997    Case {
998        /// Test expression (for simple CASE).
999        operand: Option<Box<LogicalExpression>>,
1000        /// WHEN clauses.
1001        when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
1002        /// ELSE clause.
1003        else_clause: Option<Box<LogicalExpression>>,
1004    },
1005
1006    /// Parameter reference.
1007    Parameter(String),
1008
1009    /// Labels of a node.
1010    Labels(String),
1011
1012    /// Type of an edge.
1013    Type(String),
1014
1015    /// ID of a node or edge.
1016    Id(String),
1017
1018    /// List comprehension: [x IN list WHERE predicate | expression]
1019    ListComprehension {
1020        /// Variable name for each element.
1021        variable: String,
1022        /// The source list expression.
1023        list_expr: Box<LogicalExpression>,
1024        /// Optional filter predicate.
1025        filter_expr: Option<Box<LogicalExpression>>,
1026        /// The mapping expression for each element.
1027        map_expr: Box<LogicalExpression>,
1028    },
1029
1030    /// EXISTS subquery.
1031    ExistsSubquery(Box<LogicalOperator>),
1032
1033    /// COUNT subquery.
1034    CountSubquery(Box<LogicalOperator>),
1035}
1036
1037/// Binary operator.
1038#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1039pub enum BinaryOp {
1040    /// Equality comparison (=).
1041    Eq,
1042    /// Inequality comparison (<>).
1043    Ne,
1044    /// Less than (<).
1045    Lt,
1046    /// Less than or equal (<=).
1047    Le,
1048    /// Greater than (>).
1049    Gt,
1050    /// Greater than or equal (>=).
1051    Ge,
1052
1053    /// Logical AND.
1054    And,
1055    /// Logical OR.
1056    Or,
1057    /// Logical XOR.
1058    Xor,
1059
1060    /// Addition (+).
1061    Add,
1062    /// Subtraction (-).
1063    Sub,
1064    /// Multiplication (*).
1065    Mul,
1066    /// Division (/).
1067    Div,
1068    /// Modulo (%).
1069    Mod,
1070
1071    /// String concatenation.
1072    Concat,
1073    /// String starts with.
1074    StartsWith,
1075    /// String ends with.
1076    EndsWith,
1077    /// String contains.
1078    Contains,
1079
1080    /// Collection membership (IN).
1081    In,
1082    /// Pattern matching (LIKE).
1083    Like,
1084    /// Regex matching (=~).
1085    Regex,
1086    /// Power/exponentiation (^).
1087    Pow,
1088}
1089
1090/// Unary operator.
1091#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1092pub enum UnaryOp {
1093    /// Logical NOT.
1094    Not,
1095    /// Numeric negation.
1096    Neg,
1097    /// IS NULL check.
1098    IsNull,
1099    /// IS NOT NULL check.
1100    IsNotNull,
1101}
1102
1103#[cfg(test)]
1104mod tests {
1105    use super::*;
1106
1107    #[test]
1108    fn test_simple_node_scan_plan() {
1109        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1110            items: vec![ReturnItem {
1111                expression: LogicalExpression::Variable("n".into()),
1112                alias: None,
1113            }],
1114            distinct: false,
1115            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1116                variable: "n".into(),
1117                label: Some("Person".into()),
1118                input: None,
1119            })),
1120        }));
1121
1122        // Verify structure
1123        if let LogicalOperator::Return(ret) = &plan.root {
1124            assert_eq!(ret.items.len(), 1);
1125            assert!(!ret.distinct);
1126            if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1127                assert_eq!(scan.variable, "n");
1128                assert_eq!(scan.label, Some("Person".into()));
1129            } else {
1130                panic!("Expected NodeScan");
1131            }
1132        } else {
1133            panic!("Expected Return");
1134        }
1135    }
1136
1137    #[test]
1138    fn test_filter_plan() {
1139        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1140            items: vec![ReturnItem {
1141                expression: LogicalExpression::Property {
1142                    variable: "n".into(),
1143                    property: "name".into(),
1144                },
1145                alias: Some("name".into()),
1146            }],
1147            distinct: false,
1148            input: Box::new(LogicalOperator::Filter(FilterOp {
1149                predicate: LogicalExpression::Binary {
1150                    left: Box::new(LogicalExpression::Property {
1151                        variable: "n".into(),
1152                        property: "age".into(),
1153                    }),
1154                    op: BinaryOp::Gt,
1155                    right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1156                },
1157                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1158                    variable: "n".into(),
1159                    label: Some("Person".into()),
1160                    input: None,
1161                })),
1162            })),
1163        }));
1164
1165        if let LogicalOperator::Return(ret) = &plan.root {
1166            if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1167                if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1168                    assert_eq!(*op, BinaryOp::Gt);
1169                } else {
1170                    panic!("Expected Binary expression");
1171                }
1172            } else {
1173                panic!("Expected Filter");
1174            }
1175        } else {
1176            panic!("Expected Return");
1177        }
1178    }
1179}