Skip to main content

grafeo_engine/query/
plan.rs

1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12    /// The root operator of the plan.
13    pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17    /// Creates a new logical plan with the given root operator.
18    pub fn new(root: LogicalOperator) -> Self {
19        Self { root }
20    }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26    /// Scan all nodes, optionally filtered by label.
27    NodeScan(NodeScanOp),
28
29    /// Scan all edges, optionally filtered by type.
30    EdgeScan(EdgeScanOp),
31
32    /// Expand from nodes to neighbors via edges.
33    Expand(ExpandOp),
34
35    /// Filter rows based on a predicate.
36    Filter(FilterOp),
37
38    /// Project specific columns.
39    Project(ProjectOp),
40
41    /// Join two inputs.
42    Join(JoinOp),
43
44    /// Aggregate with grouping.
45    Aggregate(AggregateOp),
46
47    /// Limit the number of results.
48    Limit(LimitOp),
49
50    /// Skip a number of results.
51    Skip(SkipOp),
52
53    /// Sort results.
54    Sort(SortOp),
55
56    /// Remove duplicate results.
57    Distinct(DistinctOp),
58
59    /// Create a new node.
60    CreateNode(CreateNodeOp),
61
62    /// Create a new edge.
63    CreateEdge(CreateEdgeOp),
64
65    /// Delete a node.
66    DeleteNode(DeleteNodeOp),
67
68    /// Delete an edge.
69    DeleteEdge(DeleteEdgeOp),
70
71    /// Set properties on a node or edge.
72    SetProperty(SetPropertyOp),
73
74    /// Add labels to a node.
75    AddLabel(AddLabelOp),
76
77    /// Remove labels from a node.
78    RemoveLabel(RemoveLabelOp),
79
80    /// Return results (terminal operator).
81    Return(ReturnOp),
82
83    /// Empty result set.
84    Empty,
85
86    // ==================== RDF/SPARQL Operators ====================
87    /// Scan RDF triples matching a pattern.
88    TripleScan(TripleScanOp),
89
90    /// Union of multiple result sets.
91    Union(UnionOp),
92
93    /// Left outer join for OPTIONAL patterns.
94    LeftJoin(LeftJoinOp),
95
96    /// Anti-join for MINUS patterns.
97    AntiJoin(AntiJoinOp),
98
99    /// Bind a variable to an expression.
100    Bind(BindOp),
101
102    /// Unwind a list into individual rows.
103    Unwind(UnwindOp),
104
105    /// Merge a pattern (match or create).
106    Merge(MergeOp),
107
108    /// Find shortest path between nodes.
109    ShortestPath(ShortestPathOp),
110
111    // ==================== SPARQL Update Operators ====================
112    /// Insert RDF triples.
113    InsertTriple(InsertTripleOp),
114
115    /// Delete RDF triples.
116    DeleteTriple(DeleteTripleOp),
117
118    /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
119    /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
120    Modify(ModifyOp),
121
122    /// Clear a graph (remove all triples).
123    ClearGraph(ClearGraphOp),
124
125    /// Create a new named graph.
126    CreateGraph(CreateGraphOp),
127
128    /// Drop (remove) a named graph.
129    DropGraph(DropGraphOp),
130
131    /// Load data from a URL into a graph.
132    LoadGraph(LoadGraphOp),
133
134    /// Copy triples from one graph to another.
135    CopyGraph(CopyGraphOp),
136
137    /// Move triples from one graph to another.
138    MoveGraph(MoveGraphOp),
139
140    /// Add (merge) triples from one graph to another.
141    AddGraph(AddGraphOp),
142
143    // ==================== Vector Search Operators ====================
144    /// Scan using vector similarity search.
145    VectorScan(VectorScanOp),
146
147    /// Join graph patterns with vector similarity search.
148    ///
149    /// Computes vector distances between entities from the left input and
150    /// a query vector, then joins with similarity scores. Useful for:
151    /// - Filtering graph traversal results by vector similarity
152    /// - Computing aggregated embeddings and finding similar entities
153    /// - Combining multiple vector sources with graph structure
154    VectorJoin(VectorJoinOp),
155}
156
157/// Scan nodes from the graph.
158#[derive(Debug, Clone)]
159pub struct NodeScanOp {
160    /// Variable name to bind the node to.
161    pub variable: String,
162    /// Optional label filter.
163    pub label: Option<String>,
164    /// Child operator (if any, for chained patterns).
165    pub input: Option<Box<LogicalOperator>>,
166}
167
168/// Scan edges from the graph.
169#[derive(Debug, Clone)]
170pub struct EdgeScanOp {
171    /// Variable name to bind the edge to.
172    pub variable: String,
173    /// Optional edge type filter.
174    pub edge_type: Option<String>,
175    /// Child operator (if any).
176    pub input: Option<Box<LogicalOperator>>,
177}
178
179/// Expand from nodes to their neighbors.
180#[derive(Debug, Clone)]
181pub struct ExpandOp {
182    /// Source node variable.
183    pub from_variable: String,
184    /// Target node variable to bind.
185    pub to_variable: String,
186    /// Edge variable to bind (optional).
187    pub edge_variable: Option<String>,
188    /// Direction of expansion.
189    pub direction: ExpandDirection,
190    /// Optional edge type filter.
191    pub edge_type: Option<String>,
192    /// Minimum hops (for variable-length patterns).
193    pub min_hops: u32,
194    /// Maximum hops (for variable-length patterns).
195    pub max_hops: Option<u32>,
196    /// Input operator.
197    pub input: Box<LogicalOperator>,
198    /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
199    /// When set, a path length column will be output under this name.
200    pub path_alias: Option<String>,
201}
202
203/// Direction for edge expansion.
204#[derive(Debug, Clone, Copy, PartialEq, Eq)]
205pub enum ExpandDirection {
206    /// Follow outgoing edges.
207    Outgoing,
208    /// Follow incoming edges.
209    Incoming,
210    /// Follow edges in either direction.
211    Both,
212}
213
214/// Join two inputs.
215#[derive(Debug, Clone)]
216pub struct JoinOp {
217    /// Left input.
218    pub left: Box<LogicalOperator>,
219    /// Right input.
220    pub right: Box<LogicalOperator>,
221    /// Join type.
222    pub join_type: JoinType,
223    /// Join conditions.
224    pub conditions: Vec<JoinCondition>,
225}
226
227/// Join type.
228#[derive(Debug, Clone, Copy, PartialEq, Eq)]
229pub enum JoinType {
230    /// Inner join.
231    Inner,
232    /// Left outer join.
233    Left,
234    /// Right outer join.
235    Right,
236    /// Full outer join.
237    Full,
238    /// Cross join (Cartesian product).
239    Cross,
240    /// Semi join (returns left rows with matching right rows).
241    Semi,
242    /// Anti join (returns left rows without matching right rows).
243    Anti,
244}
245
246/// A join condition.
247#[derive(Debug, Clone)]
248pub struct JoinCondition {
249    /// Left expression.
250    pub left: LogicalExpression,
251    /// Right expression.
252    pub right: LogicalExpression,
253}
254
255/// Aggregate with grouping.
256#[derive(Debug, Clone)]
257pub struct AggregateOp {
258    /// Group by expressions.
259    pub group_by: Vec<LogicalExpression>,
260    /// Aggregate functions.
261    pub aggregates: Vec<AggregateExpr>,
262    /// Input operator.
263    pub input: Box<LogicalOperator>,
264    /// HAVING clause filter (applied after aggregation).
265    pub having: Option<LogicalExpression>,
266}
267
268/// An aggregate expression.
269#[derive(Debug, Clone)]
270pub struct AggregateExpr {
271    /// Aggregate function.
272    pub function: AggregateFunction,
273    /// Expression to aggregate.
274    pub expression: Option<LogicalExpression>,
275    /// Whether to use DISTINCT.
276    pub distinct: bool,
277    /// Alias for the result.
278    pub alias: Option<String>,
279    /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
280    pub percentile: Option<f64>,
281}
282
283/// Aggregate function.
284#[derive(Debug, Clone, Copy, PartialEq, Eq)]
285pub enum AggregateFunction {
286    /// Count all rows (COUNT(*)).
287    Count,
288    /// Count non-null values (COUNT(expr)).
289    CountNonNull,
290    /// Sum values.
291    Sum,
292    /// Average values.
293    Avg,
294    /// Minimum value.
295    Min,
296    /// Maximum value.
297    Max,
298    /// Collect into list.
299    Collect,
300    /// Sample standard deviation (STDEV).
301    StdDev,
302    /// Population standard deviation (STDEVP).
303    StdDevPop,
304    /// Discrete percentile (PERCENTILE_DISC).
305    PercentileDisc,
306    /// Continuous percentile (PERCENTILE_CONT).
307    PercentileCont,
308}
309
310/// Filter rows based on a predicate.
311#[derive(Debug, Clone)]
312pub struct FilterOp {
313    /// The filter predicate.
314    pub predicate: LogicalExpression,
315    /// Input operator.
316    pub input: Box<LogicalOperator>,
317}
318
319/// Project specific columns.
320#[derive(Debug, Clone)]
321pub struct ProjectOp {
322    /// Columns to project.
323    pub projections: Vec<Projection>,
324    /// Input operator.
325    pub input: Box<LogicalOperator>,
326}
327
328/// A single projection (column selection or computation).
329#[derive(Debug, Clone)]
330pub struct Projection {
331    /// Expression to compute.
332    pub expression: LogicalExpression,
333    /// Alias for the result.
334    pub alias: Option<String>,
335}
336
337/// Limit the number of results.
338#[derive(Debug, Clone)]
339pub struct LimitOp {
340    /// Maximum number of rows to return.
341    pub count: usize,
342    /// Input operator.
343    pub input: Box<LogicalOperator>,
344}
345
346/// Skip a number of results.
347#[derive(Debug, Clone)]
348pub struct SkipOp {
349    /// Number of rows to skip.
350    pub count: usize,
351    /// Input operator.
352    pub input: Box<LogicalOperator>,
353}
354
355/// Sort results.
356#[derive(Debug, Clone)]
357pub struct SortOp {
358    /// Sort keys.
359    pub keys: Vec<SortKey>,
360    /// Input operator.
361    pub input: Box<LogicalOperator>,
362}
363
364/// A sort key.
365#[derive(Debug, Clone)]
366pub struct SortKey {
367    /// Expression to sort by.
368    pub expression: LogicalExpression,
369    /// Sort order.
370    pub order: SortOrder,
371}
372
373/// Sort order.
374#[derive(Debug, Clone, Copy, PartialEq, Eq)]
375pub enum SortOrder {
376    /// Ascending order.
377    Ascending,
378    /// Descending order.
379    Descending,
380}
381
382/// Remove duplicate results.
383#[derive(Debug, Clone)]
384pub struct DistinctOp {
385    /// Input operator.
386    pub input: Box<LogicalOperator>,
387    /// Optional columns to use for deduplication.
388    /// If None, all columns are used.
389    pub columns: Option<Vec<String>>,
390}
391
392/// Create a new node.
393#[derive(Debug, Clone)]
394pub struct CreateNodeOp {
395    /// Variable name to bind the created node to.
396    pub variable: String,
397    /// Labels for the new node.
398    pub labels: Vec<String>,
399    /// Properties for the new node.
400    pub properties: Vec<(String, LogicalExpression)>,
401    /// Input operator (for chained creates).
402    pub input: Option<Box<LogicalOperator>>,
403}
404
405/// Create a new edge.
406#[derive(Debug, Clone)]
407pub struct CreateEdgeOp {
408    /// Variable name to bind the created edge to.
409    pub variable: Option<String>,
410    /// Source node variable.
411    pub from_variable: String,
412    /// Target node variable.
413    pub to_variable: String,
414    /// Edge type.
415    pub edge_type: String,
416    /// Properties for the new edge.
417    pub properties: Vec<(String, LogicalExpression)>,
418    /// Input operator.
419    pub input: Box<LogicalOperator>,
420}
421
422/// Delete a node.
423#[derive(Debug, Clone)]
424pub struct DeleteNodeOp {
425    /// Variable of the node to delete.
426    pub variable: String,
427    /// Whether to detach (delete connected edges) before deleting.
428    pub detach: bool,
429    /// Input operator.
430    pub input: Box<LogicalOperator>,
431}
432
433/// Delete an edge.
434#[derive(Debug, Clone)]
435pub struct DeleteEdgeOp {
436    /// Variable of the edge to delete.
437    pub variable: String,
438    /// Input operator.
439    pub input: Box<LogicalOperator>,
440}
441
442/// Set properties on a node or edge.
443#[derive(Debug, Clone)]
444pub struct SetPropertyOp {
445    /// Variable of the entity to update.
446    pub variable: String,
447    /// Properties to set (name -> expression).
448    pub properties: Vec<(String, LogicalExpression)>,
449    /// Whether to replace all properties (vs. merge).
450    pub replace: bool,
451    /// Input operator.
452    pub input: Box<LogicalOperator>,
453}
454
455/// Add labels to a node.
456#[derive(Debug, Clone)]
457pub struct AddLabelOp {
458    /// Variable of the node to update.
459    pub variable: String,
460    /// Labels to add.
461    pub labels: Vec<String>,
462    /// Input operator.
463    pub input: Box<LogicalOperator>,
464}
465
466/// Remove labels from a node.
467#[derive(Debug, Clone)]
468pub struct RemoveLabelOp {
469    /// Variable of the node to update.
470    pub variable: String,
471    /// Labels to remove.
472    pub labels: Vec<String>,
473    /// Input operator.
474    pub input: Box<LogicalOperator>,
475}
476
477// ==================== RDF/SPARQL Operators ====================
478
479/// Scan RDF triples matching a pattern.
480#[derive(Debug, Clone)]
481pub struct TripleScanOp {
482    /// Subject pattern (variable name or IRI).
483    pub subject: TripleComponent,
484    /// Predicate pattern (variable name or IRI).
485    pub predicate: TripleComponent,
486    /// Object pattern (variable name, IRI, or literal).
487    pub object: TripleComponent,
488    /// Named graph (optional).
489    pub graph: Option<TripleComponent>,
490    /// Input operator (for chained patterns).
491    pub input: Option<Box<LogicalOperator>>,
492}
493
494/// A component of a triple pattern.
495#[derive(Debug, Clone)]
496pub enum TripleComponent {
497    /// A variable to bind.
498    Variable(String),
499    /// A constant IRI.
500    Iri(String),
501    /// A constant literal value.
502    Literal(Value),
503}
504
505/// Union of multiple result sets.
506#[derive(Debug, Clone)]
507pub struct UnionOp {
508    /// Inputs to union together.
509    pub inputs: Vec<LogicalOperator>,
510}
511
512/// Left outer join for OPTIONAL patterns.
513#[derive(Debug, Clone)]
514pub struct LeftJoinOp {
515    /// Left (required) input.
516    pub left: Box<LogicalOperator>,
517    /// Right (optional) input.
518    pub right: Box<LogicalOperator>,
519    /// Optional filter condition.
520    pub condition: Option<LogicalExpression>,
521}
522
523/// Anti-join for MINUS patterns.
524#[derive(Debug, Clone)]
525pub struct AntiJoinOp {
526    /// Left input (results to keep if no match on right).
527    pub left: Box<LogicalOperator>,
528    /// Right input (patterns to exclude).
529    pub right: Box<LogicalOperator>,
530}
531
532/// Bind a variable to an expression.
533#[derive(Debug, Clone)]
534pub struct BindOp {
535    /// Expression to compute.
536    pub expression: LogicalExpression,
537    /// Variable to bind the result to.
538    pub variable: String,
539    /// Input operator.
540    pub input: Box<LogicalOperator>,
541}
542
543/// Unwind a list into individual rows.
544///
545/// For each input row, evaluates the expression (which should return a list)
546/// and emits one row for each element in the list.
547#[derive(Debug, Clone)]
548pub struct UnwindOp {
549    /// The list expression to unwind.
550    pub expression: LogicalExpression,
551    /// The variable name for each element.
552    pub variable: String,
553    /// Input operator.
554    pub input: Box<LogicalOperator>,
555}
556
557/// Merge a pattern (match or create).
558///
559/// MERGE tries to match a pattern in the graph. If found, returns the existing
560/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
561/// (optionally applying ON CREATE SET).
562#[derive(Debug, Clone)]
563pub struct MergeOp {
564    /// The node to merge.
565    pub variable: String,
566    /// Labels to match/create.
567    pub labels: Vec<String>,
568    /// Properties that must match (used for both matching and creation).
569    pub match_properties: Vec<(String, LogicalExpression)>,
570    /// Properties to set on CREATE.
571    pub on_create: Vec<(String, LogicalExpression)>,
572    /// Properties to set on MATCH.
573    pub on_match: Vec<(String, LogicalExpression)>,
574    /// Input operator.
575    pub input: Box<LogicalOperator>,
576}
577
578/// Find shortest path between two nodes.
579///
580/// This operator uses Dijkstra's algorithm to find the shortest path(s)
581/// between a source node and a target node, optionally filtered by edge type.
582#[derive(Debug, Clone)]
583pub struct ShortestPathOp {
584    /// Input operator providing source/target nodes.
585    pub input: Box<LogicalOperator>,
586    /// Variable name for the source node.
587    pub source_var: String,
588    /// Variable name for the target node.
589    pub target_var: String,
590    /// Optional edge type filter.
591    pub edge_type: Option<String>,
592    /// Direction of edge traversal.
593    pub direction: ExpandDirection,
594    /// Variable name to bind the path result.
595    pub path_alias: String,
596    /// Whether to find all shortest paths (vs. just one).
597    pub all_paths: bool,
598}
599
600// ==================== SPARQL Update Operators ====================
601
602/// Insert RDF triples.
603#[derive(Debug, Clone)]
604pub struct InsertTripleOp {
605    /// Subject of the triple.
606    pub subject: TripleComponent,
607    /// Predicate of the triple.
608    pub predicate: TripleComponent,
609    /// Object of the triple.
610    pub object: TripleComponent,
611    /// Named graph (optional).
612    pub graph: Option<String>,
613    /// Input operator (provides variable bindings).
614    pub input: Option<Box<LogicalOperator>>,
615}
616
617/// Delete RDF triples.
618#[derive(Debug, Clone)]
619pub struct DeleteTripleOp {
620    /// Subject pattern.
621    pub subject: TripleComponent,
622    /// Predicate pattern.
623    pub predicate: TripleComponent,
624    /// Object pattern.
625    pub object: TripleComponent,
626    /// Named graph (optional).
627    pub graph: Option<String>,
628    /// Input operator (provides variable bindings).
629    pub input: Option<Box<LogicalOperator>>,
630}
631
632/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
633///
634/// Per SPARQL 1.1 Update spec, this operator:
635/// 1. Evaluates the WHERE clause once to get bindings
636/// 2. Applies DELETE templates using those bindings
637/// 3. Applies INSERT templates using the SAME bindings
638///
639/// This ensures DELETE and INSERT see consistent data.
640#[derive(Debug, Clone)]
641pub struct ModifyOp {
642    /// DELETE triple templates (patterns with variables).
643    pub delete_templates: Vec<TripleTemplate>,
644    /// INSERT triple templates (patterns with variables).
645    pub insert_templates: Vec<TripleTemplate>,
646    /// WHERE clause that provides variable bindings.
647    pub where_clause: Box<LogicalOperator>,
648    /// Named graph context (for WITH clause).
649    pub graph: Option<String>,
650}
651
652/// A triple template for DELETE/INSERT operations.
653#[derive(Debug, Clone)]
654pub struct TripleTemplate {
655    /// Subject (may be a variable).
656    pub subject: TripleComponent,
657    /// Predicate (may be a variable).
658    pub predicate: TripleComponent,
659    /// Object (may be a variable or literal).
660    pub object: TripleComponent,
661    /// Named graph (optional).
662    pub graph: Option<String>,
663}
664
665/// Clear all triples from a graph.
666#[derive(Debug, Clone)]
667pub struct ClearGraphOp {
668    /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
669    pub graph: Option<String>,
670    /// Whether to silently ignore errors.
671    pub silent: bool,
672}
673
674/// Create a new named graph.
675#[derive(Debug, Clone)]
676pub struct CreateGraphOp {
677    /// IRI of the graph to create.
678    pub graph: String,
679    /// Whether to silently ignore if graph already exists.
680    pub silent: bool,
681}
682
683/// Drop (remove) a named graph.
684#[derive(Debug, Clone)]
685pub struct DropGraphOp {
686    /// Target graph (None = default graph).
687    pub graph: Option<String>,
688    /// Whether to silently ignore errors.
689    pub silent: bool,
690}
691
692/// Load data from a URL into a graph.
693#[derive(Debug, Clone)]
694pub struct LoadGraphOp {
695    /// Source URL to load data from.
696    pub source: String,
697    /// Destination graph (None = default graph).
698    pub destination: Option<String>,
699    /// Whether to silently ignore errors.
700    pub silent: bool,
701}
702
703/// Copy triples from one graph to another.
704#[derive(Debug, Clone)]
705pub struct CopyGraphOp {
706    /// Source graph.
707    pub source: Option<String>,
708    /// Destination graph.
709    pub destination: Option<String>,
710    /// Whether to silently ignore errors.
711    pub silent: bool,
712}
713
714/// Move triples from one graph to another.
715#[derive(Debug, Clone)]
716pub struct MoveGraphOp {
717    /// Source graph.
718    pub source: Option<String>,
719    /// Destination graph.
720    pub destination: Option<String>,
721    /// Whether to silently ignore errors.
722    pub silent: bool,
723}
724
725/// Add (merge) triples from one graph to another.
726#[derive(Debug, Clone)]
727pub struct AddGraphOp {
728    /// Source graph.
729    pub source: Option<String>,
730    /// Destination graph.
731    pub destination: Option<String>,
732    /// Whether to silently ignore errors.
733    pub silent: bool,
734}
735
736// ==================== Vector Search Operators ====================
737
738/// Vector similarity scan operation.
739///
740/// Performs approximate nearest neighbor search using a vector index (HNSW)
741/// or brute-force search for small datasets. Returns nodes/edges whose
742/// embeddings are similar to the query vector.
743///
744/// # Example GQL
745///
746/// ```gql
747/// MATCH (m:Movie)
748/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
749/// RETURN m.title
750/// ```
751#[derive(Debug, Clone)]
752pub struct VectorScanOp {
753    /// Variable name to bind matching entities to.
754    pub variable: String,
755    /// Name of the vector index to use (None = brute-force).
756    pub index_name: Option<String>,
757    /// Property containing the vector embedding.
758    pub property: String,
759    /// Optional label filter (scan only nodes with this label).
760    pub label: Option<String>,
761    /// The query vector expression.
762    pub query_vector: LogicalExpression,
763    /// Number of nearest neighbors to return.
764    pub k: usize,
765    /// Distance metric (None = use index default, typically cosine).
766    pub metric: Option<VectorMetric>,
767    /// Minimum similarity threshold (filters results below this).
768    pub min_similarity: Option<f32>,
769    /// Maximum distance threshold (filters results above this).
770    pub max_distance: Option<f32>,
771    /// Input operator (for hybrid queries combining graph + vector).
772    pub input: Option<Box<LogicalOperator>>,
773}
774
775/// Vector distance/similarity metric for vector scan operations.
776#[derive(Debug, Clone, Copy, PartialEq, Eq)]
777pub enum VectorMetric {
778    /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
779    Cosine,
780    /// Euclidean (L2) distance. Best when magnitude matters.
781    Euclidean,
782    /// Dot product. Best for maximum inner product search.
783    DotProduct,
784    /// Manhattan (L1) distance. Less sensitive to outliers.
785    Manhattan,
786}
787
788/// Join graph patterns with vector similarity search.
789///
790/// This operator takes entities from the left input and computes vector
791/// similarity against a query vector, outputting (entity, distance) pairs.
792///
793/// # Use Cases
794///
795/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
796/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
797/// 3. **Filtering by similarity**: Join with threshold-based filtering
798///
799/// # Example
800///
801/// ```gql
802/// // Find movies similar to what the user liked
803/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
804/// WITH avg(liked.embedding) AS user_taste
805/// VECTOR JOIN (m:Movie) ON m.embedding
806/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
807/// RETURN m.title
808/// ```
809#[derive(Debug, Clone)]
810pub struct VectorJoinOp {
811    /// Input operator providing entities to match against.
812    pub input: Box<LogicalOperator>,
813    /// Variable from input to extract vectors from (for entity-to-entity similarity).
814    /// If None, uses `query_vector` directly.
815    pub left_vector_variable: Option<String>,
816    /// Property containing the left vector (used with `left_vector_variable`).
817    pub left_property: Option<String>,
818    /// The query vector expression (constant or computed).
819    pub query_vector: LogicalExpression,
820    /// Variable name to bind the right-side matching entities.
821    pub right_variable: String,
822    /// Property containing the right-side vector embeddings.
823    pub right_property: String,
824    /// Optional label filter for right-side entities.
825    pub right_label: Option<String>,
826    /// Name of vector index on right side (None = brute-force).
827    pub index_name: Option<String>,
828    /// Number of nearest neighbors per left-side entity.
829    pub k: usize,
830    /// Distance metric.
831    pub metric: Option<VectorMetric>,
832    /// Minimum similarity threshold.
833    pub min_similarity: Option<f32>,
834    /// Maximum distance threshold.
835    pub max_distance: Option<f32>,
836    /// Variable to bind the distance/similarity score.
837    pub score_variable: Option<String>,
838}
839
840/// Return results (terminal operator).
841#[derive(Debug, Clone)]
842pub struct ReturnOp {
843    /// Items to return.
844    pub items: Vec<ReturnItem>,
845    /// Whether to return distinct results.
846    pub distinct: bool,
847    /// Input operator.
848    pub input: Box<LogicalOperator>,
849}
850
851/// A single return item.
852#[derive(Debug, Clone)]
853pub struct ReturnItem {
854    /// Expression to return.
855    pub expression: LogicalExpression,
856    /// Alias for the result column.
857    pub alias: Option<String>,
858}
859
860/// A logical expression.
861#[derive(Debug, Clone)]
862pub enum LogicalExpression {
863    /// A literal value.
864    Literal(Value),
865
866    /// A variable reference.
867    Variable(String),
868
869    /// Property access (e.g., n.name).
870    Property {
871        /// The variable to access.
872        variable: String,
873        /// The property name.
874        property: String,
875    },
876
877    /// Binary operation.
878    Binary {
879        /// Left operand.
880        left: Box<LogicalExpression>,
881        /// Operator.
882        op: BinaryOp,
883        /// Right operand.
884        right: Box<LogicalExpression>,
885    },
886
887    /// Unary operation.
888    Unary {
889        /// Operator.
890        op: UnaryOp,
891        /// Operand.
892        operand: Box<LogicalExpression>,
893    },
894
895    /// Function call.
896    FunctionCall {
897        /// Function name.
898        name: String,
899        /// Arguments.
900        args: Vec<LogicalExpression>,
901        /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
902        distinct: bool,
903    },
904
905    /// List literal.
906    List(Vec<LogicalExpression>),
907
908    /// Map literal (e.g., {name: 'Alice', age: 30}).
909    Map(Vec<(String, LogicalExpression)>),
910
911    /// Index access (e.g., `list[0]`).
912    IndexAccess {
913        /// The base expression (typically a list or string).
914        base: Box<LogicalExpression>,
915        /// The index expression.
916        index: Box<LogicalExpression>,
917    },
918
919    /// Slice access (e.g., list[1..3]).
920    SliceAccess {
921        /// The base expression (typically a list or string).
922        base: Box<LogicalExpression>,
923        /// Start index (None means from beginning).
924        start: Option<Box<LogicalExpression>>,
925        /// End index (None means to end).
926        end: Option<Box<LogicalExpression>>,
927    },
928
929    /// CASE expression.
930    Case {
931        /// Test expression (for simple CASE).
932        operand: Option<Box<LogicalExpression>>,
933        /// WHEN clauses.
934        when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
935        /// ELSE clause.
936        else_clause: Option<Box<LogicalExpression>>,
937    },
938
939    /// Parameter reference.
940    Parameter(String),
941
942    /// Labels of a node.
943    Labels(String),
944
945    /// Type of an edge.
946    Type(String),
947
948    /// ID of a node or edge.
949    Id(String),
950
951    /// List comprehension: [x IN list WHERE predicate | expression]
952    ListComprehension {
953        /// Variable name for each element.
954        variable: String,
955        /// The source list expression.
956        list_expr: Box<LogicalExpression>,
957        /// Optional filter predicate.
958        filter_expr: Option<Box<LogicalExpression>>,
959        /// The mapping expression for each element.
960        map_expr: Box<LogicalExpression>,
961    },
962
963    /// EXISTS subquery.
964    ExistsSubquery(Box<LogicalOperator>),
965
966    /// COUNT subquery.
967    CountSubquery(Box<LogicalOperator>),
968}
969
970/// Binary operator.
971#[derive(Debug, Clone, Copy, PartialEq, Eq)]
972pub enum BinaryOp {
973    /// Equality comparison (=).
974    Eq,
975    /// Inequality comparison (<>).
976    Ne,
977    /// Less than (<).
978    Lt,
979    /// Less than or equal (<=).
980    Le,
981    /// Greater than (>).
982    Gt,
983    /// Greater than or equal (>=).
984    Ge,
985
986    /// Logical AND.
987    And,
988    /// Logical OR.
989    Or,
990    /// Logical XOR.
991    Xor,
992
993    /// Addition (+).
994    Add,
995    /// Subtraction (-).
996    Sub,
997    /// Multiplication (*).
998    Mul,
999    /// Division (/).
1000    Div,
1001    /// Modulo (%).
1002    Mod,
1003
1004    /// String concatenation.
1005    Concat,
1006    /// String starts with.
1007    StartsWith,
1008    /// String ends with.
1009    EndsWith,
1010    /// String contains.
1011    Contains,
1012
1013    /// Collection membership (IN).
1014    In,
1015    /// Pattern matching (LIKE).
1016    Like,
1017    /// Regex matching (=~).
1018    Regex,
1019    /// Power/exponentiation (^).
1020    Pow,
1021}
1022
1023/// Unary operator.
1024#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1025pub enum UnaryOp {
1026    /// Logical NOT.
1027    Not,
1028    /// Numeric negation.
1029    Neg,
1030    /// IS NULL check.
1031    IsNull,
1032    /// IS NOT NULL check.
1033    IsNotNull,
1034}
1035
1036#[cfg(test)]
1037mod tests {
1038    use super::*;
1039
1040    #[test]
1041    fn test_simple_node_scan_plan() {
1042        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1043            items: vec![ReturnItem {
1044                expression: LogicalExpression::Variable("n".into()),
1045                alias: None,
1046            }],
1047            distinct: false,
1048            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1049                variable: "n".into(),
1050                label: Some("Person".into()),
1051                input: None,
1052            })),
1053        }));
1054
1055        // Verify structure
1056        if let LogicalOperator::Return(ret) = &plan.root {
1057            assert_eq!(ret.items.len(), 1);
1058            assert!(!ret.distinct);
1059            if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1060                assert_eq!(scan.variable, "n");
1061                assert_eq!(scan.label, Some("Person".into()));
1062            } else {
1063                panic!("Expected NodeScan");
1064            }
1065        } else {
1066            panic!("Expected Return");
1067        }
1068    }
1069
1070    #[test]
1071    fn test_filter_plan() {
1072        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1073            items: vec![ReturnItem {
1074                expression: LogicalExpression::Property {
1075                    variable: "n".into(),
1076                    property: "name".into(),
1077                },
1078                alias: Some("name".into()),
1079            }],
1080            distinct: false,
1081            input: Box::new(LogicalOperator::Filter(FilterOp {
1082                predicate: LogicalExpression::Binary {
1083                    left: Box::new(LogicalExpression::Property {
1084                        variable: "n".into(),
1085                        property: "age".into(),
1086                    }),
1087                    op: BinaryOp::Gt,
1088                    right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1089                },
1090                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1091                    variable: "n".into(),
1092                    label: Some("Person".into()),
1093                    input: None,
1094                })),
1095            })),
1096        }));
1097
1098        if let LogicalOperator::Return(ret) = &plan.root {
1099            if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1100                if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1101                    assert_eq!(*op, BinaryOp::Gt);
1102                } else {
1103                    panic!("Expected Binary expression");
1104                }
1105            } else {
1106                panic!("Expected Filter");
1107            }
1108        } else {
1109            panic!("Expected Return");
1110        }
1111    }
1112}