Skip to main content

grafeo_engine/query/
plan.rs

1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12    /// The root operator of the plan.
13    pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17    /// Creates a new logical plan with the given root operator.
18    pub fn new(root: LogicalOperator) -> Self {
19        Self { root }
20    }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26    /// Scan all nodes, optionally filtered by label.
27    NodeScan(NodeScanOp),
28
29    /// Scan all edges, optionally filtered by type.
30    EdgeScan(EdgeScanOp),
31
32    /// Expand from nodes to neighbors via edges.
33    Expand(ExpandOp),
34
35    /// Filter rows based on a predicate.
36    Filter(FilterOp),
37
38    /// Project specific columns.
39    Project(ProjectOp),
40
41    /// Join two inputs.
42    Join(JoinOp),
43
44    /// Aggregate with grouping.
45    Aggregate(AggregateOp),
46
47    /// Limit the number of results.
48    Limit(LimitOp),
49
50    /// Skip a number of results.
51    Skip(SkipOp),
52
53    /// Sort results.
54    Sort(SortOp),
55
56    /// Remove duplicate results.
57    Distinct(DistinctOp),
58
59    /// Create a new node.
60    CreateNode(CreateNodeOp),
61
62    /// Create a new edge.
63    CreateEdge(CreateEdgeOp),
64
65    /// Delete a node.
66    DeleteNode(DeleteNodeOp),
67
68    /// Delete an edge.
69    DeleteEdge(DeleteEdgeOp),
70
71    /// Set properties on a node or edge.
72    SetProperty(SetPropertyOp),
73
74    /// Add labels to a node.
75    AddLabel(AddLabelOp),
76
77    /// Remove labels from a node.
78    RemoveLabel(RemoveLabelOp),
79
80    /// Return results (terminal operator).
81    Return(ReturnOp),
82
83    /// Empty result set.
84    Empty,
85
86    // ==================== RDF/SPARQL Operators ====================
87    /// Scan RDF triples matching a pattern.
88    TripleScan(TripleScanOp),
89
90    /// Union of multiple result sets.
91    Union(UnionOp),
92
93    /// Left outer join for OPTIONAL patterns.
94    LeftJoin(LeftJoinOp),
95
96    /// Anti-join for MINUS patterns.
97    AntiJoin(AntiJoinOp),
98
99    /// Bind a variable to an expression.
100    Bind(BindOp),
101
102    /// Unwind a list into individual rows.
103    Unwind(UnwindOp),
104
105    /// Merge a pattern (match or create).
106    Merge(MergeOp),
107
108    /// Find shortest path between nodes.
109    ShortestPath(ShortestPathOp),
110
111    // ==================== SPARQL Update Operators ====================
112    /// Insert RDF triples.
113    InsertTriple(InsertTripleOp),
114
115    /// Delete RDF triples.
116    DeleteTriple(DeleteTripleOp),
117
118    /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
119    /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
120    Modify(ModifyOp),
121
122    /// Clear a graph (remove all triples).
123    ClearGraph(ClearGraphOp),
124
125    /// Create a new named graph.
126    CreateGraph(CreateGraphOp),
127
128    /// Drop (remove) a named graph.
129    DropGraph(DropGraphOp),
130
131    /// Load data from a URL into a graph.
132    LoadGraph(LoadGraphOp),
133
134    /// Copy triples from one graph to another.
135    CopyGraph(CopyGraphOp),
136
137    /// Move triples from one graph to another.
138    MoveGraph(MoveGraphOp),
139
140    /// Add (merge) triples from one graph to another.
141    AddGraph(AddGraphOp),
142
143    // ==================== Vector Search Operators ====================
144    /// Scan using vector similarity search.
145    VectorScan(VectorScanOp),
146
147    /// Join graph patterns with vector similarity search.
148    ///
149    /// Computes vector distances between entities from the left input and
150    /// a query vector, then joins with similarity scores. Useful for:
151    /// - Filtering graph traversal results by vector similarity
152    /// - Computing aggregated embeddings and finding similar entities
153    /// - Combining multiple vector sources with graph structure
154    VectorJoin(VectorJoinOp),
155
156    // ==================== DDL Operators ====================
157    /// Define a property graph schema (SQL/PGQ DDL).
158    CreatePropertyGraph(CreatePropertyGraphOp),
159}
160
161/// Scan nodes from the graph.
162#[derive(Debug, Clone)]
163pub struct NodeScanOp {
164    /// Variable name to bind the node to.
165    pub variable: String,
166    /// Optional label filter.
167    pub label: Option<String>,
168    /// Child operator (if any, for chained patterns).
169    pub input: Option<Box<LogicalOperator>>,
170}
171
172/// Scan edges from the graph.
173#[derive(Debug, Clone)]
174pub struct EdgeScanOp {
175    /// Variable name to bind the edge to.
176    pub variable: String,
177    /// Optional edge type filter.
178    pub edge_type: Option<String>,
179    /// Child operator (if any).
180    pub input: Option<Box<LogicalOperator>>,
181}
182
183/// Expand from nodes to their neighbors.
184#[derive(Debug, Clone)]
185pub struct ExpandOp {
186    /// Source node variable.
187    pub from_variable: String,
188    /// Target node variable to bind.
189    pub to_variable: String,
190    /// Edge variable to bind (optional).
191    pub edge_variable: Option<String>,
192    /// Direction of expansion.
193    pub direction: ExpandDirection,
194    /// Optional edge type filter.
195    pub edge_type: Option<String>,
196    /// Minimum hops (for variable-length patterns).
197    pub min_hops: u32,
198    /// Maximum hops (for variable-length patterns).
199    pub max_hops: Option<u32>,
200    /// Input operator.
201    pub input: Box<LogicalOperator>,
202    /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
203    /// When set, a path length column will be output under this name.
204    pub path_alias: Option<String>,
205}
206
207/// Direction for edge expansion.
208#[derive(Debug, Clone, Copy, PartialEq, Eq)]
209pub enum ExpandDirection {
210    /// Follow outgoing edges.
211    Outgoing,
212    /// Follow incoming edges.
213    Incoming,
214    /// Follow edges in either direction.
215    Both,
216}
217
218/// Join two inputs.
219#[derive(Debug, Clone)]
220pub struct JoinOp {
221    /// Left input.
222    pub left: Box<LogicalOperator>,
223    /// Right input.
224    pub right: Box<LogicalOperator>,
225    /// Join type.
226    pub join_type: JoinType,
227    /// Join conditions.
228    pub conditions: Vec<JoinCondition>,
229}
230
231/// Join type.
232#[derive(Debug, Clone, Copy, PartialEq, Eq)]
233pub enum JoinType {
234    /// Inner join.
235    Inner,
236    /// Left outer join.
237    Left,
238    /// Right outer join.
239    Right,
240    /// Full outer join.
241    Full,
242    /// Cross join (Cartesian product).
243    Cross,
244    /// Semi join (returns left rows with matching right rows).
245    Semi,
246    /// Anti join (returns left rows without matching right rows).
247    Anti,
248}
249
250/// A join condition.
251#[derive(Debug, Clone)]
252pub struct JoinCondition {
253    /// Left expression.
254    pub left: LogicalExpression,
255    /// Right expression.
256    pub right: LogicalExpression,
257}
258
259/// Aggregate with grouping.
260#[derive(Debug, Clone)]
261pub struct AggregateOp {
262    /// Group by expressions.
263    pub group_by: Vec<LogicalExpression>,
264    /// Aggregate functions.
265    pub aggregates: Vec<AggregateExpr>,
266    /// Input operator.
267    pub input: Box<LogicalOperator>,
268    /// HAVING clause filter (applied after aggregation).
269    pub having: Option<LogicalExpression>,
270}
271
272/// An aggregate expression.
273#[derive(Debug, Clone)]
274pub struct AggregateExpr {
275    /// Aggregate function.
276    pub function: AggregateFunction,
277    /// Expression to aggregate.
278    pub expression: Option<LogicalExpression>,
279    /// Whether to use DISTINCT.
280    pub distinct: bool,
281    /// Alias for the result.
282    pub alias: Option<String>,
283    /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
284    pub percentile: Option<f64>,
285}
286
287/// Aggregate function.
288#[derive(Debug, Clone, Copy, PartialEq, Eq)]
289pub enum AggregateFunction {
290    /// Count all rows (COUNT(*)).
291    Count,
292    /// Count non-null values (COUNT(expr)).
293    CountNonNull,
294    /// Sum values.
295    Sum,
296    /// Average values.
297    Avg,
298    /// Minimum value.
299    Min,
300    /// Maximum value.
301    Max,
302    /// Collect into list.
303    Collect,
304    /// Sample standard deviation (STDEV).
305    StdDev,
306    /// Population standard deviation (STDEVP).
307    StdDevPop,
308    /// Discrete percentile (PERCENTILE_DISC).
309    PercentileDisc,
310    /// Continuous percentile (PERCENTILE_CONT).
311    PercentileCont,
312}
313
314/// Filter rows based on a predicate.
315#[derive(Debug, Clone)]
316pub struct FilterOp {
317    /// The filter predicate.
318    pub predicate: LogicalExpression,
319    /// Input operator.
320    pub input: Box<LogicalOperator>,
321}
322
323/// Project specific columns.
324#[derive(Debug, Clone)]
325pub struct ProjectOp {
326    /// Columns to project.
327    pub projections: Vec<Projection>,
328    /// Input operator.
329    pub input: Box<LogicalOperator>,
330}
331
332/// A single projection (column selection or computation).
333#[derive(Debug, Clone)]
334pub struct Projection {
335    /// Expression to compute.
336    pub expression: LogicalExpression,
337    /// Alias for the result.
338    pub alias: Option<String>,
339}
340
341/// Limit the number of results.
342#[derive(Debug, Clone)]
343pub struct LimitOp {
344    /// Maximum number of rows to return.
345    pub count: usize,
346    /// Input operator.
347    pub input: Box<LogicalOperator>,
348}
349
350/// Skip a number of results.
351#[derive(Debug, Clone)]
352pub struct SkipOp {
353    /// Number of rows to skip.
354    pub count: usize,
355    /// Input operator.
356    pub input: Box<LogicalOperator>,
357}
358
359/// Sort results.
360#[derive(Debug, Clone)]
361pub struct SortOp {
362    /// Sort keys.
363    pub keys: Vec<SortKey>,
364    /// Input operator.
365    pub input: Box<LogicalOperator>,
366}
367
368/// A sort key.
369#[derive(Debug, Clone)]
370pub struct SortKey {
371    /// Expression to sort by.
372    pub expression: LogicalExpression,
373    /// Sort order.
374    pub order: SortOrder,
375}
376
377/// Sort order.
378#[derive(Debug, Clone, Copy, PartialEq, Eq)]
379pub enum SortOrder {
380    /// Ascending order.
381    Ascending,
382    /// Descending order.
383    Descending,
384}
385
386/// Remove duplicate results.
387#[derive(Debug, Clone)]
388pub struct DistinctOp {
389    /// Input operator.
390    pub input: Box<LogicalOperator>,
391    /// Optional columns to use for deduplication.
392    /// If None, all columns are used.
393    pub columns: Option<Vec<String>>,
394}
395
396/// Create a new node.
397#[derive(Debug, Clone)]
398pub struct CreateNodeOp {
399    /// Variable name to bind the created node to.
400    pub variable: String,
401    /// Labels for the new node.
402    pub labels: Vec<String>,
403    /// Properties for the new node.
404    pub properties: Vec<(String, LogicalExpression)>,
405    /// Input operator (for chained creates).
406    pub input: Option<Box<LogicalOperator>>,
407}
408
409/// Create a new edge.
410#[derive(Debug, Clone)]
411pub struct CreateEdgeOp {
412    /// Variable name to bind the created edge to.
413    pub variable: Option<String>,
414    /// Source node variable.
415    pub from_variable: String,
416    /// Target node variable.
417    pub to_variable: String,
418    /// Edge type.
419    pub edge_type: String,
420    /// Properties for the new edge.
421    pub properties: Vec<(String, LogicalExpression)>,
422    /// Input operator.
423    pub input: Box<LogicalOperator>,
424}
425
426/// Delete a node.
427#[derive(Debug, Clone)]
428pub struct DeleteNodeOp {
429    /// Variable of the node to delete.
430    pub variable: String,
431    /// Whether to detach (delete connected edges) before deleting.
432    pub detach: bool,
433    /// Input operator.
434    pub input: Box<LogicalOperator>,
435}
436
437/// Delete an edge.
438#[derive(Debug, Clone)]
439pub struct DeleteEdgeOp {
440    /// Variable of the edge to delete.
441    pub variable: String,
442    /// Input operator.
443    pub input: Box<LogicalOperator>,
444}
445
446/// Set properties on a node or edge.
447#[derive(Debug, Clone)]
448pub struct SetPropertyOp {
449    /// Variable of the entity to update.
450    pub variable: String,
451    /// Properties to set (name -> expression).
452    pub properties: Vec<(String, LogicalExpression)>,
453    /// Whether to replace all properties (vs. merge).
454    pub replace: bool,
455    /// Input operator.
456    pub input: Box<LogicalOperator>,
457}
458
459/// Add labels to a node.
460#[derive(Debug, Clone)]
461pub struct AddLabelOp {
462    /// Variable of the node to update.
463    pub variable: String,
464    /// Labels to add.
465    pub labels: Vec<String>,
466    /// Input operator.
467    pub input: Box<LogicalOperator>,
468}
469
470/// Remove labels from a node.
471#[derive(Debug, Clone)]
472pub struct RemoveLabelOp {
473    /// Variable of the node to update.
474    pub variable: String,
475    /// Labels to remove.
476    pub labels: Vec<String>,
477    /// Input operator.
478    pub input: Box<LogicalOperator>,
479}
480
481// ==================== RDF/SPARQL Operators ====================
482
483/// Scan RDF triples matching a pattern.
484#[derive(Debug, Clone)]
485pub struct TripleScanOp {
486    /// Subject pattern (variable name or IRI).
487    pub subject: TripleComponent,
488    /// Predicate pattern (variable name or IRI).
489    pub predicate: TripleComponent,
490    /// Object pattern (variable name, IRI, or literal).
491    pub object: TripleComponent,
492    /// Named graph (optional).
493    pub graph: Option<TripleComponent>,
494    /// Input operator (for chained patterns).
495    pub input: Option<Box<LogicalOperator>>,
496}
497
498/// A component of a triple pattern.
499#[derive(Debug, Clone)]
500pub enum TripleComponent {
501    /// A variable to bind.
502    Variable(String),
503    /// A constant IRI.
504    Iri(String),
505    /// A constant literal value.
506    Literal(Value),
507}
508
509/// Union of multiple result sets.
510#[derive(Debug, Clone)]
511pub struct UnionOp {
512    /// Inputs to union together.
513    pub inputs: Vec<LogicalOperator>,
514}
515
516/// Left outer join for OPTIONAL patterns.
517#[derive(Debug, Clone)]
518pub struct LeftJoinOp {
519    /// Left (required) input.
520    pub left: Box<LogicalOperator>,
521    /// Right (optional) input.
522    pub right: Box<LogicalOperator>,
523    /// Optional filter condition.
524    pub condition: Option<LogicalExpression>,
525}
526
527/// Anti-join for MINUS patterns.
528#[derive(Debug, Clone)]
529pub struct AntiJoinOp {
530    /// Left input (results to keep if no match on right).
531    pub left: Box<LogicalOperator>,
532    /// Right input (patterns to exclude).
533    pub right: Box<LogicalOperator>,
534}
535
536/// Bind a variable to an expression.
537#[derive(Debug, Clone)]
538pub struct BindOp {
539    /// Expression to compute.
540    pub expression: LogicalExpression,
541    /// Variable to bind the result to.
542    pub variable: String,
543    /// Input operator.
544    pub input: Box<LogicalOperator>,
545}
546
547/// Unwind a list into individual rows.
548///
549/// For each input row, evaluates the expression (which should return a list)
550/// and emits one row for each element in the list.
551#[derive(Debug, Clone)]
552pub struct UnwindOp {
553    /// The list expression to unwind.
554    pub expression: LogicalExpression,
555    /// The variable name for each element.
556    pub variable: String,
557    /// Input operator.
558    pub input: Box<LogicalOperator>,
559}
560
561/// Merge a pattern (match or create).
562///
563/// MERGE tries to match a pattern in the graph. If found, returns the existing
564/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
565/// (optionally applying ON CREATE SET).
566#[derive(Debug, Clone)]
567pub struct MergeOp {
568    /// The node to merge.
569    pub variable: String,
570    /// Labels to match/create.
571    pub labels: Vec<String>,
572    /// Properties that must match (used for both matching and creation).
573    pub match_properties: Vec<(String, LogicalExpression)>,
574    /// Properties to set on CREATE.
575    pub on_create: Vec<(String, LogicalExpression)>,
576    /// Properties to set on MATCH.
577    pub on_match: Vec<(String, LogicalExpression)>,
578    /// Input operator.
579    pub input: Box<LogicalOperator>,
580}
581
582/// Find shortest path between two nodes.
583///
584/// This operator uses Dijkstra's algorithm to find the shortest path(s)
585/// between a source node and a target node, optionally filtered by edge type.
586#[derive(Debug, Clone)]
587pub struct ShortestPathOp {
588    /// Input operator providing source/target nodes.
589    pub input: Box<LogicalOperator>,
590    /// Variable name for the source node.
591    pub source_var: String,
592    /// Variable name for the target node.
593    pub target_var: String,
594    /// Optional edge type filter.
595    pub edge_type: Option<String>,
596    /// Direction of edge traversal.
597    pub direction: ExpandDirection,
598    /// Variable name to bind the path result.
599    pub path_alias: String,
600    /// Whether to find all shortest paths (vs. just one).
601    pub all_paths: bool,
602}
603
604// ==================== SPARQL Update Operators ====================
605
606/// Insert RDF triples.
607#[derive(Debug, Clone)]
608pub struct InsertTripleOp {
609    /// Subject of the triple.
610    pub subject: TripleComponent,
611    /// Predicate of the triple.
612    pub predicate: TripleComponent,
613    /// Object of the triple.
614    pub object: TripleComponent,
615    /// Named graph (optional).
616    pub graph: Option<String>,
617    /// Input operator (provides variable bindings).
618    pub input: Option<Box<LogicalOperator>>,
619}
620
621/// Delete RDF triples.
622#[derive(Debug, Clone)]
623pub struct DeleteTripleOp {
624    /// Subject pattern.
625    pub subject: TripleComponent,
626    /// Predicate pattern.
627    pub predicate: TripleComponent,
628    /// Object pattern.
629    pub object: TripleComponent,
630    /// Named graph (optional).
631    pub graph: Option<String>,
632    /// Input operator (provides variable bindings).
633    pub input: Option<Box<LogicalOperator>>,
634}
635
636/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
637///
638/// Per SPARQL 1.1 Update spec, this operator:
639/// 1. Evaluates the WHERE clause once to get bindings
640/// 2. Applies DELETE templates using those bindings
641/// 3. Applies INSERT templates using the SAME bindings
642///
643/// This ensures DELETE and INSERT see consistent data.
644#[derive(Debug, Clone)]
645pub struct ModifyOp {
646    /// DELETE triple templates (patterns with variables).
647    pub delete_templates: Vec<TripleTemplate>,
648    /// INSERT triple templates (patterns with variables).
649    pub insert_templates: Vec<TripleTemplate>,
650    /// WHERE clause that provides variable bindings.
651    pub where_clause: Box<LogicalOperator>,
652    /// Named graph context (for WITH clause).
653    pub graph: Option<String>,
654}
655
656/// A triple template for DELETE/INSERT operations.
657#[derive(Debug, Clone)]
658pub struct TripleTemplate {
659    /// Subject (may be a variable).
660    pub subject: TripleComponent,
661    /// Predicate (may be a variable).
662    pub predicate: TripleComponent,
663    /// Object (may be a variable or literal).
664    pub object: TripleComponent,
665    /// Named graph (optional).
666    pub graph: Option<String>,
667}
668
669/// Clear all triples from a graph.
670#[derive(Debug, Clone)]
671pub struct ClearGraphOp {
672    /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
673    pub graph: Option<String>,
674    /// Whether to silently ignore errors.
675    pub silent: bool,
676}
677
678/// Create a new named graph.
679#[derive(Debug, Clone)]
680pub struct CreateGraphOp {
681    /// IRI of the graph to create.
682    pub graph: String,
683    /// Whether to silently ignore if graph already exists.
684    pub silent: bool,
685}
686
687/// Drop (remove) a named graph.
688#[derive(Debug, Clone)]
689pub struct DropGraphOp {
690    /// Target graph (None = default graph).
691    pub graph: Option<String>,
692    /// Whether to silently ignore errors.
693    pub silent: bool,
694}
695
696/// Load data from a URL into a graph.
697#[derive(Debug, Clone)]
698pub struct LoadGraphOp {
699    /// Source URL to load data from.
700    pub source: String,
701    /// Destination graph (None = default graph).
702    pub destination: Option<String>,
703    /// Whether to silently ignore errors.
704    pub silent: bool,
705}
706
707/// Copy triples from one graph to another.
708#[derive(Debug, Clone)]
709pub struct CopyGraphOp {
710    /// Source graph.
711    pub source: Option<String>,
712    /// Destination graph.
713    pub destination: Option<String>,
714    /// Whether to silently ignore errors.
715    pub silent: bool,
716}
717
718/// Move triples from one graph to another.
719#[derive(Debug, Clone)]
720pub struct MoveGraphOp {
721    /// Source graph.
722    pub source: Option<String>,
723    /// Destination graph.
724    pub destination: Option<String>,
725    /// Whether to silently ignore errors.
726    pub silent: bool,
727}
728
729/// Add (merge) triples from one graph to another.
730#[derive(Debug, Clone)]
731pub struct AddGraphOp {
732    /// Source graph.
733    pub source: Option<String>,
734    /// Destination graph.
735    pub destination: Option<String>,
736    /// Whether to silently ignore errors.
737    pub silent: bool,
738}
739
740// ==================== Vector Search Operators ====================
741
742/// Vector similarity scan operation.
743///
744/// Performs approximate nearest neighbor search using a vector index (HNSW)
745/// or brute-force search for small datasets. Returns nodes/edges whose
746/// embeddings are similar to the query vector.
747///
748/// # Example GQL
749///
750/// ```gql
751/// MATCH (m:Movie)
752/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
753/// RETURN m.title
754/// ```
755#[derive(Debug, Clone)]
756pub struct VectorScanOp {
757    /// Variable name to bind matching entities to.
758    pub variable: String,
759    /// Name of the vector index to use (None = brute-force).
760    pub index_name: Option<String>,
761    /// Property containing the vector embedding.
762    pub property: String,
763    /// Optional label filter (scan only nodes with this label).
764    pub label: Option<String>,
765    /// The query vector expression.
766    pub query_vector: LogicalExpression,
767    /// Number of nearest neighbors to return.
768    pub k: usize,
769    /// Distance metric (None = use index default, typically cosine).
770    pub metric: Option<VectorMetric>,
771    /// Minimum similarity threshold (filters results below this).
772    pub min_similarity: Option<f32>,
773    /// Maximum distance threshold (filters results above this).
774    pub max_distance: Option<f32>,
775    /// Input operator (for hybrid queries combining graph + vector).
776    pub input: Option<Box<LogicalOperator>>,
777}
778
779/// Vector distance/similarity metric for vector scan operations.
780#[derive(Debug, Clone, Copy, PartialEq, Eq)]
781pub enum VectorMetric {
782    /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
783    Cosine,
784    /// Euclidean (L2) distance. Best when magnitude matters.
785    Euclidean,
786    /// Dot product. Best for maximum inner product search.
787    DotProduct,
788    /// Manhattan (L1) distance. Less sensitive to outliers.
789    Manhattan,
790}
791
792/// Join graph patterns with vector similarity search.
793///
794/// This operator takes entities from the left input and computes vector
795/// similarity against a query vector, outputting (entity, distance) pairs.
796///
797/// # Use Cases
798///
799/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
800/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
801/// 3. **Filtering by similarity**: Join with threshold-based filtering
802///
803/// # Example
804///
805/// ```gql
806/// // Find movies similar to what the user liked
807/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
808/// WITH avg(liked.embedding) AS user_taste
809/// VECTOR JOIN (m:Movie) ON m.embedding
810/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
811/// RETURN m.title
812/// ```
813#[derive(Debug, Clone)]
814pub struct VectorJoinOp {
815    /// Input operator providing entities to match against.
816    pub input: Box<LogicalOperator>,
817    /// Variable from input to extract vectors from (for entity-to-entity similarity).
818    /// If None, uses `query_vector` directly.
819    pub left_vector_variable: Option<String>,
820    /// Property containing the left vector (used with `left_vector_variable`).
821    pub left_property: Option<String>,
822    /// The query vector expression (constant or computed).
823    pub query_vector: LogicalExpression,
824    /// Variable name to bind the right-side matching entities.
825    pub right_variable: String,
826    /// Property containing the right-side vector embeddings.
827    pub right_property: String,
828    /// Optional label filter for right-side entities.
829    pub right_label: Option<String>,
830    /// Name of vector index on right side (None = brute-force).
831    pub index_name: Option<String>,
832    /// Number of nearest neighbors per left-side entity.
833    pub k: usize,
834    /// Distance metric.
835    pub metric: Option<VectorMetric>,
836    /// Minimum similarity threshold.
837    pub min_similarity: Option<f32>,
838    /// Maximum distance threshold.
839    pub max_distance: Option<f32>,
840    /// Variable to bind the distance/similarity score.
841    pub score_variable: Option<String>,
842}
843
844/// Return results (terminal operator).
845#[derive(Debug, Clone)]
846pub struct ReturnOp {
847    /// Items to return.
848    pub items: Vec<ReturnItem>,
849    /// Whether to return distinct results.
850    pub distinct: bool,
851    /// Input operator.
852    pub input: Box<LogicalOperator>,
853}
854
855/// A single return item.
856#[derive(Debug, Clone)]
857pub struct ReturnItem {
858    /// Expression to return.
859    pub expression: LogicalExpression,
860    /// Alias for the result column.
861    pub alias: Option<String>,
862}
863
864/// Define a property graph schema (SQL/PGQ DDL).
865#[derive(Debug, Clone)]
866pub struct CreatePropertyGraphOp {
867    /// Graph name.
868    pub name: String,
869    /// Node table schemas (label name + column definitions).
870    pub node_tables: Vec<PropertyGraphNodeTable>,
871    /// Edge table schemas (type name + column definitions + references).
872    pub edge_tables: Vec<PropertyGraphEdgeTable>,
873}
874
875/// A node table in a property graph definition.
876#[derive(Debug, Clone)]
877pub struct PropertyGraphNodeTable {
878    /// Table name (maps to a node label).
879    pub name: String,
880    /// Column definitions as (name, type_name) pairs.
881    pub columns: Vec<(String, String)>,
882}
883
884/// An edge table in a property graph definition.
885#[derive(Debug, Clone)]
886pub struct PropertyGraphEdgeTable {
887    /// Table name (maps to an edge type).
888    pub name: String,
889    /// Column definitions as (name, type_name) pairs.
890    pub columns: Vec<(String, String)>,
891    /// Source node table name.
892    pub source_table: String,
893    /// Target node table name.
894    pub target_table: String,
895}
896
897/// A logical expression.
898#[derive(Debug, Clone)]
899pub enum LogicalExpression {
900    /// A literal value.
901    Literal(Value),
902
903    /// A variable reference.
904    Variable(String),
905
906    /// Property access (e.g., n.name).
907    Property {
908        /// The variable to access.
909        variable: String,
910        /// The property name.
911        property: String,
912    },
913
914    /// Binary operation.
915    Binary {
916        /// Left operand.
917        left: Box<LogicalExpression>,
918        /// Operator.
919        op: BinaryOp,
920        /// Right operand.
921        right: Box<LogicalExpression>,
922    },
923
924    /// Unary operation.
925    Unary {
926        /// Operator.
927        op: UnaryOp,
928        /// Operand.
929        operand: Box<LogicalExpression>,
930    },
931
932    /// Function call.
933    FunctionCall {
934        /// Function name.
935        name: String,
936        /// Arguments.
937        args: Vec<LogicalExpression>,
938        /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
939        distinct: bool,
940    },
941
942    /// List literal.
943    List(Vec<LogicalExpression>),
944
945    /// Map literal (e.g., {name: 'Alice', age: 30}).
946    Map(Vec<(String, LogicalExpression)>),
947
948    /// Index access (e.g., `list[0]`).
949    IndexAccess {
950        /// The base expression (typically a list or string).
951        base: Box<LogicalExpression>,
952        /// The index expression.
953        index: Box<LogicalExpression>,
954    },
955
956    /// Slice access (e.g., list[1..3]).
957    SliceAccess {
958        /// The base expression (typically a list or string).
959        base: Box<LogicalExpression>,
960        /// Start index (None means from beginning).
961        start: Option<Box<LogicalExpression>>,
962        /// End index (None means to end).
963        end: Option<Box<LogicalExpression>>,
964    },
965
966    /// CASE expression.
967    Case {
968        /// Test expression (for simple CASE).
969        operand: Option<Box<LogicalExpression>>,
970        /// WHEN clauses.
971        when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
972        /// ELSE clause.
973        else_clause: Option<Box<LogicalExpression>>,
974    },
975
976    /// Parameter reference.
977    Parameter(String),
978
979    /// Labels of a node.
980    Labels(String),
981
982    /// Type of an edge.
983    Type(String),
984
985    /// ID of a node or edge.
986    Id(String),
987
988    /// List comprehension: [x IN list WHERE predicate | expression]
989    ListComprehension {
990        /// Variable name for each element.
991        variable: String,
992        /// The source list expression.
993        list_expr: Box<LogicalExpression>,
994        /// Optional filter predicate.
995        filter_expr: Option<Box<LogicalExpression>>,
996        /// The mapping expression for each element.
997        map_expr: Box<LogicalExpression>,
998    },
999
1000    /// EXISTS subquery.
1001    ExistsSubquery(Box<LogicalOperator>),
1002
1003    /// COUNT subquery.
1004    CountSubquery(Box<LogicalOperator>),
1005}
1006
1007/// Binary operator.
1008#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1009pub enum BinaryOp {
1010    /// Equality comparison (=).
1011    Eq,
1012    /// Inequality comparison (<>).
1013    Ne,
1014    /// Less than (<).
1015    Lt,
1016    /// Less than or equal (<=).
1017    Le,
1018    /// Greater than (>).
1019    Gt,
1020    /// Greater than or equal (>=).
1021    Ge,
1022
1023    /// Logical AND.
1024    And,
1025    /// Logical OR.
1026    Or,
1027    /// Logical XOR.
1028    Xor,
1029
1030    /// Addition (+).
1031    Add,
1032    /// Subtraction (-).
1033    Sub,
1034    /// Multiplication (*).
1035    Mul,
1036    /// Division (/).
1037    Div,
1038    /// Modulo (%).
1039    Mod,
1040
1041    /// String concatenation.
1042    Concat,
1043    /// String starts with.
1044    StartsWith,
1045    /// String ends with.
1046    EndsWith,
1047    /// String contains.
1048    Contains,
1049
1050    /// Collection membership (IN).
1051    In,
1052    /// Pattern matching (LIKE).
1053    Like,
1054    /// Regex matching (=~).
1055    Regex,
1056    /// Power/exponentiation (^).
1057    Pow,
1058}
1059
1060/// Unary operator.
1061#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1062pub enum UnaryOp {
1063    /// Logical NOT.
1064    Not,
1065    /// Numeric negation.
1066    Neg,
1067    /// IS NULL check.
1068    IsNull,
1069    /// IS NOT NULL check.
1070    IsNotNull,
1071}
1072
1073#[cfg(test)]
1074mod tests {
1075    use super::*;
1076
1077    #[test]
1078    fn test_simple_node_scan_plan() {
1079        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1080            items: vec![ReturnItem {
1081                expression: LogicalExpression::Variable("n".into()),
1082                alias: None,
1083            }],
1084            distinct: false,
1085            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1086                variable: "n".into(),
1087                label: Some("Person".into()),
1088                input: None,
1089            })),
1090        }));
1091
1092        // Verify structure
1093        if let LogicalOperator::Return(ret) = &plan.root {
1094            assert_eq!(ret.items.len(), 1);
1095            assert!(!ret.distinct);
1096            if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1097                assert_eq!(scan.variable, "n");
1098                assert_eq!(scan.label, Some("Person".into()));
1099            } else {
1100                panic!("Expected NodeScan");
1101            }
1102        } else {
1103            panic!("Expected Return");
1104        }
1105    }
1106
1107    #[test]
1108    fn test_filter_plan() {
1109        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1110            items: vec![ReturnItem {
1111                expression: LogicalExpression::Property {
1112                    variable: "n".into(),
1113                    property: "name".into(),
1114                },
1115                alias: Some("name".into()),
1116            }],
1117            distinct: false,
1118            input: Box::new(LogicalOperator::Filter(FilterOp {
1119                predicate: LogicalExpression::Binary {
1120                    left: Box::new(LogicalExpression::Property {
1121                        variable: "n".into(),
1122                        property: "age".into(),
1123                    }),
1124                    op: BinaryOp::Gt,
1125                    right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1126                },
1127                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1128                    variable: "n".into(),
1129                    label: Some("Person".into()),
1130                    input: None,
1131                })),
1132            })),
1133        }));
1134
1135        if let LogicalOperator::Return(ret) = &plan.root {
1136            if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1137                if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1138                    assert_eq!(*op, BinaryOp::Gt);
1139                } else {
1140                    panic!("Expected Binary expression");
1141                }
1142            } else {
1143                panic!("Expected Filter");
1144            }
1145        } else {
1146            panic!("Expected Return");
1147        }
1148    }
1149}