Skip to main content

grafeo_engine/query/
plan.rs

1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12    /// The root operator of the plan.
13    pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17    /// Creates a new logical plan with the given root operator.
18    pub fn new(root: LogicalOperator) -> Self {
19        Self { root }
20    }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26    /// Scan all nodes, optionally filtered by label.
27    NodeScan(NodeScanOp),
28
29    /// Scan all edges, optionally filtered by type.
30    EdgeScan(EdgeScanOp),
31
32    /// Expand from nodes to neighbors via edges.
33    Expand(ExpandOp),
34
35    /// Filter rows based on a predicate.
36    Filter(FilterOp),
37
38    /// Project specific columns.
39    Project(ProjectOp),
40
41    /// Join two inputs.
42    Join(JoinOp),
43
44    /// Aggregate with grouping.
45    Aggregate(AggregateOp),
46
47    /// Limit the number of results.
48    Limit(LimitOp),
49
50    /// Skip a number of results.
51    Skip(SkipOp),
52
53    /// Sort results.
54    Sort(SortOp),
55
56    /// Remove duplicate results.
57    Distinct(DistinctOp),
58
59    /// Create a new node.
60    CreateNode(CreateNodeOp),
61
62    /// Create a new edge.
63    CreateEdge(CreateEdgeOp),
64
65    /// Delete a node.
66    DeleteNode(DeleteNodeOp),
67
68    /// Delete an edge.
69    DeleteEdge(DeleteEdgeOp),
70
71    /// Set properties on a node or edge.
72    SetProperty(SetPropertyOp),
73
74    /// Add labels to a node.
75    AddLabel(AddLabelOp),
76
77    /// Remove labels from a node.
78    RemoveLabel(RemoveLabelOp),
79
80    /// Return results (terminal operator).
81    Return(ReturnOp),
82
83    /// Empty result set.
84    Empty,
85
86    // ==================== RDF/SPARQL Operators ====================
87    /// Scan RDF triples matching a pattern.
88    TripleScan(TripleScanOp),
89
90    /// Union of multiple result sets.
91    Union(UnionOp),
92
93    /// Left outer join for OPTIONAL patterns.
94    LeftJoin(LeftJoinOp),
95
96    /// Anti-join for MINUS patterns.
97    AntiJoin(AntiJoinOp),
98
99    /// Bind a variable to an expression.
100    Bind(BindOp),
101
102    /// Unwind a list into individual rows.
103    Unwind(UnwindOp),
104
105    /// Collect grouped key-value rows into a single Map value.
106    /// Used for Gremlin `groupCount()` semantics.
107    MapCollect(MapCollectOp),
108
109    /// Merge a node pattern (match or create).
110    Merge(MergeOp),
111
112    /// Merge a relationship pattern (match or create).
113    MergeRelationship(MergeRelationshipOp),
114
115    /// Find shortest path between nodes.
116    ShortestPath(ShortestPathOp),
117
118    // ==================== SPARQL Update Operators ====================
119    /// Insert RDF triples.
120    InsertTriple(InsertTripleOp),
121
122    /// Delete RDF triples.
123    DeleteTriple(DeleteTripleOp),
124
125    /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
126    /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
127    Modify(ModifyOp),
128
129    /// Clear a graph (remove all triples).
130    ClearGraph(ClearGraphOp),
131
132    /// Create a new named graph.
133    CreateGraph(CreateGraphOp),
134
135    /// Drop (remove) a named graph.
136    DropGraph(DropGraphOp),
137
138    /// Load data from a URL into a graph.
139    LoadGraph(LoadGraphOp),
140
141    /// Copy triples from one graph to another.
142    CopyGraph(CopyGraphOp),
143
144    /// Move triples from one graph to another.
145    MoveGraph(MoveGraphOp),
146
147    /// Add (merge) triples from one graph to another.
148    AddGraph(AddGraphOp),
149
150    // ==================== Vector Search Operators ====================
151    /// Scan using vector similarity search.
152    VectorScan(VectorScanOp),
153
154    /// Join graph patterns with vector similarity search.
155    ///
156    /// Computes vector distances between entities from the left input and
157    /// a query vector, then joins with similarity scores. Useful for:
158    /// - Filtering graph traversal results by vector similarity
159    /// - Computing aggregated embeddings and finding similar entities
160    /// - Combining multiple vector sources with graph structure
161    VectorJoin(VectorJoinOp),
162
163    // ==================== DDL Operators ====================
164    /// Define a property graph schema (SQL/PGQ DDL).
165    CreatePropertyGraph(CreatePropertyGraphOp),
166
167    // ==================== Procedure Call Operators ====================
168    /// Invoke a stored procedure (CALL ... YIELD).
169    CallProcedure(CallProcedureOp),
170}
171
172/// Scan nodes from the graph.
173#[derive(Debug, Clone)]
174pub struct NodeScanOp {
175    /// Variable name to bind the node to.
176    pub variable: String,
177    /// Optional label filter.
178    pub label: Option<String>,
179    /// Child operator (if any, for chained patterns).
180    pub input: Option<Box<LogicalOperator>>,
181}
182
183/// Scan edges from the graph.
184#[derive(Debug, Clone)]
185pub struct EdgeScanOp {
186    /// Variable name to bind the edge to.
187    pub variable: String,
188    /// Optional edge type filter.
189    pub edge_type: Option<String>,
190    /// Child operator (if any).
191    pub input: Option<Box<LogicalOperator>>,
192}
193
194/// Expand from nodes to their neighbors.
195#[derive(Debug, Clone)]
196pub struct ExpandOp {
197    /// Source node variable.
198    pub from_variable: String,
199    /// Target node variable to bind.
200    pub to_variable: String,
201    /// Edge variable to bind (optional).
202    pub edge_variable: Option<String>,
203    /// Direction of expansion.
204    pub direction: ExpandDirection,
205    /// Optional edge type filter.
206    pub edge_type: Option<String>,
207    /// Minimum hops (for variable-length patterns).
208    pub min_hops: u32,
209    /// Maximum hops (for variable-length patterns).
210    pub max_hops: Option<u32>,
211    /// Input operator.
212    pub input: Box<LogicalOperator>,
213    /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
214    /// When set, a path length column will be output under this name.
215    pub path_alias: Option<String>,
216}
217
218/// Direction for edge expansion.
219#[derive(Debug, Clone, Copy, PartialEq, Eq)]
220pub enum ExpandDirection {
221    /// Follow outgoing edges.
222    Outgoing,
223    /// Follow incoming edges.
224    Incoming,
225    /// Follow edges in either direction.
226    Both,
227}
228
229/// Join two inputs.
230#[derive(Debug, Clone)]
231pub struct JoinOp {
232    /// Left input.
233    pub left: Box<LogicalOperator>,
234    /// Right input.
235    pub right: Box<LogicalOperator>,
236    /// Join type.
237    pub join_type: JoinType,
238    /// Join conditions.
239    pub conditions: Vec<JoinCondition>,
240}
241
242/// Join type.
243#[derive(Debug, Clone, Copy, PartialEq, Eq)]
244pub enum JoinType {
245    /// Inner join.
246    Inner,
247    /// Left outer join.
248    Left,
249    /// Right outer join.
250    Right,
251    /// Full outer join.
252    Full,
253    /// Cross join (Cartesian product).
254    Cross,
255    /// Semi join (returns left rows with matching right rows).
256    Semi,
257    /// Anti join (returns left rows without matching right rows).
258    Anti,
259}
260
261/// A join condition.
262#[derive(Debug, Clone)]
263pub struct JoinCondition {
264    /// Left expression.
265    pub left: LogicalExpression,
266    /// Right expression.
267    pub right: LogicalExpression,
268}
269
270/// Aggregate with grouping.
271#[derive(Debug, Clone)]
272pub struct AggregateOp {
273    /// Group by expressions.
274    pub group_by: Vec<LogicalExpression>,
275    /// Aggregate functions.
276    pub aggregates: Vec<AggregateExpr>,
277    /// Input operator.
278    pub input: Box<LogicalOperator>,
279    /// HAVING clause filter (applied after aggregation).
280    pub having: Option<LogicalExpression>,
281}
282
283/// An aggregate expression.
284#[derive(Debug, Clone)]
285pub struct AggregateExpr {
286    /// Aggregate function.
287    pub function: AggregateFunction,
288    /// Expression to aggregate.
289    pub expression: Option<LogicalExpression>,
290    /// Whether to use DISTINCT.
291    pub distinct: bool,
292    /// Alias for the result.
293    pub alias: Option<String>,
294    /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
295    pub percentile: Option<f64>,
296}
297
298/// Aggregate function.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum AggregateFunction {
301    /// Count all rows (COUNT(*)).
302    Count,
303    /// Count non-null values (COUNT(expr)).
304    CountNonNull,
305    /// Sum values.
306    Sum,
307    /// Average values.
308    Avg,
309    /// Minimum value.
310    Min,
311    /// Maximum value.
312    Max,
313    /// Collect into list.
314    Collect,
315    /// Sample standard deviation (STDEV).
316    StdDev,
317    /// Population standard deviation (STDEVP).
318    StdDevPop,
319    /// Discrete percentile (PERCENTILE_DISC).
320    PercentileDisc,
321    /// Continuous percentile (PERCENTILE_CONT).
322    PercentileCont,
323}
324
325/// Filter rows based on a predicate.
326#[derive(Debug, Clone)]
327pub struct FilterOp {
328    /// The filter predicate.
329    pub predicate: LogicalExpression,
330    /// Input operator.
331    pub input: Box<LogicalOperator>,
332}
333
334/// Project specific columns.
335#[derive(Debug, Clone)]
336pub struct ProjectOp {
337    /// Columns to project.
338    pub projections: Vec<Projection>,
339    /// Input operator.
340    pub input: Box<LogicalOperator>,
341}
342
343/// A single projection (column selection or computation).
344#[derive(Debug, Clone)]
345pub struct Projection {
346    /// Expression to compute.
347    pub expression: LogicalExpression,
348    /// Alias for the result.
349    pub alias: Option<String>,
350}
351
352/// Limit the number of results.
353#[derive(Debug, Clone)]
354pub struct LimitOp {
355    /// Maximum number of rows to return.
356    pub count: usize,
357    /// Input operator.
358    pub input: Box<LogicalOperator>,
359}
360
361/// Skip a number of results.
362#[derive(Debug, Clone)]
363pub struct SkipOp {
364    /// Number of rows to skip.
365    pub count: usize,
366    /// Input operator.
367    pub input: Box<LogicalOperator>,
368}
369
370/// Sort results.
371#[derive(Debug, Clone)]
372pub struct SortOp {
373    /// Sort keys.
374    pub keys: Vec<SortKey>,
375    /// Input operator.
376    pub input: Box<LogicalOperator>,
377}
378
379/// A sort key.
380#[derive(Debug, Clone)]
381pub struct SortKey {
382    /// Expression to sort by.
383    pub expression: LogicalExpression,
384    /// Sort order.
385    pub order: SortOrder,
386}
387
388/// Sort order.
389#[derive(Debug, Clone, Copy, PartialEq, Eq)]
390pub enum SortOrder {
391    /// Ascending order.
392    Ascending,
393    /// Descending order.
394    Descending,
395}
396
397/// Remove duplicate results.
398#[derive(Debug, Clone)]
399pub struct DistinctOp {
400    /// Input operator.
401    pub input: Box<LogicalOperator>,
402    /// Optional columns to use for deduplication.
403    /// If None, all columns are used.
404    pub columns: Option<Vec<String>>,
405}
406
407/// Create a new node.
408#[derive(Debug, Clone)]
409pub struct CreateNodeOp {
410    /// Variable name to bind the created node to.
411    pub variable: String,
412    /// Labels for the new node.
413    pub labels: Vec<String>,
414    /// Properties for the new node.
415    pub properties: Vec<(String, LogicalExpression)>,
416    /// Input operator (for chained creates).
417    pub input: Option<Box<LogicalOperator>>,
418}
419
420/// Create a new edge.
421#[derive(Debug, Clone)]
422pub struct CreateEdgeOp {
423    /// Variable name to bind the created edge to.
424    pub variable: Option<String>,
425    /// Source node variable.
426    pub from_variable: String,
427    /// Target node variable.
428    pub to_variable: String,
429    /// Edge type.
430    pub edge_type: String,
431    /// Properties for the new edge.
432    pub properties: Vec<(String, LogicalExpression)>,
433    /// Input operator.
434    pub input: Box<LogicalOperator>,
435}
436
437/// Delete a node.
438#[derive(Debug, Clone)]
439pub struct DeleteNodeOp {
440    /// Variable of the node to delete.
441    pub variable: String,
442    /// Whether to detach (delete connected edges) before deleting.
443    pub detach: bool,
444    /// Input operator.
445    pub input: Box<LogicalOperator>,
446}
447
448/// Delete an edge.
449#[derive(Debug, Clone)]
450pub struct DeleteEdgeOp {
451    /// Variable of the edge to delete.
452    pub variable: String,
453    /// Input operator.
454    pub input: Box<LogicalOperator>,
455}
456
457/// Set properties on a node or edge.
458#[derive(Debug, Clone)]
459pub struct SetPropertyOp {
460    /// Variable of the entity to update.
461    pub variable: String,
462    /// Properties to set (name -> expression).
463    pub properties: Vec<(String, LogicalExpression)>,
464    /// Whether to replace all properties (vs. merge).
465    pub replace: bool,
466    /// Whether the target variable is an edge (vs. node).
467    pub is_edge: bool,
468    /// Input operator.
469    pub input: Box<LogicalOperator>,
470}
471
472/// Add labels to a node.
473#[derive(Debug, Clone)]
474pub struct AddLabelOp {
475    /// Variable of the node to update.
476    pub variable: String,
477    /// Labels to add.
478    pub labels: Vec<String>,
479    /// Input operator.
480    pub input: Box<LogicalOperator>,
481}
482
483/// Remove labels from a node.
484#[derive(Debug, Clone)]
485pub struct RemoveLabelOp {
486    /// Variable of the node to update.
487    pub variable: String,
488    /// Labels to remove.
489    pub labels: Vec<String>,
490    /// Input operator.
491    pub input: Box<LogicalOperator>,
492}
493
494// ==================== RDF/SPARQL Operators ====================
495
496/// Scan RDF triples matching a pattern.
497#[derive(Debug, Clone)]
498pub struct TripleScanOp {
499    /// Subject pattern (variable name or IRI).
500    pub subject: TripleComponent,
501    /// Predicate pattern (variable name or IRI).
502    pub predicate: TripleComponent,
503    /// Object pattern (variable name, IRI, or literal).
504    pub object: TripleComponent,
505    /// Named graph (optional).
506    pub graph: Option<TripleComponent>,
507    /// Input operator (for chained patterns).
508    pub input: Option<Box<LogicalOperator>>,
509}
510
511/// A component of a triple pattern.
512#[derive(Debug, Clone)]
513pub enum TripleComponent {
514    /// A variable to bind.
515    Variable(String),
516    /// A constant IRI.
517    Iri(String),
518    /// A constant literal value.
519    Literal(Value),
520}
521
522/// Union of multiple result sets.
523#[derive(Debug, Clone)]
524pub struct UnionOp {
525    /// Inputs to union together.
526    pub inputs: Vec<LogicalOperator>,
527}
528
529/// Left outer join for OPTIONAL patterns.
530#[derive(Debug, Clone)]
531pub struct LeftJoinOp {
532    /// Left (required) input.
533    pub left: Box<LogicalOperator>,
534    /// Right (optional) input.
535    pub right: Box<LogicalOperator>,
536    /// Optional filter condition.
537    pub condition: Option<LogicalExpression>,
538}
539
540/// Anti-join for MINUS patterns.
541#[derive(Debug, Clone)]
542pub struct AntiJoinOp {
543    /// Left input (results to keep if no match on right).
544    pub left: Box<LogicalOperator>,
545    /// Right input (patterns to exclude).
546    pub right: Box<LogicalOperator>,
547}
548
549/// Bind a variable to an expression.
550#[derive(Debug, Clone)]
551pub struct BindOp {
552    /// Expression to compute.
553    pub expression: LogicalExpression,
554    /// Variable to bind the result to.
555    pub variable: String,
556    /// Input operator.
557    pub input: Box<LogicalOperator>,
558}
559
560/// Unwind a list into individual rows.
561///
562/// For each input row, evaluates the expression (which should return a list)
563/// and emits one row for each element in the list.
564#[derive(Debug, Clone)]
565pub struct UnwindOp {
566    /// The list expression to unwind.
567    pub expression: LogicalExpression,
568    /// The variable name for each element.
569    pub variable: String,
570    /// Optional variable for 1-based element position (ORDINALITY).
571    pub ordinality_var: Option<String>,
572    /// Optional variable for 0-based element position (OFFSET).
573    pub offset_var: Option<String>,
574    /// Input operator.
575    pub input: Box<LogicalOperator>,
576}
577
578/// Collect grouped key-value rows into a single Map value.
579/// Used for Gremlin `groupCount()` semantics.
580#[derive(Debug, Clone)]
581pub struct MapCollectOp {
582    /// Variable holding the map key.
583    pub key_var: String,
584    /// Variable holding the map value.
585    pub value_var: String,
586    /// Output variable alias.
587    pub alias: String,
588    /// Input operator (typically a grouped aggregate).
589    pub input: Box<LogicalOperator>,
590}
591
592/// Merge a pattern (match or create).
593///
594/// MERGE tries to match a pattern in the graph. If found, returns the existing
595/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
596/// (optionally applying ON CREATE SET).
597#[derive(Debug, Clone)]
598pub struct MergeOp {
599    /// The node to merge.
600    pub variable: String,
601    /// Labels to match/create.
602    pub labels: Vec<String>,
603    /// Properties that must match (used for both matching and creation).
604    pub match_properties: Vec<(String, LogicalExpression)>,
605    /// Properties to set on CREATE.
606    pub on_create: Vec<(String, LogicalExpression)>,
607    /// Properties to set on MATCH.
608    pub on_match: Vec<(String, LogicalExpression)>,
609    /// Input operator.
610    pub input: Box<LogicalOperator>,
611}
612
613/// Merge a relationship pattern (match or create between two bound nodes).
614///
615/// MERGE on a relationship tries to find an existing relationship of the given type
616/// between the source and target nodes. If found, returns the existing relationship
617/// (optionally applying ON MATCH SET). If not found, creates it (optionally applying
618/// ON CREATE SET).
619#[derive(Debug, Clone)]
620pub struct MergeRelationshipOp {
621    /// Variable to bind the relationship to.
622    pub variable: String,
623    /// Source node variable (must already be bound).
624    pub source_variable: String,
625    /// Target node variable (must already be bound).
626    pub target_variable: String,
627    /// Relationship type.
628    pub edge_type: String,
629    /// Properties that must match (used for both matching and creation).
630    pub match_properties: Vec<(String, LogicalExpression)>,
631    /// Properties to set on CREATE.
632    pub on_create: Vec<(String, LogicalExpression)>,
633    /// Properties to set on MATCH.
634    pub on_match: Vec<(String, LogicalExpression)>,
635    /// Input operator.
636    pub input: Box<LogicalOperator>,
637}
638
639/// Find shortest path between two nodes.
640///
641/// This operator uses Dijkstra's algorithm to find the shortest path(s)
642/// between a source node and a target node, optionally filtered by edge type.
643#[derive(Debug, Clone)]
644pub struct ShortestPathOp {
645    /// Input operator providing source/target nodes.
646    pub input: Box<LogicalOperator>,
647    /// Variable name for the source node.
648    pub source_var: String,
649    /// Variable name for the target node.
650    pub target_var: String,
651    /// Optional edge type filter.
652    pub edge_type: Option<String>,
653    /// Direction of edge traversal.
654    pub direction: ExpandDirection,
655    /// Variable name to bind the path result.
656    pub path_alias: String,
657    /// Whether to find all shortest paths (vs. just one).
658    pub all_paths: bool,
659}
660
661// ==================== SPARQL Update Operators ====================
662
663/// Insert RDF triples.
664#[derive(Debug, Clone)]
665pub struct InsertTripleOp {
666    /// Subject of the triple.
667    pub subject: TripleComponent,
668    /// Predicate of the triple.
669    pub predicate: TripleComponent,
670    /// Object of the triple.
671    pub object: TripleComponent,
672    /// Named graph (optional).
673    pub graph: Option<String>,
674    /// Input operator (provides variable bindings).
675    pub input: Option<Box<LogicalOperator>>,
676}
677
678/// Delete RDF triples.
679#[derive(Debug, Clone)]
680pub struct DeleteTripleOp {
681    /// Subject pattern.
682    pub subject: TripleComponent,
683    /// Predicate pattern.
684    pub predicate: TripleComponent,
685    /// Object pattern.
686    pub object: TripleComponent,
687    /// Named graph (optional).
688    pub graph: Option<String>,
689    /// Input operator (provides variable bindings).
690    pub input: Option<Box<LogicalOperator>>,
691}
692
693/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
694///
695/// Per SPARQL 1.1 Update spec, this operator:
696/// 1. Evaluates the WHERE clause once to get bindings
697/// 2. Applies DELETE templates using those bindings
698/// 3. Applies INSERT templates using the SAME bindings
699///
700/// This ensures DELETE and INSERT see consistent data.
701#[derive(Debug, Clone)]
702pub struct ModifyOp {
703    /// DELETE triple templates (patterns with variables).
704    pub delete_templates: Vec<TripleTemplate>,
705    /// INSERT triple templates (patterns with variables).
706    pub insert_templates: Vec<TripleTemplate>,
707    /// WHERE clause that provides variable bindings.
708    pub where_clause: Box<LogicalOperator>,
709    /// Named graph context (for WITH clause).
710    pub graph: Option<String>,
711}
712
713/// A triple template for DELETE/INSERT operations.
714#[derive(Debug, Clone)]
715pub struct TripleTemplate {
716    /// Subject (may be a variable).
717    pub subject: TripleComponent,
718    /// Predicate (may be a variable).
719    pub predicate: TripleComponent,
720    /// Object (may be a variable or literal).
721    pub object: TripleComponent,
722    /// Named graph (optional).
723    pub graph: Option<String>,
724}
725
726/// Clear all triples from a graph.
727#[derive(Debug, Clone)]
728pub struct ClearGraphOp {
729    /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
730    pub graph: Option<String>,
731    /// Whether to silently ignore errors.
732    pub silent: bool,
733}
734
735/// Create a new named graph.
736#[derive(Debug, Clone)]
737pub struct CreateGraphOp {
738    /// IRI of the graph to create.
739    pub graph: String,
740    /// Whether to silently ignore if graph already exists.
741    pub silent: bool,
742}
743
744/// Drop (remove) a named graph.
745#[derive(Debug, Clone)]
746pub struct DropGraphOp {
747    /// Target graph (None = default graph).
748    pub graph: Option<String>,
749    /// Whether to silently ignore errors.
750    pub silent: bool,
751}
752
753/// Load data from a URL into a graph.
754#[derive(Debug, Clone)]
755pub struct LoadGraphOp {
756    /// Source URL to load data from.
757    pub source: String,
758    /// Destination graph (None = default graph).
759    pub destination: Option<String>,
760    /// Whether to silently ignore errors.
761    pub silent: bool,
762}
763
764/// Copy triples from one graph to another.
765#[derive(Debug, Clone)]
766pub struct CopyGraphOp {
767    /// Source graph.
768    pub source: Option<String>,
769    /// Destination graph.
770    pub destination: Option<String>,
771    /// Whether to silently ignore errors.
772    pub silent: bool,
773}
774
775/// Move triples from one graph to another.
776#[derive(Debug, Clone)]
777pub struct MoveGraphOp {
778    /// Source graph.
779    pub source: Option<String>,
780    /// Destination graph.
781    pub destination: Option<String>,
782    /// Whether to silently ignore errors.
783    pub silent: bool,
784}
785
786/// Add (merge) triples from one graph to another.
787#[derive(Debug, Clone)]
788pub struct AddGraphOp {
789    /// Source graph.
790    pub source: Option<String>,
791    /// Destination graph.
792    pub destination: Option<String>,
793    /// Whether to silently ignore errors.
794    pub silent: bool,
795}
796
797// ==================== Vector Search Operators ====================
798
799/// Vector similarity scan operation.
800///
801/// Performs approximate nearest neighbor search using a vector index (HNSW)
802/// or brute-force search for small datasets. Returns nodes/edges whose
803/// embeddings are similar to the query vector.
804///
805/// # Example GQL
806///
807/// ```gql
808/// MATCH (m:Movie)
809/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
810/// RETURN m.title
811/// ```
812#[derive(Debug, Clone)]
813pub struct VectorScanOp {
814    /// Variable name to bind matching entities to.
815    pub variable: String,
816    /// Name of the vector index to use (None = brute-force).
817    pub index_name: Option<String>,
818    /// Property containing the vector embedding.
819    pub property: String,
820    /// Optional label filter (scan only nodes with this label).
821    pub label: Option<String>,
822    /// The query vector expression.
823    pub query_vector: LogicalExpression,
824    /// Number of nearest neighbors to return.
825    pub k: usize,
826    /// Distance metric (None = use index default, typically cosine).
827    pub metric: Option<VectorMetric>,
828    /// Minimum similarity threshold (filters results below this).
829    pub min_similarity: Option<f32>,
830    /// Maximum distance threshold (filters results above this).
831    pub max_distance: Option<f32>,
832    /// Input operator (for hybrid queries combining graph + vector).
833    pub input: Option<Box<LogicalOperator>>,
834}
835
836/// Vector distance/similarity metric for vector scan operations.
837#[derive(Debug, Clone, Copy, PartialEq, Eq)]
838pub enum VectorMetric {
839    /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
840    Cosine,
841    /// Euclidean (L2) distance. Best when magnitude matters.
842    Euclidean,
843    /// Dot product. Best for maximum inner product search.
844    DotProduct,
845    /// Manhattan (L1) distance. Less sensitive to outliers.
846    Manhattan,
847}
848
849/// Join graph patterns with vector similarity search.
850///
851/// This operator takes entities from the left input and computes vector
852/// similarity against a query vector, outputting (entity, distance) pairs.
853///
854/// # Use Cases
855///
856/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
857/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
858/// 3. **Filtering by similarity**: Join with threshold-based filtering
859///
860/// # Example
861///
862/// ```gql
863/// // Find movies similar to what the user liked
864/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
865/// WITH avg(liked.embedding) AS user_taste
866/// VECTOR JOIN (m:Movie) ON m.embedding
867/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
868/// RETURN m.title
869/// ```
870#[derive(Debug, Clone)]
871pub struct VectorJoinOp {
872    /// Input operator providing entities to match against.
873    pub input: Box<LogicalOperator>,
874    /// Variable from input to extract vectors from (for entity-to-entity similarity).
875    /// If None, uses `query_vector` directly.
876    pub left_vector_variable: Option<String>,
877    /// Property containing the left vector (used with `left_vector_variable`).
878    pub left_property: Option<String>,
879    /// The query vector expression (constant or computed).
880    pub query_vector: LogicalExpression,
881    /// Variable name to bind the right-side matching entities.
882    pub right_variable: String,
883    /// Property containing the right-side vector embeddings.
884    pub right_property: String,
885    /// Optional label filter for right-side entities.
886    pub right_label: Option<String>,
887    /// Name of vector index on right side (None = brute-force).
888    pub index_name: Option<String>,
889    /// Number of nearest neighbors per left-side entity.
890    pub k: usize,
891    /// Distance metric.
892    pub metric: Option<VectorMetric>,
893    /// Minimum similarity threshold.
894    pub min_similarity: Option<f32>,
895    /// Maximum distance threshold.
896    pub max_distance: Option<f32>,
897    /// Variable to bind the distance/similarity score.
898    pub score_variable: Option<String>,
899}
900
901/// Return results (terminal operator).
902#[derive(Debug, Clone)]
903pub struct ReturnOp {
904    /// Items to return.
905    pub items: Vec<ReturnItem>,
906    /// Whether to return distinct results.
907    pub distinct: bool,
908    /// Input operator.
909    pub input: Box<LogicalOperator>,
910}
911
912/// A single return item.
913#[derive(Debug, Clone)]
914pub struct ReturnItem {
915    /// Expression to return.
916    pub expression: LogicalExpression,
917    /// Alias for the result column.
918    pub alias: Option<String>,
919}
920
921/// Define a property graph schema (SQL/PGQ DDL).
922#[derive(Debug, Clone)]
923pub struct CreatePropertyGraphOp {
924    /// Graph name.
925    pub name: String,
926    /// Node table schemas (label name + column definitions).
927    pub node_tables: Vec<PropertyGraphNodeTable>,
928    /// Edge table schemas (type name + column definitions + references).
929    pub edge_tables: Vec<PropertyGraphEdgeTable>,
930}
931
932/// A node table in a property graph definition.
933#[derive(Debug, Clone)]
934pub struct PropertyGraphNodeTable {
935    /// Table name (maps to a node label).
936    pub name: String,
937    /// Column definitions as (name, type_name) pairs.
938    pub columns: Vec<(String, String)>,
939}
940
941/// An edge table in a property graph definition.
942#[derive(Debug, Clone)]
943pub struct PropertyGraphEdgeTable {
944    /// Table name (maps to an edge type).
945    pub name: String,
946    /// Column definitions as (name, type_name) pairs.
947    pub columns: Vec<(String, String)>,
948    /// Source node table name.
949    pub source_table: String,
950    /// Target node table name.
951    pub target_table: String,
952}
953
954// ==================== Procedure Call Types ====================
955
956/// A CALL procedure operation.
957///
958/// ```text
959/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
960/// ```
961#[derive(Debug, Clone)]
962pub struct CallProcedureOp {
963    /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
964    pub name: Vec<String>,
965    /// Argument expressions (constants in Phase 1).
966    pub arguments: Vec<LogicalExpression>,
967    /// Optional YIELD clause: which columns to expose + aliases.
968    pub yield_items: Option<Vec<ProcedureYield>>,
969}
970
971/// A single YIELD item in a procedure call.
972#[derive(Debug, Clone)]
973pub struct ProcedureYield {
974    /// Column name from the procedure result.
975    pub field_name: String,
976    /// Optional alias (YIELD score AS rank).
977    pub alias: Option<String>,
978}
979
980/// A logical expression.
981#[derive(Debug, Clone)]
982pub enum LogicalExpression {
983    /// A literal value.
984    Literal(Value),
985
986    /// A variable reference.
987    Variable(String),
988
989    /// Property access (e.g., n.name).
990    Property {
991        /// The variable to access.
992        variable: String,
993        /// The property name.
994        property: String,
995    },
996
997    /// Binary operation.
998    Binary {
999        /// Left operand.
1000        left: Box<LogicalExpression>,
1001        /// Operator.
1002        op: BinaryOp,
1003        /// Right operand.
1004        right: Box<LogicalExpression>,
1005    },
1006
1007    /// Unary operation.
1008    Unary {
1009        /// Operator.
1010        op: UnaryOp,
1011        /// Operand.
1012        operand: Box<LogicalExpression>,
1013    },
1014
1015    /// Function call.
1016    FunctionCall {
1017        /// Function name.
1018        name: String,
1019        /// Arguments.
1020        args: Vec<LogicalExpression>,
1021        /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
1022        distinct: bool,
1023    },
1024
1025    /// List literal.
1026    List(Vec<LogicalExpression>),
1027
1028    /// Map literal (e.g., {name: 'Alice', age: 30}).
1029    Map(Vec<(String, LogicalExpression)>),
1030
1031    /// Index access (e.g., `list[0]`).
1032    IndexAccess {
1033        /// The base expression (typically a list or string).
1034        base: Box<LogicalExpression>,
1035        /// The index expression.
1036        index: Box<LogicalExpression>,
1037    },
1038
1039    /// Slice access (e.g., list[1..3]).
1040    SliceAccess {
1041        /// The base expression (typically a list or string).
1042        base: Box<LogicalExpression>,
1043        /// Start index (None means from beginning).
1044        start: Option<Box<LogicalExpression>>,
1045        /// End index (None means to end).
1046        end: Option<Box<LogicalExpression>>,
1047    },
1048
1049    /// CASE expression.
1050    Case {
1051        /// Test expression (for simple CASE).
1052        operand: Option<Box<LogicalExpression>>,
1053        /// WHEN clauses.
1054        when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
1055        /// ELSE clause.
1056        else_clause: Option<Box<LogicalExpression>>,
1057    },
1058
1059    /// Parameter reference.
1060    Parameter(String),
1061
1062    /// Labels of a node.
1063    Labels(String),
1064
1065    /// Type of an edge.
1066    Type(String),
1067
1068    /// ID of a node or edge.
1069    Id(String),
1070
1071    /// List comprehension: [x IN list WHERE predicate | expression]
1072    ListComprehension {
1073        /// Variable name for each element.
1074        variable: String,
1075        /// The source list expression.
1076        list_expr: Box<LogicalExpression>,
1077        /// Optional filter predicate.
1078        filter_expr: Option<Box<LogicalExpression>>,
1079        /// The mapping expression for each element.
1080        map_expr: Box<LogicalExpression>,
1081    },
1082
1083    /// List predicate: all/any/none/single(x IN list WHERE pred).
1084    ListPredicate {
1085        /// The kind of list predicate.
1086        kind: ListPredicateKind,
1087        /// The iteration variable name.
1088        variable: String,
1089        /// The source list expression.
1090        list_expr: Box<LogicalExpression>,
1091        /// The predicate to test for each element.
1092        predicate: Box<LogicalExpression>,
1093    },
1094
1095    /// EXISTS subquery.
1096    ExistsSubquery(Box<LogicalOperator>),
1097
1098    /// COUNT subquery.
1099    CountSubquery(Box<LogicalOperator>),
1100}
1101
1102/// The kind of list predicate function.
1103#[derive(Debug, Clone, PartialEq, Eq)]
1104pub enum ListPredicateKind {
1105    /// all(x IN list WHERE pred): true if pred holds for every element.
1106    All,
1107    /// any(x IN list WHERE pred): true if pred holds for at least one element.
1108    Any,
1109    /// none(x IN list WHERE pred): true if pred holds for no element.
1110    None,
1111    /// single(x IN list WHERE pred): true if pred holds for exactly one element.
1112    Single,
1113}
1114
1115/// Binary operator.
1116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1117pub enum BinaryOp {
1118    /// Equality comparison (=).
1119    Eq,
1120    /// Inequality comparison (<>).
1121    Ne,
1122    /// Less than (<).
1123    Lt,
1124    /// Less than or equal (<=).
1125    Le,
1126    /// Greater than (>).
1127    Gt,
1128    /// Greater than or equal (>=).
1129    Ge,
1130
1131    /// Logical AND.
1132    And,
1133    /// Logical OR.
1134    Or,
1135    /// Logical XOR.
1136    Xor,
1137
1138    /// Addition (+).
1139    Add,
1140    /// Subtraction (-).
1141    Sub,
1142    /// Multiplication (*).
1143    Mul,
1144    /// Division (/).
1145    Div,
1146    /// Modulo (%).
1147    Mod,
1148
1149    /// String concatenation.
1150    Concat,
1151    /// String starts with.
1152    StartsWith,
1153    /// String ends with.
1154    EndsWith,
1155    /// String contains.
1156    Contains,
1157
1158    /// Collection membership (IN).
1159    In,
1160    /// Pattern matching (LIKE).
1161    Like,
1162    /// Regex matching (=~).
1163    Regex,
1164    /// Power/exponentiation (^).
1165    Pow,
1166}
1167
1168/// Unary operator.
1169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1170pub enum UnaryOp {
1171    /// Logical NOT.
1172    Not,
1173    /// Numeric negation.
1174    Neg,
1175    /// IS NULL check.
1176    IsNull,
1177    /// IS NOT NULL check.
1178    IsNotNull,
1179}
1180
1181#[cfg(test)]
1182mod tests {
1183    use super::*;
1184
1185    #[test]
1186    fn test_simple_node_scan_plan() {
1187        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1188            items: vec![ReturnItem {
1189                expression: LogicalExpression::Variable("n".into()),
1190                alias: None,
1191            }],
1192            distinct: false,
1193            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1194                variable: "n".into(),
1195                label: Some("Person".into()),
1196                input: None,
1197            })),
1198        }));
1199
1200        // Verify structure
1201        if let LogicalOperator::Return(ret) = &plan.root {
1202            assert_eq!(ret.items.len(), 1);
1203            assert!(!ret.distinct);
1204            if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1205                assert_eq!(scan.variable, "n");
1206                assert_eq!(scan.label, Some("Person".into()));
1207            } else {
1208                panic!("Expected NodeScan");
1209            }
1210        } else {
1211            panic!("Expected Return");
1212        }
1213    }
1214
1215    #[test]
1216    fn test_filter_plan() {
1217        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1218            items: vec![ReturnItem {
1219                expression: LogicalExpression::Property {
1220                    variable: "n".into(),
1221                    property: "name".into(),
1222                },
1223                alias: Some("name".into()),
1224            }],
1225            distinct: false,
1226            input: Box::new(LogicalOperator::Filter(FilterOp {
1227                predicate: LogicalExpression::Binary {
1228                    left: Box::new(LogicalExpression::Property {
1229                        variable: "n".into(),
1230                        property: "age".into(),
1231                    }),
1232                    op: BinaryOp::Gt,
1233                    right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1234                },
1235                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1236                    variable: "n".into(),
1237                    label: Some("Person".into()),
1238                    input: None,
1239                })),
1240            })),
1241        }));
1242
1243        if let LogicalOperator::Return(ret) = &plan.root {
1244            if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1245                if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1246                    assert_eq!(*op, BinaryOp::Gt);
1247                } else {
1248                    panic!("Expected Binary expression");
1249                }
1250            } else {
1251                panic!("Expected Filter");
1252            }
1253        } else {
1254            panic!("Expected Return");
1255        }
1256    }
1257}