Skip to main content

grafeo_engine/query/
plan.rs

1//! Logical query plan representation.
2//!
3//! The logical plan is the intermediate representation between parsed queries
4//! and physical execution. Both GQL and Cypher queries are translated to this
5//! common representation.
6
7use grafeo_common::types::Value;
8
9/// A logical query plan.
10#[derive(Debug, Clone)]
11pub struct LogicalPlan {
12    /// The root operator of the plan.
13    pub root: LogicalOperator,
14}
15
16impl LogicalPlan {
17    /// Creates a new logical plan with the given root operator.
18    pub fn new(root: LogicalOperator) -> Self {
19        Self { root }
20    }
21}
22
23/// A logical operator in the query plan.
24#[derive(Debug, Clone)]
25pub enum LogicalOperator {
26    /// Scan all nodes, optionally filtered by label.
27    NodeScan(NodeScanOp),
28
29    /// Scan all edges, optionally filtered by type.
30    EdgeScan(EdgeScanOp),
31
32    /// Expand from nodes to neighbors via edges.
33    Expand(ExpandOp),
34
35    /// Filter rows based on a predicate.
36    Filter(FilterOp),
37
38    /// Project specific columns.
39    Project(ProjectOp),
40
41    /// Join two inputs.
42    Join(JoinOp),
43
44    /// Aggregate with grouping.
45    Aggregate(AggregateOp),
46
47    /// Limit the number of results.
48    Limit(LimitOp),
49
50    /// Skip a number of results.
51    Skip(SkipOp),
52
53    /// Sort results.
54    Sort(SortOp),
55
56    /// Remove duplicate results.
57    Distinct(DistinctOp),
58
59    /// Create a new node.
60    CreateNode(CreateNodeOp),
61
62    /// Create a new edge.
63    CreateEdge(CreateEdgeOp),
64
65    /// Delete a node.
66    DeleteNode(DeleteNodeOp),
67
68    /// Delete an edge.
69    DeleteEdge(DeleteEdgeOp),
70
71    /// Set properties on a node or edge.
72    SetProperty(SetPropertyOp),
73
74    /// Add labels to a node.
75    AddLabel(AddLabelOp),
76
77    /// Remove labels from a node.
78    RemoveLabel(RemoveLabelOp),
79
80    /// Return results (terminal operator).
81    Return(ReturnOp),
82
83    /// Empty result set.
84    Empty,
85
86    // ==================== RDF/SPARQL Operators ====================
87    /// Scan RDF triples matching a pattern.
88    TripleScan(TripleScanOp),
89
90    /// Union of multiple result sets.
91    Union(UnionOp),
92
93    /// Left outer join for OPTIONAL patterns.
94    LeftJoin(LeftJoinOp),
95
96    /// Anti-join for MINUS patterns.
97    AntiJoin(AntiJoinOp),
98
99    /// Bind a variable to an expression.
100    Bind(BindOp),
101
102    /// Unwind a list into individual rows.
103    Unwind(UnwindOp),
104
105    /// Collect grouped key-value rows into a single Map value.
106    /// Used for Gremlin `groupCount()` semantics.
107    MapCollect(MapCollectOp),
108
109    /// Merge a node pattern (match or create).
110    Merge(MergeOp),
111
112    /// Merge a relationship pattern (match or create).
113    MergeRelationship(MergeRelationshipOp),
114
115    /// Find shortest path between nodes.
116    ShortestPath(ShortestPathOp),
117
118    // ==================== SPARQL Update Operators ====================
119    /// Insert RDF triples.
120    InsertTriple(InsertTripleOp),
121
122    /// Delete RDF triples.
123    DeleteTriple(DeleteTripleOp),
124
125    /// SPARQL MODIFY operation (DELETE/INSERT WHERE).
126    /// Evaluates WHERE once, applies DELETE templates, then INSERT templates.
127    Modify(ModifyOp),
128
129    /// Clear a graph (remove all triples).
130    ClearGraph(ClearGraphOp),
131
132    /// Create a new named graph.
133    CreateGraph(CreateGraphOp),
134
135    /// Drop (remove) a named graph.
136    DropGraph(DropGraphOp),
137
138    /// Load data from a URL into a graph.
139    LoadGraph(LoadGraphOp),
140
141    /// Copy triples from one graph to another.
142    CopyGraph(CopyGraphOp),
143
144    /// Move triples from one graph to another.
145    MoveGraph(MoveGraphOp),
146
147    /// Add (merge) triples from one graph to another.
148    AddGraph(AddGraphOp),
149
150    // ==================== Vector Search Operators ====================
151    /// Scan using vector similarity search.
152    VectorScan(VectorScanOp),
153
154    /// Join graph patterns with vector similarity search.
155    ///
156    /// Computes vector distances between entities from the left input and
157    /// a query vector, then joins with similarity scores. Useful for:
158    /// - Filtering graph traversal results by vector similarity
159    /// - Computing aggregated embeddings and finding similar entities
160    /// - Combining multiple vector sources with graph structure
161    VectorJoin(VectorJoinOp),
162
163    // ==================== Set Operations ====================
164    /// Set difference: rows in left that are not in right.
165    Except(ExceptOp),
166
167    /// Set intersection: rows common to all inputs.
168    Intersect(IntersectOp),
169
170    /// Fallback: use left result if non-empty, otherwise right.
171    Otherwise(OtherwiseOp),
172
173    // ==================== Correlated Subquery ====================
174    /// Apply (lateral join): evaluate a subplan per input row.
175    Apply(ApplyOp),
176
177    // ==================== DDL Operators ====================
178    /// Define a property graph schema (SQL/PGQ DDL).
179    CreatePropertyGraph(CreatePropertyGraphOp),
180
181    // ==================== Procedure Call Operators ====================
182    /// Invoke a stored procedure (CALL ... YIELD).
183    CallProcedure(CallProcedureOp),
184}
185
186impl LogicalOperator {
187    /// Returns `true` if this operator or any of its children perform mutations.
188    #[must_use]
189    pub fn has_mutations(&self) -> bool {
190        match self {
191            // Direct mutation operators
192            Self::CreateNode(_)
193            | Self::CreateEdge(_)
194            | Self::DeleteNode(_)
195            | Self::DeleteEdge(_)
196            | Self::SetProperty(_)
197            | Self::AddLabel(_)
198            | Self::RemoveLabel(_)
199            | Self::Merge(_)
200            | Self::MergeRelationship(_)
201            | Self::InsertTriple(_)
202            | Self::DeleteTriple(_)
203            | Self::Modify(_)
204            | Self::ClearGraph(_)
205            | Self::CreateGraph(_)
206            | Self::DropGraph(_)
207            | Self::LoadGraph(_)
208            | Self::CopyGraph(_)
209            | Self::MoveGraph(_)
210            | Self::AddGraph(_)
211            | Self::CreatePropertyGraph(_) => true,
212
213            // Operators with an `input` child
214            Self::Filter(op) => op.input.has_mutations(),
215            Self::Project(op) => op.input.has_mutations(),
216            Self::Aggregate(op) => op.input.has_mutations(),
217            Self::Limit(op) => op.input.has_mutations(),
218            Self::Skip(op) => op.input.has_mutations(),
219            Self::Sort(op) => op.input.has_mutations(),
220            Self::Distinct(op) => op.input.has_mutations(),
221            Self::Unwind(op) => op.input.has_mutations(),
222            Self::Bind(op) => op.input.has_mutations(),
223            Self::MapCollect(op) => op.input.has_mutations(),
224            Self::Return(op) => op.input.has_mutations(),
225            Self::VectorScan(_) | Self::VectorJoin(_) => false,
226
227            // Operators with two children
228            Self::Join(op) => op.left.has_mutations() || op.right.has_mutations(),
229            Self::LeftJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
230            Self::AntiJoin(op) => op.left.has_mutations() || op.right.has_mutations(),
231            Self::Except(op) => op.left.has_mutations() || op.right.has_mutations(),
232            Self::Intersect(op) => op.left.has_mutations() || op.right.has_mutations(),
233            Self::Otherwise(op) => op.left.has_mutations() || op.right.has_mutations(),
234            Self::Union(op) => op.inputs.iter().any(|i| i.has_mutations()),
235            Self::Apply(op) => op.input.has_mutations() || op.subplan.has_mutations(),
236
237            // Leaf operators (read-only)
238            Self::NodeScan(_)
239            | Self::EdgeScan(_)
240            | Self::Expand(_)
241            | Self::TripleScan(_)
242            | Self::ShortestPath(_)
243            | Self::Empty
244            | Self::CallProcedure(_) => false,
245        }
246    }
247}
248
249/// Scan nodes from the graph.
250#[derive(Debug, Clone)]
251pub struct NodeScanOp {
252    /// Variable name to bind the node to.
253    pub variable: String,
254    /// Optional label filter.
255    pub label: Option<String>,
256    /// Child operator (if any, for chained patterns).
257    pub input: Option<Box<LogicalOperator>>,
258}
259
260/// Scan edges from the graph.
261#[derive(Debug, Clone)]
262pub struct EdgeScanOp {
263    /// Variable name to bind the edge to.
264    pub variable: String,
265    /// Edge type filter (empty = match all types).
266    pub edge_types: Vec<String>,
267    /// Child operator (if any).
268    pub input: Option<Box<LogicalOperator>>,
269}
270
271/// Path traversal mode for variable-length expansion.
272#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
273pub enum PathMode {
274    /// Allows repeated nodes and edges (default).
275    #[default]
276    Walk,
277    /// No repeated edges.
278    Trail,
279    /// No repeated nodes except endpoints.
280    Simple,
281    /// No repeated nodes at all.
282    Acyclic,
283}
284
285/// Expand from nodes to their neighbors.
286#[derive(Debug, Clone)]
287pub struct ExpandOp {
288    /// Source node variable.
289    pub from_variable: String,
290    /// Target node variable to bind.
291    pub to_variable: String,
292    /// Edge variable to bind (optional).
293    pub edge_variable: Option<String>,
294    /// Direction of expansion.
295    pub direction: ExpandDirection,
296    /// Edge type filter (empty = match all types, multiple = match any).
297    pub edge_types: Vec<String>,
298    /// Minimum hops (for variable-length patterns).
299    pub min_hops: u32,
300    /// Maximum hops (for variable-length patterns).
301    pub max_hops: Option<u32>,
302    /// Input operator.
303    pub input: Box<LogicalOperator>,
304    /// Path alias for variable-length patterns (e.g., `p` in `p = (a)-[*1..3]->(b)`).
305    /// When set, a path length column will be output under this name.
306    pub path_alias: Option<String>,
307    /// Path traversal mode (WALK, TRAIL, SIMPLE, ACYCLIC).
308    pub path_mode: PathMode,
309}
310
311/// Direction for edge expansion.
312#[derive(Debug, Clone, Copy, PartialEq, Eq)]
313pub enum ExpandDirection {
314    /// Follow outgoing edges.
315    Outgoing,
316    /// Follow incoming edges.
317    Incoming,
318    /// Follow edges in either direction.
319    Both,
320}
321
322/// Join two inputs.
323#[derive(Debug, Clone)]
324pub struct JoinOp {
325    /// Left input.
326    pub left: Box<LogicalOperator>,
327    /// Right input.
328    pub right: Box<LogicalOperator>,
329    /// Join type.
330    pub join_type: JoinType,
331    /// Join conditions.
332    pub conditions: Vec<JoinCondition>,
333}
334
335/// Join type.
336#[derive(Debug, Clone, Copy, PartialEq, Eq)]
337pub enum JoinType {
338    /// Inner join.
339    Inner,
340    /// Left outer join.
341    Left,
342    /// Right outer join.
343    Right,
344    /// Full outer join.
345    Full,
346    /// Cross join (Cartesian product).
347    Cross,
348    /// Semi join (returns left rows with matching right rows).
349    Semi,
350    /// Anti join (returns left rows without matching right rows).
351    Anti,
352}
353
354/// A join condition.
355#[derive(Debug, Clone)]
356pub struct JoinCondition {
357    /// Left expression.
358    pub left: LogicalExpression,
359    /// Right expression.
360    pub right: LogicalExpression,
361}
362
363/// Aggregate with grouping.
364#[derive(Debug, Clone)]
365pub struct AggregateOp {
366    /// Group by expressions.
367    pub group_by: Vec<LogicalExpression>,
368    /// Aggregate functions.
369    pub aggregates: Vec<AggregateExpr>,
370    /// Input operator.
371    pub input: Box<LogicalOperator>,
372    /// HAVING clause filter (applied after aggregation).
373    pub having: Option<LogicalExpression>,
374}
375
376/// An aggregate expression.
377#[derive(Debug, Clone)]
378pub struct AggregateExpr {
379    /// Aggregate function.
380    pub function: AggregateFunction,
381    /// Expression to aggregate.
382    pub expression: Option<LogicalExpression>,
383    /// Whether to use DISTINCT.
384    pub distinct: bool,
385    /// Alias for the result.
386    pub alias: Option<String>,
387    /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
388    pub percentile: Option<f64>,
389}
390
391/// Aggregate function.
392#[derive(Debug, Clone, Copy, PartialEq, Eq)]
393pub enum AggregateFunction {
394    /// Count all rows (COUNT(*)).
395    Count,
396    /// Count non-null values (COUNT(expr)).
397    CountNonNull,
398    /// Sum values.
399    Sum,
400    /// Average values.
401    Avg,
402    /// Minimum value.
403    Min,
404    /// Maximum value.
405    Max,
406    /// Collect into list.
407    Collect,
408    /// Sample standard deviation (STDEV).
409    StdDev,
410    /// Population standard deviation (STDEVP).
411    StdDevPop,
412    /// Discrete percentile (PERCENTILE_DISC).
413    PercentileDisc,
414    /// Continuous percentile (PERCENTILE_CONT).
415    PercentileCont,
416}
417
418/// Filter rows based on a predicate.
419#[derive(Debug, Clone)]
420pub struct FilterOp {
421    /// The filter predicate.
422    pub predicate: LogicalExpression,
423    /// Input operator.
424    pub input: Box<LogicalOperator>,
425}
426
427/// Project specific columns.
428#[derive(Debug, Clone)]
429pub struct ProjectOp {
430    /// Columns to project.
431    pub projections: Vec<Projection>,
432    /// Input operator.
433    pub input: Box<LogicalOperator>,
434}
435
436/// A single projection (column selection or computation).
437#[derive(Debug, Clone)]
438pub struct Projection {
439    /// Expression to compute.
440    pub expression: LogicalExpression,
441    /// Alias for the result.
442    pub alias: Option<String>,
443}
444
445/// Limit the number of results.
446#[derive(Debug, Clone)]
447pub struct LimitOp {
448    /// Maximum number of rows to return.
449    pub count: usize,
450    /// Input operator.
451    pub input: Box<LogicalOperator>,
452}
453
454/// Skip a number of results.
455#[derive(Debug, Clone)]
456pub struct SkipOp {
457    /// Number of rows to skip.
458    pub count: usize,
459    /// Input operator.
460    pub input: Box<LogicalOperator>,
461}
462
463/// Sort results.
464#[derive(Debug, Clone)]
465pub struct SortOp {
466    /// Sort keys.
467    pub keys: Vec<SortKey>,
468    /// Input operator.
469    pub input: Box<LogicalOperator>,
470}
471
472/// A sort key.
473#[derive(Debug, Clone)]
474pub struct SortKey {
475    /// Expression to sort by.
476    pub expression: LogicalExpression,
477    /// Sort order.
478    pub order: SortOrder,
479}
480
481/// Sort order.
482#[derive(Debug, Clone, Copy, PartialEq, Eq)]
483pub enum SortOrder {
484    /// Ascending order.
485    Ascending,
486    /// Descending order.
487    Descending,
488}
489
490/// Remove duplicate results.
491#[derive(Debug, Clone)]
492pub struct DistinctOp {
493    /// Input operator.
494    pub input: Box<LogicalOperator>,
495    /// Optional columns to use for deduplication.
496    /// If None, all columns are used.
497    pub columns: Option<Vec<String>>,
498}
499
500/// Create a new node.
501#[derive(Debug, Clone)]
502pub struct CreateNodeOp {
503    /// Variable name to bind the created node to.
504    pub variable: String,
505    /// Labels for the new node.
506    pub labels: Vec<String>,
507    /// Properties for the new node.
508    pub properties: Vec<(String, LogicalExpression)>,
509    /// Input operator (for chained creates).
510    pub input: Option<Box<LogicalOperator>>,
511}
512
513/// Create a new edge.
514#[derive(Debug, Clone)]
515pub struct CreateEdgeOp {
516    /// Variable name to bind the created edge to.
517    pub variable: Option<String>,
518    /// Source node variable.
519    pub from_variable: String,
520    /// Target node variable.
521    pub to_variable: String,
522    /// Edge type.
523    pub edge_type: String,
524    /// Properties for the new edge.
525    pub properties: Vec<(String, LogicalExpression)>,
526    /// Input operator.
527    pub input: Box<LogicalOperator>,
528}
529
530/// Delete a node.
531#[derive(Debug, Clone)]
532pub struct DeleteNodeOp {
533    /// Variable of the node to delete.
534    pub variable: String,
535    /// Whether to detach (delete connected edges) before deleting.
536    pub detach: bool,
537    /// Input operator.
538    pub input: Box<LogicalOperator>,
539}
540
541/// Delete an edge.
542#[derive(Debug, Clone)]
543pub struct DeleteEdgeOp {
544    /// Variable of the edge to delete.
545    pub variable: String,
546    /// Input operator.
547    pub input: Box<LogicalOperator>,
548}
549
550/// Set properties on a node or edge.
551#[derive(Debug, Clone)]
552pub struct SetPropertyOp {
553    /// Variable of the entity to update.
554    pub variable: String,
555    /// Properties to set (name -> expression).
556    pub properties: Vec<(String, LogicalExpression)>,
557    /// Whether to replace all properties (vs. merge).
558    pub replace: bool,
559    /// Whether the target variable is an edge (vs. node).
560    pub is_edge: bool,
561    /// Input operator.
562    pub input: Box<LogicalOperator>,
563}
564
565/// Add labels to a node.
566#[derive(Debug, Clone)]
567pub struct AddLabelOp {
568    /// Variable of the node to update.
569    pub variable: String,
570    /// Labels to add.
571    pub labels: Vec<String>,
572    /// Input operator.
573    pub input: Box<LogicalOperator>,
574}
575
576/// Remove labels from a node.
577#[derive(Debug, Clone)]
578pub struct RemoveLabelOp {
579    /// Variable of the node to update.
580    pub variable: String,
581    /// Labels to remove.
582    pub labels: Vec<String>,
583    /// Input operator.
584    pub input: Box<LogicalOperator>,
585}
586
587// ==================== RDF/SPARQL Operators ====================
588
589/// Scan RDF triples matching a pattern.
590#[derive(Debug, Clone)]
591pub struct TripleScanOp {
592    /// Subject pattern (variable name or IRI).
593    pub subject: TripleComponent,
594    /// Predicate pattern (variable name or IRI).
595    pub predicate: TripleComponent,
596    /// Object pattern (variable name, IRI, or literal).
597    pub object: TripleComponent,
598    /// Named graph (optional).
599    pub graph: Option<TripleComponent>,
600    /// Input operator (for chained patterns).
601    pub input: Option<Box<LogicalOperator>>,
602}
603
604/// A component of a triple pattern.
605#[derive(Debug, Clone)]
606pub enum TripleComponent {
607    /// A variable to bind.
608    Variable(String),
609    /// A constant IRI.
610    Iri(String),
611    /// A constant literal value.
612    Literal(Value),
613}
614
615/// Union of multiple result sets.
616#[derive(Debug, Clone)]
617pub struct UnionOp {
618    /// Inputs to union together.
619    pub inputs: Vec<LogicalOperator>,
620}
621
622/// Set difference: rows in left that are not in right.
623#[derive(Debug, Clone)]
624pub struct ExceptOp {
625    /// Left input.
626    pub left: Box<LogicalOperator>,
627    /// Right input (rows to exclude).
628    pub right: Box<LogicalOperator>,
629    /// If true, preserve duplicates (EXCEPT ALL); if false, deduplicate (EXCEPT DISTINCT).
630    pub all: bool,
631}
632
633/// Set intersection: rows common to both inputs.
634#[derive(Debug, Clone)]
635pub struct IntersectOp {
636    /// Left input.
637    pub left: Box<LogicalOperator>,
638    /// Right input.
639    pub right: Box<LogicalOperator>,
640    /// If true, preserve duplicates (INTERSECT ALL); if false, deduplicate (INTERSECT DISTINCT).
641    pub all: bool,
642}
643
644/// Fallback operator: use left result if non-empty, otherwise use right.
645#[derive(Debug, Clone)]
646pub struct OtherwiseOp {
647    /// Primary input (preferred).
648    pub left: Box<LogicalOperator>,
649    /// Fallback input (used only if left produces zero rows).
650    pub right: Box<LogicalOperator>,
651}
652
653/// Apply (lateral join): evaluate a subplan for each row of the outer input.
654///
655/// The subplan can reference variables bound by the outer input. Results are
656/// concatenated (cross-product per row).
657#[derive(Debug, Clone)]
658pub struct ApplyOp {
659    /// Outer input providing rows.
660    pub input: Box<LogicalOperator>,
661    /// Subplan to evaluate per outer row.
662    pub subplan: Box<LogicalOperator>,
663}
664
665/// Left outer join for OPTIONAL patterns.
666#[derive(Debug, Clone)]
667pub struct LeftJoinOp {
668    /// Left (required) input.
669    pub left: Box<LogicalOperator>,
670    /// Right (optional) input.
671    pub right: Box<LogicalOperator>,
672    /// Optional filter condition.
673    pub condition: Option<LogicalExpression>,
674}
675
676/// Anti-join for MINUS patterns.
677#[derive(Debug, Clone)]
678pub struct AntiJoinOp {
679    /// Left input (results to keep if no match on right).
680    pub left: Box<LogicalOperator>,
681    /// Right input (patterns to exclude).
682    pub right: Box<LogicalOperator>,
683}
684
685/// Bind a variable to an expression.
686#[derive(Debug, Clone)]
687pub struct BindOp {
688    /// Expression to compute.
689    pub expression: LogicalExpression,
690    /// Variable to bind the result to.
691    pub variable: String,
692    /// Input operator.
693    pub input: Box<LogicalOperator>,
694}
695
696/// Unwind a list into individual rows.
697///
698/// For each input row, evaluates the expression (which should return a list)
699/// and emits one row for each element in the list.
700#[derive(Debug, Clone)]
701pub struct UnwindOp {
702    /// The list expression to unwind.
703    pub expression: LogicalExpression,
704    /// The variable name for each element.
705    pub variable: String,
706    /// Optional variable for 1-based element position (ORDINALITY).
707    pub ordinality_var: Option<String>,
708    /// Optional variable for 0-based element position (OFFSET).
709    pub offset_var: Option<String>,
710    /// Input operator.
711    pub input: Box<LogicalOperator>,
712}
713
714/// Collect grouped key-value rows into a single Map value.
715/// Used for Gremlin `groupCount()` semantics.
716#[derive(Debug, Clone)]
717pub struct MapCollectOp {
718    /// Variable holding the map key.
719    pub key_var: String,
720    /// Variable holding the map value.
721    pub value_var: String,
722    /// Output variable alias.
723    pub alias: String,
724    /// Input operator (typically a grouped aggregate).
725    pub input: Box<LogicalOperator>,
726}
727
728/// Merge a pattern (match or create).
729///
730/// MERGE tries to match a pattern in the graph. If found, returns the existing
731/// elements (optionally applying ON MATCH SET). If not found, creates the pattern
732/// (optionally applying ON CREATE SET).
733#[derive(Debug, Clone)]
734pub struct MergeOp {
735    /// The node to merge.
736    pub variable: String,
737    /// Labels to match/create.
738    pub labels: Vec<String>,
739    /// Properties that must match (used for both matching and creation).
740    pub match_properties: Vec<(String, LogicalExpression)>,
741    /// Properties to set on CREATE.
742    pub on_create: Vec<(String, LogicalExpression)>,
743    /// Properties to set on MATCH.
744    pub on_match: Vec<(String, LogicalExpression)>,
745    /// Input operator.
746    pub input: Box<LogicalOperator>,
747}
748
749/// Merge a relationship pattern (match or create between two bound nodes).
750///
751/// MERGE on a relationship tries to find an existing relationship of the given type
752/// between the source and target nodes. If found, returns the existing relationship
753/// (optionally applying ON MATCH SET). If not found, creates it (optionally applying
754/// ON CREATE SET).
755#[derive(Debug, Clone)]
756pub struct MergeRelationshipOp {
757    /// Variable to bind the relationship to.
758    pub variable: String,
759    /// Source node variable (must already be bound).
760    pub source_variable: String,
761    /// Target node variable (must already be bound).
762    pub target_variable: String,
763    /// Relationship type.
764    pub edge_type: String,
765    /// Properties that must match (used for both matching and creation).
766    pub match_properties: Vec<(String, LogicalExpression)>,
767    /// Properties to set on CREATE.
768    pub on_create: Vec<(String, LogicalExpression)>,
769    /// Properties to set on MATCH.
770    pub on_match: Vec<(String, LogicalExpression)>,
771    /// Input operator.
772    pub input: Box<LogicalOperator>,
773}
774
775/// Find shortest path between two nodes.
776///
777/// This operator uses Dijkstra's algorithm to find the shortest path(s)
778/// between a source node and a target node, optionally filtered by edge type.
779#[derive(Debug, Clone)]
780pub struct ShortestPathOp {
781    /// Input operator providing source/target nodes.
782    pub input: Box<LogicalOperator>,
783    /// Variable name for the source node.
784    pub source_var: String,
785    /// Variable name for the target node.
786    pub target_var: String,
787    /// Edge type filter (empty = match all types, multiple = match any).
788    pub edge_types: Vec<String>,
789    /// Direction of edge traversal.
790    pub direction: ExpandDirection,
791    /// Variable name to bind the path result.
792    pub path_alias: String,
793    /// Whether to find all shortest paths (vs. just one).
794    pub all_paths: bool,
795}
796
797// ==================== SPARQL Update Operators ====================
798
799/// Insert RDF triples.
800#[derive(Debug, Clone)]
801pub struct InsertTripleOp {
802    /// Subject of the triple.
803    pub subject: TripleComponent,
804    /// Predicate of the triple.
805    pub predicate: TripleComponent,
806    /// Object of the triple.
807    pub object: TripleComponent,
808    /// Named graph (optional).
809    pub graph: Option<String>,
810    /// Input operator (provides variable bindings).
811    pub input: Option<Box<LogicalOperator>>,
812}
813
814/// Delete RDF triples.
815#[derive(Debug, Clone)]
816pub struct DeleteTripleOp {
817    /// Subject pattern.
818    pub subject: TripleComponent,
819    /// Predicate pattern.
820    pub predicate: TripleComponent,
821    /// Object pattern.
822    pub object: TripleComponent,
823    /// Named graph (optional).
824    pub graph: Option<String>,
825    /// Input operator (provides variable bindings).
826    pub input: Option<Box<LogicalOperator>>,
827}
828
829/// SPARQL MODIFY operation (DELETE/INSERT WHERE).
830///
831/// Per SPARQL 1.1 Update spec, this operator:
832/// 1. Evaluates the WHERE clause once to get bindings
833/// 2. Applies DELETE templates using those bindings
834/// 3. Applies INSERT templates using the SAME bindings
835///
836/// This ensures DELETE and INSERT see consistent data.
837#[derive(Debug, Clone)]
838pub struct ModifyOp {
839    /// DELETE triple templates (patterns with variables).
840    pub delete_templates: Vec<TripleTemplate>,
841    /// INSERT triple templates (patterns with variables).
842    pub insert_templates: Vec<TripleTemplate>,
843    /// WHERE clause that provides variable bindings.
844    pub where_clause: Box<LogicalOperator>,
845    /// Named graph context (for WITH clause).
846    pub graph: Option<String>,
847}
848
849/// A triple template for DELETE/INSERT operations.
850#[derive(Debug, Clone)]
851pub struct TripleTemplate {
852    /// Subject (may be a variable).
853    pub subject: TripleComponent,
854    /// Predicate (may be a variable).
855    pub predicate: TripleComponent,
856    /// Object (may be a variable or literal).
857    pub object: TripleComponent,
858    /// Named graph (optional).
859    pub graph: Option<String>,
860}
861
862/// Clear all triples from a graph.
863#[derive(Debug, Clone)]
864pub struct ClearGraphOp {
865    /// Target graph (None = default graph, Some("") = all named, Some(iri) = specific graph).
866    pub graph: Option<String>,
867    /// Whether to silently ignore errors.
868    pub silent: bool,
869}
870
871/// Create a new named graph.
872#[derive(Debug, Clone)]
873pub struct CreateGraphOp {
874    /// IRI of the graph to create.
875    pub graph: String,
876    /// Whether to silently ignore if graph already exists.
877    pub silent: bool,
878}
879
880/// Drop (remove) a named graph.
881#[derive(Debug, Clone)]
882pub struct DropGraphOp {
883    /// Target graph (None = default graph).
884    pub graph: Option<String>,
885    /// Whether to silently ignore errors.
886    pub silent: bool,
887}
888
889/// Load data from a URL into a graph.
890#[derive(Debug, Clone)]
891pub struct LoadGraphOp {
892    /// Source URL to load data from.
893    pub source: String,
894    /// Destination graph (None = default graph).
895    pub destination: Option<String>,
896    /// Whether to silently ignore errors.
897    pub silent: bool,
898}
899
900/// Copy triples from one graph to another.
901#[derive(Debug, Clone)]
902pub struct CopyGraphOp {
903    /// Source graph.
904    pub source: Option<String>,
905    /// Destination graph.
906    pub destination: Option<String>,
907    /// Whether to silently ignore errors.
908    pub silent: bool,
909}
910
911/// Move triples from one graph to another.
912#[derive(Debug, Clone)]
913pub struct MoveGraphOp {
914    /// Source graph.
915    pub source: Option<String>,
916    /// Destination graph.
917    pub destination: Option<String>,
918    /// Whether to silently ignore errors.
919    pub silent: bool,
920}
921
922/// Add (merge) triples from one graph to another.
923#[derive(Debug, Clone)]
924pub struct AddGraphOp {
925    /// Source graph.
926    pub source: Option<String>,
927    /// Destination graph.
928    pub destination: Option<String>,
929    /// Whether to silently ignore errors.
930    pub silent: bool,
931}
932
933// ==================== Vector Search Operators ====================
934
935/// Vector similarity scan operation.
936///
937/// Performs approximate nearest neighbor search using a vector index (HNSW)
938/// or brute-force search for small datasets. Returns nodes/edges whose
939/// embeddings are similar to the query vector.
940///
941/// # Example GQL
942///
943/// ```gql
944/// MATCH (m:Movie)
945/// WHERE vector_similarity(m.embedding, $query_vector) > 0.8
946/// RETURN m.title
947/// ```
948#[derive(Debug, Clone)]
949pub struct VectorScanOp {
950    /// Variable name to bind matching entities to.
951    pub variable: String,
952    /// Name of the vector index to use (None = brute-force).
953    pub index_name: Option<String>,
954    /// Property containing the vector embedding.
955    pub property: String,
956    /// Optional label filter (scan only nodes with this label).
957    pub label: Option<String>,
958    /// The query vector expression.
959    pub query_vector: LogicalExpression,
960    /// Number of nearest neighbors to return.
961    pub k: usize,
962    /// Distance metric (None = use index default, typically cosine).
963    pub metric: Option<VectorMetric>,
964    /// Minimum similarity threshold (filters results below this).
965    pub min_similarity: Option<f32>,
966    /// Maximum distance threshold (filters results above this).
967    pub max_distance: Option<f32>,
968    /// Input operator (for hybrid queries combining graph + vector).
969    pub input: Option<Box<LogicalOperator>>,
970}
971
972/// Vector distance/similarity metric for vector scan operations.
973#[derive(Debug, Clone, Copy, PartialEq, Eq)]
974pub enum VectorMetric {
975    /// Cosine similarity (1 - cosine_distance). Best for normalized embeddings.
976    Cosine,
977    /// Euclidean (L2) distance. Best when magnitude matters.
978    Euclidean,
979    /// Dot product. Best for maximum inner product search.
980    DotProduct,
981    /// Manhattan (L1) distance. Less sensitive to outliers.
982    Manhattan,
983}
984
985/// Join graph patterns with vector similarity search.
986///
987/// This operator takes entities from the left input and computes vector
988/// similarity against a query vector, outputting (entity, distance) pairs.
989///
990/// # Use Cases
991///
992/// 1. **Hybrid graph + vector queries**: Find similar nodes after graph traversal
993/// 2. **Aggregated embeddings**: Use AVG(embeddings) as query vector
994/// 3. **Filtering by similarity**: Join with threshold-based filtering
995///
996/// # Example
997///
998/// ```gql
999/// // Find movies similar to what the user liked
1000/// MATCH (u:User {id: $user_id})-[:LIKED]->(liked:Movie)
1001/// WITH avg(liked.embedding) AS user_taste
1002/// VECTOR JOIN (m:Movie) ON m.embedding
1003/// WHERE vector_similarity(m.embedding, user_taste) > 0.7
1004/// RETURN m.title
1005/// ```
1006#[derive(Debug, Clone)]
1007pub struct VectorJoinOp {
1008    /// Input operator providing entities to match against.
1009    pub input: Box<LogicalOperator>,
1010    /// Variable from input to extract vectors from (for entity-to-entity similarity).
1011    /// If None, uses `query_vector` directly.
1012    pub left_vector_variable: Option<String>,
1013    /// Property containing the left vector (used with `left_vector_variable`).
1014    pub left_property: Option<String>,
1015    /// The query vector expression (constant or computed).
1016    pub query_vector: LogicalExpression,
1017    /// Variable name to bind the right-side matching entities.
1018    pub right_variable: String,
1019    /// Property containing the right-side vector embeddings.
1020    pub right_property: String,
1021    /// Optional label filter for right-side entities.
1022    pub right_label: Option<String>,
1023    /// Name of vector index on right side (None = brute-force).
1024    pub index_name: Option<String>,
1025    /// Number of nearest neighbors per left-side entity.
1026    pub k: usize,
1027    /// Distance metric.
1028    pub metric: Option<VectorMetric>,
1029    /// Minimum similarity threshold.
1030    pub min_similarity: Option<f32>,
1031    /// Maximum distance threshold.
1032    pub max_distance: Option<f32>,
1033    /// Variable to bind the distance/similarity score.
1034    pub score_variable: Option<String>,
1035}
1036
1037/// Return results (terminal operator).
1038#[derive(Debug, Clone)]
1039pub struct ReturnOp {
1040    /// Items to return.
1041    pub items: Vec<ReturnItem>,
1042    /// Whether to return distinct results.
1043    pub distinct: bool,
1044    /// Input operator.
1045    pub input: Box<LogicalOperator>,
1046}
1047
1048/// A single return item.
1049#[derive(Debug, Clone)]
1050pub struct ReturnItem {
1051    /// Expression to return.
1052    pub expression: LogicalExpression,
1053    /// Alias for the result column.
1054    pub alias: Option<String>,
1055}
1056
1057/// Define a property graph schema (SQL/PGQ DDL).
1058#[derive(Debug, Clone)]
1059pub struct CreatePropertyGraphOp {
1060    /// Graph name.
1061    pub name: String,
1062    /// Node table schemas (label name + column definitions).
1063    pub node_tables: Vec<PropertyGraphNodeTable>,
1064    /// Edge table schemas (type name + column definitions + references).
1065    pub edge_tables: Vec<PropertyGraphEdgeTable>,
1066}
1067
1068/// A node table in a property graph definition.
1069#[derive(Debug, Clone)]
1070pub struct PropertyGraphNodeTable {
1071    /// Table name (maps to a node label).
1072    pub name: String,
1073    /// Column definitions as (name, type_name) pairs.
1074    pub columns: Vec<(String, String)>,
1075}
1076
1077/// An edge table in a property graph definition.
1078#[derive(Debug, Clone)]
1079pub struct PropertyGraphEdgeTable {
1080    /// Table name (maps to an edge type).
1081    pub name: String,
1082    /// Column definitions as (name, type_name) pairs.
1083    pub columns: Vec<(String, String)>,
1084    /// Source node table name.
1085    pub source_table: String,
1086    /// Target node table name.
1087    pub target_table: String,
1088}
1089
1090// ==================== Procedure Call Types ====================
1091
1092/// A CALL procedure operation.
1093///
1094/// ```text
1095/// CALL grafeo.pagerank({damping: 0.85}) YIELD nodeId, score
1096/// ```
1097#[derive(Debug, Clone)]
1098pub struct CallProcedureOp {
1099    /// Dotted procedure name, e.g. `["grafeo", "pagerank"]`.
1100    pub name: Vec<String>,
1101    /// Argument expressions (constants in Phase 1).
1102    pub arguments: Vec<LogicalExpression>,
1103    /// Optional YIELD clause: which columns to expose + aliases.
1104    pub yield_items: Option<Vec<ProcedureYield>>,
1105}
1106
1107/// A single YIELD item in a procedure call.
1108#[derive(Debug, Clone)]
1109pub struct ProcedureYield {
1110    /// Column name from the procedure result.
1111    pub field_name: String,
1112    /// Optional alias (YIELD score AS rank).
1113    pub alias: Option<String>,
1114}
1115
1116/// A logical expression.
1117#[derive(Debug, Clone)]
1118pub enum LogicalExpression {
1119    /// A literal value.
1120    Literal(Value),
1121
1122    /// A variable reference.
1123    Variable(String),
1124
1125    /// Property access (e.g., n.name).
1126    Property {
1127        /// The variable to access.
1128        variable: String,
1129        /// The property name.
1130        property: String,
1131    },
1132
1133    /// Binary operation.
1134    Binary {
1135        /// Left operand.
1136        left: Box<LogicalExpression>,
1137        /// Operator.
1138        op: BinaryOp,
1139        /// Right operand.
1140        right: Box<LogicalExpression>,
1141    },
1142
1143    /// Unary operation.
1144    Unary {
1145        /// Operator.
1146        op: UnaryOp,
1147        /// Operand.
1148        operand: Box<LogicalExpression>,
1149    },
1150
1151    /// Function call.
1152    FunctionCall {
1153        /// Function name.
1154        name: String,
1155        /// Arguments.
1156        args: Vec<LogicalExpression>,
1157        /// Whether DISTINCT is applied (e.g., COUNT(DISTINCT x)).
1158        distinct: bool,
1159    },
1160
1161    /// List literal.
1162    List(Vec<LogicalExpression>),
1163
1164    /// Map literal (e.g., {name: 'Alice', age: 30}).
1165    Map(Vec<(String, LogicalExpression)>),
1166
1167    /// Index access (e.g., `list[0]`).
1168    IndexAccess {
1169        /// The base expression (typically a list or string).
1170        base: Box<LogicalExpression>,
1171        /// The index expression.
1172        index: Box<LogicalExpression>,
1173    },
1174
1175    /// Slice access (e.g., list[1..3]).
1176    SliceAccess {
1177        /// The base expression (typically a list or string).
1178        base: Box<LogicalExpression>,
1179        /// Start index (None means from beginning).
1180        start: Option<Box<LogicalExpression>>,
1181        /// End index (None means to end).
1182        end: Option<Box<LogicalExpression>>,
1183    },
1184
1185    /// CASE expression.
1186    Case {
1187        /// Test expression (for simple CASE).
1188        operand: Option<Box<LogicalExpression>>,
1189        /// WHEN clauses.
1190        when_clauses: Vec<(LogicalExpression, LogicalExpression)>,
1191        /// ELSE clause.
1192        else_clause: Option<Box<LogicalExpression>>,
1193    },
1194
1195    /// Parameter reference.
1196    Parameter(String),
1197
1198    /// Labels of a node.
1199    Labels(String),
1200
1201    /// Type of an edge.
1202    Type(String),
1203
1204    /// ID of a node or edge.
1205    Id(String),
1206
1207    /// List comprehension: [x IN list WHERE predicate | expression]
1208    ListComprehension {
1209        /// Variable name for each element.
1210        variable: String,
1211        /// The source list expression.
1212        list_expr: Box<LogicalExpression>,
1213        /// Optional filter predicate.
1214        filter_expr: Option<Box<LogicalExpression>>,
1215        /// The mapping expression for each element.
1216        map_expr: Box<LogicalExpression>,
1217    },
1218
1219    /// List predicate: all/any/none/single(x IN list WHERE pred).
1220    ListPredicate {
1221        /// The kind of list predicate.
1222        kind: ListPredicateKind,
1223        /// The iteration variable name.
1224        variable: String,
1225        /// The source list expression.
1226        list_expr: Box<LogicalExpression>,
1227        /// The predicate to test for each element.
1228        predicate: Box<LogicalExpression>,
1229    },
1230
1231    /// EXISTS subquery.
1232    ExistsSubquery(Box<LogicalOperator>),
1233
1234    /// COUNT subquery.
1235    CountSubquery(Box<LogicalOperator>),
1236
1237    /// Map projection: `node { .prop1, .prop2, key: expr, .* }`.
1238    MapProjection {
1239        /// The base variable name.
1240        base: String,
1241        /// Projection entries (property selectors, literal entries, all-properties).
1242        entries: Vec<MapProjectionEntry>,
1243    },
1244
1245    /// reduce() accumulator: `reduce(acc = init, x IN list | expr)`.
1246    Reduce {
1247        /// Accumulator variable name.
1248        accumulator: String,
1249        /// Initial value for the accumulator.
1250        initial: Box<LogicalExpression>,
1251        /// Iteration variable name.
1252        variable: String,
1253        /// List to iterate over.
1254        list: Box<LogicalExpression>,
1255        /// Body expression evaluated per iteration (references both accumulator and variable).
1256        expression: Box<LogicalExpression>,
1257    },
1258
1259    /// Pattern comprehension: `[(pattern) WHERE pred | expr]`.
1260    ///
1261    /// Executes the inner subplan, evaluates the projection for each row,
1262    /// and collects the results into a list.
1263    PatternComprehension {
1264        /// The subplan produced by translating the pattern (+optional WHERE).
1265        subplan: Box<LogicalOperator>,
1266        /// The projection expression evaluated for each match.
1267        projection: Box<LogicalExpression>,
1268    },
1269}
1270
1271/// An entry in a map projection.
1272#[derive(Debug, Clone)]
1273pub enum MapProjectionEntry {
1274    /// `.propertyName`: shorthand for `propertyName: base.propertyName`.
1275    PropertySelector(String),
1276    /// `key: expression`: explicit key-value pair.
1277    LiteralEntry(String, LogicalExpression),
1278    /// `.*`: include all properties of the base entity.
1279    AllProperties,
1280}
1281
1282/// The kind of list predicate function.
1283#[derive(Debug, Clone, PartialEq, Eq)]
1284pub enum ListPredicateKind {
1285    /// all(x IN list WHERE pred): true if pred holds for every element.
1286    All,
1287    /// any(x IN list WHERE pred): true if pred holds for at least one element.
1288    Any,
1289    /// none(x IN list WHERE pred): true if pred holds for no element.
1290    None,
1291    /// single(x IN list WHERE pred): true if pred holds for exactly one element.
1292    Single,
1293}
1294
1295/// Binary operator.
1296#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1297pub enum BinaryOp {
1298    /// Equality comparison (=).
1299    Eq,
1300    /// Inequality comparison (<>).
1301    Ne,
1302    /// Less than (<).
1303    Lt,
1304    /// Less than or equal (<=).
1305    Le,
1306    /// Greater than (>).
1307    Gt,
1308    /// Greater than or equal (>=).
1309    Ge,
1310
1311    /// Logical AND.
1312    And,
1313    /// Logical OR.
1314    Or,
1315    /// Logical XOR.
1316    Xor,
1317
1318    /// Addition (+).
1319    Add,
1320    /// Subtraction (-).
1321    Sub,
1322    /// Multiplication (*).
1323    Mul,
1324    /// Division (/).
1325    Div,
1326    /// Modulo (%).
1327    Mod,
1328
1329    /// String concatenation.
1330    Concat,
1331    /// String starts with.
1332    StartsWith,
1333    /// String ends with.
1334    EndsWith,
1335    /// String contains.
1336    Contains,
1337
1338    /// Collection membership (IN).
1339    In,
1340    /// Pattern matching (LIKE).
1341    Like,
1342    /// Regex matching (=~).
1343    Regex,
1344    /// Power/exponentiation (^).
1345    Pow,
1346}
1347
1348/// Unary operator.
1349#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1350pub enum UnaryOp {
1351    /// Logical NOT.
1352    Not,
1353    /// Numeric negation.
1354    Neg,
1355    /// IS NULL check.
1356    IsNull,
1357    /// IS NOT NULL check.
1358    IsNotNull,
1359}
1360
1361#[cfg(test)]
1362mod tests {
1363    use super::*;
1364
1365    #[test]
1366    fn test_simple_node_scan_plan() {
1367        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1368            items: vec![ReturnItem {
1369                expression: LogicalExpression::Variable("n".into()),
1370                alias: None,
1371            }],
1372            distinct: false,
1373            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1374                variable: "n".into(),
1375                label: Some("Person".into()),
1376                input: None,
1377            })),
1378        }));
1379
1380        // Verify structure
1381        if let LogicalOperator::Return(ret) = &plan.root {
1382            assert_eq!(ret.items.len(), 1);
1383            assert!(!ret.distinct);
1384            if let LogicalOperator::NodeScan(scan) = ret.input.as_ref() {
1385                assert_eq!(scan.variable, "n");
1386                assert_eq!(scan.label, Some("Person".into()));
1387            } else {
1388                panic!("Expected NodeScan");
1389            }
1390        } else {
1391            panic!("Expected Return");
1392        }
1393    }
1394
1395    #[test]
1396    fn test_filter_plan() {
1397        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1398            items: vec![ReturnItem {
1399                expression: LogicalExpression::Property {
1400                    variable: "n".into(),
1401                    property: "name".into(),
1402                },
1403                alias: Some("name".into()),
1404            }],
1405            distinct: false,
1406            input: Box::new(LogicalOperator::Filter(FilterOp {
1407                predicate: LogicalExpression::Binary {
1408                    left: Box::new(LogicalExpression::Property {
1409                        variable: "n".into(),
1410                        property: "age".into(),
1411                    }),
1412                    op: BinaryOp::Gt,
1413                    right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1414                },
1415                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1416                    variable: "n".into(),
1417                    label: Some("Person".into()),
1418                    input: None,
1419                })),
1420            })),
1421        }));
1422
1423        if let LogicalOperator::Return(ret) = &plan.root {
1424            if let LogicalOperator::Filter(filter) = ret.input.as_ref() {
1425                if let LogicalExpression::Binary { op, .. } = &filter.predicate {
1426                    assert_eq!(*op, BinaryOp::Gt);
1427                } else {
1428                    panic!("Expected Binary expression");
1429                }
1430            } else {
1431                panic!("Expected Filter");
1432            }
1433        } else {
1434            panic!("Expected Return");
1435        }
1436    }
1437}