mangle_ir/physical.rs
1// Copyright 2025 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Physical Plan IR for Mangle.
16//!
17//! This represents the imperative execution logic (loops, joins, inserts)
18//! derived from the declarative logical IR.
19
20use crate::{NameId, StringId};
21
22#[derive(Debug, Clone, PartialEq)]
23pub enum Op {
24 /// A no-op.
25 Nop,
26
27 /// Sequence of operations executed in order.
28 Seq(Vec<Op>),
29
30 /// Iterate over a data source.
31 /// For each tuple yielded by `source`, `body` is executed.
32 /// Variables defined in `source` are bound and available in `body`.
33 Iterate { source: DataSource, body: Box<Op> },
34
35 /// Filter / Check condition.
36 /// If `cond` evaluates to true, `body` is executed.
37 Filter { cond: Condition, body: Box<Op> },
38
39 /// Insert a tuple into a relation.
40 /// All variables in `args` must be bound.
41 Insert {
42 relation: NameId,
43 args: Vec<Operand>,
44 },
45
46 /// Calculate a value and bind it to a variable.
47 /// `let var = expr`
48 Let {
49 var: NameId,
50 expr: Expr,
51 body: Box<Op>,
52 },
53
54 /// GroupBy operation.
55 /// Scans `source` (binding columns to `vars`), groups by `keys`, computes `aggregates`
56 /// for each group, and then executes `body` for each group.
57 GroupBy {
58 source: NameId, // Relation to scan
59 vars: Vec<NameId>, // Variables to bind to source columns
60 keys: Vec<NameId>, // Variables to group by (must be in `vars` or previously bound?)
61 // Typically `keys` are subset of `vars`.
62 aggregates: Vec<Aggregate>,
63 body: Box<Op>,
64 },
65
66 /// Hash join of two data sources on a shared set of variables.
67 ///
68 /// Execution:
69 /// 1. Drain `build_source`. For each tuple, capture the values bound by
70 /// `build_source`'s vars (including the join-key positions) into an
71 /// in-memory hash table keyed by the `join_keys` projection.
72 /// 2. Stream `probe_source`. For each tuple, extract its join-key values
73 /// and look up in the build hash table. For every matching build
74 /// tuple, restore the build-side bindings alongside the probe-side
75 /// bindings and execute `body`.
76 ///
77 /// `join_keys` must be variables that both `build_source.vars` and
78 /// `probe_source.vars` bind. The planner only emits this op for 2-way
79 /// joins where neither side has a useful IndexLookup.
80 HashJoin {
81 build_source: DataSource,
82 probe_source: DataSource,
83 join_keys: Vec<NameId>,
84 body: Box<Op>,
85 },
86}
87
88#[derive(Debug, Clone, PartialEq)]
89pub struct Aggregate {
90 pub var: NameId,
91 pub func: NameId,
92 pub args: Vec<Operand>,
93}
94
95#[derive(Debug, Clone, PartialEq)]
96pub enum DataSource {
97 /// Scan a relation (iterate over all tuples).
98 /// Binds the variables in `vars` to the columns of the relation.
99 Scan { relation: NameId, vars: Vec<NameId> },
100
101 /// Scan only the "delta" set of a relation (new facts from last iteration).
102 ScanDelta { relation: NameId, vars: Vec<NameId> },
103
104 /// Lookup in an index.
105 /// `col_idx`: The column index to lookup on.
106 /// `key`: The value to look up.
107 /// `vars`: Variables to bind to the *other* columns (or all columns?).
108 /// For simplicity: `vars` maps to the relation columns. The column at `col_idx`
109 /// is already bound (to `key`), but we might re-bind it or check it.
110 IndexLookup {
111 relation: NameId,
112 col_idx: usize,
113 key: Operand,
114 vars: Vec<NameId>,
115 },
116}
117
118#[derive(Debug, Clone, Copy, PartialEq)]
119pub enum CmpOp {
120 Eq,
121 Neq,
122 Lt,
123 Le,
124 Gt,
125 Ge,
126}
127
128#[derive(Debug, Clone, PartialEq)]
129pub enum Condition {
130 /// Comparison of two operands.
131 Cmp {
132 op: CmpOp,
133 left: Operand,
134 right: Operand,
135 },
136 /// Negation check: !exists(...)
137 Negation {
138 relation: NameId,
139 args: Vec<Operand>,
140 },
141 /// Call to a boolean function / predicate (e.g. starts_with).
142 Call {
143 function: NameId,
144 args: Vec<Operand>,
145 },
146}
147
148#[derive(Debug, Clone, PartialEq)]
149pub enum Expr {
150 // Basic value
151 Value(Operand),
152 // Function call (arithmetic or built-in)
153 Call {
154 function: NameId,
155 args: Vec<Operand>,
156 },
157}
158
159#[derive(Debug, Clone, PartialEq)]
160pub enum Operand {
161 Var(NameId),
162 Const(Constant),
163}
164
165#[derive(Clone, Debug, PartialEq)]
166pub enum Constant {
167 Number(i64),
168 Float(f64),
169 String(StringId),
170 Name(NameId),
171 Time(i64),
172 Duration(i64),
173}