helios_persistence/sof/ir.rs
1//! Intermediate representation for the FHIRPath → SQL compiler.
2//!
3//! Two layered IRs:
4//!
5//! - [`SqlExpr`] is a dialect-independent value-level expression. Every FHIRPath
6//! sub-expression compiles to one of these. The [`Dialect`](super::dialect::Dialect)
7//! trait lowers an `SqlExpr` to a SQL string per backend.
8//! - [`PlanNode`] is the row-source-level plan: scans, lateral unnests, filters,
9//! projections, unions, and recursive descents (`repeat:`).
10//!
11//! Stages 2–5 progressively populate the consumers of these types. Stage 1 just
12//! defines the shapes so later work has a stable target.
13
14#![allow(dead_code)] // Stage 1 scaffold; consumers land in stages 2–5.
15
16use std::sync::Arc;
17
18/// A dialect-independent value-level SQL expression.
19///
20/// Each variant lowers to a SQL fragment via the [`Dialect`](super::dialect::Dialect)
21/// trait. Subqueries hold a [`PlanNode`] together with the scalar projection
22/// extracted from each row.
23#[derive(Debug, Clone)]
24pub enum SqlExpr {
25 /// Literal scalar.
26 Lit(LitValue),
27
28 /// Navigation through a JSON document.
29 ///
30 /// `root` is the alias provided by the surrounding plan node — typically
31 /// `r.data` (resource scan), `fe.value` (lateral unnest), or `rec.node`
32 /// (recursive CTE). `path` is the chain of steps applied to it.
33 JsonPath {
34 /// JSON root alias (e.g., `r.data`).
35 root: String,
36 /// Ordered navigation steps applied to `root`.
37 path: JsonPath,
38 },
39
40 /// Bound query parameter, 1-based.
41 ///
42 /// Indices 1 and 2 are reserved for `tenant_id` and `resource_type`.
43 /// Constants from `ViewDefinition.constant[]` and string literals lifted
44 /// out of `extension(url)` etc. allocate from index 3 upward.
45 Param(usize),
46
47 /// Reference to a column projected by a CTE or subquery.
48 ColRef(String),
49
50 /// Type coercion. The dialect lowerer chooses the appropriate cast syntax.
51 Cast {
52 /// Expression being coerced.
53 inner: Box<SqlExpr>,
54 /// Target SQL type.
55 ty: SqlType,
56 },
57
58 /// Binary operator.
59 BinOp {
60 /// Operator kind.
61 op: BinOp,
62 /// Left-hand operand.
63 lhs: Box<SqlExpr>,
64 /// Right-hand operand.
65 rhs: Box<SqlExpr>,
66 },
67
68 /// Unary operator.
69 UnaryOp {
70 /// Operator kind.
71 op: UnaryOp,
72 /// Operand the operator is applied to.
73 inner: Box<SqlExpr>,
74 },
75
76 /// `CASE WHEN .. THEN .. ... ELSE .. END`.
77 Case {
78 /// `(condition, value)` pairs evaluated in order.
79 arms: Vec<(SqlExpr, SqlExpr)>,
80 /// Optional default branch.
81 else_: Option<Box<SqlExpr>>,
82 },
83
84 /// `COALESCE(a, b, ...)`.
85 Coalesce(Vec<SqlExpr>),
86
87 /// `NULLIF(a, b)`.
88 NullIf(Box<SqlExpr>, Box<SqlExpr>),
89
90 /// Wrap a scalar as a JSON value (`to_jsonb` / `json`).
91 AsJson(Box<SqlExpr>),
92
93 /// Aggregate the rows produced by a subquery into a JSON array
94 /// (`jsonb_agg` / `json_group_array`). Used for `column.collection: true`.
95 JsonAgg(Box<SubQuery>),
96
97 /// Scalar subquery — the inner plan must project exactly one value per row
98 /// and return at most one row.
99 Scalar(Box<SubQuery>),
100
101 /// `EXISTS(subquery)` — collapses to a boolean.
102 Exists(Box<SubQuery>),
103
104 /// `(SELECT count(*) FROM subquery)`.
105 CountSub(Box<SubQuery>),
106
107 /// Names an inner expression for reuse (lowered as a CTE column reference
108 /// when the same scalar appears in multiple projections).
109 Alias {
110 /// Alias to assign to `inner`.
111 name: String,
112 /// Expression being aliased.
113 inner: Box<SqlExpr>,
114 },
115
116 /// Extracts the id portion of a `Reference.reference` string. When
117 /// `expected_type` is supplied, returns NULL unless the reference's type
118 /// segment matches (e.g. `getReferenceKey(Patient)` over `Observation/123`
119 /// returns NULL).
120 ReferenceKey {
121 /// Reference string to inspect.
122 reference: Box<SqlExpr>,
123 /// FHIR resource type the reference must match, when set.
124 expected_type: Option<String>,
125 },
126
127 /// FHIRPath `lowBoundary()` / `highBoundary()` — emits a precision-driven
128 /// CASE expression over the source's text form (decimal expands by a
129 /// half-step in the last digit; date/dateTime/time pad with the first or
130 /// last instant of the largest unspecified unit). The expected
131 /// `column.type` is supplied so the dialect can pick decimal vs.
132 /// date/dateTime/time logic.
133 Boundary {
134 /// Whether to take the low or high boundary.
135 side: BoundarySide,
136 /// Source value kind (decimal vs. date/dateTime/time).
137 kind: BoundaryKind,
138 /// Expression whose boundary is being computed.
139 source: Box<SqlExpr>,
140 },
141
142 /// FHIRPath `<focus>.where(<crit>).exists()` — lowers to an `EXISTS`
143 /// subquery that iterates the focus collection (a lateral unnest of a
144 /// JSON path) and tests `crit` against each element. The criterion is
145 /// pre-lowered with `iter_alias.value` set as its path root.
146 WhereExists {
147 /// Collection expression to iterate.
148 focus: Box<SqlExpr>,
149 /// Iteration alias used by `predicate`.
150 iter_alias: String,
151 /// Criterion evaluated against each element.
152 predicate: Box<SqlExpr>,
153 /// Mirrors `where(crit).empty()` — negate the EXISTS.
154 negate: bool,
155 },
156
157 /// FHIRPath `<focus>.where(<crit>).<navigation>` collapsed to a scalar
158 /// subquery: iterate the focus collection, filter by the criterion,
159 /// project the navigation off the iteration alias, return at most one
160 /// row. Used when a column's path threads a `where()` call somewhere in
161 /// the middle (e.g. `name.where(use='official').family`).
162 WhereScalar {
163 /// Collection expression to iterate.
164 focus: Box<SqlExpr>,
165 /// Iteration alias used by `predicate` and `projection`.
166 iter_alias: String,
167 /// Filter applied to each iteration row.
168 predicate: Box<SqlExpr>,
169 /// Scalar projection extracted from the surviving row.
170 projection: Box<SqlExpr>,
171 },
172
173 /// FHIRPath `<base>.<field>.join(<sep>)` — aggregates the values of
174 /// `<field>` across each element of `<base>` (flattened) into a single
175 /// separator-joined string. Lowers to `string_agg` (PG) /
176 /// `group_concat` (SQLite) over a chained lateral unnest.
177 JoinAggregate {
178 /// Outer collection expression to iterate.
179 outer_focus: Box<SqlExpr>,
180 /// Outer iteration alias.
181 outer_alias: String,
182 /// Field name to flatten on each outer row.
183 inner_field: String,
184 /// Inner iteration alias.
185 inner_alias: String,
186 /// Separator inserted between joined elements.
187 separator: String,
188 },
189
190 /// `column.collection: true` projection — aggregates the flattened
191 /// values of a JSON path into a JSON array. Each `Field` step in `path`
192 /// becomes a lateral unnest; the final element values feed into a
193 /// `json_agg` / `json_group_array`.
194 CollectionAgg {
195 /// JSON root alias for the aggregation source.
196 root: String,
197 /// Path navigation aggregated into an array.
198 path: JsonPath,
199 },
200
201 /// Correlated scalar subquery used for `forEach: "<chain>[N]"` paths —
202 /// FHIRPath indexes the FLATTENED iteration result, but SQLite forbids
203 /// correlated subqueries in `FROM`. Lowering each column to a
204 /// scalar-subquery in the SELECT side bypasses that limitation:
205 ///
206 /// `(SELECT <projection> FROM <chain_sql> LIMIT 1 OFFSET <offset>)`.
207 ScalarFromChain {
208 /// Pre-built `FROM`-clause SQL for the flattened chain.
209 chain_sql: String,
210 /// Scalar projection extracted from the row at `offset`.
211 projection: Box<SqlExpr>,
212 /// Zero-based index into the flattened chain.
213 offset: i64,
214 },
215}
216
217/// Selects between `lowBoundary()` and `highBoundary()` semantics.
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219pub enum BoundarySide {
220 /// Low boundary (`lowBoundary()`).
221 Low,
222 /// High boundary (`highBoundary()`).
223 High,
224}
225
226/// Source value type for [`SqlExpr::Boundary`].
227#[derive(Debug, Clone, Copy, PartialEq, Eq)]
228pub enum BoundaryKind {
229 /// FHIR `decimal`.
230 Decimal,
231 /// FHIR `date`.
232 Date,
233 /// FHIR `dateTime` (or `instant`).
234 DateTime,
235 /// FHIR `time`.
236 Time,
237}
238
239/// Literal scalar value embedded directly in SQL.
240///
241/// Strings derived from user input must be bound as parameters via
242/// [`SqlExpr::Param`] — `LitValue::Str` is reserved for compile-time-constant
243/// identifiers (e.g. polymorphic-type field names).
244#[derive(Debug, Clone)]
245pub enum LitValue {
246 /// `NULL`.
247 Null,
248 /// Boolean — lowered to `true`/`false` (PG) or `1`/`0` (SQLite).
249 Bool(bool),
250 /// Integer.
251 Int(i64),
252 /// Decimal as a string to preserve precision.
253 Decimal(String),
254 /// String literal — used only for compile-time-constant idents; user input
255 /// must always go through [`SqlExpr::Param`].
256 Str(String),
257}
258
259/// SQL type tag used by [`SqlExpr::Cast`] and column projections.
260///
261/// The dialect lowerer maps each variant to its native cast syntax
262/// (`::text` / `CAST(.. AS TEXT)` etc.).
263#[derive(Debug, Clone, Copy, PartialEq, Eq)]
264pub enum SqlType {
265 /// SQL `text` / `TEXT`.
266 Text,
267 /// SQL `bigint` / `INTEGER`.
268 Integer,
269 /// SQL `numeric` / `REAL`.
270 Decimal,
271 /// SQL `boolean` (projected as `'true'`/`'false'` text for the runner).
272 Boolean,
273 /// JSON value (PG: `jsonb`; SQLite: `json` returned by `json()` function).
274 Json,
275}
276
277/// JSON value-type predicate, used by [`PathStep::TypeFilter`] and
278/// polymorphic-field guards.
279#[derive(Debug, Clone, Copy, PartialEq, Eq)]
280pub enum JsonType {
281 /// JSON object.
282 Object,
283 /// JSON array.
284 Array,
285 /// JSON string.
286 String,
287 /// JSON number.
288 Number,
289 /// JSON boolean.
290 Boolean,
291 /// JSON `null`.
292 Null,
293}
294
295/// Binary operator for [`SqlExpr::BinOp`].
296#[derive(Debug, Clone, Copy, PartialEq, Eq)]
297pub enum BinOp {
298 /// `=` equality.
299 Eq,
300 /// `!=` inequality.
301 Neq,
302 /// `<` less than.
303 Lt,
304 /// `<=` less than or equal.
305 Lte,
306 /// `>` greater than.
307 Gt,
308 /// `>=` greater than or equal.
309 Gte,
310 /// `+` addition.
311 Add,
312 /// `-` subtraction.
313 Sub,
314 /// `*` multiplication.
315 Mul,
316 /// `/` division.
317 Div,
318 /// `AND` with SQL three-valued logic.
319 And,
320 /// `OR` with SQL three-valued logic.
321 Or,
322 /// String concatenation (PG: `||`; SQLite: `||`).
323 Concat,
324 /// `LIKE`.
325 Like,
326 /// `regexp_match` / dialect-specific regex.
327 RegexMatch,
328}
329
330/// Unary operator for [`SqlExpr::UnaryOp`].
331#[derive(Debug, Clone, Copy, PartialEq, Eq)]
332pub enum UnaryOp {
333 /// `NOT`.
334 Not,
335 /// `IS NULL`.
336 IsNull,
337 /// `IS NOT NULL`.
338 IsNotNull,
339 /// Negation (`-x`).
340 Neg,
341}
342
343/// Ordered sequence of [`PathStep`]s applied to a JSON root.
344#[derive(Debug, Clone, Default)]
345pub struct JsonPath(pub Vec<PathStep>);
346
347impl JsonPath {
348 /// Creates an empty path.
349 pub fn new() -> Self {
350 Self(Vec::new())
351 }
352
353 /// Appends a navigation step to the end of the path.
354 pub fn push(&mut self, step: PathStep) {
355 self.0.push(step);
356 }
357
358 /// Returns true when no steps have been added.
359 pub fn is_empty(&self) -> bool {
360 self.0.is_empty()
361 }
362}
363
364/// One navigation step in a [`JsonPath`].
365#[derive(Debug, Clone, PartialEq, Eq)]
366pub enum PathStep {
367 /// `.field` (object key).
368 Field(String),
369 /// `[N]` (array index).
370 Index(i64),
371 /// `value.ofType(X)` resolved against FHIR's polymorphic-element JSON
372 /// convention. The contained string is the FHIR type name (`Quantity`,
373 /// `string`, ...). The lowerer rewrites the previous `Field` step to its
374 /// `value{X}` sibling.
375 OfType(String),
376 /// Restricts the focus to JSON values of a given type — used by
377 /// `ofType(primitive)` to make sibling polymorphic fields evaluate to NULL.
378 TypeFilter(JsonType),
379}
380
381/// Row-source plan node.
382///
383/// Plans are trees: a [`Project`](PlanNode::Project) at the root, descending
384/// through filters and lateral unnests to a [`Scan`](PlanNode::Scan) over
385/// `resources`. [`Union`](PlanNode::Union) and [`Recurse`](PlanNode::Recurse)
386/// wrap multiple sub-plans.
387#[derive(Debug, Clone)]
388pub enum PlanNode {
389 /// Top-level scan over the `resources` table for a single resource type.
390 /// The tenant predicate is injected by the emitter.
391 Scan {
392 /// SQL alias for the scanned row (e.g., `r`).
393 alias: String,
394 /// FHIR resource type to scan.
395 resource_type: String,
396 },
397
398 /// Lateral unnest of a JSON-array source. `out_alias` names the iteration
399 /// row; `left_join` distinguishes `forEach` from `forEachOrNull`.
400 /// `on_filter`, if set, is appended to the JOIN ON clause and lets a
401 /// trailing `where(crit)` on the forEach path filter rows in-place
402 /// (preserving LEFT JOIN semantics for `forEachOrNull`). `flat_index`,
403 /// if set, restricts the unnest to the Nth element of the flattened
404 /// collection (FHIRPath `name[0]` style indexing applied to the result
405 /// of an array-flattening navigation).
406 LateralUnnest {
407 /// Plan whose rows are being unnested.
408 parent: Box<PlanNode>,
409 /// JSON-array source expression.
410 source: SqlExpr,
411 /// SQL alias bound to each iteration row.
412 out_alias: String,
413 /// True for `forEachOrNull` (LEFT JOIN), false for `forEach` (INNER JOIN).
414 left_join: bool,
415 /// Optional filter appended to the JOIN ON clause.
416 on_filter: Option<SqlExpr>,
417 /// When set, restrict the unnest to the Nth element of the flattened collection.
418 flat_index: Option<i64>,
419 },
420
421 /// `WHERE` filter applied to `parent`. Multiple `Filter` nodes compose
422 /// AND-wise.
423 Filter {
424 /// Plan whose rows are being filtered.
425 parent: Box<PlanNode>,
426 /// Boolean predicate the rows must satisfy.
427 predicate: SqlExpr,
428 },
429
430 /// Output projection.
431 Project {
432 /// Plan supplying the rows to project.
433 parent: Box<PlanNode>,
434 /// Output column definitions.
435 columns: Vec<Column>,
436 },
437
438 /// `UNION ALL` of N row-compatible plans. Output schemas must align;
439 /// the emitter validates this and emits a single `ORDER BY 1` outside the
440 /// compound query.
441 Union(Vec<PlanNode>),
442
443 /// Recursive-CTE descent — used for SoF `repeat:` clauses.
444 Recurse {
445 /// Plan producing the seed rows.
446 parent: Box<PlanNode>,
447 /// Seed projection (currently unused; emitter walks `parent`).
448 seed: SqlExpr,
449 /// Paths walked on each iteration.
450 step_paths: Vec<JsonPath>,
451 /// CTE alias also used as the `node` column alias.
452 out_alias: String,
453 },
454}
455
456/// Output column projected by a [`Project`](PlanNode::Project) node.
457#[derive(Debug, Clone)]
458pub struct Column {
459 /// Output column name.
460 pub name: String,
461 /// Expression that produces the column's value.
462 pub expr: SqlExpr,
463 /// When true, lower to a JSON array via [`SqlExpr::JsonAgg`] over a lateral
464 /// subquery. When false, lower to a scalar (with a defensive `LIMIT 1` if
465 /// the underlying expression yields a row source).
466 pub collection: bool,
467 /// SQL type the column is projected as.
468 pub ty: SqlType,
469}
470
471/// A subquery embedded inside a [`SqlExpr`]. Holds the inner plan together
472/// with the scalar projection extracted from each row.
473#[derive(Debug, Clone)]
474pub struct SubQuery {
475 /// Plan producing the subquery's rows.
476 pub plan: PlanNode,
477 /// Scalar projection extracted from each row.
478 pub select_expr: SqlExpr,
479}
480
481/// Boxed dialect handle used by emission helpers.
482pub type DialectRef = Arc<dyn super::dialect::Dialect>;