Skip to main content

helios_persistence/sof/
ir.rs

1//! Intermediate representation for the FHIRPath → SQL compiler.
2//!
3//! Two layered IRs:
4//!
5//! - [`SqlExpr`] is a dialect-independent value-level expression. Every FHIRPath
6//!   sub-expression compiles to one of these. The [`Dialect`](super::dialect::Dialect)
7//!   trait lowers an `SqlExpr` to a SQL string per backend.
8//! - [`PlanNode`] is the row-source-level plan: scans, lateral unnests, filters,
9//!   projections, unions, and recursive descents (`repeat:`).
10//!
11//! Stages 2–5 progressively populate the consumers of these types. Stage 1 just
12//! defines the shapes so later work has a stable target.
13
14#![allow(dead_code)] // Stage 1 scaffold; consumers land in stages 2–5.
15
16use std::sync::Arc;
17
18/// A dialect-independent value-level SQL expression.
19///
20/// Each variant lowers to a SQL fragment via the [`Dialect`](super::dialect::Dialect)
21/// trait. Subqueries hold a [`PlanNode`] together with the scalar projection
22/// extracted from each row.
23#[derive(Debug, Clone)]
24pub enum SqlExpr {
25    /// Literal scalar.
26    Lit(LitValue),
27
28    /// Navigation through a JSON document.
29    ///
30    /// `root` is the alias provided by the surrounding plan node — typically
31    /// `r.data` (resource scan), `fe.value` (lateral unnest), or `rec.node`
32    /// (recursive CTE). `path` is the chain of steps applied to it.
33    JsonPath {
34        /// JSON root alias (e.g., `r.data`).
35        root: String,
36        /// Ordered navigation steps applied to `root`.
37        path: JsonPath,
38    },
39
40    /// Bound query parameter, 1-based.
41    ///
42    /// Indices 1 and 2 are reserved for `tenant_id` and `resource_type`.
43    /// Constants from `ViewDefinition.constant[]` and string literals lifted
44    /// out of `extension(url)` etc. allocate from index 3 upward.
45    Param(usize),
46
47    /// Reference to a column projected by a CTE or subquery.
48    ColRef(String),
49
50    /// Type coercion. The dialect lowerer chooses the appropriate cast syntax.
51    Cast {
52        /// Expression being coerced.
53        inner: Box<SqlExpr>,
54        /// Target SQL type.
55        ty: SqlType,
56    },
57
58    /// Binary operator.
59    BinOp {
60        /// Operator kind.
61        op: BinOp,
62        /// Left-hand operand.
63        lhs: Box<SqlExpr>,
64        /// Right-hand operand.
65        rhs: Box<SqlExpr>,
66    },
67
68    /// Unary operator.
69    UnaryOp {
70        /// Operator kind.
71        op: UnaryOp,
72        /// Operand the operator is applied to.
73        inner: Box<SqlExpr>,
74    },
75
76    /// `CASE WHEN .. THEN .. ... ELSE .. END`.
77    Case {
78        /// `(condition, value)` pairs evaluated in order.
79        arms: Vec<(SqlExpr, SqlExpr)>,
80        /// Optional default branch.
81        else_: Option<Box<SqlExpr>>,
82    },
83
84    /// `COALESCE(a, b, ...)`.
85    Coalesce(Vec<SqlExpr>),
86
87    /// `NULLIF(a, b)`.
88    NullIf(Box<SqlExpr>, Box<SqlExpr>),
89
90    /// Wrap a scalar as a JSON value (`to_jsonb` / `json`).
91    AsJson(Box<SqlExpr>),
92
93    /// Aggregate the rows produced by a subquery into a JSON array
94    /// (`jsonb_agg` / `json_group_array`). Used for `column.collection: true`.
95    JsonAgg(Box<SubQuery>),
96
97    /// Scalar subquery — the inner plan must project exactly one value per row
98    /// and return at most one row.
99    Scalar(Box<SubQuery>),
100
101    /// `EXISTS(subquery)` — collapses to a boolean.
102    Exists(Box<SubQuery>),
103
104    /// `(SELECT count(*) FROM subquery)`.
105    CountSub(Box<SubQuery>),
106
107    /// Names an inner expression for reuse (lowered as a CTE column reference
108    /// when the same scalar appears in multiple projections).
109    Alias {
110        /// Alias to assign to `inner`.
111        name: String,
112        /// Expression being aliased.
113        inner: Box<SqlExpr>,
114    },
115
116    /// Extracts the id portion of a `Reference.reference` string. When
117    /// `expected_type` is supplied, returns NULL unless the reference's type
118    /// segment matches (e.g. `getReferenceKey(Patient)` over `Observation/123`
119    /// returns NULL).
120    ReferenceKey {
121        /// Reference string to inspect.
122        reference: Box<SqlExpr>,
123        /// FHIR resource type the reference must match, when set.
124        expected_type: Option<String>,
125    },
126
127    /// FHIRPath `lowBoundary()` / `highBoundary()` — emits a precision-driven
128    /// CASE expression over the source's text form (decimal expands by a
129    /// half-step in the last digit; date/dateTime/time pad with the first or
130    /// last instant of the largest unspecified unit). The expected
131    /// `column.type` is supplied so the dialect can pick decimal vs.
132    /// date/dateTime/time logic.
133    Boundary {
134        /// Whether to take the low or high boundary.
135        side: BoundarySide,
136        /// Source value kind (decimal vs. date/dateTime/time).
137        kind: BoundaryKind,
138        /// Expression whose boundary is being computed.
139        source: Box<SqlExpr>,
140    },
141
142    /// FHIRPath `<focus>.where(<crit>).exists()` — lowers to an `EXISTS`
143    /// subquery that iterates the focus collection (a lateral unnest of a
144    /// JSON path) and tests `crit` against each element. The criterion is
145    /// pre-lowered with `iter_alias.value` set as its path root.
146    WhereExists {
147        /// Collection expression to iterate.
148        focus: Box<SqlExpr>,
149        /// Iteration alias used by `predicate`.
150        iter_alias: String,
151        /// Criterion evaluated against each element.
152        predicate: Box<SqlExpr>,
153        /// Mirrors `where(crit).empty()` — negate the EXISTS.
154        negate: bool,
155    },
156
157    /// FHIRPath `<focus>.where(<crit>).<navigation>` collapsed to a scalar
158    /// subquery: iterate the focus collection, filter by the criterion,
159    /// project the navigation off the iteration alias, return at most one
160    /// row. Used when a column's path threads a `where()` call somewhere in
161    /// the middle (e.g. `name.where(use='official').family`).
162    WhereScalar {
163        /// Collection expression to iterate.
164        focus: Box<SqlExpr>,
165        /// Iteration alias used by `predicate` and `projection`.
166        iter_alias: String,
167        /// Filter applied to each iteration row.
168        predicate: Box<SqlExpr>,
169        /// Scalar projection extracted from the surviving row.
170        projection: Box<SqlExpr>,
171    },
172
173    /// FHIRPath `<base>.<field>.join(<sep>)` — aggregates the values of
174    /// `<field>` across each element of `<base>` (flattened) into a single
175    /// separator-joined string. Lowers to `string_agg` (PG) /
176    /// `group_concat` (SQLite) over a chained lateral unnest.
177    JoinAggregate {
178        /// Outer collection expression to iterate.
179        outer_focus: Box<SqlExpr>,
180        /// Outer iteration alias.
181        outer_alias: String,
182        /// Field name to flatten on each outer row.
183        inner_field: String,
184        /// Inner iteration alias.
185        inner_alias: String,
186        /// Separator inserted between joined elements.
187        separator: String,
188    },
189
190    /// `column.collection: true` projection — aggregates the flattened
191    /// values of a JSON path into a JSON array. Each `Field` step in `path`
192    /// becomes a lateral unnest; the final element values feed into a
193    /// `json_agg` / `json_group_array`.
194    CollectionAgg {
195        /// JSON root alias for the aggregation source.
196        root: String,
197        /// Path navigation aggregated into an array.
198        path: JsonPath,
199    },
200
201    /// Correlated scalar subquery used for `forEach: "<chain>[N]"` paths —
202    /// FHIRPath indexes the FLATTENED iteration result, but SQLite forbids
203    /// correlated subqueries in `FROM`. Lowering each column to a
204    /// scalar-subquery in the SELECT side bypasses that limitation:
205    ///
206    /// `(SELECT <projection> FROM <chain_sql> LIMIT 1 OFFSET <offset>)`.
207    ScalarFromChain {
208        /// Pre-built `FROM`-clause SQL for the flattened chain.
209        chain_sql: String,
210        /// Scalar projection extracted from the row at `offset`.
211        projection: Box<SqlExpr>,
212        /// Zero-based index into the flattened chain.
213        offset: i64,
214    },
215}
216
217/// Selects between `lowBoundary()` and `highBoundary()` semantics.
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219pub enum BoundarySide {
220    /// Low boundary (`lowBoundary()`).
221    Low,
222    /// High boundary (`highBoundary()`).
223    High,
224}
225
226/// Source value type for [`SqlExpr::Boundary`].
227#[derive(Debug, Clone, Copy, PartialEq, Eq)]
228pub enum BoundaryKind {
229    /// FHIR `decimal`.
230    Decimal,
231    /// FHIR `date`.
232    Date,
233    /// FHIR `dateTime` (or `instant`).
234    DateTime,
235    /// FHIR `time`.
236    Time,
237}
238
239/// Literal scalar value embedded directly in SQL.
240///
241/// Strings derived from user input must be bound as parameters via
242/// [`SqlExpr::Param`] — `LitValue::Str` is reserved for compile-time-constant
243/// identifiers (e.g. polymorphic-type field names).
244#[derive(Debug, Clone)]
245pub enum LitValue {
246    /// `NULL`.
247    Null,
248    /// Boolean — lowered to `true`/`false` (PG) or `1`/`0` (SQLite).
249    Bool(bool),
250    /// Integer.
251    Int(i64),
252    /// Decimal as a string to preserve precision.
253    Decimal(String),
254    /// String literal — used only for compile-time-constant idents; user input
255    /// must always go through [`SqlExpr::Param`].
256    Str(String),
257}
258
259/// SQL type tag used by [`SqlExpr::Cast`] and column projections.
260///
261/// The dialect lowerer maps each variant to its native cast syntax
262/// (`::text` / `CAST(.. AS TEXT)` etc.).
263#[derive(Debug, Clone, Copy, PartialEq, Eq)]
264pub enum SqlType {
265    /// SQL `text` / `TEXT`.
266    Text,
267    /// SQL `bigint` / `INTEGER`.
268    Integer,
269    /// SQL `numeric` / `REAL`.
270    Decimal,
271    /// SQL `boolean` (projected as `'true'`/`'false'` text for the runner).
272    Boolean,
273    /// JSON value (PG: `jsonb`; SQLite: `json` returned by `json()` function).
274    Json,
275}
276
277/// JSON value-type predicate, used by [`PathStep::TypeFilter`] and
278/// polymorphic-field guards.
279#[derive(Debug, Clone, Copy, PartialEq, Eq)]
280pub enum JsonType {
281    /// JSON object.
282    Object,
283    /// JSON array.
284    Array,
285    /// JSON string.
286    String,
287    /// JSON number.
288    Number,
289    /// JSON boolean.
290    Boolean,
291    /// JSON `null`.
292    Null,
293}
294
295/// Binary operator for [`SqlExpr::BinOp`].
296#[derive(Debug, Clone, Copy, PartialEq, Eq)]
297pub enum BinOp {
298    /// `=` equality.
299    Eq,
300    /// `!=` inequality.
301    Neq,
302    /// `<` less than.
303    Lt,
304    /// `<=` less than or equal.
305    Lte,
306    /// `>` greater than.
307    Gt,
308    /// `>=` greater than or equal.
309    Gte,
310    /// `+` addition.
311    Add,
312    /// `-` subtraction.
313    Sub,
314    /// `*` multiplication.
315    Mul,
316    /// `/` division.
317    Div,
318    /// `AND` with SQL three-valued logic.
319    And,
320    /// `OR` with SQL three-valued logic.
321    Or,
322    /// String concatenation (PG: `||`; SQLite: `||`).
323    Concat,
324    /// `LIKE`.
325    Like,
326    /// `regexp_match` / dialect-specific regex.
327    RegexMatch,
328}
329
330/// Unary operator for [`SqlExpr::UnaryOp`].
331#[derive(Debug, Clone, Copy, PartialEq, Eq)]
332pub enum UnaryOp {
333    /// `NOT`.
334    Not,
335    /// `IS NULL`.
336    IsNull,
337    /// `IS NOT NULL`.
338    IsNotNull,
339    /// Negation (`-x`).
340    Neg,
341}
342
343/// Ordered sequence of [`PathStep`]s applied to a JSON root.
344#[derive(Debug, Clone, Default)]
345pub struct JsonPath(pub Vec<PathStep>);
346
347impl JsonPath {
348    /// Creates an empty path.
349    pub fn new() -> Self {
350        Self(Vec::new())
351    }
352
353    /// Appends a navigation step to the end of the path.
354    pub fn push(&mut self, step: PathStep) {
355        self.0.push(step);
356    }
357
358    /// Returns true when no steps have been added.
359    pub fn is_empty(&self) -> bool {
360        self.0.is_empty()
361    }
362}
363
364/// One navigation step in a [`JsonPath`].
365#[derive(Debug, Clone, PartialEq, Eq)]
366pub enum PathStep {
367    /// `.field` (object key).
368    Field(String),
369    /// `[N]` (array index).
370    Index(i64),
371    /// `value.ofType(X)` resolved against FHIR's polymorphic-element JSON
372    /// convention. The contained string is the FHIR type name (`Quantity`,
373    /// `string`, ...). The lowerer rewrites the previous `Field` step to its
374    /// `value{X}` sibling.
375    OfType(String),
376    /// Restricts the focus to JSON values of a given type — used by
377    /// `ofType(primitive)` to make sibling polymorphic fields evaluate to NULL.
378    TypeFilter(JsonType),
379}
380
381/// Row-source plan node.
382///
383/// Plans are trees: a [`Project`](PlanNode::Project) at the root, descending
384/// through filters and lateral unnests to a [`Scan`](PlanNode::Scan) over
385/// `resources`. [`Union`](PlanNode::Union) and [`Recurse`](PlanNode::Recurse)
386/// wrap multiple sub-plans.
387#[derive(Debug, Clone)]
388pub enum PlanNode {
389    /// Top-level scan over the `resources` table for a single resource type.
390    /// The tenant predicate is injected by the emitter.
391    Scan {
392        /// SQL alias for the scanned row (e.g., `r`).
393        alias: String,
394        /// FHIR resource type to scan.
395        resource_type: String,
396    },
397
398    /// Lateral unnest of a JSON-array source. `out_alias` names the iteration
399    /// row; `left_join` distinguishes `forEach` from `forEachOrNull`.
400    /// `on_filter`, if set, is appended to the JOIN ON clause and lets a
401    /// trailing `where(crit)` on the forEach path filter rows in-place
402    /// (preserving LEFT JOIN semantics for `forEachOrNull`). `flat_index`,
403    /// if set, restricts the unnest to the Nth element of the flattened
404    /// collection (FHIRPath `name[0]` style indexing applied to the result
405    /// of an array-flattening navigation).
406    LateralUnnest {
407        /// Plan whose rows are being unnested.
408        parent: Box<PlanNode>,
409        /// JSON-array source expression.
410        source: SqlExpr,
411        /// SQL alias bound to each iteration row.
412        out_alias: String,
413        /// True for `forEachOrNull` (LEFT JOIN), false for `forEach` (INNER JOIN).
414        left_join: bool,
415        /// Optional filter appended to the JOIN ON clause.
416        on_filter: Option<SqlExpr>,
417        /// When set, restrict the unnest to the Nth element of the flattened collection.
418        flat_index: Option<i64>,
419    },
420
421    /// `WHERE` filter applied to `parent`. Multiple `Filter` nodes compose
422    /// AND-wise.
423    Filter {
424        /// Plan whose rows are being filtered.
425        parent: Box<PlanNode>,
426        /// Boolean predicate the rows must satisfy.
427        predicate: SqlExpr,
428    },
429
430    /// Output projection.
431    Project {
432        /// Plan supplying the rows to project.
433        parent: Box<PlanNode>,
434        /// Output column definitions.
435        columns: Vec<Column>,
436    },
437
438    /// `UNION ALL` of N row-compatible plans. Output schemas must align;
439    /// the emitter validates this and emits a single `ORDER BY 1` outside the
440    /// compound query.
441    Union(Vec<PlanNode>),
442
443    /// Recursive-CTE descent — used for SoF `repeat:` clauses.
444    Recurse {
445        /// Plan producing the seed rows.
446        parent: Box<PlanNode>,
447        /// Seed projection (currently unused; emitter walks `parent`).
448        seed: SqlExpr,
449        /// Paths walked on each iteration.
450        step_paths: Vec<JsonPath>,
451        /// CTE alias also used as the `node` column alias.
452        out_alias: String,
453    },
454}
455
456/// Output column projected by a [`Project`](PlanNode::Project) node.
457#[derive(Debug, Clone)]
458pub struct Column {
459    /// Output column name.
460    pub name: String,
461    /// Expression that produces the column's value.
462    pub expr: SqlExpr,
463    /// When true, lower to a JSON array via [`SqlExpr::JsonAgg`] over a lateral
464    /// subquery. When false, lower to a scalar (with a defensive `LIMIT 1` if
465    /// the underlying expression yields a row source).
466    pub collection: bool,
467    /// SQL type the column is projected as.
468    pub ty: SqlType,
469}
470
471/// A subquery embedded inside a [`SqlExpr`]. Holds the inner plan together
472/// with the scalar projection extracted from each row.
473#[derive(Debug, Clone)]
474pub struct SubQuery {
475    /// Plan producing the subquery's rows.
476    pub plan: PlanNode,
477    /// Scalar projection extracted from each row.
478    pub select_expr: SqlExpr,
479}
480
481/// Boxed dialect handle used by emission helpers.
482pub type DialectRef = Arc<dyn super::dialect::Dialect>;