helios-persistence 0.2.0

Polyglot persistence layer for Helios FHIR Server
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
//! Intermediate representation for the FHIRPath → SQL compiler.
//!
//! Two layered IRs:
//!
//! - [`SqlExpr`] is a dialect-independent value-level expression. Every FHIRPath
//!   sub-expression compiles to one of these. The [`Dialect`](super::dialect::Dialect)
//!   trait lowers an `SqlExpr` to a SQL string per backend.
//! - [`PlanNode`] is the row-source-level plan: scans, lateral unnests, filters,
//!   projections, unions, and recursive descents (`repeat:`).
//!
//! Stages 2–5 progressively populate the consumers of these types. Stage 1 just
//! defines the shapes so later work has a stable target.

#![allow(dead_code)] // Stage 1 scaffold; consumers land in stages 2–5.

use std::sync::Arc;

/// A dialect-independent value-level SQL expression.
///
/// Each variant lowers to a SQL fragment via the [`Dialect`](super::dialect::Dialect)
/// trait. Subqueries hold a [`PlanNode`] together with the scalar projection
/// extracted from each row.
#[derive(Debug, Clone)]
pub enum SqlExpr {
    /// Literal scalar.
    Lit(LitValue),

    /// Navigation through a JSON document.
    ///
    /// `root` is the alias provided by the surrounding plan node — typically
    /// `r.data` (resource scan), `fe.value` (lateral unnest), or `rec.node`
    /// (recursive CTE). `path` is the chain of steps applied to it.
    JsonPath {
        /// JSON root alias (e.g., `r.data`).
        root: String,
        /// Ordered navigation steps applied to `root`.
        path: JsonPath,
    },

    /// Bound query parameter, 1-based.
    ///
    /// Indices 1 and 2 are reserved for `tenant_id` and `resource_type`.
    /// Constants from `ViewDefinition.constant[]` and string literals lifted
    /// out of `extension(url)` etc. allocate from index 3 upward.
    Param(usize),

    /// Reference to a column projected by a CTE or subquery.
    ColRef(String),

    /// Type coercion. The dialect lowerer chooses the appropriate cast syntax.
    Cast {
        /// Expression being coerced.
        inner: Box<SqlExpr>,
        /// Target SQL type.
        ty: SqlType,
    },

    /// Binary operator.
    BinOp {
        /// Operator kind.
        op: BinOp,
        /// Left-hand operand.
        lhs: Box<SqlExpr>,
        /// Right-hand operand.
        rhs: Box<SqlExpr>,
    },

    /// Unary operator.
    UnaryOp {
        /// Operator kind.
        op: UnaryOp,
        /// Operand the operator is applied to.
        inner: Box<SqlExpr>,
    },

    /// `CASE WHEN .. THEN .. ... ELSE .. END`.
    Case {
        /// `(condition, value)` pairs evaluated in order.
        arms: Vec<(SqlExpr, SqlExpr)>,
        /// Optional default branch.
        else_: Option<Box<SqlExpr>>,
    },

    /// `COALESCE(a, b, ...)`.
    Coalesce(Vec<SqlExpr>),

    /// `NULLIF(a, b)`.
    NullIf(Box<SqlExpr>, Box<SqlExpr>),

    /// Wrap a scalar as a JSON value (`to_jsonb` / `json`).
    AsJson(Box<SqlExpr>),

    /// Aggregate the rows produced by a subquery into a JSON array
    /// (`jsonb_agg` / `json_group_array`). Used for `column.collection: true`.
    JsonAgg(Box<SubQuery>),

    /// Scalar subquery — the inner plan must project exactly one value per row
    /// and return at most one row.
    Scalar(Box<SubQuery>),

    /// `EXISTS(subquery)` — collapses to a boolean.
    Exists(Box<SubQuery>),

    /// `(SELECT count(*) FROM subquery)`.
    CountSub(Box<SubQuery>),

    /// Names an inner expression for reuse (lowered as a CTE column reference
    /// when the same scalar appears in multiple projections).
    Alias {
        /// Alias to assign to `inner`.
        name: String,
        /// Expression being aliased.
        inner: Box<SqlExpr>,
    },

    /// Extracts the id portion of a `Reference.reference` string. When
    /// `expected_type` is supplied, returns NULL unless the reference's type
    /// segment matches (e.g. `getReferenceKey(Patient)` over `Observation/123`
    /// returns NULL).
    ReferenceKey {
        /// Reference string to inspect.
        reference: Box<SqlExpr>,
        /// FHIR resource type the reference must match, when set.
        expected_type: Option<String>,
    },

    /// FHIRPath `lowBoundary()` / `highBoundary()` — emits a precision-driven
    /// CASE expression over the source's text form (decimal expands by a
    /// half-step in the last digit; date/dateTime/time pad with the first or
    /// last instant of the largest unspecified unit). The expected
    /// `column.type` is supplied so the dialect can pick decimal vs.
    /// date/dateTime/time logic.
    Boundary {
        /// Whether to take the low or high boundary.
        side: BoundarySide,
        /// Source value kind (decimal vs. date/dateTime/time).
        kind: BoundaryKind,
        /// Expression whose boundary is being computed.
        source: Box<SqlExpr>,
    },

    /// FHIRPath `<focus>.where(<crit>).exists()` — lowers to an `EXISTS`
    /// subquery that iterates the focus collection (a lateral unnest of a
    /// JSON path) and tests `crit` against each element. The criterion is
    /// pre-lowered with `iter_alias.value` set as its path root.
    WhereExists {
        /// Collection expression to iterate.
        focus: Box<SqlExpr>,
        /// Iteration alias used by `predicate`.
        iter_alias: String,
        /// Criterion evaluated against each element.
        predicate: Box<SqlExpr>,
        /// Mirrors `where(crit).empty()` — negate the EXISTS.
        negate: bool,
    },

    /// FHIRPath `<focus>.where(<crit>).<navigation>` collapsed to a scalar
    /// subquery: iterate the focus collection, filter by the criterion,
    /// project the navigation off the iteration alias, return at most one
    /// row. Used when a column's path threads a `where()` call somewhere in
    /// the middle (e.g. `name.where(use='official').family`).
    WhereScalar {
        /// Collection expression to iterate.
        focus: Box<SqlExpr>,
        /// Iteration alias used by `predicate` and `projection`.
        iter_alias: String,
        /// Filter applied to each iteration row.
        predicate: Box<SqlExpr>,
        /// Scalar projection extracted from the surviving row.
        projection: Box<SqlExpr>,
    },

    /// FHIRPath `<base>.<field>.join(<sep>)` — aggregates the values of
    /// `<field>` across each element of `<base>` (flattened) into a single
    /// separator-joined string. Lowers to `string_agg` (PG) /
    /// `group_concat` (SQLite) over a chained lateral unnest.
    JoinAggregate {
        /// Outer collection expression to iterate.
        outer_focus: Box<SqlExpr>,
        /// Outer iteration alias.
        outer_alias: String,
        /// Field name to flatten on each outer row.
        inner_field: String,
        /// Inner iteration alias.
        inner_alias: String,
        /// Separator inserted between joined elements.
        separator: String,
    },

    /// `column.collection: true` projection — aggregates the flattened
    /// values of a JSON path into a JSON array. Each `Field` step in `path`
    /// becomes a lateral unnest; the final element values feed into a
    /// `json_agg` / `json_group_array`.
    CollectionAgg {
        /// JSON root alias for the aggregation source.
        root: String,
        /// Path navigation aggregated into an array.
        path: JsonPath,
    },

    /// Correlated scalar subquery used for `forEach: "<chain>[N]"` paths —
    /// FHIRPath indexes the FLATTENED iteration result, but SQLite forbids
    /// correlated subqueries in `FROM`. Lowering each column to a
    /// scalar-subquery in the SELECT side bypasses that limitation:
    ///
    /// `(SELECT <projection> FROM <chain_sql> LIMIT 1 OFFSET <offset>)`.
    ScalarFromChain {
        /// Pre-built `FROM`-clause SQL for the flattened chain.
        chain_sql: String,
        /// Scalar projection extracted from the row at `offset`.
        projection: Box<SqlExpr>,
        /// Zero-based index into the flattened chain.
        offset: i64,
    },
}

/// Selects between `lowBoundary()` and `highBoundary()` semantics.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BoundarySide {
    /// Low boundary (`lowBoundary()`).
    Low,
    /// High boundary (`highBoundary()`).
    High,
}

/// Source value type for [`SqlExpr::Boundary`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BoundaryKind {
    /// FHIR `decimal`.
    Decimal,
    /// FHIR `date`.
    Date,
    /// FHIR `dateTime` (or `instant`).
    DateTime,
    /// FHIR `time`.
    Time,
}

/// Literal scalar value embedded directly in SQL.
///
/// Strings derived from user input must be bound as parameters via
/// [`SqlExpr::Param`] — `LitValue::Str` is reserved for compile-time-constant
/// identifiers (e.g. polymorphic-type field names).
#[derive(Debug, Clone)]
pub enum LitValue {
    /// `NULL`.
    Null,
    /// Boolean — lowered to `true`/`false` (PG) or `1`/`0` (SQLite).
    Bool(bool),
    /// Integer.
    Int(i64),
    /// Decimal as a string to preserve precision.
    Decimal(String),
    /// String literal — used only for compile-time-constant idents; user input
    /// must always go through [`SqlExpr::Param`].
    Str(String),
}

/// SQL type tag used by [`SqlExpr::Cast`] and column projections.
///
/// The dialect lowerer maps each variant to its native cast syntax
/// (`::text` / `CAST(.. AS TEXT)` etc.).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SqlType {
    /// SQL `text` / `TEXT`.
    Text,
    /// SQL `bigint` / `INTEGER`.
    Integer,
    /// SQL `numeric` / `REAL`.
    Decimal,
    /// SQL `boolean` (projected as `'true'`/`'false'` text for the runner).
    Boolean,
    /// JSON value (PG: `jsonb`; SQLite: `json` returned by `json()` function).
    Json,
}

/// JSON value-type predicate, used by [`PathStep::TypeFilter`] and
/// polymorphic-field guards.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum JsonType {
    /// JSON object.
    Object,
    /// JSON array.
    Array,
    /// JSON string.
    String,
    /// JSON number.
    Number,
    /// JSON boolean.
    Boolean,
    /// JSON `null`.
    Null,
}

/// Binary operator for [`SqlExpr::BinOp`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinOp {
    /// `=` equality.
    Eq,
    /// `!=` inequality.
    Neq,
    /// `<` less than.
    Lt,
    /// `<=` less than or equal.
    Lte,
    /// `>` greater than.
    Gt,
    /// `>=` greater than or equal.
    Gte,
    /// `+` addition.
    Add,
    /// `-` subtraction.
    Sub,
    /// `*` multiplication.
    Mul,
    /// `/` division.
    Div,
    /// `AND` with SQL three-valued logic.
    And,
    /// `OR` with SQL three-valued logic.
    Or,
    /// String concatenation (PG: `||`; SQLite: `||`).
    Concat,
    /// `LIKE`.
    Like,
    /// `regexp_match` / dialect-specific regex.
    RegexMatch,
}

/// Unary operator for [`SqlExpr::UnaryOp`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UnaryOp {
    /// `NOT`.
    Not,
    /// `IS NULL`.
    IsNull,
    /// `IS NOT NULL`.
    IsNotNull,
    /// Negation (`-x`).
    Neg,
}

/// Ordered sequence of [`PathStep`]s applied to a JSON root.
#[derive(Debug, Clone, Default)]
pub struct JsonPath(pub Vec<PathStep>);

impl JsonPath {
    /// Creates an empty path.
    pub fn new() -> Self {
        Self(Vec::new())
    }

    /// Appends a navigation step to the end of the path.
    pub fn push(&mut self, step: PathStep) {
        self.0.push(step);
    }

    /// Returns true when no steps have been added.
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }
}

/// One navigation step in a [`JsonPath`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PathStep {
    /// `.field` (object key).
    Field(String),
    /// `[N]` (array index).
    Index(i64),
    /// `value.ofType(X)` resolved against FHIR's polymorphic-element JSON
    /// convention. The contained string is the FHIR type name (`Quantity`,
    /// `string`, ...). The lowerer rewrites the previous `Field` step to its
    /// `value{X}` sibling.
    OfType(String),
    /// Restricts the focus to JSON values of a given type — used by
    /// `ofType(primitive)` to make sibling polymorphic fields evaluate to NULL.
    TypeFilter(JsonType),
}

/// Row-source plan node.
///
/// Plans are trees: a [`Project`](PlanNode::Project) at the root, descending
/// through filters and lateral unnests to a [`Scan`](PlanNode::Scan) over
/// `resources`. [`Union`](PlanNode::Union) and [`Recurse`](PlanNode::Recurse)
/// wrap multiple sub-plans.
#[derive(Debug, Clone)]
pub enum PlanNode {
    /// Top-level scan over the `resources` table for a single resource type.
    /// The tenant predicate is injected by the emitter.
    Scan {
        /// SQL alias for the scanned row (e.g., `r`).
        alias: String,
        /// FHIR resource type to scan.
        resource_type: String,
    },

    /// Lateral unnest of a JSON-array source. `out_alias` names the iteration
    /// row; `left_join` distinguishes `forEach` from `forEachOrNull`.
    /// `on_filter`, if set, is appended to the JOIN ON clause and lets a
    /// trailing `where(crit)` on the forEach path filter rows in-place
    /// (preserving LEFT JOIN semantics for `forEachOrNull`). `flat_index`,
    /// if set, restricts the unnest to the Nth element of the flattened
    /// collection (FHIRPath `name[0]` style indexing applied to the result
    /// of an array-flattening navigation).
    LateralUnnest {
        /// Plan whose rows are being unnested.
        parent: Box<PlanNode>,
        /// JSON-array source expression.
        source: SqlExpr,
        /// SQL alias bound to each iteration row.
        out_alias: String,
        /// True for `forEachOrNull` (LEFT JOIN), false for `forEach` (INNER JOIN).
        left_join: bool,
        /// Optional filter appended to the JOIN ON clause.
        on_filter: Option<SqlExpr>,
        /// When set, restrict the unnest to the Nth element of the flattened collection.
        flat_index: Option<i64>,
    },

    /// `WHERE` filter applied to `parent`. Multiple `Filter` nodes compose
    /// AND-wise.
    Filter {
        /// Plan whose rows are being filtered.
        parent: Box<PlanNode>,
        /// Boolean predicate the rows must satisfy.
        predicate: SqlExpr,
    },

    /// Output projection.
    Project {
        /// Plan supplying the rows to project.
        parent: Box<PlanNode>,
        /// Output column definitions.
        columns: Vec<Column>,
    },

    /// `UNION ALL` of N row-compatible plans. Output schemas must align;
    /// the emitter validates this and emits a single `ORDER BY 1` outside the
    /// compound query.
    Union(Vec<PlanNode>),

    /// Recursive-CTE descent — used for SoF `repeat:` clauses.
    Recurse {
        /// Plan producing the seed rows.
        parent: Box<PlanNode>,
        /// Seed projection (currently unused; emitter walks `parent`).
        seed: SqlExpr,
        /// Paths walked on each iteration.
        step_paths: Vec<JsonPath>,
        /// CTE alias also used as the `node` column alias.
        out_alias: String,
    },
}

/// Output column projected by a [`Project`](PlanNode::Project) node.
#[derive(Debug, Clone)]
pub struct Column {
    /// Output column name.
    pub name: String,
    /// Expression that produces the column's value.
    pub expr: SqlExpr,
    /// When true, lower to a JSON array via [`SqlExpr::JsonAgg`] over a lateral
    /// subquery. When false, lower to a scalar (with a defensive `LIMIT 1` if
    /// the underlying expression yields a row source).
    pub collection: bool,
    /// SQL type the column is projected as.
    pub ty: SqlType,
}

/// A subquery embedded inside a [`SqlExpr`]. Holds the inner plan together
/// with the scalar projection extracted from each row.
#[derive(Debug, Clone)]
pub struct SubQuery {
    /// Plan producing the subquery's rows.
    pub plan: PlanNode,
    /// Scalar projection extracted from each row.
    pub select_expr: SqlExpr,
}

/// Boxed dialect handle used by emission helpers.
pub type DialectRef = Arc<dyn super::dialect::Dialect>;