Skip to main content

helios_persistence/sof/
dialect.rs

1//! Dialect trait — token-level SQL emission for PostgreSQL JSONB and SQLite JSON1.
2//!
3//! The compiler builds dialect-independent IR ([`PlanNode`](super::ir::PlanNode)
4//! and [`SqlExpr`](super::ir::SqlExpr)); the emitter walks the IR and asks the
5//! dialect for each concrete SQL token. Keeping these helpers behind a trait
6//! confines per-dialect divergence (operator syntax, parameter form, JSON
7//! function names) to two small implementations.
8
9#![allow(dead_code)] // Stage 1 scaffold; consumers land in stages 2–5.
10
11use super::ir::{JsonType, SqlType};
12
13/// Per-dialect SQL emission helpers.
14pub trait Dialect: Send + Sync {
15    /// Short identifier for diagnostics ("postgres", "sqlite").
16    fn name(&self) -> &'static str;
17
18    /// Render a 1-based parameter placeholder (`$1` for PG, `?1` for SQLite).
19    fn placeholder(&self, idx: usize) -> String;
20
21    /// `base->'key'` (returns JSON value).
22    fn json_field(&self, base: &str, key: &str) -> String;
23
24    /// `base->>'key'` (returns text).
25    fn json_field_text(&self, base: &str, key: &str) -> String;
26
27    /// Multi-key path returning a JSON value.
28    fn json_path(&self, base: &str, segments: &[&str]) -> String;
29
30    /// Multi-key path returning text.
31    fn json_path_text(&self, base: &str, segments: &[&str]) -> String;
32
33    /// Emit a lateral unnest source clause (e.g. `jsonb_array_elements(<expr>)`
34    /// or `json_each(<expr>)`).
35    fn unnest_array(&self, expr: &str) -> String;
36
37    /// Emit `<expr> IS NULL`-safe wrapping for an array source — guards against
38    /// `jsonb_array_elements(NULL)` / `json_each(NULL)` errors. Returns SQL that
39    /// always yields a usable array (empty if missing).
40    fn coalesce_array(&self, expr: &str) -> String;
41
42    /// JSON type-of expression (`jsonb_typeof(x)` / `json_type(x)`), returning
43    /// a lowercase string.
44    fn json_type(&self, expr: &str) -> String;
45
46    /// JSON aggregate (`jsonb_agg(x)` / `json_group_array(x)`).
47    fn json_agg(&self, expr: &str) -> String;
48
49    /// String aggregate with separator (`string_agg` / `group_concat`).
50    fn string_agg(&self, expr: &str, sep_param: &str) -> String;
51
52    /// SQL boolean literal for `true`.
53    fn bool_true(&self) -> &'static str;
54    /// SQL boolean literal for `false`.
55    fn bool_false(&self) -> &'static str;
56
57    /// `LATERAL` keyword (PG) or empty (SQLite — uses correlated subqueries).
58    fn lateral_keyword(&self) -> &'static str;
59
60    /// Cast `inner` to `ty`, returning a SQL expression.
61    fn cast(&self, inner: &str, ty: SqlType) -> String;
62
63    /// Predicate testing whether `expr` has the given JSON type.
64    fn has_json_type(&self, expr: &str, ty: JsonType) -> String;
65
66    /// Boolean coercion at the WHERE-clause boundary — represents FHIRPath's
67    /// three-valued-logic rule that an empty / NULL operand filters the row
68    /// out. The expression `expr` may be a text projection (PG `->>`), a JSON
69    /// value (PG `->`), or a SQLite JSON1 extracted scalar; the dialect picks
70    /// an appropriate form.
71    fn truthy_predicate(&self, expr: &str) -> String;
72
73    /// Substring of `s` after the last `/` — used by `getReferenceKey()` to
74    /// extract the id portion of a FHIR `Reference.reference` like
75    /// `Patient/123` (or `http://server/path/Patient/123`).
76    fn last_path_segment(&self, s: &str) -> String;
77}
78
79// ============================================================================
80// PostgreSQL
81// ============================================================================
82
83/// PostgreSQL JSONB dialect.
84#[derive(Debug, Default, Clone, Copy)]
85pub struct PgDialect;
86
87impl Dialect for PgDialect {
88    fn name(&self) -> &'static str {
89        "postgres"
90    }
91
92    fn placeholder(&self, idx: usize) -> String {
93        format!("${idx}")
94    }
95
96    fn json_field(&self, base: &str, key: &str) -> String {
97        format!("{base}->'{key}'")
98    }
99
100    fn json_field_text(&self, base: &str, key: &str) -> String {
101        format!("{base}->>'{key}'")
102    }
103
104    fn json_path(&self, base: &str, segments: &[&str]) -> String {
105        if segments.len() == 1 {
106            self.json_field(base, segments[0])
107        } else {
108            format!("{base}#>'{{{}}}'", segments.join(","))
109        }
110    }
111
112    fn json_path_text(&self, base: &str, segments: &[&str]) -> String {
113        if segments.len() == 1 {
114            self.json_field_text(base, segments[0])
115        } else {
116            format!("{base}#>>'{{{}}}'", segments.join(","))
117        }
118    }
119
120    fn unnest_array(&self, expr: &str) -> String {
121        format!("jsonb_array_elements({expr})")
122    }
123
124    fn coalesce_array(&self, expr: &str) -> String {
125        format!("coalesce({expr}, '[]'::jsonb)")
126    }
127
128    fn json_type(&self, expr: &str) -> String {
129        format!("jsonb_typeof({expr})")
130    }
131
132    fn json_agg(&self, expr: &str) -> String {
133        // PG's `jsonb_agg` returns NULL for empty input; coalesce to `[]`
134        // so `collection: true` columns always project an array (matching
135        // SQLite's `json_group_array`, which already returns `[]` for the
136        // empty case).
137        format!("coalesce(jsonb_agg({expr}), '[]'::jsonb)")
138    }
139
140    fn string_agg(&self, expr: &str, sep_param: &str) -> String {
141        format!("string_agg({expr}, {sep_param})")
142    }
143
144    fn bool_true(&self) -> &'static str {
145        "true"
146    }
147
148    fn bool_false(&self) -> &'static str {
149        "false"
150    }
151
152    fn lateral_keyword(&self) -> &'static str {
153        "LATERAL "
154    }
155
156    fn cast(&self, inner: &str, ty: SqlType) -> String {
157        match ty {
158            SqlType::Text => format!("({inner})::text"),
159            // Numeric column projections wrap with an outer `::text` so the
160            // PG row mapper (which reads each column as `Option<String>` to
161            // stay type-agnostic) can decode the value. Round-tripping
162            // through numeric first preserves canonical formatting (`1.0`
163            // stays `1.0`, not `1`); the runner then JSON-parses the text
164            // back to a number.
165            SqlType::Integer => format!("(({inner})::bigint)::text"),
166            SqlType::Decimal => format!("(({inner})::numeric)::text"),
167            // Column projections want JSON-parsable text: literal `'true'` /
168            // `'false'` deserialise as JSON booleans in the row mapper. The
169            // input may be either a JSON `->>` text projection (`'true'` /
170            // `'false'` / NULL) or a native boolean expression (e.g. a
171            // comparison `(a = b)` projected through `type: boolean`); both
172            // shapes cast cleanly via `::boolean` and route through `IS
173            // TRUE` / `IS FALSE` to give the right text literal back.
174            SqlType::Boolean => {
175                format!(
176                    "CASE WHEN ({inner})::boolean IS TRUE THEN 'true' \
177                     WHEN ({inner})::boolean IS FALSE THEN 'false' END"
178                )
179            }
180            SqlType::Json => format!("({inner})::jsonb"),
181        }
182    }
183
184    fn has_json_type(&self, expr: &str, ty: JsonType) -> String {
185        let name = match ty {
186            JsonType::Object => "object",
187            JsonType::Array => "array",
188            JsonType::String => "string",
189            JsonType::Number => "number",
190            JsonType::Boolean => "boolean",
191            JsonType::Null => "null",
192        };
193        format!("jsonb_typeof({expr}) = '{name}'")
194    }
195
196    fn truthy_predicate(&self, expr: &str) -> String {
197        // Already-boolean SQL fragments (e.g. `x IS NOT NULL`) cast back to
198        // boolean cheaply; text JSON projections (`r.data->>'active'`)
199        // require an explicit `::boolean` cast since `IS TRUE` is strict.
200        format!("({expr})::boolean IS TRUE")
201    }
202
203    fn last_path_segment(&self, s: &str) -> String {
204        // POSIX regexp on PG: strip everything up to and including the last `/`.
205        format!("regexp_replace({s}, '.*/', '')")
206    }
207}
208
209// ============================================================================
210// SQLite
211// ============================================================================
212
213/// SQLite JSON1 dialect.
214#[derive(Debug, Default, Clone, Copy)]
215pub struct SqliteDialect;
216
217impl Dialect for SqliteDialect {
218    fn name(&self) -> &'static str {
219        "sqlite"
220    }
221
222    fn placeholder(&self, idx: usize) -> String {
223        format!("?{idx}")
224    }
225
226    fn json_field(&self, base: &str, key: &str) -> String {
227        format!("json_extract({base}, '$.{key}')")
228    }
229
230    fn json_field_text(&self, base: &str, key: &str) -> String {
231        // SQLite's json_extract returns the natural type; for object/array
232        // values it returns JSON text. For scalar leaves callers usually want
233        // the value directly — same call site.
234        self.json_field(base, key)
235    }
236
237    fn json_path(&self, base: &str, segments: &[&str]) -> String {
238        // SQLite JSON1 paths use `[N]` for array indices and `.field` for
239        // object members. Numeric-only segments are array indices and must
240        // not be preceded by a dot.
241        let mut path = String::from("$");
242        for seg in segments {
243            if seg.chars().all(|c| c.is_ascii_digit()) {
244                path.push('[');
245                path.push_str(seg);
246                path.push(']');
247            } else {
248                path.push('.');
249                path.push_str(seg);
250            }
251        }
252        format!("json_extract({base}, '{path}')")
253    }
254
255    fn json_path_text(&self, base: &str, segments: &[&str]) -> String {
256        self.json_path(base, segments)
257    }
258
259    fn unnest_array(&self, expr: &str) -> String {
260        format!("json_each({expr})")
261    }
262
263    fn coalesce_array(&self, expr: &str) -> String {
264        format!("coalesce({expr}, '[]')")
265    }
266
267    fn json_type(&self, expr: &str) -> String {
268        format!("json_type({expr})")
269    }
270
271    fn json_agg(&self, expr: &str) -> String {
272        format!("json_group_array({expr})")
273    }
274
275    fn string_agg(&self, expr: &str, sep_param: &str) -> String {
276        format!("group_concat({expr}, {sep_param})")
277    }
278
279    fn bool_true(&self) -> &'static str {
280        "1"
281    }
282
283    fn bool_false(&self) -> &'static str {
284        "0"
285    }
286
287    fn lateral_keyword(&self) -> &'static str {
288        ""
289    }
290
291    fn cast(&self, inner: &str, ty: SqlType) -> String {
292        match ty {
293            SqlType::Text => format!("CAST({inner} AS TEXT)"),
294            SqlType::Integer => format!("CAST({inner} AS INTEGER)"),
295            SqlType::Decimal => format!("CAST({inner} AS REAL)"),
296            // Boolean column projections — emit `'true'`/`'false'` text so the
297            // runner's row mapper deserializes them as JSON booleans rather
298            // than the JSON-number 1/0 it would get from CAST AS INTEGER.
299            SqlType::Boolean => {
300                format!("CASE WHEN ({inner}) THEN 'true' WHEN NOT ({inner}) THEN 'false' END")
301            }
302            SqlType::Json => format!("json({inner})"),
303        }
304    }
305
306    fn has_json_type(&self, expr: &str, ty: JsonType) -> String {
307        let name = match ty {
308            JsonType::Object => "object",
309            JsonType::Array => "array",
310            JsonType::String => "text",
311            JsonType::Number => "integer", // also "real"; callers needing both must compose
312            JsonType::Boolean => "true",   // SQLite has no native boolean json_type
313            JsonType::Null => "null",
314        };
315        format!("json_type({expr}) = '{name}'")
316    }
317
318    fn truthy_predicate(&self, expr: &str) -> String {
319        // `json_extract` returns the JSON value's native SQLite type:
320        // JSON booleans → integer 1/0, numbers → integer/real, strings → text.
321        // Truthy is: non-NULL AND not zero/false. The explicit text-equality
322        // check covers literal `'true'`/`'false'` text values just in case.
323        format!("({expr}) IS NOT NULL AND ({expr}) != 0 AND ({expr}) != 'false'")
324    }
325
326    fn last_path_segment(&self, s: &str) -> String {
327        // Calls the `fhir_last_segment` scalar UDF registered on every
328        // pooled SQLite connection by the backend's connection initialiser
329        // (see `crates/persistence/src/sof/sqlite_udfs.rs`).
330        format!("fhir_last_segment({s})")
331    }
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337
338    #[test]
339    fn pg_field_text() {
340        assert_eq!(PgDialect.json_field_text("r.data", "id"), "r.data->>'id'");
341    }
342
343    #[test]
344    fn pg_path_text_dotted() {
345        assert_eq!(
346            PgDialect.json_path_text("r.data", &["subject", "reference"]),
347            "r.data#>>'{subject,reference}'"
348        );
349    }
350
351    #[test]
352    fn sqlite_field() {
353        assert_eq!(
354            SqliteDialect.json_field("r.data", "id"),
355            "json_extract(r.data, '$.id')"
356        );
357    }
358
359    #[test]
360    fn sqlite_path_dotted() {
361        assert_eq!(
362            SqliteDialect.json_path("r.data", &["subject", "reference"]),
363            "json_extract(r.data, '$.subject.reference')"
364        );
365    }
366
367    #[test]
368    fn placeholder_forms() {
369        assert_eq!(PgDialect.placeholder(3), "$3");
370        assert_eq!(SqliteDialect.placeholder(3), "?3");
371    }
372}