fraiseql_core/runtime/aggregation/mod.rs
1//! Runtime Aggregation SQL Generation Module
2//!
3//! This module generates database-specific SQL from aggregation execution plans.
4//!
5//! # Database-Specific SQL
6//!
7//! ## PostgreSQL
8//! ```sql
9//! SELECT
10//! data->>'category' AS category,
11//! DATE_TRUNC('day', occurred_at) AS occurred_at_day,
12//! COUNT(*) AS count,
13//! SUM(revenue) AS revenue_sum
14//! FROM tf_sales
15//! WHERE customer_id = $1
16//! GROUP BY data->>'category', DATE_TRUNC('day', occurred_at)
17//! HAVING SUM(revenue) > $2
18//! ORDER BY revenue_sum DESC
19//! LIMIT 10
20//! ```
21//!
22//! ## MySQL
23//! ```sql
24//! SELECT
25//! JSON_UNQUOTE(JSON_EXTRACT(data, '$.category')) AS category,
26//! DATE_FORMAT(occurred_at, '%Y-%m-%d') AS occurred_at_day,
27//! COUNT(*) AS count,
28//! SUM(revenue) AS revenue_sum
29//! FROM tf_sales
30//! WHERE customer_id = ?
31//! GROUP BY JSON_UNQUOTE(JSON_EXTRACT(data, '$.category')), DATE_FORMAT(occurred_at, '%Y-%m-%d')
32//! HAVING SUM(revenue) > ?
33//! ORDER BY revenue_sum DESC
34//! LIMIT 10
35//! ```
36//!
37//! ## SQLite
38//! ```sql
39//! SELECT
40//! json_extract(data, '$.category') AS category,
41//! strftime('%Y-%m-%d', occurred_at) AS occurred_at_day,
42//! COUNT(*) AS count,
43//! SUM(revenue) AS revenue_sum
44//! FROM tf_sales
45//! WHERE customer_id = ?
46//! GROUP BY json_extract(data, '$.category'), strftime('%Y-%m-%d', occurred_at)
47//! HAVING SUM(revenue) > ?
48//! ORDER BY revenue_sum DESC
49//! LIMIT 10
50//! ```
51
52use std::fmt::Write as _;
53
54use crate::{
55 compiler::{
56 aggregate_types::{AggregateFunction, TemporalBucket},
57 aggregation::{
58 AggregateExpression, AggregationPlan, GroupByExpression, OrderByClause, OrderDirection,
59 ValidatedHavingCondition,
60 },
61 fact_table::FactTableMetadata,
62 },
63 db::{
64 identifier::{
65 quote_mysql_identifier, quote_postgres_identifier, quote_sqlserver_identifier,
66 },
67 path_escape::{
68 escape_mysql_json_path, escape_postgres_jsonb_segment, escape_sqlite_json_path,
69 escape_sqlserver_json_path,
70 },
71 types::DatabaseType,
72 where_clause::{WhereClause, WhereOperator},
73 },
74 error::{FraiseQLError, Result},
75 utils::casing::to_snake_case,
76};
77
78mod expressions;
79mod where_clause;
80
81#[cfg(test)]
82mod tests;
83
84/// Aggregate query with bind parameters instead of escaped string literals.
85///
86/// Produced by [`AggregationSqlGenerator::generate_parameterized`]. Pass `sql`
87/// and `params` directly to [`crate::db::DatabaseAdapter::execute_parameterized_aggregate`].
88#[derive(Debug, Clone)]
89pub struct ParameterizedAggregationSql {
90 /// SQL with `$N` (PostgreSQL), `?` (MySQL / SQLite), or `@P1` (SQL Server) placeholders.
91 pub sql: String,
92 /// Bind parameters in placeholder order.
93 pub params: Vec<serde_json::Value>,
94}
95
96/// Aggregation SQL generator
97pub struct AggregationSqlGenerator {
98 database_type: DatabaseType,
99}
100
101impl AggregationSqlGenerator {
102 /// Create new SQL generator for specific database
103 #[must_use]
104 pub const fn new(database_type: DatabaseType) -> Self {
105 Self { database_type }
106 }
107
108 /// Generate JSONB extraction SQL with per-database path escaping.
109 ///
110 /// Each database uses a different string literal syntax for JSON paths.
111 /// Single quotes or other metacharacters in `path` could otherwise break
112 /// out of the string literal and inject arbitrary SQL. The per-database
113 /// escape functions from `fraiseql_db::path_escape` are applied here as
114 /// a second line of defence after schema allowlist validation in the planner.
115 pub(super) fn jsonb_extract_sql(&self, jsonb_column: &str, path: &str) -> String {
116 match self.database_type {
117 DatabaseType::PostgreSQL => {
118 let escaped = escape_postgres_jsonb_segment(path);
119 format!("{}->>'{}' ", jsonb_column, escaped)
120 },
121 DatabaseType::MySQL => {
122 // escape_mysql_json_path returns "$.escaped_segment"
123 let escaped = escape_mysql_json_path(&[path.to_owned()]);
124 format!("JSON_UNQUOTE(JSON_EXTRACT({}, '{}'))", jsonb_column, escaped)
125 },
126 DatabaseType::SQLite => {
127 // escape_sqlite_json_path returns "$.escaped_segment"
128 let escaped = escape_sqlite_json_path(&[path.to_owned()]);
129 format!("json_extract({}, '{}')", jsonb_column, escaped)
130 },
131 DatabaseType::SQLServer => {
132 // escape_sqlserver_json_path returns "$.escaped_segment"
133 let escaped = escape_sqlserver_json_path(&[path.to_owned()]);
134 format!("JSON_VALUE({}, '{}')", jsonb_column, escaped)
135 },
136 }
137 }
138
139 /// Convert `WhereOperator` to SQL operator
140 pub(super) const fn operator_to_sql(&self, operator: &WhereOperator) -> &'static str {
141 match operator {
142 WhereOperator::Neq => "!=",
143 WhereOperator::Gt => ">",
144 WhereOperator::Gte => ">=",
145 WhereOperator::Lt => "<",
146 WhereOperator::Lte => "<=",
147 WhereOperator::In => "IN",
148 WhereOperator::Nin => "NOT IN",
149 WhereOperator::Like
150 | WhereOperator::Contains
151 | WhereOperator::Startswith
152 | WhereOperator::Endswith => "LIKE",
153 WhereOperator::Ilike
154 | WhereOperator::Icontains
155 | WhereOperator::Istartswith
156 | WhereOperator::Iendswith => match self.database_type {
157 DatabaseType::PostgreSQL => "ILIKE",
158 _ => "LIKE", // Other databases use LIKE with UPPER/LOWER
159 },
160 // Eq and any future operators default to equality
161 _ => "=",
162 }
163 }
164
165 /// Quote a validated field alias/column name using the database-appropriate identifier syntax.
166 ///
167 /// Field names arrive here after `OrderByClause::validate_field_name` has verified they
168 /// match `[_A-Za-z][_0-9A-Za-z]*`, so no delimiter-escaping is required — but quoting
169 /// still protects against SQL reserved words (`order`, `count`, `group`, `select`, …)
170 /// that would break unquoted ORDER BY clauses.
171 pub(super) fn quote_identifier(&self, name: &str) -> String {
172 match self.database_type {
173 DatabaseType::MySQL => quote_mysql_identifier(name),
174 DatabaseType::SQLServer => quote_sqlserver_identifier(name),
175 // PostgreSQL and SQLite both use double-quote syntax.
176 DatabaseType::PostgreSQL | DatabaseType::SQLite => quote_postgres_identifier(name),
177 }
178 }
179
180 /// Escape a string value for embedding inside a SQL string literal.
181 ///
182 /// MySQL treats backslash as an escape character in string literals by default
183 /// (unless `NO_BACKSLASH_ESCAPES` `sql_mode` is set). Backslashes must be doubled
184 /// before single-quote escaping to prevent injection via sequences like `\';`.
185 ///
186 /// Null bytes (`\x00`) are stripped before escaping. PostgreSQL rejects null
187 /// bytes in string literals with "invalid byte sequence for encoding", which
188 /// would surface as a confusing database error. Stripping them produces
189 /// deterministic SQL regardless of the database's null-byte handling.
190 pub(super) fn escape_sql_string(&self, s: &str) -> String {
191 // Strip null bytes — never valid in SQL string literals.
192 let without_nulls: std::borrow::Cow<str> = if s.contains('\0') {
193 s.replace('\0', "").into()
194 } else {
195 s.into()
196 };
197 if matches!(self.database_type, DatabaseType::MySQL) {
198 // Escape backslashes first, then single quotes.
199 without_nulls.replace('\\', "\\\\").replace('\'', "''")
200 } else {
201 // Standard SQL: only double single quotes.
202 without_nulls.replace('\'', "''")
203 }
204 }
205
206 /// Returns the bind-parameter placeholder for position `index` (0-based).
207 pub(super) fn placeholder(&self, index: usize) -> String {
208 match self.database_type {
209 DatabaseType::PostgreSQL => format!("${}", index + 1),
210 DatabaseType::SQLServer => format!("@P{}", index + 1),
211 _ => "?".to_string(),
212 }
213 }
214
215 /// If `value` is non-NULL, appends it to `params` and returns the placeholder.
216 ///
217 /// `NULL` is emitted inline as the literal `NULL`; it cannot be reliably
218 /// parameterized across all four database drivers in the same way.
219 pub(super) fn emit_value_param(
220 &self,
221 value: &serde_json::Value,
222 params: &mut Vec<serde_json::Value>,
223 ) -> String {
224 if matches!(value, serde_json::Value::Null) {
225 return "NULL".to_string();
226 }
227 let idx = params.len();
228 params.push(value.clone());
229 self.placeholder(idx)
230 }
231
232 /// Build a LIKE pattern string, escape LIKE metacharacters with `!`, bind it as a
233 /// parameter, and return the placeholder. Returns `(placeholder, needs_escape_clause)`
234 /// where `needs_escape_clause` indicates whether `ESCAPE '!'` should be appended to
235 /// the SQL fragment.
236 pub(super) fn emit_like_pattern_param(
237 &self,
238 operator: &WhereOperator,
239 value: &str,
240 params: &mut Vec<serde_json::Value>,
241 ) -> (String, bool) {
242 // Strip null bytes before binding (same invariant as escape_sql_string).
243 let clean: String = if value.contains('\0') {
244 value.replace('\0', "")
245 } else {
246 value.to_string()
247 };
248
249 let (pattern, needs_escape) = match operator {
250 WhereOperator::Contains | WhereOperator::Icontains => {
251 let esc = clean.replace('!', "!!").replace('%', "!%").replace('_', "!_");
252 (format!("%{esc}%"), true)
253 },
254 WhereOperator::Startswith | WhereOperator::Istartswith => {
255 let esc = clean.replace('!', "!!").replace('%', "!%").replace('_', "!_");
256 (format!("{esc}%"), true)
257 },
258 WhereOperator::Endswith | WhereOperator::Iendswith => {
259 let esc = clean.replace('!', "!!").replace('%', "!%").replace('_', "!_");
260 (format!("%{esc}"), true)
261 },
262 // Like / Ilike: caller controls wildcards — bind as-is.
263 _ => (clean, false),
264 };
265
266 let ph = self.emit_value_param(&serde_json::Value::String(pattern), params);
267 (ph, needs_escape)
268 }
269
270 /// Generate a parameterized aggregate SQL query.
271 ///
272 /// All user-supplied string values in `WHERE` and `HAVING` clauses are emitted as
273 /// bind placeholders (`$N` / `?` / `@P1` depending on the database dialect) rather
274 /// than being embedded as escaped string literals. Numeric, boolean, and `NULL`
275 /// values are still inlined since they carry no injection risk.
276 ///
277 /// # Errors
278 ///
279 /// Returns error if SQL generation fails (unknown aggregate function, etc.).
280 pub fn generate_parameterized(
281 &self,
282 plan: &AggregationPlan,
283 ) -> Result<ParameterizedAggregationSql> {
284 let mut params: Vec<serde_json::Value> = Vec::new();
285
286 let select_sql =
287 self.build_select_clause(&plan.group_by_expressions, &plan.aggregate_expressions)?;
288 let from_sql = format!("FROM {}", plan.request.table_name);
289
290 let where_sql = if let Some(ref wc) = plan.request.where_clause {
291 self.build_where_clause_parameterized(wc, &plan.metadata, &mut params)?
292 } else {
293 String::new()
294 };
295
296 let group_sql = if !plan.group_by_expressions.is_empty() {
297 self.build_group_by_clause(&plan.group_by_expressions)?
298 } else {
299 String::new()
300 };
301
302 let having_sql =
303 self.build_having_clause_parameterized(&plan.having_conditions, &mut params)?;
304
305 let order_sql = if !plan.request.order_by.is_empty() {
306 self.build_order_by_clause(&plan.request.order_by)?
307 } else {
308 String::new()
309 };
310
311 let mut parts: Vec<&str> = vec![
312 &select_sql,
313 &from_sql,
314 &where_sql,
315 &group_sql,
316 &having_sql,
317 &order_sql,
318 ];
319 parts.retain(|s| !s.is_empty());
320
321 let mut sql = parts.join("\n");
322
323 if let Some(limit) = plan.request.limit {
324 let _ = write!(sql, "\nLIMIT {limit}");
325 }
326 if let Some(offset) = plan.request.offset {
327 let _ = write!(sql, "\nOFFSET {offset}");
328 }
329
330 Ok(ParameterizedAggregationSql { sql, params })
331 }
332}