Skip to main content

fraiseql_core/runtime/aggregation/
mod.rs

1//! Runtime Aggregation SQL Generation Module
2//!
3//! This module generates database-specific SQL from aggregation execution plans.
4//!
5//! # Database-Specific SQL
6//!
7//! ## PostgreSQL
8//! ```sql
9//! SELECT
10//!   data->>'category' AS category,
11//!   DATE_TRUNC('day', occurred_at) AS occurred_at_day,
12//!   COUNT(*) AS count,
13//!   SUM(revenue) AS revenue_sum
14//! FROM tf_sales
15//! WHERE customer_id = $1
16//! GROUP BY data->>'category', DATE_TRUNC('day', occurred_at)
17//! HAVING SUM(revenue) > $2
18//! ORDER BY revenue_sum DESC
19//! LIMIT 10
20//! ```
21//!
22//! ## MySQL
23//! ```sql
24//! SELECT
25//!   JSON_UNQUOTE(JSON_EXTRACT(data, '$.category')) AS category,
26//!   DATE_FORMAT(occurred_at, '%Y-%m-%d') AS occurred_at_day,
27//!   COUNT(*) AS count,
28//!   SUM(revenue) AS revenue_sum
29//! FROM tf_sales
30//! WHERE customer_id = ?
31//! GROUP BY JSON_UNQUOTE(JSON_EXTRACT(data, '$.category')), DATE_FORMAT(occurred_at, '%Y-%m-%d')
32//! HAVING SUM(revenue) > ?
33//! ORDER BY revenue_sum DESC
34//! LIMIT 10
35//! ```
36//!
37//! ## SQLite
38//! ```sql
39//! SELECT
40//!   json_extract(data, '$.category') AS category,
41//!   strftime('%Y-%m-%d', occurred_at) AS occurred_at_day,
42//!   COUNT(*) AS count,
43//!   SUM(revenue) AS revenue_sum
44//! FROM tf_sales
45//! WHERE customer_id = ?
46//! GROUP BY json_extract(data, '$.category'), strftime('%Y-%m-%d', occurred_at)
47//! HAVING SUM(revenue) > ?
48//! ORDER BY revenue_sum DESC
49//! LIMIT 10
50//! ```
51
52use std::fmt::Write as _;
53
54use crate::{
55    compiler::{
56        aggregate_types::{AggregateFunction, TemporalBucket},
57        aggregation::{
58            AggregateExpression, AggregationPlan, GroupByExpression, OrderByClause, OrderDirection,
59            ValidatedHavingCondition,
60        },
61        fact_table::FactTableMetadata,
62    },
63    db::{
64        identifier::{
65            quote_mysql_identifier, quote_postgres_identifier, quote_sqlserver_identifier,
66        },
67        path_escape::{
68            escape_mysql_json_path, escape_postgres_jsonb_segment, escape_sqlite_json_path,
69            escape_sqlserver_json_path,
70        },
71        types::DatabaseType,
72        where_clause::{WhereClause, WhereOperator},
73    },
74    error::{FraiseQLError, Result},
75    utils::casing::to_snake_case,
76};
77
78mod expressions;
79mod where_clause;
80
81#[cfg(test)]
82mod tests;
83
84/// Aggregate query with bind parameters instead of escaped string literals.
85///
86/// Produced by [`AggregationSqlGenerator::generate_parameterized`].  Pass `sql`
87/// and `params` directly to [`crate::db::DatabaseAdapter::execute_parameterized_aggregate`].
88#[derive(Debug, Clone)]
89pub struct ParameterizedAggregationSql {
90    /// SQL with `$N` (PostgreSQL), `?` (MySQL / SQLite), or `@P1` (SQL Server) placeholders.
91    pub sql:    String,
92    /// Bind parameters in placeholder order.
93    pub params: Vec<serde_json::Value>,
94}
95
96/// Aggregation SQL generator
97pub struct AggregationSqlGenerator {
98    database_type: DatabaseType,
99}
100
101impl AggregationSqlGenerator {
102    /// Create new SQL generator for specific database
103    #[must_use]
104    pub const fn new(database_type: DatabaseType) -> Self {
105        Self { database_type }
106    }
107
108    /// Generate JSONB extraction SQL with per-database path escaping.
109    ///
110    /// Each database uses a different string literal syntax for JSON paths.
111    /// Single quotes or other metacharacters in `path` could otherwise break
112    /// out of the string literal and inject arbitrary SQL. The per-database
113    /// escape functions from `fraiseql_db::path_escape` are applied here as
114    /// a second line of defence after schema allowlist validation in the planner.
115    pub(super) fn jsonb_extract_sql(&self, jsonb_column: &str, path: &str) -> String {
116        match self.database_type {
117            DatabaseType::PostgreSQL => {
118                let escaped = escape_postgres_jsonb_segment(path);
119                format!("{}->>'{}' ", jsonb_column, escaped)
120            },
121            DatabaseType::MySQL => {
122                // escape_mysql_json_path returns "$.escaped_segment"
123                let escaped = escape_mysql_json_path(&[path.to_owned()]);
124                format!("JSON_UNQUOTE(JSON_EXTRACT({}, '{}'))", jsonb_column, escaped)
125            },
126            DatabaseType::SQLite => {
127                // escape_sqlite_json_path returns "$.escaped_segment"
128                let escaped = escape_sqlite_json_path(&[path.to_owned()]);
129                format!("json_extract({}, '{}')", jsonb_column, escaped)
130            },
131            DatabaseType::SQLServer => {
132                // escape_sqlserver_json_path returns "$.escaped_segment"
133                let escaped = escape_sqlserver_json_path(&[path.to_owned()]);
134                format!("JSON_VALUE({}, '{}')", jsonb_column, escaped)
135            },
136        }
137    }
138
139    /// Convert `WhereOperator` to SQL operator
140    pub(super) const fn operator_to_sql(&self, operator: &WhereOperator) -> &'static str {
141        match operator {
142            WhereOperator::Neq => "!=",
143            WhereOperator::Gt => ">",
144            WhereOperator::Gte => ">=",
145            WhereOperator::Lt => "<",
146            WhereOperator::Lte => "<=",
147            WhereOperator::In => "IN",
148            WhereOperator::Nin => "NOT IN",
149            WhereOperator::Like
150            | WhereOperator::Contains
151            | WhereOperator::Startswith
152            | WhereOperator::Endswith => "LIKE",
153            WhereOperator::Ilike
154            | WhereOperator::Icontains
155            | WhereOperator::Istartswith
156            | WhereOperator::Iendswith => match self.database_type {
157                DatabaseType::PostgreSQL => "ILIKE",
158                _ => "LIKE", // Other databases use LIKE with UPPER/LOWER
159            },
160            // Eq and any future operators default to equality
161            _ => "=",
162        }
163    }
164
165    /// Quote a validated field alias/column name using the database-appropriate identifier syntax.
166    ///
167    /// Field names arrive here after `OrderByClause::validate_field_name` has verified they
168    /// match `[_A-Za-z][_0-9A-Za-z]*`, so no delimiter-escaping is required — but quoting
169    /// still protects against SQL reserved words (`order`, `count`, `group`, `select`, …)
170    /// that would break unquoted ORDER BY clauses.
171    pub(super) fn quote_identifier(&self, name: &str) -> String {
172        match self.database_type {
173            DatabaseType::MySQL => quote_mysql_identifier(name),
174            DatabaseType::SQLServer => quote_sqlserver_identifier(name),
175            // PostgreSQL and SQLite both use double-quote syntax.
176            DatabaseType::PostgreSQL | DatabaseType::SQLite => quote_postgres_identifier(name),
177        }
178    }
179
180    /// Escape a string value for embedding inside a SQL string literal.
181    ///
182    /// MySQL treats backslash as an escape character in string literals by default
183    /// (unless `NO_BACKSLASH_ESCAPES` `sql_mode` is set). Backslashes must be doubled
184    /// before single-quote escaping to prevent injection via sequences like `\';`.
185    ///
186    /// Null bytes (`\x00`) are stripped before escaping. PostgreSQL rejects null
187    /// bytes in string literals with "invalid byte sequence for encoding", which
188    /// would surface as a confusing database error. Stripping them produces
189    /// deterministic SQL regardless of the database's null-byte handling.
190    pub(super) fn escape_sql_string(&self, s: &str) -> String {
191        // Strip null bytes — never valid in SQL string literals.
192        let without_nulls: std::borrow::Cow<str> = if s.contains('\0') {
193            s.replace('\0', "").into()
194        } else {
195            s.into()
196        };
197        if matches!(self.database_type, DatabaseType::MySQL) {
198            // Escape backslashes first, then single quotes.
199            without_nulls.replace('\\', "\\\\").replace('\'', "''")
200        } else {
201            // Standard SQL: only double single quotes.
202            without_nulls.replace('\'', "''")
203        }
204    }
205
206    /// Returns the bind-parameter placeholder for position `index` (0-based).
207    pub(super) fn placeholder(&self, index: usize) -> String {
208        match self.database_type {
209            DatabaseType::PostgreSQL => format!("${}", index + 1),
210            DatabaseType::SQLServer => format!("@P{}", index + 1),
211            _ => "?".to_string(),
212        }
213    }
214
215    /// If `value` is non-NULL, appends it to `params` and returns the placeholder.
216    ///
217    /// `NULL` is emitted inline as the literal `NULL`; it cannot be reliably
218    /// parameterized across all four database drivers in the same way.
219    pub(super) fn emit_value_param(
220        &self,
221        value: &serde_json::Value,
222        params: &mut Vec<serde_json::Value>,
223    ) -> String {
224        if matches!(value, serde_json::Value::Null) {
225            return "NULL".to_string();
226        }
227        let idx = params.len();
228        params.push(value.clone());
229        self.placeholder(idx)
230    }
231
232    /// Build a LIKE pattern string, escape LIKE metacharacters with `!`, bind it as a
233    /// parameter, and return the placeholder.  Returns `(placeholder, needs_escape_clause)`
234    /// where `needs_escape_clause` indicates whether `ESCAPE '!'` should be appended to
235    /// the SQL fragment.
236    pub(super) fn emit_like_pattern_param(
237        &self,
238        operator: &WhereOperator,
239        value: &str,
240        params: &mut Vec<serde_json::Value>,
241    ) -> (String, bool) {
242        // Strip null bytes before binding (same invariant as escape_sql_string).
243        let clean: String = if value.contains('\0') {
244            value.replace('\0', "")
245        } else {
246            value.to_string()
247        };
248
249        let (pattern, needs_escape) = match operator {
250            WhereOperator::Contains | WhereOperator::Icontains => {
251                let esc = clean.replace('!', "!!").replace('%', "!%").replace('_', "!_");
252                (format!("%{esc}%"), true)
253            },
254            WhereOperator::Startswith | WhereOperator::Istartswith => {
255                let esc = clean.replace('!', "!!").replace('%', "!%").replace('_', "!_");
256                (format!("{esc}%"), true)
257            },
258            WhereOperator::Endswith | WhereOperator::Iendswith => {
259                let esc = clean.replace('!', "!!").replace('%', "!%").replace('_', "!_");
260                (format!("%{esc}"), true)
261            },
262            // Like / Ilike: caller controls wildcards — bind as-is.
263            _ => (clean, false),
264        };
265
266        let ph = self.emit_value_param(&serde_json::Value::String(pattern), params);
267        (ph, needs_escape)
268    }
269
270    /// Generate a parameterized aggregate SQL query.
271    ///
272    /// All user-supplied string values in `WHERE` and `HAVING` clauses are emitted as
273    /// bind placeholders (`$N` / `?` / `@P1` depending on the database dialect) rather
274    /// than being embedded as escaped string literals.  Numeric, boolean, and `NULL`
275    /// values are still inlined since they carry no injection risk.
276    ///
277    /// # Errors
278    ///
279    /// Returns error if SQL generation fails (unknown aggregate function, etc.).
280    pub fn generate_parameterized(
281        &self,
282        plan: &AggregationPlan,
283    ) -> Result<ParameterizedAggregationSql> {
284        let mut params: Vec<serde_json::Value> = Vec::new();
285
286        let select_sql =
287            self.build_select_clause(&plan.group_by_expressions, &plan.aggregate_expressions)?;
288        let from_sql = format!("FROM {}", plan.request.table_name);
289
290        let where_sql = if let Some(ref wc) = plan.request.where_clause {
291            self.build_where_clause_parameterized(wc, &plan.metadata, &mut params)?
292        } else {
293            String::new()
294        };
295
296        let group_sql = if !plan.group_by_expressions.is_empty() {
297            self.build_group_by_clause(&plan.group_by_expressions)?
298        } else {
299            String::new()
300        };
301
302        let having_sql =
303            self.build_having_clause_parameterized(&plan.having_conditions, &mut params)?;
304
305        let order_sql = if !plan.request.order_by.is_empty() {
306            self.build_order_by_clause(&plan.request.order_by)?
307        } else {
308            String::new()
309        };
310
311        let mut parts: Vec<&str> = vec![
312            &select_sql,
313            &from_sql,
314            &where_sql,
315            &group_sql,
316            &having_sql,
317            &order_sql,
318        ];
319        parts.retain(|s| !s.is_empty());
320
321        let mut sql = parts.join("\n");
322
323        if let Some(limit) = plan.request.limit {
324            let _ = write!(sql, "\nLIMIT {limit}");
325        }
326        if let Some(offset) = plan.request.offset {
327            let _ = write!(sql, "\nOFFSET {offset}");
328        }
329
330        Ok(ParameterizedAggregationSql { sql, params })
331    }
332}