Skip to main content

polyglot_sql/optimizer/
optimizer.rs

1//! Optimizer Orchestration Module
2//!
3//! This module provides the main entry point for SQL optimization,
4//! coordinating multiple optimization passes in the correct order.
5//!
6//! Ported from sqlglot's optimizer/optimizer.py
7
8use crate::dialects::DialectType;
9use crate::expressions::Expression;
10use crate::schema::Schema;
11
12use super::annotate_types::annotate_types;
13use super::canonicalize::canonicalize;
14use super::eliminate_ctes::eliminate_ctes;
15use super::normalize::normalize;
16use super::optimize_joins::optimize_joins;
17use super::pushdown_predicates::pushdown_predicates;
18use super::pushdown_projections::pushdown_projections;
19use super::qualify_columns::qualify_columns;
20use super::simplify::simplify;
21use super::subquery::{merge_subqueries, unnest_subqueries};
22
23/// Optimizer configuration
24pub struct OptimizerConfig<'a> {
25    /// Database schema for type inference and column resolution
26    pub schema: Option<&'a dyn Schema>,
27    /// Default database name
28    pub db: Option<String>,
29    /// Default catalog name
30    pub catalog: Option<String>,
31    /// Dialect for dialect-specific optimizations
32    pub dialect: Option<DialectType>,
33    /// Whether to keep tables isolated (don't merge from multiple tables)
34    pub isolate_tables: bool,
35    /// Whether to quote identifiers
36    pub quote_identifiers: bool,
37}
38
39impl<'a> Default for OptimizerConfig<'a> {
40    fn default() -> Self {
41        Self {
42            schema: None,
43            db: None,
44            catalog: None,
45            dialect: None,
46            isolate_tables: true,
47            quote_identifiers: false,
48        }
49    }
50}
51
52/// Optimization rule type
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum OptimizationRule {
55    /// Qualify columns and tables with their full names
56    Qualify,
57    /// Push projections down to eliminate unused columns early
58    PushdownProjections,
59    /// Normalize boolean expressions
60    Normalize,
61    /// Unnest correlated subqueries into joins
62    UnnestSubqueries,
63    /// Push predicates down to filter data early
64    PushdownPredicates,
65    /// Optimize join order and remove cross joins
66    OptimizeJoins,
67    /// Eliminate derived tables by converting to CTEs
68    EliminateSubqueries,
69    /// Merge subqueries into outer queries
70    MergeSubqueries,
71    /// Remove unused CTEs
72    EliminateCtes,
73    /// Annotate expressions with type information
74    AnnotateTypes,
75    /// Convert expressions to canonical form
76    Canonicalize,
77    /// Simplify expressions
78    Simplify,
79}
80
81/// Default optimization rules in order of execution
82pub const DEFAULT_RULES: &[OptimizationRule] = &[
83    OptimizationRule::Qualify,
84    OptimizationRule::PushdownProjections,
85    OptimizationRule::Normalize,
86    OptimizationRule::UnnestSubqueries,
87    OptimizationRule::PushdownPredicates,
88    OptimizationRule::OptimizeJoins,
89    OptimizationRule::EliminateSubqueries,
90    OptimizationRule::MergeSubqueries,
91    OptimizationRule::EliminateCtes,
92    OptimizationRule::AnnotateTypes,
93    OptimizationRule::Canonicalize,
94    OptimizationRule::Simplify,
95];
96
97/// Optimize a SQL expression using the default set of rules.
98///
99/// This function coordinates multiple optimization passes in the correct order
100/// to produce an optimized query plan.
101///
102/// # Arguments
103/// * `expression` - The expression to optimize
104/// * `config` - Optimizer configuration
105///
106/// # Returns
107/// The optimized expression
108pub fn optimize(expression: Expression, config: &OptimizerConfig<'_>) -> Expression {
109    optimize_with_rules(expression, config, DEFAULT_RULES)
110}
111
112/// Optimize a SQL expression using a custom set of rules.
113///
114/// # Arguments
115/// * `expression` - The expression to optimize
116/// * `config` - Optimizer configuration
117/// * `rules` - The optimization rules to apply
118///
119/// # Returns
120/// The optimized expression
121pub fn optimize_with_rules(
122    mut expression: Expression,
123    config: &OptimizerConfig<'_>,
124    rules: &[OptimizationRule],
125) -> Expression {
126    for rule in rules {
127        expression = apply_rule(expression, *rule, config);
128    }
129    expression
130}
131
132/// Apply a single optimization rule
133fn apply_rule(
134    expression: Expression,
135    rule: OptimizationRule,
136    config: &OptimizerConfig<'_>,
137) -> Expression {
138    match rule {
139        OptimizationRule::Qualify => {
140            // Qualify columns with table references
141            if let Some(schema) = config.schema {
142                let options = super::qualify_columns::QualifyColumnsOptions {
143                    dialect: config.dialect,
144                    ..Default::default()
145                };
146                let original = expression.clone();
147                qualify_columns(expression, schema, &options).unwrap_or(original)
148            } else {
149                // Without schema, skip qualification
150                expression
151            }
152        }
153        OptimizationRule::PushdownProjections => {
154            pushdown_projections(expression, config.dialect, true)
155        }
156        OptimizationRule::Normalize => {
157            // Use CNF (dnf=false) with default max distance
158            let original = expression.clone();
159            normalize(expression, false, super::normalize::DEFAULT_MAX_DISTANCE).unwrap_or(original)
160        }
161        OptimizationRule::UnnestSubqueries => unnest_subqueries(expression),
162        OptimizationRule::PushdownPredicates => pushdown_predicates(expression, config.dialect),
163        OptimizationRule::OptimizeJoins => optimize_joins(expression),
164        OptimizationRule::EliminateSubqueries => eliminate_subqueries_opt(expression),
165        OptimizationRule::MergeSubqueries => merge_subqueries(expression, config.isolate_tables),
166        OptimizationRule::EliminateCtes => eliminate_ctes(expression),
167        OptimizationRule::AnnotateTypes => {
168            // annotate_types is used for type inference, not expression transformation
169            // For now, just return the expression unchanged
170            let _ = annotate_types(&expression, config.schema, config.dialect);
171            expression
172        }
173        OptimizationRule::Canonicalize => canonicalize(expression, config.dialect),
174        OptimizationRule::Simplify => simplify(expression, config.dialect),
175    }
176}
177
178// Re-import from subquery module with different name to avoid conflict
179use super::subquery::eliminate_subqueries as eliminate_subqueries_opt;
180
181/// Quick optimization that only applies essential passes.
182///
183/// This is faster than full optimization but may miss some opportunities.
184pub fn quick_optimize(expression: Expression, dialect: Option<DialectType>) -> Expression {
185    let config = OptimizerConfig {
186        dialect,
187        ..Default::default()
188    };
189
190    let rules = &[OptimizationRule::Simplify, OptimizationRule::Canonicalize];
191
192    optimize_with_rules(expression, &config, rules)
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198    use crate::generator::Generator;
199    use crate::parser::Parser;
200
201    fn gen(expr: &Expression) -> String {
202        Generator::new().generate(expr).unwrap()
203    }
204
205    fn parse(sql: &str) -> Expression {
206        Parser::parse_sql(sql).expect("Failed to parse")[0].clone()
207    }
208
209    #[test]
210    fn test_optimize_simple() {
211        let expr = parse("SELECT a FROM t");
212        let config = OptimizerConfig::default();
213        let result = optimize(expr, &config);
214        let sql = gen(&result);
215        assert!(sql.contains("SELECT"));
216    }
217
218    #[test]
219    fn test_optimize_with_where() {
220        let expr = parse("SELECT a FROM t WHERE b = 1");
221        let config = OptimizerConfig::default();
222        let result = optimize(expr, &config);
223        let sql = gen(&result);
224        assert!(sql.contains("WHERE"));
225    }
226
227    #[test]
228    fn test_optimize_with_join() {
229        let expr = parse("SELECT t.a FROM t JOIN s ON t.id = s.id");
230        let config = OptimizerConfig::default();
231        let result = optimize(expr, &config);
232        let sql = gen(&result);
233        assert!(sql.contains("JOIN"));
234    }
235
236    #[test]
237    fn test_quick_optimize() {
238        let expr = parse("SELECT 1 + 0 FROM t");
239        let result = quick_optimize(expr, None);
240        let sql = gen(&result);
241        assert!(sql.contains("SELECT"));
242    }
243
244    #[test]
245    fn test_optimize_with_custom_rules() {
246        let expr = parse("SELECT a FROM t WHERE NOT NOT b = 1");
247        let config = OptimizerConfig::default();
248        let rules = &[OptimizationRule::Simplify];
249        let result = optimize_with_rules(expr, &config, rules);
250        let sql = gen(&result);
251        assert!(sql.contains("SELECT"));
252    }
253
254    #[test]
255    fn test_optimizer_config_default() {
256        let config = OptimizerConfig::default();
257        assert!(config.schema.is_none());
258        assert!(config.dialect.is_none());
259        assert!(config.isolate_tables);
260        assert!(!config.quote_identifiers);
261    }
262
263    #[test]
264    fn test_default_rules() {
265        assert!(!DEFAULT_RULES.is_empty());
266        assert!(DEFAULT_RULES.contains(&OptimizationRule::Simplify));
267        assert!(DEFAULT_RULES.contains(&OptimizationRule::Canonicalize));
268    }
269
270    #[test]
271    fn test_optimize_subquery() {
272        let expr = parse("SELECT * FROM (SELECT a FROM t) AS sub");
273        let config = OptimizerConfig::default();
274        let result = optimize(expr, &config);
275        let sql = gen(&result);
276        assert!(sql.contains("SELECT"));
277    }
278
279    #[test]
280    fn test_optimize_cte() {
281        let expr = parse("WITH cte AS (SELECT a FROM t) SELECT * FROM cte");
282        let config = OptimizerConfig::default();
283        let result = optimize(expr, &config);
284        let sql = gen(&result);
285        assert!(sql.contains("WITH"));
286    }
287
288    #[test]
289    fn test_optimize_preserves_semantics() {
290        let expr = parse("SELECT a, b FROM t WHERE c > 1 ORDER BY a");
291        let config = OptimizerConfig::default();
292        let result = optimize(expr, &config);
293        let sql = gen(&result);
294        assert!(sql.contains("ORDER BY"));
295    }
296}