Skip to main content

polyglot_sql/optimizer/
optimizer.rs

1//! Optimizer Orchestration Module
2//!
3//! This module provides the main entry point for SQL optimization,
4//! coordinating multiple optimization passes in the correct order.
5//!
6//! Ported from sqlglot's optimizer/optimizer.py
7
8use crate::dialects::DialectType;
9use crate::expressions::Expression;
10use crate::schema::Schema;
11
12use super::annotate_types::annotate_types;
13use super::canonicalize::canonicalize;
14use super::eliminate_ctes::eliminate_ctes;
15use super::normalize::normalize;
16use super::optimize_joins::optimize_joins;
17use super::pushdown_predicates::pushdown_predicates;
18use super::pushdown_projections::pushdown_projections;
19use super::qualify_columns::qualify_columns;
20use super::simplify::simplify;
21use super::subquery::{merge_subqueries, unnest_subqueries};
22
23/// Optimizer configuration
24pub struct OptimizerConfig<'a> {
25    /// Database schema for type inference and column resolution
26    pub schema: Option<&'a dyn Schema>,
27    /// Default database name
28    pub db: Option<String>,
29    /// Default catalog name
30    pub catalog: Option<String>,
31    /// Dialect for dialect-specific optimizations
32    pub dialect: Option<DialectType>,
33    /// Whether to keep tables isolated (don't merge from multiple tables)
34    pub isolate_tables: bool,
35    /// Whether to quote identifiers
36    pub quote_identifiers: bool,
37}
38
39impl<'a> Default for OptimizerConfig<'a> {
40    fn default() -> Self {
41        Self {
42            schema: None,
43            db: None,
44            catalog: None,
45            dialect: None,
46            isolate_tables: true,
47            quote_identifiers: false,
48        }
49    }
50}
51
52/// Optimization rule type
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum OptimizationRule {
55    /// Qualify columns and tables with their full names
56    Qualify,
57    /// Push projections down to eliminate unused columns early
58    PushdownProjections,
59    /// Normalize boolean expressions
60    Normalize,
61    /// Unnest correlated subqueries into joins
62    UnnestSubqueries,
63    /// Push predicates down to filter data early
64    PushdownPredicates,
65    /// Optimize join order and remove cross joins
66    OptimizeJoins,
67    /// Eliminate derived tables by converting to CTEs
68    EliminateSubqueries,
69    /// Merge subqueries into outer queries
70    MergeSubqueries,
71    /// Remove unused CTEs
72    EliminateCtes,
73    /// Annotate expressions with type information
74    AnnotateTypes,
75    /// Convert expressions to canonical form
76    Canonicalize,
77    /// Simplify expressions
78    Simplify,
79}
80
81/// Default optimization rules in order of execution
82pub const DEFAULT_RULES: &[OptimizationRule] = &[
83    OptimizationRule::Qualify,
84    OptimizationRule::PushdownProjections,
85    OptimizationRule::Normalize,
86    OptimizationRule::UnnestSubqueries,
87    OptimizationRule::PushdownPredicates,
88    OptimizationRule::OptimizeJoins,
89    OptimizationRule::EliminateSubqueries,
90    OptimizationRule::MergeSubqueries,
91    OptimizationRule::EliminateCtes,
92    OptimizationRule::AnnotateTypes,
93    OptimizationRule::Canonicalize,
94    OptimizationRule::Simplify,
95];
96
97/// Optimize a SQL expression using the default set of rules.
98///
99/// This function coordinates multiple optimization passes in the correct order
100/// to produce an optimized query plan.
101///
102/// # Arguments
103/// * `expression` - The expression to optimize
104/// * `config` - Optimizer configuration
105///
106/// # Returns
107/// The optimized expression
108pub fn optimize(expression: Expression, config: &OptimizerConfig<'_>) -> Expression {
109    optimize_with_rules(expression, config, DEFAULT_RULES)
110}
111
112/// Optimize a SQL expression using a custom set of rules.
113///
114/// # Arguments
115/// * `expression` - The expression to optimize
116/// * `config` - Optimizer configuration
117/// * `rules` - The optimization rules to apply
118///
119/// # Returns
120/// The optimized expression
121pub fn optimize_with_rules(
122    mut expression: Expression,
123    config: &OptimizerConfig<'_>,
124    rules: &[OptimizationRule],
125) -> Expression {
126    for rule in rules {
127        expression = apply_rule(expression, *rule, config);
128    }
129    expression
130}
131
132/// Apply a single optimization rule
133fn apply_rule(
134    expression: Expression,
135    rule: OptimizationRule,
136    config: &OptimizerConfig<'_>,
137) -> Expression {
138    match rule {
139        OptimizationRule::Qualify => {
140            // Qualify columns with table references
141            if let Some(schema) = config.schema {
142                let options = super::qualify_columns::QualifyColumnsOptions {
143                    dialect: config.dialect,
144                    ..Default::default()
145                };
146                let original = expression.clone();
147                qualify_columns(expression, schema, &options).unwrap_or(original)
148            } else {
149                // Without schema, skip qualification
150                expression
151            }
152        }
153        OptimizationRule::PushdownProjections => {
154            pushdown_projections(expression, config.dialect, true)
155        }
156        OptimizationRule::Normalize => {
157            // Use CNF (dnf=false) with default max distance
158            let original = expression.clone();
159            normalize(expression, false, super::normalize::DEFAULT_MAX_DISTANCE)
160                .unwrap_or(original)
161        }
162        OptimizationRule::UnnestSubqueries => {
163            unnest_subqueries(expression)
164        }
165        OptimizationRule::PushdownPredicates => {
166            pushdown_predicates(expression, config.dialect)
167        }
168        OptimizationRule::OptimizeJoins => {
169            optimize_joins(expression)
170        }
171        OptimizationRule::EliminateSubqueries => {
172            eliminate_subqueries_opt(expression)
173        }
174        OptimizationRule::MergeSubqueries => {
175            merge_subqueries(expression, config.isolate_tables)
176        }
177        OptimizationRule::EliminateCtes => {
178            eliminate_ctes(expression)
179        }
180        OptimizationRule::AnnotateTypes => {
181            // annotate_types is used for type inference, not expression transformation
182            // For now, just return the expression unchanged
183            let _ = annotate_types(&expression, config.schema, config.dialect);
184            expression
185        }
186        OptimizationRule::Canonicalize => {
187            canonicalize(expression, config.dialect)
188        }
189        OptimizationRule::Simplify => {
190            simplify(expression, config.dialect)
191        }
192    }
193}
194
195// Re-import from subquery module with different name to avoid conflict
196use super::subquery::eliminate_subqueries as eliminate_subqueries_opt;
197
198/// Quick optimization that only applies essential passes.
199///
200/// This is faster than full optimization but may miss some opportunities.
201pub fn quick_optimize(expression: Expression, dialect: Option<DialectType>) -> Expression {
202    let config = OptimizerConfig {
203        dialect,
204        ..Default::default()
205    };
206
207    let rules = &[
208        OptimizationRule::Simplify,
209        OptimizationRule::Canonicalize,
210    ];
211
212    optimize_with_rules(expression, &config, rules)
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use crate::generator::Generator;
219    use crate::parser::Parser;
220
221    fn gen(expr: &Expression) -> String {
222        Generator::new().generate(expr).unwrap()
223    }
224
225    fn parse(sql: &str) -> Expression {
226        Parser::parse_sql(sql).expect("Failed to parse")[0].clone()
227    }
228
229    #[test]
230    fn test_optimize_simple() {
231        let expr = parse("SELECT a FROM t");
232        let config = OptimizerConfig::default();
233        let result = optimize(expr, &config);
234        let sql = gen(&result);
235        assert!(sql.contains("SELECT"));
236    }
237
238    #[test]
239    fn test_optimize_with_where() {
240        let expr = parse("SELECT a FROM t WHERE b = 1");
241        let config = OptimizerConfig::default();
242        let result = optimize(expr, &config);
243        let sql = gen(&result);
244        assert!(sql.contains("WHERE"));
245    }
246
247    #[test]
248    fn test_optimize_with_join() {
249        let expr = parse("SELECT t.a FROM t JOIN s ON t.id = s.id");
250        let config = OptimizerConfig::default();
251        let result = optimize(expr, &config);
252        let sql = gen(&result);
253        assert!(sql.contains("JOIN"));
254    }
255
256    #[test]
257    fn test_quick_optimize() {
258        let expr = parse("SELECT 1 + 0 FROM t");
259        let result = quick_optimize(expr, None);
260        let sql = gen(&result);
261        assert!(sql.contains("SELECT"));
262    }
263
264    #[test]
265    fn test_optimize_with_custom_rules() {
266        let expr = parse("SELECT a FROM t WHERE NOT NOT b = 1");
267        let config = OptimizerConfig::default();
268        let rules = &[OptimizationRule::Simplify];
269        let result = optimize_with_rules(expr, &config, rules);
270        let sql = gen(&result);
271        assert!(sql.contains("SELECT"));
272    }
273
274    #[test]
275    fn test_optimizer_config_default() {
276        let config = OptimizerConfig::default();
277        assert!(config.schema.is_none());
278        assert!(config.dialect.is_none());
279        assert!(config.isolate_tables);
280        assert!(!config.quote_identifiers);
281    }
282
283    #[test]
284    fn test_default_rules() {
285        assert!(!DEFAULT_RULES.is_empty());
286        assert!(DEFAULT_RULES.contains(&OptimizationRule::Simplify));
287        assert!(DEFAULT_RULES.contains(&OptimizationRule::Canonicalize));
288    }
289
290    #[test]
291    fn test_optimize_subquery() {
292        let expr = parse("SELECT * FROM (SELECT a FROM t) AS sub");
293        let config = OptimizerConfig::default();
294        let result = optimize(expr, &config);
295        let sql = gen(&result);
296        assert!(sql.contains("SELECT"));
297    }
298
299    #[test]
300    fn test_optimize_cte() {
301        let expr = parse("WITH cte AS (SELECT a FROM t) SELECT * FROM cte");
302        let config = OptimizerConfig::default();
303        let result = optimize(expr, &config);
304        let sql = gen(&result);
305        assert!(sql.contains("WITH"));
306    }
307
308    #[test]
309    fn test_optimize_preserves_semantics() {
310        let expr = parse("SELECT a, b FROM t WHERE c > 1 ORDER BY a");
311        let config = OptimizerConfig::default();
312        let result = optimize(expr, &config);
313        let sql = gen(&result);
314        assert!(sql.contains("ORDER BY"));
315    }
316}