Skip to main content

oxigdal_query/optimizer/rules/
projection_pushdown.rs

1//! Projection pushdown optimization rule.
2//!
3//! Removes unnecessary columns early in the plan by rewriting subquery
4//! projections to include only columns referenced by the outer query.
5
6use crate::error::{QueryError, Result};
7use crate::parser::ast::*;
8use oxigdal_core::error::OxiGdalError;
9use std::collections::HashSet;
10
11use super::{OptimizationRule, collect_column_refs};
12
13/// Projection pushdown rule.
14///
15/// Removes unnecessary columns early in the plan.
16pub struct ProjectionPushdown;
17
18impl OptimizationRule for ProjectionPushdown {
19    fn apply(&self, stmt: SelectStatement) -> Result<SelectStatement> {
20        // Validate projection is not empty
21        if stmt.projection.is_empty() {
22            return Err(QueryError::optimization(
23                OxiGdalError::invalid_state_builder(
24                    "Cannot apply projection pushdown with empty projection",
25                )
26                .with_operation("projection_pushdown")
27                .with_suggestion("Ensure SELECT clause has at least one column or wildcard")
28                .build()
29                .to_string(),
30            ));
31        }
32
33        // Collect all referenced columns
34        let mut referenced_columns = HashSet::new();
35
36        // Add columns from projection
37        for item in &stmt.projection {
38            match item {
39                SelectItem::Wildcard | SelectItem::QualifiedWildcard(_) => {
40                    // Keep all columns
41                    return Ok(stmt);
42                }
43                SelectItem::Expr { expr, .. } => {
44                    collect_column_refs(expr, &mut referenced_columns);
45                }
46            }
47        }
48
49        // Add columns from WHERE
50        if let Some(ref selection) = stmt.selection {
51            collect_column_refs(selection, &mut referenced_columns);
52        }
53
54        // Add columns from GROUP BY
55        for expr in &stmt.group_by {
56            collect_column_refs(expr, &mut referenced_columns);
57        }
58
59        // Add columns from HAVING
60        if let Some(ref having) = stmt.having {
61            collect_column_refs(having, &mut referenced_columns);
62        }
63
64        // Add columns from ORDER BY
65        for order in &stmt.order_by {
66            collect_column_refs(&order.expr, &mut referenced_columns);
67        }
68
69        // Push column projections into subqueries within the FROM clause.
70        // This reduces the amount of data computed and materialized by inner queries.
71        let mut optimized_stmt = stmt;
72        if let Some(from) = optimized_stmt.from.take() {
73            optimized_stmt.from = Some(push_column_projections(from, &referenced_columns));
74        }
75
76        Ok(optimized_stmt)
77    }
78}
79
80/// Push column projections into subqueries within the FROM clause.
81///
82/// For each subquery, determines which columns are referenced by the outer
83/// query and rewrites the subquery's projection accordingly. This reduces
84/// the amount of data computed and materialized by inner queries.
85///
86/// For wildcard subqueries (`SELECT * FROM ...`), the wildcard is replaced
87/// with explicit column references for only the needed columns.
88/// For non-wildcard subqueries, unnecessary projection items are pruned.
89fn push_column_projections(
90    table_ref: TableReference,
91    referenced_columns: &HashSet<String>,
92) -> TableReference {
93    match table_ref {
94        TableReference::Subquery { query, alias } => {
95            // Determine which columns from this subquery the outer query needs
96            let needed: HashSet<String> = referenced_columns
97                .iter()
98                .filter_map(|col| {
99                    // Match qualified references: alias.column
100                    if let Some(stripped) = col.strip_prefix(&format!("{}.", alias)) {
101                        Some(stripped.to_string())
102                    } else if !col.contains('.') {
103                        // Unqualified column might come from this subquery
104                        Some(col.clone())
105                    } else {
106                        None
107                    }
108                })
109                .collect();
110
111            if needed.is_empty() {
112                return TableReference::Subquery { query, alias };
113            }
114
115            let mut new_query = *query;
116
117            let has_wildcard = new_query
118                .projection
119                .iter()
120                .any(|p| matches!(p, SelectItem::Wildcard | SelectItem::QualifiedWildcard(_)));
121
122            if has_wildcard {
123                // Replace wildcard with only the needed columns
124                let mut new_projection: Vec<SelectItem> = Vec::new();
125
126                // Keep non-wildcard items that produce needed columns
127                for item in &new_query.projection {
128                    match item {
129                        SelectItem::Expr { alias: Some(a), .. } if needed.contains(a.as_str()) => {
130                            new_projection.push(item.clone());
131                        }
132                        SelectItem::Expr {
133                            expr: Expr::Column { name, .. },
134                            alias: None,
135                        } if needed.contains(name.as_str()) => {
136                            new_projection.push(item.clone());
137                        }
138                        SelectItem::Wildcard | SelectItem::QualifiedWildcard(_) => {
139                            // Replaced below
140                        }
141                        _ => {}
142                    }
143                }
144
145                // Add needed columns not yet in projection
146                let existing: HashSet<String> = new_projection
147                    .iter()
148                    .filter_map(|item| match item {
149                        SelectItem::Expr { alias: Some(a), .. } => Some(a.clone()),
150                        SelectItem::Expr {
151                            expr: Expr::Column { name, .. },
152                            alias: None,
153                        } => Some(name.clone()),
154                        _ => None,
155                    })
156                    .collect();
157
158                for col in &needed {
159                    if !existing.contains(col) {
160                        new_projection.push(SelectItem::Expr {
161                            expr: Expr::Column {
162                                table: None,
163                                name: col.clone(),
164                            },
165                            alias: None,
166                        });
167                    }
168                }
169
170                if !new_projection.is_empty() {
171                    new_query.projection = new_projection;
172                }
173            } else {
174                // No wildcard: filter existing projection to keep only needed items.
175                // First collect internally referenced columns (from WHERE, GROUP BY, etc.)
176                // to avoid removing columns the subquery itself needs.
177                let mut internal_refs = HashSet::new();
178                if let Some(ref sel) = new_query.selection {
179                    collect_column_refs(sel, &mut internal_refs);
180                }
181                for gexpr in &new_query.group_by {
182                    collect_column_refs(gexpr, &mut internal_refs);
183                }
184                if let Some(ref hav) = new_query.having {
185                    collect_column_refs(hav, &mut internal_refs);
186                }
187                for ord in &new_query.order_by {
188                    collect_column_refs(&ord.expr, &mut internal_refs);
189                }
190
191                new_query.projection.retain(|item| match item {
192                    SelectItem::Wildcard | SelectItem::QualifiedWildcard(_) => true,
193                    SelectItem::Expr { alias: Some(a), .. } => needed.contains(a.as_str()),
194                    SelectItem::Expr {
195                        expr: Expr::Column { name, .. },
196                        alias: None,
197                    } => {
198                        needed.contains(name.as_str())
199                            || internal_refs
200                                .iter()
201                                .any(|r| r == name || r.ends_with(&format!(".{}", name)))
202                    }
203                    SelectItem::Expr { expr, alias: None } => {
204                        let key = format!("{}", expr);
205                        needed.contains(&key)
206                    }
207                });
208
209                // Safety: never produce an empty projection
210                if new_query.projection.is_empty() {
211                    new_query.projection = vec![SelectItem::Wildcard];
212                }
213            }
214
215            // Recursively push projections into the subquery's own FROM clause
216            if let Some(inner_from) = new_query.from.take() {
217                let mut sub_refs = HashSet::new();
218                for item in &new_query.projection {
219                    if let SelectItem::Expr { expr, .. } = item {
220                        collect_column_refs(expr, &mut sub_refs);
221                    }
222                }
223                if let Some(ref sel) = new_query.selection {
224                    collect_column_refs(sel, &mut sub_refs);
225                }
226                for gexpr in &new_query.group_by {
227                    collect_column_refs(gexpr, &mut sub_refs);
228                }
229                if let Some(ref hav) = new_query.having {
230                    collect_column_refs(hav, &mut sub_refs);
231                }
232                for ord in &new_query.order_by {
233                    collect_column_refs(&ord.expr, &mut sub_refs);
234                }
235                new_query.from = Some(push_column_projections(inner_from, &sub_refs));
236            }
237
238            TableReference::Subquery {
239                query: Box::new(new_query),
240                alias,
241            }
242        }
243        TableReference::Join {
244            left,
245            right,
246            join_type,
247            on,
248        } => {
249            // Include join condition columns in the referenced set
250            let mut extended_refs = referenced_columns.clone();
251            if let Some(ref on_expr) = on {
252                collect_column_refs(on_expr, &mut extended_refs);
253            }
254
255            TableReference::Join {
256                left: Box::new(push_column_projections(*left, &extended_refs)),
257                right: Box::new(push_column_projections(*right, &extended_refs)),
258                join_type,
259                on,
260            }
261        }
262        other => other,
263    }
264}
265
266#[cfg(test)]
267#[allow(clippy::unwrap_used)]
268#[allow(clippy::panic)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn test_projection_pushdown_subquery_wildcard() {
274        // SELECT sub.x FROM (SELECT * FROM t) AS sub
275        // -> SELECT sub.x FROM (SELECT x FROM t) AS sub
276        let stmt = SelectStatement {
277            projection: vec![SelectItem::Expr {
278                expr: Expr::Column {
279                    table: Some("sub".to_string()),
280                    name: "x".to_string(),
281                },
282                alias: None,
283            }],
284            from: Some(TableReference::Subquery {
285                query: Box::new(SelectStatement {
286                    projection: vec![SelectItem::Wildcard],
287                    from: Some(TableReference::Table {
288                        name: "t".to_string(),
289                        alias: None,
290                    }),
291                    selection: None,
292                    group_by: Vec::new(),
293                    having: None,
294                    order_by: Vec::new(),
295                    limit: None,
296                    offset: None,
297                }),
298                alias: "sub".to_string(),
299            }),
300            selection: None,
301            group_by: Vec::new(),
302            having: None,
303            order_by: Vec::new(),
304            limit: None,
305            offset: None,
306        };
307
308        let pushdown = ProjectionPushdown;
309        let result = pushdown.apply(stmt);
310        assert!(result.is_ok(), "Projection pushdown should succeed");
311        let result = result.expect("Projection pushdown should succeed");
312
313        // The subquery should no longer have a wildcard
314        let Some(TableReference::Subquery { query, .. }) = &result.from else {
315            panic!("FROM should be a subquery");
316        };
317        let has_wildcard = query
318            .projection
319            .iter()
320            .any(|p| matches!(p, SelectItem::Wildcard));
321        assert!(
322            !has_wildcard,
323            "Wildcard should be replaced with specific columns"
324        );
325        // Should have exactly one column (x)
326        assert_eq!(query.projection.len(), 1);
327    }
328
329    #[test]
330    fn test_projection_pushdown_outer_wildcard_skips() {
331        // SELECT * FROM (SELECT * FROM t) AS sub
332        // Outer wildcard means all columns are needed; no pushdown possible
333        let stmt = SelectStatement {
334            projection: vec![SelectItem::Wildcard],
335            from: Some(TableReference::Subquery {
336                query: Box::new(SelectStatement {
337                    projection: vec![SelectItem::Wildcard],
338                    from: Some(TableReference::Table {
339                        name: "t".to_string(),
340                        alias: None,
341                    }),
342                    selection: None,
343                    group_by: Vec::new(),
344                    having: None,
345                    order_by: Vec::new(),
346                    limit: None,
347                    offset: None,
348                }),
349                alias: "sub".to_string(),
350            }),
351            selection: None,
352            group_by: Vec::new(),
353            having: None,
354            order_by: Vec::new(),
355            limit: None,
356            offset: None,
357        };
358
359        let pushdown = ProjectionPushdown;
360        let result = pushdown.apply(stmt);
361        assert!(result.is_ok(), "Projection pushdown should succeed");
362        let result = result.expect("Projection pushdown should succeed");
363
364        // Subquery should still have wildcard (early return for outer wildcard)
365        if let Some(TableReference::Subquery { query, .. }) = &result.from {
366            assert!(
367                query
368                    .projection
369                    .iter()
370                    .any(|p| matches!(p, SelectItem::Wildcard))
371            );
372        }
373    }
374
375    #[test]
376    fn test_projection_pushdown_with_where_columns() {
377        // SELECT sub.x FROM (SELECT * FROM t) AS sub WHERE sub.y > 10
378        // -> SELECT sub.x FROM (SELECT x, y FROM t) AS sub WHERE sub.y > 10
379        let stmt = SelectStatement {
380            projection: vec![SelectItem::Expr {
381                expr: Expr::Column {
382                    table: Some("sub".to_string()),
383                    name: "x".to_string(),
384                },
385                alias: None,
386            }],
387            from: Some(TableReference::Subquery {
388                query: Box::new(SelectStatement {
389                    projection: vec![SelectItem::Wildcard],
390                    from: Some(TableReference::Table {
391                        name: "t".to_string(),
392                        alias: None,
393                    }),
394                    selection: None,
395                    group_by: Vec::new(),
396                    having: None,
397                    order_by: Vec::new(),
398                    limit: None,
399                    offset: None,
400                }),
401                alias: "sub".to_string(),
402            }),
403            selection: Some(Expr::BinaryOp {
404                left: Box::new(Expr::Column {
405                    table: Some("sub".to_string()),
406                    name: "y".to_string(),
407                }),
408                op: BinaryOperator::Gt,
409                right: Box::new(Expr::Literal(Literal::Integer(10))),
410            }),
411            group_by: Vec::new(),
412            having: None,
413            order_by: Vec::new(),
414            limit: None,
415            offset: None,
416        };
417
418        let pushdown = ProjectionPushdown;
419        let result = pushdown.apply(stmt);
420        assert!(result.is_ok(), "Projection pushdown should succeed");
421        let result = result.expect("Projection pushdown should succeed");
422
423        // Subquery should have exactly 2 columns: x and y
424        if let Some(TableReference::Subquery { query, .. }) = &result.from {
425            assert_eq!(query.projection.len(), 2, "Subquery should project x and y");
426        }
427    }
428}