sql_cli/data/
subquery_executor.rs

1// Subquery execution handler
2// Walks the AST to evaluate subqueries and replace them with their results
3
4use crate::data::data_view::DataView;
5use crate::data::datatable::{DataTable, DataValue};
6use crate::data::query_engine::QueryEngine;
7use crate::sql::parser::ast::{Condition, SelectItem, SelectStatement, SqlExpression, WhereClause};
8use anyhow::{anyhow, Result};
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use tracing::{debug, info};
12
13/// Result of executing a subquery
14#[derive(Debug, Clone)]
15pub enum SubqueryResult {
16    /// Scalar subquery returned a single value
17    Scalar(DataValue),
18    /// IN subquery returned a set of values
19    ValueSet(HashSet<DataValue>),
20    /// Subquery returned multiple rows/columns (for future use)
21    Table(Arc<DataView>),
22}
23
24/// Executes subqueries within a SQL statement
25pub struct SubqueryExecutor {
26    query_engine: QueryEngine,
27    source_table: Arc<DataTable>,
28    /// Cache of executed subqueries to avoid re-execution
29    cache: HashMap<String, SubqueryResult>,
30    /// CTE context for resolving CTE references in subqueries
31    cte_context: HashMap<String, Arc<DataView>>,
32}
33
34impl SubqueryExecutor {
35    /// Create a new subquery executor
36    pub fn new(query_engine: QueryEngine, source_table: Arc<DataTable>) -> Self {
37        Self {
38            query_engine,
39            source_table,
40            cache: HashMap::new(),
41            cte_context: HashMap::new(),
42        }
43    }
44
45    /// Create a new subquery executor with CTE context
46    pub fn with_cte_context(
47        query_engine: QueryEngine,
48        source_table: Arc<DataTable>,
49        cte_context: HashMap<String, Arc<DataView>>,
50    ) -> Self {
51        Self {
52            query_engine,
53            source_table,
54            cache: HashMap::new(),
55            cte_context,
56        }
57    }
58
59    /// Execute all subqueries in a statement and return a modified statement
60    /// with subqueries replaced by their results
61    pub fn execute_subqueries(&mut self, statement: &SelectStatement) -> Result<SelectStatement> {
62        info!("SubqueryExecutor: Starting subquery execution pass");
63        info!(
64            "SubqueryExecutor: Available CTEs: {:?}",
65            self.cte_context.keys().collect::<Vec<_>>()
66        );
67
68        // Clone the statement to modify
69        let mut modified_statement = statement.clone();
70
71        // Process WHERE clause if present
72        if let Some(ref where_clause) = statement.where_clause {
73            debug!("SubqueryExecutor: Processing WHERE clause for subqueries");
74            let mut new_conditions = Vec::new();
75            for condition in &where_clause.conditions {
76                new_conditions.push(Condition {
77                    expr: self.process_expression(&condition.expr)?,
78                    connector: condition.connector.clone(),
79                });
80            }
81            modified_statement.where_clause = Some(WhereClause {
82                conditions: new_conditions,
83            });
84        }
85
86        // Process SELECT items
87        let mut new_select_items = Vec::new();
88        for item in &statement.select_items {
89            match item {
90                SelectItem::Column {
91                    column: col,
92                    leading_comments,
93                    trailing_comment,
94                } => {
95                    new_select_items.push(SelectItem::Column {
96                        column: col.clone(),
97                        leading_comments: leading_comments.clone(),
98                        trailing_comment: trailing_comment.clone(),
99                    });
100                }
101                SelectItem::Expression {
102                    expr,
103                    alias,
104                    leading_comments,
105                    trailing_comment,
106                } => {
107                    new_select_items.push(SelectItem::Expression {
108                        expr: self.process_expression(expr)?,
109                        alias: alias.clone(),
110                        leading_comments: leading_comments.clone(),
111                        trailing_comment: trailing_comment.clone(),
112                    });
113                }
114                SelectItem::Star {
115                    leading_comments,
116                    trailing_comment,
117                } => {
118                    new_select_items.push(SelectItem::Star {
119                        leading_comments: leading_comments.clone(),
120                        trailing_comment: trailing_comment.clone(),
121                    });
122                }
123            }
124        }
125        modified_statement.select_items = new_select_items;
126
127        // Process HAVING clause if present
128        if let Some(ref having) = statement.having {
129            debug!("SubqueryExecutor: Processing HAVING clause for subqueries");
130            modified_statement.having = Some(self.process_expression(having)?);
131        }
132
133        debug!("SubqueryExecutor: Subquery execution complete");
134        Ok(modified_statement)
135    }
136
137    /// Process an expression, executing any subqueries and replacing them with results
138    fn process_expression(&mut self, expr: &SqlExpression) -> Result<SqlExpression> {
139        match expr {
140            SqlExpression::ScalarSubquery { query } => {
141                debug!("SubqueryExecutor: Executing scalar subquery");
142                let result = self.execute_scalar_subquery(query)?;
143                Ok(result)
144            }
145
146            SqlExpression::InSubquery { expr, subquery } => {
147                debug!("SubqueryExecutor: Executing IN subquery");
148                let values = self.execute_in_subquery(subquery)?;
149
150                // Replace with InList containing the actual values
151                Ok(SqlExpression::InList {
152                    expr: Box::new(self.process_expression(expr)?),
153                    values: values
154                        .into_iter()
155                        .map(|v| match v {
156                            DataValue::Null => SqlExpression::Null,
157                            DataValue::Integer(i) => SqlExpression::NumberLiteral(i.to_string()),
158                            DataValue::Float(f) => SqlExpression::NumberLiteral(f.to_string()),
159                            DataValue::String(s) => SqlExpression::StringLiteral(s),
160                            DataValue::InternedString(s) => {
161                                SqlExpression::StringLiteral(s.to_string())
162                            }
163                            DataValue::Boolean(b) => SqlExpression::BooleanLiteral(b),
164                            DataValue::DateTime(dt) => SqlExpression::StringLiteral(dt),
165                        })
166                        .collect(),
167                })
168            }
169
170            SqlExpression::NotInSubquery { expr, subquery } => {
171                debug!("SubqueryExecutor: Executing NOT IN subquery");
172                let values = self.execute_in_subquery(subquery)?;
173
174                // Replace with NotInList containing the actual values
175                Ok(SqlExpression::NotInList {
176                    expr: Box::new(self.process_expression(expr)?),
177                    values: values
178                        .into_iter()
179                        .map(|v| match v {
180                            DataValue::Null => SqlExpression::Null,
181                            DataValue::Integer(i) => SqlExpression::NumberLiteral(i.to_string()),
182                            DataValue::Float(f) => SqlExpression::NumberLiteral(f.to_string()),
183                            DataValue::String(s) => SqlExpression::StringLiteral(s),
184                            DataValue::InternedString(s) => {
185                                SqlExpression::StringLiteral(s.to_string())
186                            }
187                            DataValue::Boolean(b) => SqlExpression::BooleanLiteral(b),
188                            DataValue::DateTime(dt) => SqlExpression::StringLiteral(dt),
189                        })
190                        .collect(),
191                })
192            }
193
194            // Process nested expressions
195            SqlExpression::BinaryOp { left, op, right } => Ok(SqlExpression::BinaryOp {
196                left: Box::new(self.process_expression(left)?),
197                op: op.clone(),
198                right: Box::new(self.process_expression(right)?),
199            }),
200
201            // Note: UnaryOp doesn't exist in the current AST, handle negation differently
202            // This case might need to be removed or adapted based on actual AST structure
203            SqlExpression::Between { expr, lower, upper } => Ok(SqlExpression::Between {
204                expr: Box::new(self.process_expression(expr)?),
205                lower: Box::new(self.process_expression(lower)?),
206                upper: Box::new(self.process_expression(upper)?),
207            }),
208
209            SqlExpression::InList { expr, values } => Ok(SqlExpression::InList {
210                expr: Box::new(self.process_expression(expr)?),
211                values: values
212                    .iter()
213                    .map(|v| self.process_expression(v))
214                    .collect::<Result<Vec<_>>>()?,
215            }),
216
217            SqlExpression::NotInList { expr, values } => Ok(SqlExpression::NotInList {
218                expr: Box::new(self.process_expression(expr)?),
219                values: values
220                    .iter()
221                    .map(|v| self.process_expression(v))
222                    .collect::<Result<Vec<_>>>()?,
223            }),
224
225            // CaseWhen doesn't exist in current AST, skip for now
226            SqlExpression::FunctionCall {
227                name,
228                args,
229                distinct,
230            } => Ok(SqlExpression::FunctionCall {
231                name: name.clone(),
232                args: args
233                    .iter()
234                    .map(|a| self.process_expression(a))
235                    .collect::<Result<Vec<_>>>()?,
236                distinct: *distinct,
237            }),
238
239            // Pass through expressions that don't contain subqueries
240            _ => Ok(expr.clone()),
241        }
242    }
243
244    /// Execute a scalar subquery and return a single value
245    fn execute_scalar_subquery(&mut self, query: &SelectStatement) -> Result<SqlExpression> {
246        let cache_key = format!("scalar:{:?}", query);
247
248        // Check cache first
249        if let Some(cached) = self.cache.get(&cache_key) {
250            debug!("SubqueryExecutor: Using cached scalar subquery result");
251            if let SubqueryResult::Scalar(value) = cached {
252                return Ok(self.datavalue_to_expression(value.clone()));
253            }
254        }
255
256        info!("SubqueryExecutor: Executing scalar subquery");
257
258        // Execute the subquery using execute_statement_with_cte_context
259        let result_view = self.query_engine.execute_statement_with_cte_context(
260            self.source_table.clone(),
261            query.clone(),
262            &self.cte_context,
263        )?;
264
265        // Scalar subquery must return exactly one row and one column
266        if result_view.row_count() != 1 {
267            return Err(anyhow!(
268                "Scalar subquery returned {} rows, expected exactly 1",
269                result_view.row_count()
270            ));
271        }
272
273        if result_view.column_count() != 1 {
274            return Err(anyhow!(
275                "Scalar subquery returned {} columns, expected exactly 1",
276                result_view.column_count()
277            ));
278        }
279
280        // Get the single value
281        let value = if let Some(row) = result_view.get_row(0) {
282            row.values.get(0).cloned().unwrap_or(DataValue::Null)
283        } else {
284            DataValue::Null
285        };
286
287        // Cache the result
288        self.cache
289            .insert(cache_key, SubqueryResult::Scalar(value.clone()));
290
291        Ok(self.datavalue_to_expression(value))
292    }
293
294    /// Execute an IN subquery and return a set of values
295    fn execute_in_subquery(&mut self, query: &SelectStatement) -> Result<Vec<DataValue>> {
296        let cache_key = format!("in:{:?}", query);
297
298        // Check cache first
299        if let Some(cached) = self.cache.get(&cache_key) {
300            debug!("SubqueryExecutor: Using cached IN subquery result");
301            if let SubqueryResult::ValueSet(values) = cached {
302                return Ok(values.iter().cloned().collect());
303            }
304        }
305
306        info!("SubqueryExecutor: Executing IN subquery");
307        debug!(
308            "SubqueryExecutor: Available CTEs in context: {:?}",
309            self.cte_context.keys().collect::<Vec<_>>()
310        );
311        debug!("SubqueryExecutor: Subquery: {:?}", query);
312
313        // Execute the subquery using execute_statement_with_cte_context
314        let result_view = self.query_engine.execute_statement_with_cte_context(
315            self.source_table.clone(),
316            query.clone(),
317            &self.cte_context,
318        )?;
319
320        debug!(
321            "SubqueryExecutor: IN subquery returned {} rows",
322            result_view.row_count()
323        );
324
325        // IN subquery must return exactly one column
326        if result_view.column_count() != 1 {
327            return Err(anyhow!(
328                "IN subquery returned {} columns, expected exactly 1",
329                result_view.column_count()
330            ));
331        }
332
333        // Collect all values from the first column
334        let mut values = HashSet::new();
335        for row_idx in 0..result_view.row_count() {
336            if let Some(row) = result_view.get_row(row_idx) {
337                if let Some(value) = row.values.get(0) {
338                    values.insert(value.clone());
339                }
340            }
341        }
342
343        // Cache the result
344        self.cache
345            .insert(cache_key, SubqueryResult::ValueSet(values.clone()));
346
347        Ok(values.into_iter().collect())
348    }
349
350    /// Convert a DataValue to a SqlExpression
351    fn datavalue_to_expression(&self, value: DataValue) -> SqlExpression {
352        match value {
353            DataValue::Null => SqlExpression::Null,
354            DataValue::Integer(i) => SqlExpression::NumberLiteral(i.to_string()),
355            DataValue::Float(f) => SqlExpression::NumberLiteral(f.to_string()),
356            DataValue::String(s) => SqlExpression::StringLiteral(s),
357            DataValue::InternedString(s) => SqlExpression::StringLiteral(s.to_string()),
358            DataValue::Boolean(b) => SqlExpression::BooleanLiteral(b),
359            DataValue::DateTime(dt) => SqlExpression::StringLiteral(dt),
360        }
361    }
362}