sql_cli/data/
query_engine.rs

1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::config::global::get_date_notation;
9use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
10use crate::data::data_view::DataView;
11use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
12use crate::data::group_by_expressions::GroupByExpressions;
13use crate::data::hash_join::HashJoinExecutor;
14use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
15use crate::data::subquery_executor::SubqueryExecutor;
16use crate::data::virtual_table_generator::VirtualTableGenerator;
17use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
18use crate::sql::aggregates::contains_aggregate;
19use crate::sql::parser::ast::TableSource;
20use crate::sql::recursive_parser::{
21    OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression, TableFunction,
22};
23
24/// Query engine that executes SQL directly on `DataTable`
25#[derive(Clone)]
26pub struct QueryEngine {
27    case_insensitive: bool,
28    date_notation: String,
29    behavior_config: Option<BehaviorConfig>,
30}
31
32impl Default for QueryEngine {
33    fn default() -> Self {
34        Self::new()
35    }
36}
37
38impl QueryEngine {
39    #[must_use]
40    pub fn new() -> Self {
41        Self {
42            case_insensitive: false,
43            date_notation: get_date_notation(),
44            behavior_config: None,
45        }
46    }
47
48    #[must_use]
49    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
50        let case_insensitive = config.case_insensitive_default;
51        // Use get_date_notation() to respect environment variable override
52        let date_notation = get_date_notation();
53        Self {
54            case_insensitive,
55            date_notation,
56            behavior_config: Some(config),
57        }
58    }
59
60    #[must_use]
61    pub fn with_date_notation(_date_notation: String) -> Self {
62        Self {
63            case_insensitive: false,
64            date_notation: get_date_notation(), // Always use the global function
65            behavior_config: None,
66        }
67    }
68
69    #[must_use]
70    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
71        Self {
72            case_insensitive,
73            date_notation: get_date_notation(),
74            behavior_config: None,
75        }
76    }
77
78    #[must_use]
79    pub fn with_case_insensitive_and_date_notation(
80        case_insensitive: bool,
81        _date_notation: String, // Keep parameter for compatibility but use get_date_notation()
82    ) -> Self {
83        Self {
84            case_insensitive,
85            date_notation: get_date_notation(), // Always use the global function
86            behavior_config: None,
87        }
88    }
89
90    /// Find a column name similar to the given name using edit distance
91    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
92        let columns = table.column_names();
93        let mut best_match: Option<(String, usize)> = None;
94
95        for col in columns {
96            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
97            // Only suggest if distance is small (likely a typo)
98            // Allow up to 3 edits for longer names
99            let max_distance = if name.len() > 10 { 3 } else { 2 };
100            if distance <= max_distance {
101                match &best_match {
102                    None => best_match = Some((col, distance)),
103                    Some((_, best_dist)) if distance < *best_dist => {
104                        best_match = Some((col, distance));
105                    }
106                    _ => {}
107                }
108            }
109        }
110
111        best_match.map(|(name, _)| name)
112    }
113
114    /// Calculate Levenshtein edit distance between two strings
115    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
116        let len1 = s1.len();
117        let len2 = s2.len();
118        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
119
120        for i in 0..=len1 {
121            matrix[i][0] = i;
122        }
123        for j in 0..=len2 {
124            matrix[0][j] = j;
125        }
126
127        for (i, c1) in s1.chars().enumerate() {
128            for (j, c2) in s2.chars().enumerate() {
129                let cost = usize::from(c1 != c2);
130                matrix[i + 1][j + 1] = std::cmp::min(
131                    matrix[i][j + 1] + 1, // deletion
132                    std::cmp::min(
133                        matrix[i + 1][j] + 1, // insertion
134                        matrix[i][j] + cost,  // substitution
135                    ),
136                );
137            }
138        }
139
140        matrix[len1][len2]
141    }
142
143    /// Execute a SQL query on a `DataTable` and return a `DataView` (for backward compatibility)
144    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
145        let (view, _plan) = self.execute_with_plan(table, sql)?;
146        Ok(view)
147    }
148
149    /// Execute a parsed SelectStatement on a `DataTable` and return a `DataView`
150    pub fn execute_statement(
151        &self,
152        table: Arc<DataTable>,
153        statement: SelectStatement,
154    ) -> Result<DataView> {
155        // First process CTEs to build context
156        let mut cte_context = HashMap::new();
157        for cte in &statement.ctes {
158            debug!("QueryEngine: Pre-processing CTE '{}'...", cte.name);
159            // Execute the CTE query (it might reference earlier CTEs)
160            let cte_result =
161                self.build_view_with_context(table.clone(), cte.query.clone(), &mut cte_context)?;
162            // Store the result in the context for later use
163            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
164            debug!(
165                "QueryEngine: CTE '{}' pre-processed, stored in context",
166                cte.name
167            );
168        }
169
170        // Now process subqueries with CTE context available
171        let mut subquery_executor =
172            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
173        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
174
175        // Build the view with the same CTE context
176        self.build_view_with_context(table, processed_statement, &mut cte_context)
177    }
178
179    /// Execute a statement with provided CTE context (for subqueries)
180    pub fn execute_statement_with_cte_context(
181        &self,
182        table: Arc<DataTable>,
183        statement: SelectStatement,
184        cte_context: &HashMap<String, Arc<DataView>>,
185    ) -> Result<DataView> {
186        // Clone the context so we can add any CTEs from this statement
187        let mut local_context = cte_context.clone();
188
189        // Process any CTEs in this statement (they might be nested)
190        for cte in &statement.ctes {
191            debug!("QueryEngine: Processing nested CTE '{}'...", cte.name);
192            let cte_result =
193                self.build_view_with_context(table.clone(), cte.query.clone(), &mut local_context)?;
194            local_context.insert(cte.name.clone(), Arc::new(cte_result));
195        }
196
197        // Process subqueries with the complete context
198        let mut subquery_executor =
199            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), local_context.clone());
200        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
201
202        // Build the view
203        self.build_view_with_context(table, processed_statement, &mut local_context)
204    }
205
206    /// Execute a query and return both the result and the execution plan
207    pub fn execute_with_plan(
208        &self,
209        table: Arc<DataTable>,
210        sql: &str,
211    ) -> Result<(DataView, ExecutionPlan)> {
212        let mut plan_builder = ExecutionPlanBuilder::new();
213        let start_time = Instant::now();
214
215        // Parse the SQL query
216        plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
217        plan_builder.add_detail(format!("Query: {}", sql));
218        let mut parser = Parser::new(sql);
219        let statement = parser
220            .parse()
221            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
222        plan_builder.add_detail(format!("Parsed successfully"));
223        if let Some(from) = &statement.from_table {
224            plan_builder.add_detail(format!("FROM: {}", from));
225        }
226        if statement.where_clause.is_some() {
227            plan_builder.add_detail("WHERE clause present".to_string());
228        }
229        plan_builder.end_step();
230
231        // First process CTEs to build context
232        let mut cte_context = HashMap::new();
233
234        if !statement.ctes.is_empty() {
235            plan_builder.begin_step(
236                StepType::CTE,
237                format!("Process {} CTEs", statement.ctes.len()),
238            );
239
240            for cte in &statement.ctes {
241                let cte_start = Instant::now();
242                plan_builder.begin_step(StepType::CTE, format!("CTE '{}'", cte.name));
243
244                // Add CTE query details
245                if let Some(from) = &cte.query.from_table {
246                    plan_builder.add_detail(format!("Source: {}", from));
247                }
248                if cte.query.where_clause.is_some() {
249                    plan_builder.add_detail("Has WHERE clause".to_string());
250                }
251                if cte.query.group_by.is_some() {
252                    plan_builder.add_detail("Has GROUP BY".to_string());
253                }
254
255                let cte_result = self.build_view_with_context(
256                    table.clone(),
257                    cte.query.clone(),
258                    &mut cte_context,
259                )?;
260
261                // Record CTE statistics
262                plan_builder.set_rows_out(cte_result.row_count());
263                plan_builder.add_detail(format!(
264                    "Result: {} rows, {} columns",
265                    cte_result.row_count(),
266                    cte_result.column_count()
267                ));
268                plan_builder.add_detail(format!(
269                    "Execution time: {:.3}ms",
270                    cte_start.elapsed().as_secs_f64() * 1000.0
271                ));
272
273                cte_context.insert(cte.name.clone(), Arc::new(cte_result));
274                plan_builder.end_step();
275            }
276
277            plan_builder.add_detail(format!(
278                "All {} CTEs cached in context",
279                statement.ctes.len()
280            ));
281            plan_builder.end_step();
282        }
283
284        // Process subqueries in the statement with CTE context
285        plan_builder.begin_step(StepType::Subquery, "Process subqueries".to_string());
286        let mut subquery_executor =
287            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
288
289        // Check if there are subqueries to process
290        let has_subqueries = statement.where_clause.as_ref().map_or(false, |w| {
291            // This is a simplified check - in reality we'd need to walk the AST
292            format!("{:?}", w).contains("Subquery")
293        });
294
295        if has_subqueries {
296            plan_builder.add_detail("Evaluating subqueries in WHERE clause".to_string());
297        }
298
299        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
300
301        if has_subqueries {
302            plan_builder.add_detail("Subqueries replaced with materialized values".to_string());
303        } else {
304            plan_builder.add_detail("No subqueries to process".to_string());
305        }
306
307        plan_builder.end_step();
308        let result = self.build_view_with_context_and_plan(
309            table,
310            processed_statement,
311            &mut cte_context,
312            &mut plan_builder,
313        )?;
314
315        let total_duration = start_time.elapsed();
316        info!(
317            "Query execution complete: total={:?}, rows={}",
318            total_duration,
319            result.row_count()
320        );
321
322        let plan = plan_builder.build();
323        Ok((result, plan))
324    }
325
326    /// Build a `DataView` from a parsed SQL statement
327    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
328        let mut cte_context = HashMap::new();
329        self.build_view_with_context(table, statement, &mut cte_context)
330    }
331
332    /// Build a DataView from a SelectStatement with CTE context
333    fn build_view_with_context(
334        &self,
335        table: Arc<DataTable>,
336        statement: SelectStatement,
337        cte_context: &mut HashMap<String, Arc<DataView>>,
338    ) -> Result<DataView> {
339        let mut dummy_plan = ExecutionPlanBuilder::new();
340        self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
341    }
342
343    /// Build a DataView from a SelectStatement with CTE context and execution plan tracking
344    fn build_view_with_context_and_plan(
345        &self,
346        table: Arc<DataTable>,
347        statement: SelectStatement,
348        cte_context: &mut HashMap<String, Arc<DataView>>,
349        plan: &mut ExecutionPlanBuilder,
350    ) -> Result<DataView> {
351        // First, process any CTEs
352        for cte in &statement.ctes {
353            debug!("QueryEngine: Processing CTE '{}'...", cte.name);
354            // Execute the CTE query (it might reference earlier CTEs)
355            let cte_result =
356                self.build_view_with_context(table.clone(), cte.query.clone(), cte_context)?;
357
358            // Store the result in the context for later use
359            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
360            debug!(
361                "QueryEngine: CTE '{}' processed, stored in context",
362                cte.name
363            );
364        }
365
366        // Determine the source table for the main query
367        let source_table = if let Some(ref table_func) = statement.from_function {
368            // Handle table functions like RANGE()
369            debug!("QueryEngine: Processing table function...");
370            match table_func {
371                TableFunction::Range { start, end, step } => {
372                    // Evaluate expressions to get numeric values
373                    let mut evaluator =
374                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
375
376                    // Create a dummy row for evaluating constant expressions
377                    let dummy_row = 0;
378
379                    let start_val = evaluator.evaluate(start, dummy_row)?;
380                    let end_val = evaluator.evaluate(end, dummy_row)?;
381                    let step_val = if let Some(step_expr) = step {
382                        Some(evaluator.evaluate(step_expr, dummy_row)?)
383                    } else {
384                        None
385                    };
386
387                    // Convert DataValues to integers
388                    let start_int = match start_val {
389                        DataValue::Integer(i) => i,
390                        DataValue::Float(f) => f as i64,
391                        _ => return Err(anyhow!("RANGE start must be numeric")),
392                    };
393
394                    let end_int = match end_val {
395                        DataValue::Integer(i) => i,
396                        DataValue::Float(f) => f as i64,
397                        _ => return Err(anyhow!("RANGE end must be numeric")),
398                    };
399
400                    let step_int = if let Some(step) = step_val {
401                        match step {
402                            DataValue::Integer(i) => Some(i),
403                            DataValue::Float(f) => Some(f as i64),
404                            _ => return Err(anyhow!("RANGE step must be numeric")),
405                        }
406                    } else {
407                        None
408                    };
409
410                    // Generate the virtual table
411                    VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
412                }
413            }
414        } else if let Some(ref subquery) = statement.from_subquery {
415            // Execute the subquery and use its result as the source
416            debug!("QueryEngine: Processing FROM subquery...");
417            let subquery_result =
418                self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
419
420            // Convert the DataView to a DataTable for use as source
421            // This materializes the subquery result
422            let materialized = self.materialize_view(subquery_result)?;
423            Arc::new(materialized)
424        } else if let Some(ref table_name) = statement.from_table {
425            // Check if this references a CTE
426            if let Some(cte_view) = cte_context.get(table_name) {
427                debug!("QueryEngine: Using CTE '{}' as source table", table_name);
428                // Materialize the CTE view as a table
429                let materialized = self.materialize_view((**cte_view).clone())?;
430                Arc::new(materialized)
431            } else {
432                // Regular table reference - use the provided table
433                table.clone()
434            }
435        } else {
436            // No FROM clause - use the provided table
437            table.clone()
438        };
439
440        // Process JOINs if present
441        let final_table = if !statement.joins.is_empty() {
442            plan.begin_step(
443                StepType::Join,
444                format!("Process {} JOINs", statement.joins.len()),
445            );
446            plan.set_rows_in(source_table.row_count());
447
448            let join_executor = HashJoinExecutor::new(self.case_insensitive);
449            let mut current_table = source_table;
450
451            for (idx, join_clause) in statement.joins.iter().enumerate() {
452                let join_start = Instant::now();
453                plan.begin_step(StepType::Join, format!("JOIN #{}", idx + 1));
454                plan.add_detail(format!("Type: {:?}", join_clause.join_type));
455                plan.add_detail(format!("Left table: {} rows", current_table.row_count()));
456                plan.add_detail(format!(
457                    "Executing {:?} JOIN on {}",
458                    join_clause.join_type, join_clause.condition.left_column
459                ));
460
461                // Resolve the right table for the join
462                let right_table = match &join_clause.table {
463                    TableSource::Table(name) => {
464                        // Check if it's a CTE reference
465                        if let Some(cte_view) = cte_context.get(name) {
466                            let materialized = self.materialize_view((**cte_view).clone())?;
467                            Arc::new(materialized)
468                        } else {
469                            // For now, we need the actual table data
470                            // In a real implementation, this would load from file
471                            return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
472                        }
473                    }
474                    TableSource::DerivedTable { query, alias: _ } => {
475                        // Execute the subquery
476                        let subquery_result = self.build_view_with_context(
477                            table.clone(),
478                            *query.clone(),
479                            cte_context,
480                        )?;
481                        let materialized = self.materialize_view(subquery_result)?;
482                        Arc::new(materialized)
483                    }
484                };
485
486                // Execute the join
487                let joined = join_executor.execute_join(
488                    current_table.clone(),
489                    join_clause,
490                    right_table.clone(),
491                )?;
492
493                plan.add_detail(format!("Right table: {} rows", right_table.row_count()));
494                plan.set_rows_out(joined.row_count());
495                plan.add_detail(format!("Result: {} rows", joined.row_count()));
496                plan.add_detail(format!(
497                    "Join time: {:.3}ms",
498                    join_start.elapsed().as_secs_f64() * 1000.0
499                ));
500                plan.end_step();
501
502                current_table = Arc::new(joined);
503            }
504
505            plan.set_rows_out(current_table.row_count());
506            plan.add_detail(format!(
507                "Final result after all joins: {} rows",
508                current_table.row_count()
509            ));
510            plan.end_step();
511            current_table
512        } else {
513            source_table
514        };
515
516        // Continue with the existing build_view logic but using final_table
517        self.build_view_internal_with_plan(final_table, statement, plan)
518    }
519
520    /// Materialize a DataView into a new DataTable
521    fn materialize_view(&self, view: DataView) -> Result<DataTable> {
522        let source = view.source();
523        let mut result_table = DataTable::new("derived");
524
525        // Get the visible columns from the view
526        let visible_cols = view.visible_column_indices().to_vec();
527
528        // Copy column definitions
529        for col_idx in &visible_cols {
530            let col = &source.columns[*col_idx];
531            let new_col = DataColumn {
532                name: col.name.clone(),
533                data_type: col.data_type.clone(),
534                nullable: col.nullable,
535                unique_values: col.unique_values,
536                null_count: col.null_count,
537                metadata: col.metadata.clone(),
538            };
539            result_table.add_column(new_col);
540        }
541
542        // Copy visible rows
543        for row_idx in view.visible_row_indices() {
544            let source_row = &source.rows[*row_idx];
545            let mut new_row = DataRow { values: Vec::new() };
546
547            for col_idx in &visible_cols {
548                new_row.values.push(source_row.values[*col_idx].clone());
549            }
550
551            result_table.add_row(new_row);
552        }
553
554        Ok(result_table)
555    }
556
557    fn build_view_internal(
558        &self,
559        table: Arc<DataTable>,
560        statement: SelectStatement,
561    ) -> Result<DataView> {
562        let mut dummy_plan = ExecutionPlanBuilder::new();
563        self.build_view_internal_with_plan(table, statement, &mut dummy_plan)
564    }
565
566    fn build_view_internal_with_plan(
567        &self,
568        table: Arc<DataTable>,
569        statement: SelectStatement,
570        plan: &mut ExecutionPlanBuilder,
571    ) -> Result<DataView> {
572        debug!(
573            "QueryEngine::build_view - select_items: {:?}",
574            statement.select_items
575        );
576        debug!(
577            "QueryEngine::build_view - where_clause: {:?}",
578            statement.where_clause
579        );
580
581        // Start with all rows visible
582        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
583
584        // Apply WHERE clause filtering using recursive evaluator
585        if let Some(where_clause) = &statement.where_clause {
586            let total_rows = table.row_count();
587            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
588            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
589
590            plan.begin_step(StepType::Filter, "WHERE clause filtering".to_string());
591            plan.set_rows_in(total_rows);
592            plan.add_detail(format!("Input: {} rows", total_rows));
593
594            // Add details about WHERE conditions
595            for condition in &where_clause.conditions {
596                plan.add_detail(format!("Condition: {:?}", condition.expr));
597            }
598
599            let filter_start = Instant::now();
600            // Filter visible rows based on WHERE clause
601            let mut filtered_rows = Vec::new();
602            for row_idx in visible_rows {
603                // Only log for first few rows to avoid performance impact
604                if row_idx < 3 {
605                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
606                }
607                let evaluator = RecursiveWhereEvaluator::with_config(
608                    &table,
609                    self.case_insensitive,
610                    self.date_notation.clone(),
611                );
612                match evaluator.evaluate(where_clause, row_idx) {
613                    Ok(result) => {
614                        if row_idx < 3 {
615                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
616                        }
617                        if result {
618                            filtered_rows.push(row_idx);
619                        }
620                    }
621                    Err(e) => {
622                        if row_idx < 3 {
623                            debug!(
624                                "QueryEngine: WHERE evaluation error for row {}: {}",
625                                row_idx, e
626                            );
627                        }
628                        // Propagate WHERE clause errors instead of silently ignoring them
629                        return Err(e);
630                    }
631                }
632            }
633            visible_rows = filtered_rows;
634            let filter_duration = filter_start.elapsed();
635            info!(
636                "WHERE clause filtering: {} rows -> {} rows in {:?}",
637                total_rows,
638                visible_rows.len(),
639                filter_duration
640            );
641
642            plan.set_rows_out(visible_rows.len());
643            plan.add_detail(format!("Output: {} rows", visible_rows.len()));
644            plan.add_detail(format!(
645                "Filter time: {:.3}ms",
646                filter_duration.as_secs_f64() * 1000.0
647            ));
648            plan.end_step();
649        }
650
651        // Create initial DataView with filtered rows
652        let mut view = DataView::new(table.clone());
653        view = view.with_rows(visible_rows);
654
655        // Handle GROUP BY if present
656        if let Some(group_by_exprs) = &statement.group_by {
657            if !group_by_exprs.is_empty() {
658                debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_exprs);
659
660                plan.begin_step(
661                    StepType::GroupBy,
662                    format!("GROUP BY {} expressions", group_by_exprs.len()),
663                );
664                plan.set_rows_in(view.row_count());
665                plan.add_detail(format!("Input: {} rows", view.row_count()));
666                for expr in group_by_exprs {
667                    plan.add_detail(format!("Group by: {:?}", expr));
668                }
669
670                let group_start = Instant::now();
671                view = self.apply_group_by(
672                    view,
673                    group_by_exprs,
674                    &statement.select_items,
675                    statement.having.as_ref(),
676                )?;
677
678                plan.set_rows_out(view.row_count());
679                plan.add_detail(format!("Output: {} groups", view.row_count()));
680                plan.add_detail(format!(
681                    "Group time: {:.3}ms",
682                    group_start.elapsed().as_secs_f64() * 1000.0
683                ));
684                plan.end_step();
685            }
686        } else {
687            // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
688            if !statement.select_items.is_empty() {
689                // Check if we have ANY non-star items (not just the first one)
690                let has_non_star_items = statement
691                    .select_items
692                    .iter()
693                    .any(|item| !matches!(item, SelectItem::Star));
694
695                // Apply select items if:
696                // 1. We have computed expressions or explicit columns
697                // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
698                if has_non_star_items || statement.select_items.len() > 1 {
699                    view = self.apply_select_items(view, &statement.select_items)?;
700                }
701                // If it's just a single star, no projection needed
702            } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
703                // Fallback to legacy column projection for backward compatibility
704                let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
705                view = view.with_columns(column_indices);
706            }
707        }
708
709        // Apply DISTINCT if specified
710        if statement.distinct {
711            plan.begin_step(StepType::Distinct, "Remove duplicate rows".to_string());
712            plan.set_rows_in(view.row_count());
713            plan.add_detail(format!("Input: {} rows", view.row_count()));
714
715            let distinct_start = Instant::now();
716            view = self.apply_distinct(view)?;
717
718            plan.set_rows_out(view.row_count());
719            plan.add_detail(format!("Output: {} unique rows", view.row_count()));
720            plan.add_detail(format!(
721                "Distinct time: {:.3}ms",
722                distinct_start.elapsed().as_secs_f64() * 1000.0
723            ));
724            plan.end_step();
725        }
726
727        // Apply ORDER BY sorting
728        if let Some(order_by_columns) = &statement.order_by {
729            if !order_by_columns.is_empty() {
730                plan.begin_step(
731                    StepType::Sort,
732                    format!("ORDER BY {} columns", order_by_columns.len()),
733                );
734                plan.set_rows_in(view.row_count());
735                for col in order_by_columns {
736                    plan.add_detail(format!("{} {:?}", col.column, col.direction));
737                }
738
739                let sort_start = Instant::now();
740                view = self.apply_multi_order_by(view, order_by_columns)?;
741
742                plan.add_detail(format!(
743                    "Sort time: {:.3}ms",
744                    sort_start.elapsed().as_secs_f64() * 1000.0
745                ));
746                plan.end_step();
747            }
748        }
749
750        // Apply LIMIT/OFFSET
751        if let Some(limit) = statement.limit {
752            let offset = statement.offset.unwrap_or(0);
753            plan.begin_step(StepType::Limit, format!("LIMIT {}", limit));
754            plan.set_rows_in(view.row_count());
755            if offset > 0 {
756                plan.add_detail(format!("OFFSET: {}", offset));
757            }
758            view = view.with_limit(limit, offset);
759            plan.set_rows_out(view.row_count());
760            plan.add_detail(format!("Output: {} rows", view.row_count()));
761            plan.end_step();
762        }
763
764        Ok(view)
765    }
766
767    /// Resolve column names to indices
768    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
769        let mut indices = Vec::new();
770        let table_columns = table.column_names();
771
772        for col_name in columns {
773            let index = table_columns
774                .iter()
775                .position(|c| c.eq_ignore_ascii_case(col_name))
776                .ok_or_else(|| {
777                    let suggestion = self.find_similar_column(table, col_name);
778                    match suggestion {
779                        Some(similar) => anyhow::anyhow!(
780                            "Column '{}' not found. Did you mean '{}'?",
781                            col_name,
782                            similar
783                        ),
784                        None => anyhow::anyhow!("Column '{}' not found", col_name),
785                    }
786                })?;
787            indices.push(index);
788        }
789
790        Ok(indices)
791    }
792
793    /// Apply SELECT items (columns and computed expressions) to create new view
794    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
795        debug!(
796            "QueryEngine::apply_select_items - items: {:?}",
797            select_items
798        );
799        debug!(
800            "QueryEngine::apply_select_items - input view has {} rows",
801            view.row_count()
802        );
803
804        // Check if ALL select items are aggregate functions (no GROUP BY)
805        let all_aggregates = select_items.iter().all(|item| match item {
806            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
807            SelectItem::Column(_) => false,
808            SelectItem::Star => false,
809        });
810
811        if all_aggregates && view.row_count() > 0 {
812            // Special handling for aggregate-only queries (no GROUP BY)
813            // These should produce exactly one row
814            return self.apply_aggregate_select(view, select_items);
815        }
816
817        // Check if we need to create computed columns
818        let has_computed_expressions = select_items
819            .iter()
820            .any(|item| matches!(item, SelectItem::Expression { .. }));
821
822        debug!(
823            "QueryEngine::apply_select_items - has_computed_expressions: {}",
824            has_computed_expressions
825        );
826
827        if !has_computed_expressions {
828            // Simple case: only columns, use existing projection logic
829            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
830            return Ok(view.with_columns(column_indices));
831        }
832
833        // Complex case: we have computed expressions
834        // IMPORTANT: We create a PROJECTED view, not a new table
835        // This preserves the original DataTable reference
836
837        let source_table = view.source();
838        let visible_rows = view.visible_row_indices();
839
840        // Create a temporary table just for the computed result view
841        // But this table is only used for the current query result
842        let mut computed_table = DataTable::new("query_result");
843
844        // First, expand any Star selectors to actual columns
845        let mut expanded_items = Vec::new();
846        for item in select_items {
847            match item {
848                SelectItem::Star => {
849                    // Expand * to all columns from source table
850                    for col_name in source_table.column_names() {
851                        expanded_items.push(SelectItem::Column(col_name));
852                    }
853                }
854                _ => expanded_items.push(item.clone()),
855            }
856        }
857
858        // Add columns based on expanded SelectItems, handling duplicates
859        let mut column_name_counts: std::collections::HashMap<String, usize> =
860            std::collections::HashMap::new();
861
862        for item in &expanded_items {
863            let base_name = match item {
864                SelectItem::Column(name) => name.clone(),
865                SelectItem::Expression { alias, .. } => alias.clone(),
866                SelectItem::Star => unreachable!("Star should have been expanded"),
867            };
868
869            // Check if this column name has been used before
870            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
871            let column_name = if *count == 0 {
872                // First occurrence, use the name as-is
873                base_name.clone()
874            } else {
875                // Duplicate, append a suffix
876                format!("{base_name}_{count}")
877            };
878            *count += 1;
879
880            computed_table.add_column(DataColumn::new(&column_name));
881        }
882
883        // Calculate values for each row
884        let mut evaluator =
885            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
886
887        for &row_idx in visible_rows {
888            let mut row_values = Vec::new();
889
890            for item in &expanded_items {
891                let value = match item {
892                    SelectItem::Column(col_name) => {
893                        // Simple column reference
894                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
895                            let suggestion = self.find_similar_column(source_table, col_name);
896                            match suggestion {
897                                Some(similar) => anyhow::anyhow!(
898                                    "Column '{}' not found. Did you mean '{}'?",
899                                    col_name,
900                                    similar
901                                ),
902                                None => anyhow::anyhow!("Column '{}' not found", col_name),
903                            }
904                        })?;
905                        let row = source_table
906                            .get_row(row_idx)
907                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
908                        row.get(col_idx)
909                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
910                            .clone()
911                    }
912                    SelectItem::Expression { expr, .. } => {
913                        // Computed expression
914                        evaluator.evaluate(expr, row_idx)?
915                    }
916                    SelectItem::Star => unreachable!("Star should have been expanded"),
917                };
918                row_values.push(value);
919            }
920
921            computed_table
922                .add_row(DataRow::new(row_values))
923                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
924        }
925
926        // Return a view of the computed result
927        // This is a temporary view for this query only
928        Ok(DataView::new(Arc::new(computed_table)))
929    }
930
931    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
932    fn apply_aggregate_select(
933        &self,
934        view: DataView,
935        select_items: &[SelectItem],
936    ) -> Result<DataView> {
937        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
938
939        let source_table = view.source();
940        let mut result_table = DataTable::new("aggregate_result");
941
942        // Add columns for each select item
943        for item in select_items {
944            let column_name = match item {
945                SelectItem::Expression { alias, .. } => alias.clone(),
946                _ => unreachable!("Should only have expressions in aggregate-only query"),
947            };
948            result_table.add_column(DataColumn::new(&column_name));
949        }
950
951        // Create evaluator with visible rows from the view (for filtered aggregates)
952        let visible_rows = view.visible_row_indices().to_vec();
953        let mut evaluator =
954            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
955                .with_visible_rows(visible_rows);
956
957        // Evaluate each aggregate expression once (they handle all rows internally)
958        let mut row_values = Vec::new();
959        for item in select_items {
960            match item {
961                SelectItem::Expression { expr, .. } => {
962                    // The evaluator will handle aggregates over all rows
963                    // We pass row_index=0 but aggregates ignore it and process all rows
964                    let value = evaluator.evaluate(expr, 0)?;
965                    row_values.push(value);
966                }
967                _ => unreachable!("Should only have expressions in aggregate-only query"),
968            }
969        }
970
971        // Add the single result row
972        result_table
973            .add_row(DataRow::new(row_values))
974            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
975
976        Ok(DataView::new(Arc::new(result_table)))
977    }
978
979    /// Resolve `SelectItem` columns to indices (for simple column projections only)
980    fn resolve_select_columns(
981        &self,
982        table: &DataTable,
983        select_items: &[SelectItem],
984    ) -> Result<Vec<usize>> {
985        let mut indices = Vec::new();
986        let table_columns = table.column_names();
987
988        for item in select_items {
989            match item {
990                SelectItem::Column(col_name) => {
991                    let index = table_columns
992                        .iter()
993                        .position(|c| c.eq_ignore_ascii_case(col_name))
994                        .ok_or_else(|| {
995                            let suggestion = self.find_similar_column(table, col_name);
996                            match suggestion {
997                                Some(similar) => anyhow::anyhow!(
998                                    "Column '{}' not found. Did you mean '{}'?",
999                                    col_name,
1000                                    similar
1001                                ),
1002                                None => anyhow::anyhow!("Column '{}' not found", col_name),
1003                            }
1004                        })?;
1005                    indices.push(index);
1006                }
1007                SelectItem::Star => {
1008                    // Expand * to all column indices
1009                    for i in 0..table_columns.len() {
1010                        indices.push(i);
1011                    }
1012                }
1013                SelectItem::Expression { .. } => {
1014                    return Err(anyhow::anyhow!(
1015                        "Computed expressions require new table creation"
1016                    ));
1017                }
1018            }
1019        }
1020
1021        Ok(indices)
1022    }
1023
1024    /// Apply DISTINCT to remove duplicate rows
1025    fn apply_distinct(&self, view: DataView) -> Result<DataView> {
1026        use std::collections::HashSet;
1027
1028        let source = view.source();
1029        let visible_cols = view.visible_column_indices();
1030        let visible_rows = view.visible_row_indices();
1031
1032        // Build a set to track unique rows
1033        let mut seen_rows = HashSet::new();
1034        let mut unique_row_indices = Vec::new();
1035
1036        for &row_idx in visible_rows {
1037            // Build a key representing this row's visible column values
1038            let mut row_key = Vec::new();
1039            for &col_idx in visible_cols {
1040                let value = source
1041                    .get_value(row_idx, col_idx)
1042                    .ok_or_else(|| anyhow!("Invalid cell reference"))?;
1043                // Convert value to a hashable representation
1044                row_key.push(format!("{:?}", value));
1045            }
1046
1047            // Check if we've seen this row before
1048            if seen_rows.insert(row_key) {
1049                // First time seeing this row combination
1050                unique_row_indices.push(row_idx);
1051            }
1052        }
1053
1054        // Create a new view with only unique rows
1055        Ok(view.with_rows(unique_row_indices))
1056    }
1057
1058    /// Apply multi-column ORDER BY sorting to the view
1059    fn apply_multi_order_by(
1060        &self,
1061        mut view: DataView,
1062        order_by_columns: &[OrderByColumn],
1063    ) -> Result<DataView> {
1064        // Build list of (source_column_index, ascending) tuples
1065        let mut sort_columns = Vec::new();
1066
1067        for order_col in order_by_columns {
1068            // Try to find the column index
1069            // Check in the current view's source table (this handles both regular columns and computed columns)
1070            // This is especially important after GROUP BY where we have a new result table with aggregate aliases
1071            let col_index = view
1072                .source()
1073                .get_column_index(&order_col.column)
1074                .ok_or_else(|| {
1075                    // If not found, provide helpful error with suggestions
1076                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
1077                    match suggestion {
1078                        Some(similar) => anyhow::anyhow!(
1079                            "Column '{}' not found. Did you mean '{}'?",
1080                            order_col.column,
1081                            similar
1082                        ),
1083                        None => {
1084                            // Also list available columns for debugging
1085                            let available_cols = view.source().column_names().join(", ");
1086                            anyhow::anyhow!(
1087                                "Column '{}' not found. Available columns: {}",
1088                                order_col.column,
1089                                available_cols
1090                            )
1091                        }
1092                    }
1093                })?;
1094
1095            let ascending = matches!(order_col.direction, SortDirection::Asc);
1096            sort_columns.push((col_index, ascending));
1097        }
1098
1099        // Apply multi-column sorting
1100        view.apply_multi_sort(&sort_columns)?;
1101        Ok(view)
1102    }
1103
1104    /// Apply GROUP BY to the view with optional HAVING clause
1105    fn apply_group_by(
1106        &self,
1107        view: DataView,
1108        group_by_exprs: &[SqlExpression],
1109        select_items: &[SelectItem],
1110        having: Option<&SqlExpression>,
1111    ) -> Result<DataView> {
1112        // Use the new expression-based GROUP BY implementation
1113        self.apply_group_by_expressions(
1114            view,
1115            group_by_exprs,
1116            select_items,
1117            having,
1118            self.case_insensitive,
1119            self.date_notation.clone(),
1120        )
1121    }
1122}
1123
1124#[cfg(test)]
1125mod tests {
1126    use super::*;
1127    use crate::data::datatable::{DataColumn, DataRow, DataValue};
1128
1129    fn create_test_table() -> Arc<DataTable> {
1130        let mut table = DataTable::new("test");
1131
1132        // Add columns
1133        table.add_column(DataColumn::new("id"));
1134        table.add_column(DataColumn::new("name"));
1135        table.add_column(DataColumn::new("age"));
1136
1137        // Add rows
1138        table
1139            .add_row(DataRow::new(vec![
1140                DataValue::Integer(1),
1141                DataValue::String("Alice".to_string()),
1142                DataValue::Integer(30),
1143            ]))
1144            .unwrap();
1145
1146        table
1147            .add_row(DataRow::new(vec![
1148                DataValue::Integer(2),
1149                DataValue::String("Bob".to_string()),
1150                DataValue::Integer(25),
1151            ]))
1152            .unwrap();
1153
1154        table
1155            .add_row(DataRow::new(vec![
1156                DataValue::Integer(3),
1157                DataValue::String("Charlie".to_string()),
1158                DataValue::Integer(35),
1159            ]))
1160            .unwrap();
1161
1162        Arc::new(table)
1163    }
1164
1165    #[test]
1166    fn test_select_all() {
1167        let table = create_test_table();
1168        let engine = QueryEngine::new();
1169
1170        let view = engine
1171            .execute(table.clone(), "SELECT * FROM users")
1172            .unwrap();
1173        assert_eq!(view.row_count(), 3);
1174        assert_eq!(view.column_count(), 3);
1175    }
1176
1177    #[test]
1178    fn test_select_columns() {
1179        let table = create_test_table();
1180        let engine = QueryEngine::new();
1181
1182        let view = engine
1183            .execute(table.clone(), "SELECT name, age FROM users")
1184            .unwrap();
1185        assert_eq!(view.row_count(), 3);
1186        assert_eq!(view.column_count(), 2);
1187    }
1188
1189    #[test]
1190    fn test_select_with_limit() {
1191        let table = create_test_table();
1192        let engine = QueryEngine::new();
1193
1194        let view = engine
1195            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1196            .unwrap();
1197        assert_eq!(view.row_count(), 2);
1198    }
1199
1200    #[test]
1201    fn test_type_coercion_contains() {
1202        // Initialize tracing for debug output
1203        let _ = tracing_subscriber::fmt()
1204            .with_max_level(tracing::Level::DEBUG)
1205            .try_init();
1206
1207        let mut table = DataTable::new("test");
1208        table.add_column(DataColumn::new("id"));
1209        table.add_column(DataColumn::new("status"));
1210        table.add_column(DataColumn::new("price"));
1211
1212        // Add test data with mixed types
1213        table
1214            .add_row(DataRow::new(vec![
1215                DataValue::Integer(1),
1216                DataValue::String("Pending".to_string()),
1217                DataValue::Float(99.99),
1218            ]))
1219            .unwrap();
1220
1221        table
1222            .add_row(DataRow::new(vec![
1223                DataValue::Integer(2),
1224                DataValue::String("Confirmed".to_string()),
1225                DataValue::Float(150.50),
1226            ]))
1227            .unwrap();
1228
1229        table
1230            .add_row(DataRow::new(vec![
1231                DataValue::Integer(3),
1232                DataValue::String("Pending".to_string()),
1233                DataValue::Float(75.00),
1234            ]))
1235            .unwrap();
1236
1237        let table = Arc::new(table);
1238        let engine = QueryEngine::new();
1239
1240        println!("\n=== Testing WHERE clause with Contains ===");
1241        println!("Table has {} rows", table.row_count());
1242        for i in 0..table.row_count() {
1243            let status = table.get_value(i, 1);
1244            println!("Row {i}: status = {status:?}");
1245        }
1246
1247        // Test 1: Basic string contains (should work)
1248        println!("\n--- Test 1: status.Contains('pend') ---");
1249        let result = engine.execute(
1250            table.clone(),
1251            "SELECT * FROM test WHERE status.Contains('pend')",
1252        );
1253        match result {
1254            Ok(view) => {
1255                println!("SUCCESS: Found {} matching rows", view.row_count());
1256                assert_eq!(view.row_count(), 2); // Should find both Pending rows
1257            }
1258            Err(e) => {
1259                panic!("Query failed: {e}");
1260            }
1261        }
1262
1263        // Test 2: Numeric contains (should work with type coercion)
1264        println!("\n--- Test 2: price.Contains('9') ---");
1265        let result = engine.execute(
1266            table.clone(),
1267            "SELECT * FROM test WHERE price.Contains('9')",
1268        );
1269        match result {
1270            Ok(view) => {
1271                println!(
1272                    "SUCCESS: Found {} matching rows with price containing '9'",
1273                    view.row_count()
1274                );
1275                // Should find 99.99 row
1276                assert!(view.row_count() >= 1);
1277            }
1278            Err(e) => {
1279                panic!("Numeric coercion query failed: {e}");
1280            }
1281        }
1282
1283        println!("\n=== All tests passed! ===");
1284    }
1285
1286    #[test]
1287    fn test_not_in_clause() {
1288        // Initialize tracing for debug output
1289        let _ = tracing_subscriber::fmt()
1290            .with_max_level(tracing::Level::DEBUG)
1291            .try_init();
1292
1293        let mut table = DataTable::new("test");
1294        table.add_column(DataColumn::new("id"));
1295        table.add_column(DataColumn::new("country"));
1296
1297        // Add test data
1298        table
1299            .add_row(DataRow::new(vec![
1300                DataValue::Integer(1),
1301                DataValue::String("CA".to_string()),
1302            ]))
1303            .unwrap();
1304
1305        table
1306            .add_row(DataRow::new(vec![
1307                DataValue::Integer(2),
1308                DataValue::String("US".to_string()),
1309            ]))
1310            .unwrap();
1311
1312        table
1313            .add_row(DataRow::new(vec![
1314                DataValue::Integer(3),
1315                DataValue::String("UK".to_string()),
1316            ]))
1317            .unwrap();
1318
1319        let table = Arc::new(table);
1320        let engine = QueryEngine::new();
1321
1322        println!("\n=== Testing NOT IN clause ===");
1323        println!("Table has {} rows", table.row_count());
1324        for i in 0..table.row_count() {
1325            let country = table.get_value(i, 1);
1326            println!("Row {i}: country = {country:?}");
1327        }
1328
1329        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
1330        println!("\n--- Test: country NOT IN ('CA') ---");
1331        let result = engine.execute(
1332            table.clone(),
1333            "SELECT * FROM test WHERE country NOT IN ('CA')",
1334        );
1335        match result {
1336            Ok(view) => {
1337                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1338                assert_eq!(view.row_count(), 2); // Should find US and UK
1339            }
1340            Err(e) => {
1341                panic!("NOT IN query failed: {e}");
1342            }
1343        }
1344
1345        println!("\n=== NOT IN test complete! ===");
1346    }
1347
1348    #[test]
1349    fn test_case_insensitive_in_and_not_in() {
1350        // Initialize tracing for debug output
1351        let _ = tracing_subscriber::fmt()
1352            .with_max_level(tracing::Level::DEBUG)
1353            .try_init();
1354
1355        let mut table = DataTable::new("test");
1356        table.add_column(DataColumn::new("id"));
1357        table.add_column(DataColumn::new("country"));
1358
1359        // Add test data with mixed case
1360        table
1361            .add_row(DataRow::new(vec![
1362                DataValue::Integer(1),
1363                DataValue::String("CA".to_string()), // uppercase
1364            ]))
1365            .unwrap();
1366
1367        table
1368            .add_row(DataRow::new(vec![
1369                DataValue::Integer(2),
1370                DataValue::String("us".to_string()), // lowercase
1371            ]))
1372            .unwrap();
1373
1374        table
1375            .add_row(DataRow::new(vec![
1376                DataValue::Integer(3),
1377                DataValue::String("UK".to_string()), // uppercase
1378            ]))
1379            .unwrap();
1380
1381        let table = Arc::new(table);
1382
1383        println!("\n=== Testing Case-Insensitive IN clause ===");
1384        println!("Table has {} rows", table.row_count());
1385        for i in 0..table.row_count() {
1386            let country = table.get_value(i, 1);
1387            println!("Row {i}: country = {country:?}");
1388        }
1389
1390        // Test case-insensitive IN - should match 'CA' with 'ca'
1391        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1392        let engine = QueryEngine::with_case_insensitive(true);
1393        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1394        match result {
1395            Ok(view) => {
1396                println!(
1397                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1398                    view.row_count()
1399                );
1400                assert_eq!(view.row_count(), 1); // Should find CA row
1401            }
1402            Err(e) => {
1403                panic!("Case-insensitive IN query failed: {e}");
1404            }
1405        }
1406
1407        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
1408        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1409        let result = engine.execute(
1410            table.clone(),
1411            "SELECT * FROM test WHERE country NOT IN ('ca')",
1412        );
1413        match result {
1414            Ok(view) => {
1415                println!(
1416                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1417                    view.row_count()
1418                );
1419                assert_eq!(view.row_count(), 2); // Should find us and UK rows
1420            }
1421            Err(e) => {
1422                panic!("Case-insensitive NOT IN query failed: {e}");
1423            }
1424        }
1425
1426        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
1427        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1428        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
1429        let result = engine_case_sensitive
1430            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1431        match result {
1432            Ok(view) => {
1433                println!(
1434                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1435                    view.row_count()
1436                );
1437                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
1438            }
1439            Err(e) => {
1440                panic!("Case-sensitive IN query failed: {e}");
1441            }
1442        }
1443
1444        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1445    }
1446
1447    #[test]
1448    #[ignore = "Parentheses in WHERE clause not yet implemented"]
1449    fn test_parentheses_in_where_clause() {
1450        // Initialize tracing for debug output
1451        let _ = tracing_subscriber::fmt()
1452            .with_max_level(tracing::Level::DEBUG)
1453            .try_init();
1454
1455        let mut table = DataTable::new("test");
1456        table.add_column(DataColumn::new("id"));
1457        table.add_column(DataColumn::new("status"));
1458        table.add_column(DataColumn::new("priority"));
1459
1460        // Add test data
1461        table
1462            .add_row(DataRow::new(vec![
1463                DataValue::Integer(1),
1464                DataValue::String("Pending".to_string()),
1465                DataValue::String("High".to_string()),
1466            ]))
1467            .unwrap();
1468
1469        table
1470            .add_row(DataRow::new(vec![
1471                DataValue::Integer(2),
1472                DataValue::String("Complete".to_string()),
1473                DataValue::String("High".to_string()),
1474            ]))
1475            .unwrap();
1476
1477        table
1478            .add_row(DataRow::new(vec![
1479                DataValue::Integer(3),
1480                DataValue::String("Pending".to_string()),
1481                DataValue::String("Low".to_string()),
1482            ]))
1483            .unwrap();
1484
1485        table
1486            .add_row(DataRow::new(vec![
1487                DataValue::Integer(4),
1488                DataValue::String("Complete".to_string()),
1489                DataValue::String("Low".to_string()),
1490            ]))
1491            .unwrap();
1492
1493        let table = Arc::new(table);
1494        let engine = QueryEngine::new();
1495
1496        println!("\n=== Testing Parentheses in WHERE clause ===");
1497        println!("Table has {} rows", table.row_count());
1498        for i in 0..table.row_count() {
1499            let status = table.get_value(i, 1);
1500            let priority = table.get_value(i, 2);
1501            println!("Row {i}: status = {status:?}, priority = {priority:?}");
1502        }
1503
1504        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
1505        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1506        let result = engine.execute(
1507            table.clone(),
1508            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1509        );
1510        match result {
1511            Ok(view) => {
1512                println!(
1513                    "SUCCESS: Found {} rows with parenthetical logic",
1514                    view.row_count()
1515                );
1516                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
1517            }
1518            Err(e) => {
1519                panic!("Parentheses query failed: {e}");
1520            }
1521        }
1522
1523        println!("\n=== Parentheses test complete! ===");
1524    }
1525
1526    #[test]
1527    #[ignore = "Numeric type coercion needs fixing"]
1528    fn test_numeric_type_coercion() {
1529        // Initialize tracing for debug output
1530        let _ = tracing_subscriber::fmt()
1531            .with_max_level(tracing::Level::DEBUG)
1532            .try_init();
1533
1534        let mut table = DataTable::new("test");
1535        table.add_column(DataColumn::new("id"));
1536        table.add_column(DataColumn::new("price"));
1537        table.add_column(DataColumn::new("quantity"));
1538
1539        // Add test data with different numeric types
1540        table
1541            .add_row(DataRow::new(vec![
1542                DataValue::Integer(1),
1543                DataValue::Float(99.50), // Contains '.'
1544                DataValue::Integer(100),
1545            ]))
1546            .unwrap();
1547
1548        table
1549            .add_row(DataRow::new(vec![
1550                DataValue::Integer(2),
1551                DataValue::Float(150.0), // Contains '.' and '0'
1552                DataValue::Integer(200),
1553            ]))
1554            .unwrap();
1555
1556        table
1557            .add_row(DataRow::new(vec![
1558                DataValue::Integer(3),
1559                DataValue::Integer(75), // No decimal point
1560                DataValue::Integer(50),
1561            ]))
1562            .unwrap();
1563
1564        let table = Arc::new(table);
1565        let engine = QueryEngine::new();
1566
1567        println!("\n=== Testing Numeric Type Coercion ===");
1568        println!("Table has {} rows", table.row_count());
1569        for i in 0..table.row_count() {
1570            let price = table.get_value(i, 1);
1571            let quantity = table.get_value(i, 2);
1572            println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1573        }
1574
1575        // Test Contains on float values - should find rows with decimal points
1576        println!("\n--- Test: price.Contains('.') ---");
1577        let result = engine.execute(
1578            table.clone(),
1579            "SELECT * FROM test WHERE price.Contains('.')",
1580        );
1581        match result {
1582            Ok(view) => {
1583                println!(
1584                    "SUCCESS: Found {} rows with decimal points in price",
1585                    view.row_count()
1586                );
1587                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1588            }
1589            Err(e) => {
1590                panic!("Numeric Contains query failed: {e}");
1591            }
1592        }
1593
1594        // Test Contains on integer values converted to string
1595        println!("\n--- Test: quantity.Contains('0') ---");
1596        let result = engine.execute(
1597            table.clone(),
1598            "SELECT * FROM test WHERE quantity.Contains('0')",
1599        );
1600        match result {
1601            Ok(view) => {
1602                println!(
1603                    "SUCCESS: Found {} rows with '0' in quantity",
1604                    view.row_count()
1605                );
1606                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1607            }
1608            Err(e) => {
1609                panic!("Integer Contains query failed: {e}");
1610            }
1611        }
1612
1613        println!("\n=== Numeric type coercion test complete! ===");
1614    }
1615
1616    #[test]
1617    fn test_datetime_comparisons() {
1618        // Initialize tracing for debug output
1619        let _ = tracing_subscriber::fmt()
1620            .with_max_level(tracing::Level::DEBUG)
1621            .try_init();
1622
1623        let mut table = DataTable::new("test");
1624        table.add_column(DataColumn::new("id"));
1625        table.add_column(DataColumn::new("created_date"));
1626
1627        // Add test data with date strings (as they would come from CSV)
1628        table
1629            .add_row(DataRow::new(vec![
1630                DataValue::Integer(1),
1631                DataValue::String("2024-12-15".to_string()),
1632            ]))
1633            .unwrap();
1634
1635        table
1636            .add_row(DataRow::new(vec![
1637                DataValue::Integer(2),
1638                DataValue::String("2025-01-15".to_string()),
1639            ]))
1640            .unwrap();
1641
1642        table
1643            .add_row(DataRow::new(vec![
1644                DataValue::Integer(3),
1645                DataValue::String("2025-02-15".to_string()),
1646            ]))
1647            .unwrap();
1648
1649        let table = Arc::new(table);
1650        let engine = QueryEngine::new();
1651
1652        println!("\n=== Testing DateTime Comparisons ===");
1653        println!("Table has {} rows", table.row_count());
1654        for i in 0..table.row_count() {
1655            let date = table.get_value(i, 1);
1656            println!("Row {i}: created_date = {date:?}");
1657        }
1658
1659        // Test DateTime constructor comparison - should find dates after 2025-01-01
1660        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1661        let result = engine.execute(
1662            table.clone(),
1663            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1664        );
1665        match result {
1666            Ok(view) => {
1667                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1668                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1669            }
1670            Err(e) => {
1671                panic!("DateTime comparison query failed: {e}");
1672            }
1673        }
1674
1675        println!("\n=== DateTime comparison test complete! ===");
1676    }
1677
1678    #[test]
1679    fn test_not_with_method_calls() {
1680        // Initialize tracing for debug output
1681        let _ = tracing_subscriber::fmt()
1682            .with_max_level(tracing::Level::DEBUG)
1683            .try_init();
1684
1685        let mut table = DataTable::new("test");
1686        table.add_column(DataColumn::new("id"));
1687        table.add_column(DataColumn::new("status"));
1688
1689        // Add test data
1690        table
1691            .add_row(DataRow::new(vec![
1692                DataValue::Integer(1),
1693                DataValue::String("Pending Review".to_string()),
1694            ]))
1695            .unwrap();
1696
1697        table
1698            .add_row(DataRow::new(vec![
1699                DataValue::Integer(2),
1700                DataValue::String("Complete".to_string()),
1701            ]))
1702            .unwrap();
1703
1704        table
1705            .add_row(DataRow::new(vec![
1706                DataValue::Integer(3),
1707                DataValue::String("Pending Approval".to_string()),
1708            ]))
1709            .unwrap();
1710
1711        let table = Arc::new(table);
1712        let engine = QueryEngine::with_case_insensitive(true);
1713
1714        println!("\n=== Testing NOT with Method Calls ===");
1715        println!("Table has {} rows", table.row_count());
1716        for i in 0..table.row_count() {
1717            let status = table.get_value(i, 1);
1718            println!("Row {i}: status = {status:?}");
1719        }
1720
1721        // Test NOT with Contains - should exclude rows containing "pend"
1722        println!("\n--- Test: NOT status.Contains('pend') ---");
1723        let result = engine.execute(
1724            table.clone(),
1725            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1726        );
1727        match result {
1728            Ok(view) => {
1729                println!(
1730                    "SUCCESS: Found {} rows NOT containing 'pend'",
1731                    view.row_count()
1732                );
1733                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1734            }
1735            Err(e) => {
1736                panic!("NOT Contains query failed: {e}");
1737            }
1738        }
1739
1740        // Test NOT with StartsWith
1741        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1742        let result = engine.execute(
1743            table.clone(),
1744            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1745        );
1746        match result {
1747            Ok(view) => {
1748                println!(
1749                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1750                    view.row_count()
1751                );
1752                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1753            }
1754            Err(e) => {
1755                panic!("NOT StartsWith query failed: {e}");
1756            }
1757        }
1758
1759        println!("\n=== NOT with method calls test complete! ===");
1760    }
1761
1762    #[test]
1763    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1764    fn test_complex_logical_expressions() {
1765        // Initialize tracing for debug output
1766        let _ = tracing_subscriber::fmt()
1767            .with_max_level(tracing::Level::DEBUG)
1768            .try_init();
1769
1770        let mut table = DataTable::new("test");
1771        table.add_column(DataColumn::new("id"));
1772        table.add_column(DataColumn::new("status"));
1773        table.add_column(DataColumn::new("priority"));
1774        table.add_column(DataColumn::new("assigned"));
1775
1776        // Add comprehensive test data
1777        table
1778            .add_row(DataRow::new(vec![
1779                DataValue::Integer(1),
1780                DataValue::String("Pending".to_string()),
1781                DataValue::String("High".to_string()),
1782                DataValue::String("John".to_string()),
1783            ]))
1784            .unwrap();
1785
1786        table
1787            .add_row(DataRow::new(vec![
1788                DataValue::Integer(2),
1789                DataValue::String("Complete".to_string()),
1790                DataValue::String("High".to_string()),
1791                DataValue::String("Jane".to_string()),
1792            ]))
1793            .unwrap();
1794
1795        table
1796            .add_row(DataRow::new(vec![
1797                DataValue::Integer(3),
1798                DataValue::String("Pending".to_string()),
1799                DataValue::String("Low".to_string()),
1800                DataValue::String("John".to_string()),
1801            ]))
1802            .unwrap();
1803
1804        table
1805            .add_row(DataRow::new(vec![
1806                DataValue::Integer(4),
1807                DataValue::String("In Progress".to_string()),
1808                DataValue::String("Medium".to_string()),
1809                DataValue::String("Jane".to_string()),
1810            ]))
1811            .unwrap();
1812
1813        let table = Arc::new(table);
1814        let engine = QueryEngine::new();
1815
1816        println!("\n=== Testing Complex Logical Expressions ===");
1817        println!("Table has {} rows", table.row_count());
1818        for i in 0..table.row_count() {
1819            let status = table.get_value(i, 1);
1820            let priority = table.get_value(i, 2);
1821            let assigned = table.get_value(i, 3);
1822            println!(
1823                "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1824            );
1825        }
1826
1827        // Test complex AND/OR logic
1828        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1829        let result = engine.execute(
1830            table.clone(),
1831            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1832        );
1833        match result {
1834            Ok(view) => {
1835                println!(
1836                    "SUCCESS: Found {} rows with complex logic",
1837                    view.row_count()
1838                );
1839                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
1840            }
1841            Err(e) => {
1842                panic!("Complex logic query failed: {e}");
1843            }
1844        }
1845
1846        // Test NOT with complex expressions
1847        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1848        let result = engine.execute(
1849            table.clone(),
1850            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1851        );
1852        match result {
1853            Ok(view) => {
1854                println!(
1855                    "SUCCESS: Found {} rows with NOT complex logic",
1856                    view.row_count()
1857                );
1858                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
1859            }
1860            Err(e) => {
1861                panic!("NOT complex logic query failed: {e}");
1862            }
1863        }
1864
1865        println!("\n=== Complex logical expressions test complete! ===");
1866    }
1867
1868    #[test]
1869    fn test_mixed_data_types_and_edge_cases() {
1870        // Initialize tracing for debug output
1871        let _ = tracing_subscriber::fmt()
1872            .with_max_level(tracing::Level::DEBUG)
1873            .try_init();
1874
1875        let mut table = DataTable::new("test");
1876        table.add_column(DataColumn::new("id"));
1877        table.add_column(DataColumn::new("value"));
1878        table.add_column(DataColumn::new("nullable_field"));
1879
1880        // Add test data with mixed types and edge cases
1881        table
1882            .add_row(DataRow::new(vec![
1883                DataValue::Integer(1),
1884                DataValue::String("123.45".to_string()),
1885                DataValue::String("present".to_string()),
1886            ]))
1887            .unwrap();
1888
1889        table
1890            .add_row(DataRow::new(vec![
1891                DataValue::Integer(2),
1892                DataValue::Float(678.90),
1893                DataValue::Null,
1894            ]))
1895            .unwrap();
1896
1897        table
1898            .add_row(DataRow::new(vec![
1899                DataValue::Integer(3),
1900                DataValue::Boolean(true),
1901                DataValue::String("also present".to_string()),
1902            ]))
1903            .unwrap();
1904
1905        table
1906            .add_row(DataRow::new(vec![
1907                DataValue::Integer(4),
1908                DataValue::String("false".to_string()),
1909                DataValue::Null,
1910            ]))
1911            .unwrap();
1912
1913        let table = Arc::new(table);
1914        let engine = QueryEngine::new();
1915
1916        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1917        println!("Table has {} rows", table.row_count());
1918        for i in 0..table.row_count() {
1919            let value = table.get_value(i, 1);
1920            let nullable = table.get_value(i, 2);
1921            println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
1922        }
1923
1924        // Test type coercion with boolean Contains
1925        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1926        let result = engine.execute(
1927            table.clone(),
1928            "SELECT * FROM test WHERE value.Contains('true')",
1929        );
1930        match result {
1931            Ok(view) => {
1932                println!(
1933                    "SUCCESS: Found {} rows with boolean coercion",
1934                    view.row_count()
1935                );
1936                assert_eq!(view.row_count(), 1); // Should find the boolean true row
1937            }
1938            Err(e) => {
1939                panic!("Boolean coercion query failed: {e}");
1940            }
1941        }
1942
1943        // Test multiple IN values with mixed types
1944        println!("\n--- Test: id IN (1, 3) ---");
1945        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
1946        match result {
1947            Ok(view) => {
1948                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
1949                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
1950            }
1951            Err(e) => {
1952                panic!("Multiple IN values query failed: {e}");
1953            }
1954        }
1955
1956        println!("\n=== Mixed data types test complete! ===");
1957    }
1958
1959    #[test]
1960    fn test_not_in_parsing() {
1961        use crate::sql::recursive_parser::Parser;
1962
1963        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
1964        println!("\n=== Testing NOT IN parsing ===");
1965        println!("Parsing query: {query}");
1966
1967        let mut parser = Parser::new(query);
1968        match parser.parse() {
1969            Ok(statement) => {
1970                println!("Parsed statement: {statement:#?}");
1971                if let Some(where_clause) = statement.where_clause {
1972                    println!("WHERE conditions: {:#?}", where_clause.conditions);
1973                    if let Some(first_condition) = where_clause.conditions.first() {
1974                        println!("First condition expression: {:#?}", first_condition.expr);
1975                    }
1976                }
1977            }
1978            Err(e) => {
1979                panic!("Parse error: {e}");
1980            }
1981        }
1982    }
1983}
1984
1985#[cfg(test)]
1986#[path = "query_engine_tests.rs"]
1987mod query_engine_tests;