sql_cli/data/
query_engine.rs

1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::config::global::get_date_notation;
9use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
10use crate::data::data_view::DataView;
11use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
12use crate::data::group_by_expressions::GroupByExpressions;
13use crate::data::hash_join::HashJoinExecutor;
14use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
15use crate::data::subquery_executor::SubqueryExecutor;
16use crate::data::virtual_table_generator::VirtualTableGenerator;
17use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
18use crate::sql::aggregates::contains_aggregate;
19use crate::sql::parser::ast::TableSource;
20use crate::sql::recursive_parser::{
21    CTEType, OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression,
22    TableFunction,
23};
24
25/// Query engine that executes SQL directly on `DataTable`
26#[derive(Clone)]
27pub struct QueryEngine {
28    case_insensitive: bool,
29    date_notation: String,
30    behavior_config: Option<BehaviorConfig>,
31}
32
33impl Default for QueryEngine {
34    fn default() -> Self {
35        Self::new()
36    }
37}
38
39impl QueryEngine {
40    #[must_use]
41    pub fn new() -> Self {
42        Self {
43            case_insensitive: false,
44            date_notation: get_date_notation(),
45            behavior_config: None,
46        }
47    }
48
49    #[must_use]
50    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
51        let case_insensitive = config.case_insensitive_default;
52        // Use get_date_notation() to respect environment variable override
53        let date_notation = get_date_notation();
54        Self {
55            case_insensitive,
56            date_notation,
57            behavior_config: Some(config),
58        }
59    }
60
61    #[must_use]
62    pub fn with_date_notation(_date_notation: String) -> Self {
63        Self {
64            case_insensitive: false,
65            date_notation: get_date_notation(), // Always use the global function
66            behavior_config: None,
67        }
68    }
69
70    #[must_use]
71    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
72        Self {
73            case_insensitive,
74            date_notation: get_date_notation(),
75            behavior_config: None,
76        }
77    }
78
79    #[must_use]
80    pub fn with_case_insensitive_and_date_notation(
81        case_insensitive: bool,
82        _date_notation: String, // Keep parameter for compatibility but use get_date_notation()
83    ) -> Self {
84        Self {
85            case_insensitive,
86            date_notation: get_date_notation(), // Always use the global function
87            behavior_config: None,
88        }
89    }
90
91    /// Find a column name similar to the given name using edit distance
92    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
93        let columns = table.column_names();
94        let mut best_match: Option<(String, usize)> = None;
95
96        for col in columns {
97            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
98            // Only suggest if distance is small (likely a typo)
99            // Allow up to 3 edits for longer names
100            let max_distance = if name.len() > 10 { 3 } else { 2 };
101            if distance <= max_distance {
102                match &best_match {
103                    None => best_match = Some((col, distance)),
104                    Some((_, best_dist)) if distance < *best_dist => {
105                        best_match = Some((col, distance));
106                    }
107                    _ => {}
108                }
109            }
110        }
111
112        best_match.map(|(name, _)| name)
113    }
114
115    /// Calculate Levenshtein edit distance between two strings
116    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
117        let len1 = s1.len();
118        let len2 = s2.len();
119        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
120
121        for i in 0..=len1 {
122            matrix[i][0] = i;
123        }
124        for j in 0..=len2 {
125            matrix[0][j] = j;
126        }
127
128        for (i, c1) in s1.chars().enumerate() {
129            for (j, c2) in s2.chars().enumerate() {
130                let cost = usize::from(c1 != c2);
131                matrix[i + 1][j + 1] = std::cmp::min(
132                    matrix[i][j + 1] + 1, // deletion
133                    std::cmp::min(
134                        matrix[i + 1][j] + 1, // insertion
135                        matrix[i][j] + cost,  // substitution
136                    ),
137                );
138            }
139        }
140
141        matrix[len1][len2]
142    }
143
144    /// Execute a SQL query on a `DataTable` and return a `DataView` (for backward compatibility)
145    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
146        let (view, _plan) = self.execute_with_plan(table, sql)?;
147        Ok(view)
148    }
149
150    /// Execute a parsed SelectStatement on a `DataTable` and return a `DataView`
151    pub fn execute_statement(
152        &self,
153        table: Arc<DataTable>,
154        statement: SelectStatement,
155    ) -> Result<DataView> {
156        // First process CTEs to build context
157        let mut cte_context = HashMap::new();
158        for cte in &statement.ctes {
159            debug!("QueryEngine: Pre-processing CTE '{}'...", cte.name);
160            // Execute the CTE based on its type
161            let cte_result = match &cte.cte_type {
162                CTEType::Standard(query) => {
163                    // Execute the CTE query (it might reference earlier CTEs)
164                    self.build_view_with_context(table.clone(), query.clone(), &mut cte_context)?
165                }
166                CTEType::Web(web_spec) => {
167                    // Fetch data from URL
168                    use crate::web::http_fetcher::WebDataFetcher;
169
170                    let fetcher = WebDataFetcher::new()?;
171                    let data_table = fetcher.fetch(web_spec, &cte.name)?;
172
173                    // Convert DataTable to DataView
174                    DataView::new(Arc::new(data_table))
175                }
176            };
177            // Store the result in the context for later use
178            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
179            debug!(
180                "QueryEngine: CTE '{}' pre-processed, stored in context",
181                cte.name
182            );
183        }
184
185        // Now process subqueries with CTE context available
186        let mut subquery_executor =
187            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
188        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
189
190        // Build the view with the same CTE context
191        self.build_view_with_context(table, processed_statement, &mut cte_context)
192    }
193
194    /// Execute a statement with provided CTE context (for subqueries)
195    pub fn execute_statement_with_cte_context(
196        &self,
197        table: Arc<DataTable>,
198        statement: SelectStatement,
199        cte_context: &HashMap<String, Arc<DataView>>,
200    ) -> Result<DataView> {
201        // Clone the context so we can add any CTEs from this statement
202        let mut local_context = cte_context.clone();
203
204        // Process any CTEs in this statement (they might be nested)
205        for cte in &statement.ctes {
206            debug!("QueryEngine: Processing nested CTE '{}'...", cte.name);
207            let cte_result = match &cte.cte_type {
208                CTEType::Standard(query) => {
209                    self.build_view_with_context(table.clone(), query.clone(), &mut local_context)?
210                }
211                CTEType::Web(web_spec) => {
212                    // Fetch data from URL
213                    use crate::web::http_fetcher::WebDataFetcher;
214
215                    let fetcher = WebDataFetcher::new()?;
216                    let data_table = fetcher.fetch(web_spec, &cte.name)?;
217
218                    // Convert DataTable to DataView
219                    DataView::new(Arc::new(data_table))
220                }
221            };
222            local_context.insert(cte.name.clone(), Arc::new(cte_result));
223        }
224
225        // Process subqueries with the complete context
226        let mut subquery_executor =
227            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), local_context.clone());
228        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
229
230        // Build the view
231        self.build_view_with_context(table, processed_statement, &mut local_context)
232    }
233
234    /// Execute a query and return both the result and the execution plan
235    pub fn execute_with_plan(
236        &self,
237        table: Arc<DataTable>,
238        sql: &str,
239    ) -> Result<(DataView, ExecutionPlan)> {
240        let mut plan_builder = ExecutionPlanBuilder::new();
241        let start_time = Instant::now();
242
243        // Parse the SQL query
244        plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
245        plan_builder.add_detail(format!("Query: {}", sql));
246        let mut parser = Parser::new(sql);
247        let statement = parser
248            .parse()
249            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
250        plan_builder.add_detail(format!("Parsed successfully"));
251        if let Some(from) = &statement.from_table {
252            plan_builder.add_detail(format!("FROM: {}", from));
253        }
254        if statement.where_clause.is_some() {
255            plan_builder.add_detail("WHERE clause present".to_string());
256        }
257        plan_builder.end_step();
258
259        // First process CTEs to build context
260        let mut cte_context = HashMap::new();
261
262        if !statement.ctes.is_empty() {
263            plan_builder.begin_step(
264                StepType::CTE,
265                format!("Process {} CTEs", statement.ctes.len()),
266            );
267
268            for cte in &statement.ctes {
269                let cte_start = Instant::now();
270                plan_builder.begin_step(StepType::CTE, format!("CTE '{}'", cte.name));
271
272                let cte_result = match &cte.cte_type {
273                    CTEType::Standard(query) => {
274                        // Add CTE query details
275                        if let Some(from) = &query.from_table {
276                            plan_builder.add_detail(format!("Source: {}", from));
277                        }
278                        if query.where_clause.is_some() {
279                            plan_builder.add_detail("Has WHERE clause".to_string());
280                        }
281                        if query.group_by.is_some() {
282                            plan_builder.add_detail("Has GROUP BY".to_string());
283                        }
284
285                        self.build_view_with_context(
286                            table.clone(),
287                            query.clone(),
288                            &mut cte_context,
289                        )?
290                    }
291                    CTEType::Web(web_spec) => {
292                        plan_builder.add_detail(format!("URL: {}", web_spec.url));
293                        if let Some(format) = &web_spec.format {
294                            plan_builder.add_detail(format!("Format: {:?}", format));
295                        }
296                        if let Some(cache) = web_spec.cache_seconds {
297                            plan_builder.add_detail(format!("Cache: {} seconds", cache));
298                        }
299
300                        // Fetch data from URL
301                        use crate::web::http_fetcher::WebDataFetcher;
302
303                        let fetcher = WebDataFetcher::new()?;
304                        let data_table = fetcher.fetch(web_spec, &cte.name)?;
305
306                        // Convert DataTable to DataView
307                        DataView::new(Arc::new(data_table))
308                    }
309                };
310
311                // Record CTE statistics
312                plan_builder.set_rows_out(cte_result.row_count());
313                plan_builder.add_detail(format!(
314                    "Result: {} rows, {} columns",
315                    cte_result.row_count(),
316                    cte_result.column_count()
317                ));
318                plan_builder.add_detail(format!(
319                    "Execution time: {:.3}ms",
320                    cte_start.elapsed().as_secs_f64() * 1000.0
321                ));
322
323                cte_context.insert(cte.name.clone(), Arc::new(cte_result));
324                plan_builder.end_step();
325            }
326
327            plan_builder.add_detail(format!(
328                "All {} CTEs cached in context",
329                statement.ctes.len()
330            ));
331            plan_builder.end_step();
332        }
333
334        // Process subqueries in the statement with CTE context
335        plan_builder.begin_step(StepType::Subquery, "Process subqueries".to_string());
336        let mut subquery_executor =
337            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
338
339        // Check if there are subqueries to process
340        let has_subqueries = statement.where_clause.as_ref().map_or(false, |w| {
341            // This is a simplified check - in reality we'd need to walk the AST
342            format!("{:?}", w).contains("Subquery")
343        });
344
345        if has_subqueries {
346            plan_builder.add_detail("Evaluating subqueries in WHERE clause".to_string());
347        }
348
349        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
350
351        if has_subqueries {
352            plan_builder.add_detail("Subqueries replaced with materialized values".to_string());
353        } else {
354            plan_builder.add_detail("No subqueries to process".to_string());
355        }
356
357        plan_builder.end_step();
358        let result = self.build_view_with_context_and_plan(
359            table,
360            processed_statement,
361            &mut cte_context,
362            &mut plan_builder,
363        )?;
364
365        let total_duration = start_time.elapsed();
366        info!(
367            "Query execution complete: total={:?}, rows={}",
368            total_duration,
369            result.row_count()
370        );
371
372        let plan = plan_builder.build();
373        Ok((result, plan))
374    }
375
376    /// Build a `DataView` from a parsed SQL statement
377    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
378        let mut cte_context = HashMap::new();
379        self.build_view_with_context(table, statement, &mut cte_context)
380    }
381
382    /// Build a DataView from a SelectStatement with CTE context
383    fn build_view_with_context(
384        &self,
385        table: Arc<DataTable>,
386        statement: SelectStatement,
387        cte_context: &mut HashMap<String, Arc<DataView>>,
388    ) -> Result<DataView> {
389        let mut dummy_plan = ExecutionPlanBuilder::new();
390        self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
391    }
392
393    /// Build a DataView from a SelectStatement with CTE context and execution plan tracking
394    fn build_view_with_context_and_plan(
395        &self,
396        table: Arc<DataTable>,
397        statement: SelectStatement,
398        cte_context: &mut HashMap<String, Arc<DataView>>,
399        plan: &mut ExecutionPlanBuilder,
400    ) -> Result<DataView> {
401        // First, process any CTEs that aren't already in the context
402        for cte in &statement.ctes {
403            // Skip if already processed (e.g., by execute_select for WEB CTEs)
404            if cte_context.contains_key(&cte.name) {
405                debug!(
406                    "QueryEngine: CTE '{}' already in context, skipping",
407                    cte.name
408                );
409                continue;
410            }
411
412            debug!("QueryEngine: Processing CTE '{}'...", cte.name);
413            // Execute the CTE query (it might reference earlier CTEs)
414            let cte_result = match &cte.cte_type {
415                CTEType::Standard(query) => {
416                    self.build_view_with_context(table.clone(), query.clone(), cte_context)?
417                }
418                CTEType::Web(_web_spec) => {
419                    // Web CTEs should have been processed earlier in execute_select
420                    return Err(anyhow!(
421                        "Web CTEs should be processed in execute_select method"
422                    ));
423                }
424            };
425
426            // Store the result in the context for later use
427            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
428            debug!(
429                "QueryEngine: CTE '{}' processed, stored in context",
430                cte.name
431            );
432        }
433
434        // Determine the source table for the main query
435        let source_table = if let Some(ref table_func) = statement.from_function {
436            // Handle table functions like RANGE()
437            debug!("QueryEngine: Processing table function...");
438            match table_func {
439                TableFunction::Range { start, end, step } => {
440                    // Evaluate expressions to get numeric values
441                    let mut evaluator =
442                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
443
444                    // Create a dummy row for evaluating constant expressions
445                    let dummy_row = 0;
446
447                    let start_val = evaluator.evaluate(start, dummy_row)?;
448                    let end_val = evaluator.evaluate(end, dummy_row)?;
449                    let step_val = if let Some(step_expr) = step {
450                        Some(evaluator.evaluate(step_expr, dummy_row)?)
451                    } else {
452                        None
453                    };
454
455                    // Convert DataValues to integers
456                    let start_int = match start_val {
457                        DataValue::Integer(i) => i,
458                        DataValue::Float(f) => f as i64,
459                        _ => return Err(anyhow!("RANGE start must be numeric")),
460                    };
461
462                    let end_int = match end_val {
463                        DataValue::Integer(i) => i,
464                        DataValue::Float(f) => f as i64,
465                        _ => return Err(anyhow!("RANGE end must be numeric")),
466                    };
467
468                    let step_int = if let Some(step) = step_val {
469                        match step {
470                            DataValue::Integer(i) => Some(i),
471                            DataValue::Float(f) => Some(f as i64),
472                            _ => return Err(anyhow!("RANGE step must be numeric")),
473                        }
474                    } else {
475                        None
476                    };
477
478                    // Generate the virtual table
479                    VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
480                }
481            }
482        } else if let Some(ref subquery) = statement.from_subquery {
483            // Execute the subquery and use its result as the source
484            debug!("QueryEngine: Processing FROM subquery...");
485            let subquery_result =
486                self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
487
488            // Convert the DataView to a DataTable for use as source
489            // This materializes the subquery result
490            let materialized = self.materialize_view(subquery_result)?;
491            Arc::new(materialized)
492        } else if let Some(ref table_name) = statement.from_table {
493            // Check if this references a CTE
494            if let Some(cte_view) = cte_context.get(table_name) {
495                debug!("QueryEngine: Using CTE '{}' as source table", table_name);
496                // Materialize the CTE view as a table
497                let materialized = self.materialize_view((**cte_view).clone())?;
498                Arc::new(materialized)
499            } else {
500                // Regular table reference - use the provided table
501                table.clone()
502            }
503        } else {
504            // No FROM clause - use the provided table
505            table.clone()
506        };
507
508        // Process JOINs if present
509        let final_table = if !statement.joins.is_empty() {
510            plan.begin_step(
511                StepType::Join,
512                format!("Process {} JOINs", statement.joins.len()),
513            );
514            plan.set_rows_in(source_table.row_count());
515
516            let join_executor = HashJoinExecutor::new(self.case_insensitive);
517            let mut current_table = source_table;
518
519            for (idx, join_clause) in statement.joins.iter().enumerate() {
520                let join_start = Instant::now();
521                plan.begin_step(StepType::Join, format!("JOIN #{}", idx + 1));
522                plan.add_detail(format!("Type: {:?}", join_clause.join_type));
523                plan.add_detail(format!("Left table: {} rows", current_table.row_count()));
524                plan.add_detail(format!(
525                    "Executing {:?} JOIN on {}",
526                    join_clause.join_type, join_clause.condition.left_column
527                ));
528
529                // Resolve the right table for the join
530                let right_table = match &join_clause.table {
531                    TableSource::Table(name) => {
532                        // Check if it's a CTE reference
533                        if let Some(cte_view) = cte_context.get(name) {
534                            let materialized = self.materialize_view((**cte_view).clone())?;
535                            Arc::new(materialized)
536                        } else {
537                            // For now, we need the actual table data
538                            // In a real implementation, this would load from file
539                            return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
540                        }
541                    }
542                    TableSource::DerivedTable { query, alias: _ } => {
543                        // Execute the subquery
544                        let subquery_result = self.build_view_with_context(
545                            table.clone(),
546                            *query.clone(),
547                            cte_context,
548                        )?;
549                        let materialized = self.materialize_view(subquery_result)?;
550                        Arc::new(materialized)
551                    }
552                };
553
554                // Execute the join
555                let joined = join_executor.execute_join(
556                    current_table.clone(),
557                    join_clause,
558                    right_table.clone(),
559                )?;
560
561                plan.add_detail(format!("Right table: {} rows", right_table.row_count()));
562                plan.set_rows_out(joined.row_count());
563                plan.add_detail(format!("Result: {} rows", joined.row_count()));
564                plan.add_detail(format!(
565                    "Join time: {:.3}ms",
566                    join_start.elapsed().as_secs_f64() * 1000.0
567                ));
568                plan.end_step();
569
570                current_table = Arc::new(joined);
571            }
572
573            plan.set_rows_out(current_table.row_count());
574            plan.add_detail(format!(
575                "Final result after all joins: {} rows",
576                current_table.row_count()
577            ));
578            plan.end_step();
579            current_table
580        } else {
581            source_table
582        };
583
584        // Continue with the existing build_view logic but using final_table
585        self.build_view_internal_with_plan(final_table, statement, plan)
586    }
587
588    /// Materialize a DataView into a new DataTable
589    fn materialize_view(&self, view: DataView) -> Result<DataTable> {
590        let source = view.source();
591        let mut result_table = DataTable::new("derived");
592
593        // Get the visible columns from the view
594        let visible_cols = view.visible_column_indices().to_vec();
595
596        // Copy column definitions
597        for col_idx in &visible_cols {
598            let col = &source.columns[*col_idx];
599            let new_col = DataColumn {
600                name: col.name.clone(),
601                data_type: col.data_type.clone(),
602                nullable: col.nullable,
603                unique_values: col.unique_values,
604                null_count: col.null_count,
605                metadata: col.metadata.clone(),
606            };
607            result_table.add_column(new_col);
608        }
609
610        // Copy visible rows
611        for row_idx in view.visible_row_indices() {
612            let source_row = &source.rows[*row_idx];
613            let mut new_row = DataRow { values: Vec::new() };
614
615            for col_idx in &visible_cols {
616                new_row.values.push(source_row.values[*col_idx].clone());
617            }
618
619            result_table.add_row(new_row);
620        }
621
622        Ok(result_table)
623    }
624
625    fn build_view_internal(
626        &self,
627        table: Arc<DataTable>,
628        statement: SelectStatement,
629    ) -> Result<DataView> {
630        let mut dummy_plan = ExecutionPlanBuilder::new();
631        self.build_view_internal_with_plan(table, statement, &mut dummy_plan)
632    }
633
634    fn build_view_internal_with_plan(
635        &self,
636        table: Arc<DataTable>,
637        statement: SelectStatement,
638        plan: &mut ExecutionPlanBuilder,
639    ) -> Result<DataView> {
640        debug!(
641            "QueryEngine::build_view - select_items: {:?}",
642            statement.select_items
643        );
644        debug!(
645            "QueryEngine::build_view - where_clause: {:?}",
646            statement.where_clause
647        );
648
649        // Start with all rows visible
650        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
651
652        // Apply WHERE clause filtering using recursive evaluator
653        if let Some(where_clause) = &statement.where_clause {
654            let total_rows = table.row_count();
655            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
656            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
657
658            plan.begin_step(StepType::Filter, "WHERE clause filtering".to_string());
659            plan.set_rows_in(total_rows);
660            plan.add_detail(format!("Input: {} rows", total_rows));
661
662            // Add details about WHERE conditions
663            for condition in &where_clause.conditions {
664                plan.add_detail(format!("Condition: {:?}", condition.expr));
665            }
666
667            let filter_start = Instant::now();
668            // Filter visible rows based on WHERE clause
669            let mut filtered_rows = Vec::new();
670            for row_idx in visible_rows {
671                // Only log for first few rows to avoid performance impact
672                if row_idx < 3 {
673                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
674                }
675                let evaluator = RecursiveWhereEvaluator::with_config(
676                    &table,
677                    self.case_insensitive,
678                    self.date_notation.clone(),
679                );
680                match evaluator.evaluate(where_clause, row_idx) {
681                    Ok(result) => {
682                        if row_idx < 3 {
683                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
684                        }
685                        if result {
686                            filtered_rows.push(row_idx);
687                        }
688                    }
689                    Err(e) => {
690                        if row_idx < 3 {
691                            debug!(
692                                "QueryEngine: WHERE evaluation error for row {}: {}",
693                                row_idx, e
694                            );
695                        }
696                        // Propagate WHERE clause errors instead of silently ignoring them
697                        return Err(e);
698                    }
699                }
700            }
701            visible_rows = filtered_rows;
702            let filter_duration = filter_start.elapsed();
703            info!(
704                "WHERE clause filtering: {} rows -> {} rows in {:?}",
705                total_rows,
706                visible_rows.len(),
707                filter_duration
708            );
709
710            plan.set_rows_out(visible_rows.len());
711            plan.add_detail(format!("Output: {} rows", visible_rows.len()));
712            plan.add_detail(format!(
713                "Filter time: {:.3}ms",
714                filter_duration.as_secs_f64() * 1000.0
715            ));
716            plan.end_step();
717        }
718
719        // Create initial DataView with filtered rows
720        let mut view = DataView::new(table.clone());
721        view = view.with_rows(visible_rows);
722
723        // Handle GROUP BY if present
724        if let Some(group_by_exprs) = &statement.group_by {
725            if !group_by_exprs.is_empty() {
726                debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_exprs);
727
728                plan.begin_step(
729                    StepType::GroupBy,
730                    format!("GROUP BY {} expressions", group_by_exprs.len()),
731                );
732                plan.set_rows_in(view.row_count());
733                plan.add_detail(format!("Input: {} rows", view.row_count()));
734                for expr in group_by_exprs {
735                    plan.add_detail(format!("Group by: {:?}", expr));
736                }
737
738                let group_start = Instant::now();
739                view = self.apply_group_by(
740                    view,
741                    group_by_exprs,
742                    &statement.select_items,
743                    statement.having.as_ref(),
744                )?;
745
746                plan.set_rows_out(view.row_count());
747                plan.add_detail(format!("Output: {} groups", view.row_count()));
748                plan.add_detail(format!(
749                    "Group time: {:.3}ms",
750                    group_start.elapsed().as_secs_f64() * 1000.0
751                ));
752                plan.end_step();
753            }
754        } else {
755            // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
756            if !statement.select_items.is_empty() {
757                // Check if we have ANY non-star items (not just the first one)
758                let has_non_star_items = statement
759                    .select_items
760                    .iter()
761                    .any(|item| !matches!(item, SelectItem::Star));
762
763                // Apply select items if:
764                // 1. We have computed expressions or explicit columns
765                // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
766                if has_non_star_items || statement.select_items.len() > 1 {
767                    view = self.apply_select_items(view, &statement.select_items)?;
768                }
769                // If it's just a single star, no projection needed
770            } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
771                // Fallback to legacy column projection for backward compatibility
772                let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
773                view = view.with_columns(column_indices);
774            }
775        }
776
777        // Apply DISTINCT if specified
778        if statement.distinct {
779            plan.begin_step(StepType::Distinct, "Remove duplicate rows".to_string());
780            plan.set_rows_in(view.row_count());
781            plan.add_detail(format!("Input: {} rows", view.row_count()));
782
783            let distinct_start = Instant::now();
784            view = self.apply_distinct(view)?;
785
786            plan.set_rows_out(view.row_count());
787            plan.add_detail(format!("Output: {} unique rows", view.row_count()));
788            plan.add_detail(format!(
789                "Distinct time: {:.3}ms",
790                distinct_start.elapsed().as_secs_f64() * 1000.0
791            ));
792            plan.end_step();
793        }
794
795        // Apply ORDER BY sorting
796        if let Some(order_by_columns) = &statement.order_by {
797            if !order_by_columns.is_empty() {
798                plan.begin_step(
799                    StepType::Sort,
800                    format!("ORDER BY {} columns", order_by_columns.len()),
801                );
802                plan.set_rows_in(view.row_count());
803                for col in order_by_columns {
804                    plan.add_detail(format!("{} {:?}", col.column, col.direction));
805                }
806
807                let sort_start = Instant::now();
808                view = self.apply_multi_order_by(view, order_by_columns)?;
809
810                plan.add_detail(format!(
811                    "Sort time: {:.3}ms",
812                    sort_start.elapsed().as_secs_f64() * 1000.0
813                ));
814                plan.end_step();
815            }
816        }
817
818        // Apply LIMIT/OFFSET
819        if let Some(limit) = statement.limit {
820            let offset = statement.offset.unwrap_or(0);
821            plan.begin_step(StepType::Limit, format!("LIMIT {}", limit));
822            plan.set_rows_in(view.row_count());
823            if offset > 0 {
824                plan.add_detail(format!("OFFSET: {}", offset));
825            }
826            view = view.with_limit(limit, offset);
827            plan.set_rows_out(view.row_count());
828            plan.add_detail(format!("Output: {} rows", view.row_count()));
829            plan.end_step();
830        }
831
832        Ok(view)
833    }
834
835    /// Resolve column names to indices
836    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
837        let mut indices = Vec::new();
838        let table_columns = table.column_names();
839
840        for col_name in columns {
841            let index = table_columns
842                .iter()
843                .position(|c| c.eq_ignore_ascii_case(col_name))
844                .ok_or_else(|| {
845                    let suggestion = self.find_similar_column(table, col_name);
846                    match suggestion {
847                        Some(similar) => anyhow::anyhow!(
848                            "Column '{}' not found. Did you mean '{}'?",
849                            col_name,
850                            similar
851                        ),
852                        None => anyhow::anyhow!("Column '{}' not found", col_name),
853                    }
854                })?;
855            indices.push(index);
856        }
857
858        Ok(indices)
859    }
860
861    /// Apply SELECT items (columns and computed expressions) to create new view
862    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
863        debug!(
864            "QueryEngine::apply_select_items - items: {:?}",
865            select_items
866        );
867        debug!(
868            "QueryEngine::apply_select_items - input view has {} rows",
869            view.row_count()
870        );
871
872        // Check if ALL select items are aggregate functions (no GROUP BY)
873        let all_aggregates = select_items.iter().all(|item| match item {
874            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
875            SelectItem::Column(_) => false,
876            SelectItem::Star => false,
877        });
878
879        if all_aggregates && view.row_count() > 0 {
880            // Special handling for aggregate-only queries (no GROUP BY)
881            // These should produce exactly one row
882            return self.apply_aggregate_select(view, select_items);
883        }
884
885        // Check if we need to create computed columns
886        let has_computed_expressions = select_items
887            .iter()
888            .any(|item| matches!(item, SelectItem::Expression { .. }));
889
890        debug!(
891            "QueryEngine::apply_select_items - has_computed_expressions: {}",
892            has_computed_expressions
893        );
894
895        if !has_computed_expressions {
896            // Simple case: only columns, use existing projection logic
897            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
898            return Ok(view.with_columns(column_indices));
899        }
900
901        // Complex case: we have computed expressions
902        // IMPORTANT: We create a PROJECTED view, not a new table
903        // This preserves the original DataTable reference
904
905        let source_table = view.source();
906        let visible_rows = view.visible_row_indices();
907
908        // Create a temporary table just for the computed result view
909        // But this table is only used for the current query result
910        let mut computed_table = DataTable::new("query_result");
911
912        // First, expand any Star selectors to actual columns
913        let mut expanded_items = Vec::new();
914        for item in select_items {
915            match item {
916                SelectItem::Star => {
917                    // Expand * to all columns from source table
918                    for col_name in source_table.column_names() {
919                        expanded_items.push(SelectItem::Column(col_name));
920                    }
921                }
922                _ => expanded_items.push(item.clone()),
923            }
924        }
925
926        // Add columns based on expanded SelectItems, handling duplicates
927        let mut column_name_counts: std::collections::HashMap<String, usize> =
928            std::collections::HashMap::new();
929
930        for item in &expanded_items {
931            let base_name = match item {
932                SelectItem::Column(name) => name.clone(),
933                SelectItem::Expression { alias, .. } => alias.clone(),
934                SelectItem::Star => unreachable!("Star should have been expanded"),
935            };
936
937            // Check if this column name has been used before
938            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
939            let column_name = if *count == 0 {
940                // First occurrence, use the name as-is
941                base_name.clone()
942            } else {
943                // Duplicate, append a suffix
944                format!("{base_name}_{count}")
945            };
946            *count += 1;
947
948            computed_table.add_column(DataColumn::new(&column_name));
949        }
950
951        // Calculate values for each row
952        let mut evaluator =
953            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
954
955        for &row_idx in visible_rows {
956            let mut row_values = Vec::new();
957
958            for item in &expanded_items {
959                let value = match item {
960                    SelectItem::Column(col_name) => {
961                        // Simple column reference
962                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
963                            let suggestion = self.find_similar_column(source_table, col_name);
964                            match suggestion {
965                                Some(similar) => anyhow::anyhow!(
966                                    "Column '{}' not found. Did you mean '{}'?",
967                                    col_name,
968                                    similar
969                                ),
970                                None => anyhow::anyhow!("Column '{}' not found", col_name),
971                            }
972                        })?;
973                        let row = source_table
974                            .get_row(row_idx)
975                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
976                        row.get(col_idx)
977                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
978                            .clone()
979                    }
980                    SelectItem::Expression { expr, .. } => {
981                        // Computed expression
982                        evaluator.evaluate(expr, row_idx)?
983                    }
984                    SelectItem::Star => unreachable!("Star should have been expanded"),
985                };
986                row_values.push(value);
987            }
988
989            computed_table
990                .add_row(DataRow::new(row_values))
991                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
992        }
993
994        // Return a view of the computed result
995        // This is a temporary view for this query only
996        Ok(DataView::new(Arc::new(computed_table)))
997    }
998
999    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
1000    fn apply_aggregate_select(
1001        &self,
1002        view: DataView,
1003        select_items: &[SelectItem],
1004    ) -> Result<DataView> {
1005        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
1006
1007        let source_table = view.source();
1008        let mut result_table = DataTable::new("aggregate_result");
1009
1010        // Add columns for each select item
1011        for item in select_items {
1012            let column_name = match item {
1013                SelectItem::Expression { alias, .. } => alias.clone(),
1014                _ => unreachable!("Should only have expressions in aggregate-only query"),
1015            };
1016            result_table.add_column(DataColumn::new(&column_name));
1017        }
1018
1019        // Create evaluator with visible rows from the view (for filtered aggregates)
1020        let visible_rows = view.visible_row_indices().to_vec();
1021        let mut evaluator =
1022            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
1023                .with_visible_rows(visible_rows);
1024
1025        // Evaluate each aggregate expression once (they handle all rows internally)
1026        let mut row_values = Vec::new();
1027        for item in select_items {
1028            match item {
1029                SelectItem::Expression { expr, .. } => {
1030                    // The evaluator will handle aggregates over all rows
1031                    // We pass row_index=0 but aggregates ignore it and process all rows
1032                    let value = evaluator.evaluate(expr, 0)?;
1033                    row_values.push(value);
1034                }
1035                _ => unreachable!("Should only have expressions in aggregate-only query"),
1036            }
1037        }
1038
1039        // Add the single result row
1040        result_table
1041            .add_row(DataRow::new(row_values))
1042            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
1043
1044        Ok(DataView::new(Arc::new(result_table)))
1045    }
1046
1047    /// Resolve `SelectItem` columns to indices (for simple column projections only)
1048    fn resolve_select_columns(
1049        &self,
1050        table: &DataTable,
1051        select_items: &[SelectItem],
1052    ) -> Result<Vec<usize>> {
1053        let mut indices = Vec::new();
1054        let table_columns = table.column_names();
1055
1056        for item in select_items {
1057            match item {
1058                SelectItem::Column(col_name) => {
1059                    let index = table_columns
1060                        .iter()
1061                        .position(|c| c.eq_ignore_ascii_case(col_name))
1062                        .ok_or_else(|| {
1063                            let suggestion = self.find_similar_column(table, col_name);
1064                            match suggestion {
1065                                Some(similar) => anyhow::anyhow!(
1066                                    "Column '{}' not found. Did you mean '{}'?",
1067                                    col_name,
1068                                    similar
1069                                ),
1070                                None => anyhow::anyhow!("Column '{}' not found", col_name),
1071                            }
1072                        })?;
1073                    indices.push(index);
1074                }
1075                SelectItem::Star => {
1076                    // Expand * to all column indices
1077                    for i in 0..table_columns.len() {
1078                        indices.push(i);
1079                    }
1080                }
1081                SelectItem::Expression { .. } => {
1082                    return Err(anyhow::anyhow!(
1083                        "Computed expressions require new table creation"
1084                    ));
1085                }
1086            }
1087        }
1088
1089        Ok(indices)
1090    }
1091
1092    /// Apply DISTINCT to remove duplicate rows
1093    fn apply_distinct(&self, view: DataView) -> Result<DataView> {
1094        use std::collections::HashSet;
1095
1096        let source = view.source();
1097        let visible_cols = view.visible_column_indices();
1098        let visible_rows = view.visible_row_indices();
1099
1100        // Build a set to track unique rows
1101        let mut seen_rows = HashSet::new();
1102        let mut unique_row_indices = Vec::new();
1103
1104        for &row_idx in visible_rows {
1105            // Build a key representing this row's visible column values
1106            let mut row_key = Vec::new();
1107            for &col_idx in visible_cols {
1108                let value = source
1109                    .get_value(row_idx, col_idx)
1110                    .ok_or_else(|| anyhow!("Invalid cell reference"))?;
1111                // Convert value to a hashable representation
1112                row_key.push(format!("{:?}", value));
1113            }
1114
1115            // Check if we've seen this row before
1116            if seen_rows.insert(row_key) {
1117                // First time seeing this row combination
1118                unique_row_indices.push(row_idx);
1119            }
1120        }
1121
1122        // Create a new view with only unique rows
1123        Ok(view.with_rows(unique_row_indices))
1124    }
1125
1126    /// Apply multi-column ORDER BY sorting to the view
1127    fn apply_multi_order_by(
1128        &self,
1129        mut view: DataView,
1130        order_by_columns: &[OrderByColumn],
1131    ) -> Result<DataView> {
1132        // Build list of (source_column_index, ascending) tuples
1133        let mut sort_columns = Vec::new();
1134
1135        for order_col in order_by_columns {
1136            // Try to find the column index
1137            // Check in the current view's source table (this handles both regular columns and computed columns)
1138            // This is especially important after GROUP BY where we have a new result table with aggregate aliases
1139            let col_index = view
1140                .source()
1141                .get_column_index(&order_col.column)
1142                .ok_or_else(|| {
1143                    // If not found, provide helpful error with suggestions
1144                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
1145                    match suggestion {
1146                        Some(similar) => anyhow::anyhow!(
1147                            "Column '{}' not found. Did you mean '{}'?",
1148                            order_col.column,
1149                            similar
1150                        ),
1151                        None => {
1152                            // Also list available columns for debugging
1153                            let available_cols = view.source().column_names().join(", ");
1154                            anyhow::anyhow!(
1155                                "Column '{}' not found. Available columns: {}",
1156                                order_col.column,
1157                                available_cols
1158                            )
1159                        }
1160                    }
1161                })?;
1162
1163            let ascending = matches!(order_col.direction, SortDirection::Asc);
1164            sort_columns.push((col_index, ascending));
1165        }
1166
1167        // Apply multi-column sorting
1168        view.apply_multi_sort(&sort_columns)?;
1169        Ok(view)
1170    }
1171
1172    /// Apply GROUP BY to the view with optional HAVING clause
1173    fn apply_group_by(
1174        &self,
1175        view: DataView,
1176        group_by_exprs: &[SqlExpression],
1177        select_items: &[SelectItem],
1178        having: Option<&SqlExpression>,
1179    ) -> Result<DataView> {
1180        // Use the new expression-based GROUP BY implementation
1181        self.apply_group_by_expressions(
1182            view,
1183            group_by_exprs,
1184            select_items,
1185            having,
1186            self.case_insensitive,
1187            self.date_notation.clone(),
1188        )
1189    }
1190}
1191
1192#[cfg(test)]
1193mod tests {
1194    use super::*;
1195    use crate::data::datatable::{DataColumn, DataRow, DataValue};
1196
1197    fn create_test_table() -> Arc<DataTable> {
1198        let mut table = DataTable::new("test");
1199
1200        // Add columns
1201        table.add_column(DataColumn::new("id"));
1202        table.add_column(DataColumn::new("name"));
1203        table.add_column(DataColumn::new("age"));
1204
1205        // Add rows
1206        table
1207            .add_row(DataRow::new(vec![
1208                DataValue::Integer(1),
1209                DataValue::String("Alice".to_string()),
1210                DataValue::Integer(30),
1211            ]))
1212            .unwrap();
1213
1214        table
1215            .add_row(DataRow::new(vec![
1216                DataValue::Integer(2),
1217                DataValue::String("Bob".to_string()),
1218                DataValue::Integer(25),
1219            ]))
1220            .unwrap();
1221
1222        table
1223            .add_row(DataRow::new(vec![
1224                DataValue::Integer(3),
1225                DataValue::String("Charlie".to_string()),
1226                DataValue::Integer(35),
1227            ]))
1228            .unwrap();
1229
1230        Arc::new(table)
1231    }
1232
1233    #[test]
1234    fn test_select_all() {
1235        let table = create_test_table();
1236        let engine = QueryEngine::new();
1237
1238        let view = engine
1239            .execute(table.clone(), "SELECT * FROM users")
1240            .unwrap();
1241        assert_eq!(view.row_count(), 3);
1242        assert_eq!(view.column_count(), 3);
1243    }
1244
1245    #[test]
1246    fn test_select_columns() {
1247        let table = create_test_table();
1248        let engine = QueryEngine::new();
1249
1250        let view = engine
1251            .execute(table.clone(), "SELECT name, age FROM users")
1252            .unwrap();
1253        assert_eq!(view.row_count(), 3);
1254        assert_eq!(view.column_count(), 2);
1255    }
1256
1257    #[test]
1258    fn test_select_with_limit() {
1259        let table = create_test_table();
1260        let engine = QueryEngine::new();
1261
1262        let view = engine
1263            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1264            .unwrap();
1265        assert_eq!(view.row_count(), 2);
1266    }
1267
1268    #[test]
1269    fn test_type_coercion_contains() {
1270        // Initialize tracing for debug output
1271        let _ = tracing_subscriber::fmt()
1272            .with_max_level(tracing::Level::DEBUG)
1273            .try_init();
1274
1275        let mut table = DataTable::new("test");
1276        table.add_column(DataColumn::new("id"));
1277        table.add_column(DataColumn::new("status"));
1278        table.add_column(DataColumn::new("price"));
1279
1280        // Add test data with mixed types
1281        table
1282            .add_row(DataRow::new(vec![
1283                DataValue::Integer(1),
1284                DataValue::String("Pending".to_string()),
1285                DataValue::Float(99.99),
1286            ]))
1287            .unwrap();
1288
1289        table
1290            .add_row(DataRow::new(vec![
1291                DataValue::Integer(2),
1292                DataValue::String("Confirmed".to_string()),
1293                DataValue::Float(150.50),
1294            ]))
1295            .unwrap();
1296
1297        table
1298            .add_row(DataRow::new(vec![
1299                DataValue::Integer(3),
1300                DataValue::String("Pending".to_string()),
1301                DataValue::Float(75.00),
1302            ]))
1303            .unwrap();
1304
1305        let table = Arc::new(table);
1306        let engine = QueryEngine::new();
1307
1308        println!("\n=== Testing WHERE clause with Contains ===");
1309        println!("Table has {} rows", table.row_count());
1310        for i in 0..table.row_count() {
1311            let status = table.get_value(i, 1);
1312            println!("Row {i}: status = {status:?}");
1313        }
1314
1315        // Test 1: Basic string contains (should work)
1316        println!("\n--- Test 1: status.Contains('pend') ---");
1317        let result = engine.execute(
1318            table.clone(),
1319            "SELECT * FROM test WHERE status.Contains('pend')",
1320        );
1321        match result {
1322            Ok(view) => {
1323                println!("SUCCESS: Found {} matching rows", view.row_count());
1324                assert_eq!(view.row_count(), 2); // Should find both Pending rows
1325            }
1326            Err(e) => {
1327                panic!("Query failed: {e}");
1328            }
1329        }
1330
1331        // Test 2: Numeric contains (should work with type coercion)
1332        println!("\n--- Test 2: price.Contains('9') ---");
1333        let result = engine.execute(
1334            table.clone(),
1335            "SELECT * FROM test WHERE price.Contains('9')",
1336        );
1337        match result {
1338            Ok(view) => {
1339                println!(
1340                    "SUCCESS: Found {} matching rows with price containing '9'",
1341                    view.row_count()
1342                );
1343                // Should find 99.99 row
1344                assert!(view.row_count() >= 1);
1345            }
1346            Err(e) => {
1347                panic!("Numeric coercion query failed: {e}");
1348            }
1349        }
1350
1351        println!("\n=== All tests passed! ===");
1352    }
1353
1354    #[test]
1355    fn test_not_in_clause() {
1356        // Initialize tracing for debug output
1357        let _ = tracing_subscriber::fmt()
1358            .with_max_level(tracing::Level::DEBUG)
1359            .try_init();
1360
1361        let mut table = DataTable::new("test");
1362        table.add_column(DataColumn::new("id"));
1363        table.add_column(DataColumn::new("country"));
1364
1365        // Add test data
1366        table
1367            .add_row(DataRow::new(vec![
1368                DataValue::Integer(1),
1369                DataValue::String("CA".to_string()),
1370            ]))
1371            .unwrap();
1372
1373        table
1374            .add_row(DataRow::new(vec![
1375                DataValue::Integer(2),
1376                DataValue::String("US".to_string()),
1377            ]))
1378            .unwrap();
1379
1380        table
1381            .add_row(DataRow::new(vec![
1382                DataValue::Integer(3),
1383                DataValue::String("UK".to_string()),
1384            ]))
1385            .unwrap();
1386
1387        let table = Arc::new(table);
1388        let engine = QueryEngine::new();
1389
1390        println!("\n=== Testing NOT IN clause ===");
1391        println!("Table has {} rows", table.row_count());
1392        for i in 0..table.row_count() {
1393            let country = table.get_value(i, 1);
1394            println!("Row {i}: country = {country:?}");
1395        }
1396
1397        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
1398        println!("\n--- Test: country NOT IN ('CA') ---");
1399        let result = engine.execute(
1400            table.clone(),
1401            "SELECT * FROM test WHERE country NOT IN ('CA')",
1402        );
1403        match result {
1404            Ok(view) => {
1405                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1406                assert_eq!(view.row_count(), 2); // Should find US and UK
1407            }
1408            Err(e) => {
1409                panic!("NOT IN query failed: {e}");
1410            }
1411        }
1412
1413        println!("\n=== NOT IN test complete! ===");
1414    }
1415
1416    #[test]
1417    fn test_case_insensitive_in_and_not_in() {
1418        // Initialize tracing for debug output
1419        let _ = tracing_subscriber::fmt()
1420            .with_max_level(tracing::Level::DEBUG)
1421            .try_init();
1422
1423        let mut table = DataTable::new("test");
1424        table.add_column(DataColumn::new("id"));
1425        table.add_column(DataColumn::new("country"));
1426
1427        // Add test data with mixed case
1428        table
1429            .add_row(DataRow::new(vec![
1430                DataValue::Integer(1),
1431                DataValue::String("CA".to_string()), // uppercase
1432            ]))
1433            .unwrap();
1434
1435        table
1436            .add_row(DataRow::new(vec![
1437                DataValue::Integer(2),
1438                DataValue::String("us".to_string()), // lowercase
1439            ]))
1440            .unwrap();
1441
1442        table
1443            .add_row(DataRow::new(vec![
1444                DataValue::Integer(3),
1445                DataValue::String("UK".to_string()), // uppercase
1446            ]))
1447            .unwrap();
1448
1449        let table = Arc::new(table);
1450
1451        println!("\n=== Testing Case-Insensitive IN clause ===");
1452        println!("Table has {} rows", table.row_count());
1453        for i in 0..table.row_count() {
1454            let country = table.get_value(i, 1);
1455            println!("Row {i}: country = {country:?}");
1456        }
1457
1458        // Test case-insensitive IN - should match 'CA' with 'ca'
1459        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1460        let engine = QueryEngine::with_case_insensitive(true);
1461        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1462        match result {
1463            Ok(view) => {
1464                println!(
1465                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1466                    view.row_count()
1467                );
1468                assert_eq!(view.row_count(), 1); // Should find CA row
1469            }
1470            Err(e) => {
1471                panic!("Case-insensitive IN query failed: {e}");
1472            }
1473        }
1474
1475        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
1476        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1477        let result = engine.execute(
1478            table.clone(),
1479            "SELECT * FROM test WHERE country NOT IN ('ca')",
1480        );
1481        match result {
1482            Ok(view) => {
1483                println!(
1484                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1485                    view.row_count()
1486                );
1487                assert_eq!(view.row_count(), 2); // Should find us and UK rows
1488            }
1489            Err(e) => {
1490                panic!("Case-insensitive NOT IN query failed: {e}");
1491            }
1492        }
1493
1494        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
1495        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1496        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
1497        let result = engine_case_sensitive
1498            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1499        match result {
1500            Ok(view) => {
1501                println!(
1502                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1503                    view.row_count()
1504                );
1505                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
1506            }
1507            Err(e) => {
1508                panic!("Case-sensitive IN query failed: {e}");
1509            }
1510        }
1511
1512        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1513    }
1514
1515    #[test]
1516    #[ignore = "Parentheses in WHERE clause not yet implemented"]
1517    fn test_parentheses_in_where_clause() {
1518        // Initialize tracing for debug output
1519        let _ = tracing_subscriber::fmt()
1520            .with_max_level(tracing::Level::DEBUG)
1521            .try_init();
1522
1523        let mut table = DataTable::new("test");
1524        table.add_column(DataColumn::new("id"));
1525        table.add_column(DataColumn::new("status"));
1526        table.add_column(DataColumn::new("priority"));
1527
1528        // Add test data
1529        table
1530            .add_row(DataRow::new(vec![
1531                DataValue::Integer(1),
1532                DataValue::String("Pending".to_string()),
1533                DataValue::String("High".to_string()),
1534            ]))
1535            .unwrap();
1536
1537        table
1538            .add_row(DataRow::new(vec![
1539                DataValue::Integer(2),
1540                DataValue::String("Complete".to_string()),
1541                DataValue::String("High".to_string()),
1542            ]))
1543            .unwrap();
1544
1545        table
1546            .add_row(DataRow::new(vec![
1547                DataValue::Integer(3),
1548                DataValue::String("Pending".to_string()),
1549                DataValue::String("Low".to_string()),
1550            ]))
1551            .unwrap();
1552
1553        table
1554            .add_row(DataRow::new(vec![
1555                DataValue::Integer(4),
1556                DataValue::String("Complete".to_string()),
1557                DataValue::String("Low".to_string()),
1558            ]))
1559            .unwrap();
1560
1561        let table = Arc::new(table);
1562        let engine = QueryEngine::new();
1563
1564        println!("\n=== Testing Parentheses in WHERE clause ===");
1565        println!("Table has {} rows", table.row_count());
1566        for i in 0..table.row_count() {
1567            let status = table.get_value(i, 1);
1568            let priority = table.get_value(i, 2);
1569            println!("Row {i}: status = {status:?}, priority = {priority:?}");
1570        }
1571
1572        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
1573        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1574        let result = engine.execute(
1575            table.clone(),
1576            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1577        );
1578        match result {
1579            Ok(view) => {
1580                println!(
1581                    "SUCCESS: Found {} rows with parenthetical logic",
1582                    view.row_count()
1583                );
1584                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
1585            }
1586            Err(e) => {
1587                panic!("Parentheses query failed: {e}");
1588            }
1589        }
1590
1591        println!("\n=== Parentheses test complete! ===");
1592    }
1593
1594    #[test]
1595    #[ignore = "Numeric type coercion needs fixing"]
1596    fn test_numeric_type_coercion() {
1597        // Initialize tracing for debug output
1598        let _ = tracing_subscriber::fmt()
1599            .with_max_level(tracing::Level::DEBUG)
1600            .try_init();
1601
1602        let mut table = DataTable::new("test");
1603        table.add_column(DataColumn::new("id"));
1604        table.add_column(DataColumn::new("price"));
1605        table.add_column(DataColumn::new("quantity"));
1606
1607        // Add test data with different numeric types
1608        table
1609            .add_row(DataRow::new(vec![
1610                DataValue::Integer(1),
1611                DataValue::Float(99.50), // Contains '.'
1612                DataValue::Integer(100),
1613            ]))
1614            .unwrap();
1615
1616        table
1617            .add_row(DataRow::new(vec![
1618                DataValue::Integer(2),
1619                DataValue::Float(150.0), // Contains '.' and '0'
1620                DataValue::Integer(200),
1621            ]))
1622            .unwrap();
1623
1624        table
1625            .add_row(DataRow::new(vec![
1626                DataValue::Integer(3),
1627                DataValue::Integer(75), // No decimal point
1628                DataValue::Integer(50),
1629            ]))
1630            .unwrap();
1631
1632        let table = Arc::new(table);
1633        let engine = QueryEngine::new();
1634
1635        println!("\n=== Testing Numeric Type Coercion ===");
1636        println!("Table has {} rows", table.row_count());
1637        for i in 0..table.row_count() {
1638            let price = table.get_value(i, 1);
1639            let quantity = table.get_value(i, 2);
1640            println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1641        }
1642
1643        // Test Contains on float values - should find rows with decimal points
1644        println!("\n--- Test: price.Contains('.') ---");
1645        let result = engine.execute(
1646            table.clone(),
1647            "SELECT * FROM test WHERE price.Contains('.')",
1648        );
1649        match result {
1650            Ok(view) => {
1651                println!(
1652                    "SUCCESS: Found {} rows with decimal points in price",
1653                    view.row_count()
1654                );
1655                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1656            }
1657            Err(e) => {
1658                panic!("Numeric Contains query failed: {e}");
1659            }
1660        }
1661
1662        // Test Contains on integer values converted to string
1663        println!("\n--- Test: quantity.Contains('0') ---");
1664        let result = engine.execute(
1665            table.clone(),
1666            "SELECT * FROM test WHERE quantity.Contains('0')",
1667        );
1668        match result {
1669            Ok(view) => {
1670                println!(
1671                    "SUCCESS: Found {} rows with '0' in quantity",
1672                    view.row_count()
1673                );
1674                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1675            }
1676            Err(e) => {
1677                panic!("Integer Contains query failed: {e}");
1678            }
1679        }
1680
1681        println!("\n=== Numeric type coercion test complete! ===");
1682    }
1683
1684    #[test]
1685    fn test_datetime_comparisons() {
1686        // Initialize tracing for debug output
1687        let _ = tracing_subscriber::fmt()
1688            .with_max_level(tracing::Level::DEBUG)
1689            .try_init();
1690
1691        let mut table = DataTable::new("test");
1692        table.add_column(DataColumn::new("id"));
1693        table.add_column(DataColumn::new("created_date"));
1694
1695        // Add test data with date strings (as they would come from CSV)
1696        table
1697            .add_row(DataRow::new(vec![
1698                DataValue::Integer(1),
1699                DataValue::String("2024-12-15".to_string()),
1700            ]))
1701            .unwrap();
1702
1703        table
1704            .add_row(DataRow::new(vec![
1705                DataValue::Integer(2),
1706                DataValue::String("2025-01-15".to_string()),
1707            ]))
1708            .unwrap();
1709
1710        table
1711            .add_row(DataRow::new(vec![
1712                DataValue::Integer(3),
1713                DataValue::String("2025-02-15".to_string()),
1714            ]))
1715            .unwrap();
1716
1717        let table = Arc::new(table);
1718        let engine = QueryEngine::new();
1719
1720        println!("\n=== Testing DateTime Comparisons ===");
1721        println!("Table has {} rows", table.row_count());
1722        for i in 0..table.row_count() {
1723            let date = table.get_value(i, 1);
1724            println!("Row {i}: created_date = {date:?}");
1725        }
1726
1727        // Test DateTime constructor comparison - should find dates after 2025-01-01
1728        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1729        let result = engine.execute(
1730            table.clone(),
1731            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1732        );
1733        match result {
1734            Ok(view) => {
1735                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1736                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1737            }
1738            Err(e) => {
1739                panic!("DateTime comparison query failed: {e}");
1740            }
1741        }
1742
1743        println!("\n=== DateTime comparison test complete! ===");
1744    }
1745
1746    #[test]
1747    fn test_not_with_method_calls() {
1748        // Initialize tracing for debug output
1749        let _ = tracing_subscriber::fmt()
1750            .with_max_level(tracing::Level::DEBUG)
1751            .try_init();
1752
1753        let mut table = DataTable::new("test");
1754        table.add_column(DataColumn::new("id"));
1755        table.add_column(DataColumn::new("status"));
1756
1757        // Add test data
1758        table
1759            .add_row(DataRow::new(vec![
1760                DataValue::Integer(1),
1761                DataValue::String("Pending Review".to_string()),
1762            ]))
1763            .unwrap();
1764
1765        table
1766            .add_row(DataRow::new(vec![
1767                DataValue::Integer(2),
1768                DataValue::String("Complete".to_string()),
1769            ]))
1770            .unwrap();
1771
1772        table
1773            .add_row(DataRow::new(vec![
1774                DataValue::Integer(3),
1775                DataValue::String("Pending Approval".to_string()),
1776            ]))
1777            .unwrap();
1778
1779        let table = Arc::new(table);
1780        let engine = QueryEngine::with_case_insensitive(true);
1781
1782        println!("\n=== Testing NOT with Method Calls ===");
1783        println!("Table has {} rows", table.row_count());
1784        for i in 0..table.row_count() {
1785            let status = table.get_value(i, 1);
1786            println!("Row {i}: status = {status:?}");
1787        }
1788
1789        // Test NOT with Contains - should exclude rows containing "pend"
1790        println!("\n--- Test: NOT status.Contains('pend') ---");
1791        let result = engine.execute(
1792            table.clone(),
1793            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1794        );
1795        match result {
1796            Ok(view) => {
1797                println!(
1798                    "SUCCESS: Found {} rows NOT containing 'pend'",
1799                    view.row_count()
1800                );
1801                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1802            }
1803            Err(e) => {
1804                panic!("NOT Contains query failed: {e}");
1805            }
1806        }
1807
1808        // Test NOT with StartsWith
1809        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1810        let result = engine.execute(
1811            table.clone(),
1812            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1813        );
1814        match result {
1815            Ok(view) => {
1816                println!(
1817                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1818                    view.row_count()
1819                );
1820                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1821            }
1822            Err(e) => {
1823                panic!("NOT StartsWith query failed: {e}");
1824            }
1825        }
1826
1827        println!("\n=== NOT with method calls test complete! ===");
1828    }
1829
1830    #[test]
1831    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1832    fn test_complex_logical_expressions() {
1833        // Initialize tracing for debug output
1834        let _ = tracing_subscriber::fmt()
1835            .with_max_level(tracing::Level::DEBUG)
1836            .try_init();
1837
1838        let mut table = DataTable::new("test");
1839        table.add_column(DataColumn::new("id"));
1840        table.add_column(DataColumn::new("status"));
1841        table.add_column(DataColumn::new("priority"));
1842        table.add_column(DataColumn::new("assigned"));
1843
1844        // Add comprehensive test data
1845        table
1846            .add_row(DataRow::new(vec![
1847                DataValue::Integer(1),
1848                DataValue::String("Pending".to_string()),
1849                DataValue::String("High".to_string()),
1850                DataValue::String("John".to_string()),
1851            ]))
1852            .unwrap();
1853
1854        table
1855            .add_row(DataRow::new(vec![
1856                DataValue::Integer(2),
1857                DataValue::String("Complete".to_string()),
1858                DataValue::String("High".to_string()),
1859                DataValue::String("Jane".to_string()),
1860            ]))
1861            .unwrap();
1862
1863        table
1864            .add_row(DataRow::new(vec![
1865                DataValue::Integer(3),
1866                DataValue::String("Pending".to_string()),
1867                DataValue::String("Low".to_string()),
1868                DataValue::String("John".to_string()),
1869            ]))
1870            .unwrap();
1871
1872        table
1873            .add_row(DataRow::new(vec![
1874                DataValue::Integer(4),
1875                DataValue::String("In Progress".to_string()),
1876                DataValue::String("Medium".to_string()),
1877                DataValue::String("Jane".to_string()),
1878            ]))
1879            .unwrap();
1880
1881        let table = Arc::new(table);
1882        let engine = QueryEngine::new();
1883
1884        println!("\n=== Testing Complex Logical Expressions ===");
1885        println!("Table has {} rows", table.row_count());
1886        for i in 0..table.row_count() {
1887            let status = table.get_value(i, 1);
1888            let priority = table.get_value(i, 2);
1889            let assigned = table.get_value(i, 3);
1890            println!(
1891                "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1892            );
1893        }
1894
1895        // Test complex AND/OR logic
1896        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1897        let result = engine.execute(
1898            table.clone(),
1899            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1900        );
1901        match result {
1902            Ok(view) => {
1903                println!(
1904                    "SUCCESS: Found {} rows with complex logic",
1905                    view.row_count()
1906                );
1907                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
1908            }
1909            Err(e) => {
1910                panic!("Complex logic query failed: {e}");
1911            }
1912        }
1913
1914        // Test NOT with complex expressions
1915        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1916        let result = engine.execute(
1917            table.clone(),
1918            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1919        );
1920        match result {
1921            Ok(view) => {
1922                println!(
1923                    "SUCCESS: Found {} rows with NOT complex logic",
1924                    view.row_count()
1925                );
1926                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
1927            }
1928            Err(e) => {
1929                panic!("NOT complex logic query failed: {e}");
1930            }
1931        }
1932
1933        println!("\n=== Complex logical expressions test complete! ===");
1934    }
1935
1936    #[test]
1937    fn test_mixed_data_types_and_edge_cases() {
1938        // Initialize tracing for debug output
1939        let _ = tracing_subscriber::fmt()
1940            .with_max_level(tracing::Level::DEBUG)
1941            .try_init();
1942
1943        let mut table = DataTable::new("test");
1944        table.add_column(DataColumn::new("id"));
1945        table.add_column(DataColumn::new("value"));
1946        table.add_column(DataColumn::new("nullable_field"));
1947
1948        // Add test data with mixed types and edge cases
1949        table
1950            .add_row(DataRow::new(vec![
1951                DataValue::Integer(1),
1952                DataValue::String("123.45".to_string()),
1953                DataValue::String("present".to_string()),
1954            ]))
1955            .unwrap();
1956
1957        table
1958            .add_row(DataRow::new(vec![
1959                DataValue::Integer(2),
1960                DataValue::Float(678.90),
1961                DataValue::Null,
1962            ]))
1963            .unwrap();
1964
1965        table
1966            .add_row(DataRow::new(vec![
1967                DataValue::Integer(3),
1968                DataValue::Boolean(true),
1969                DataValue::String("also present".to_string()),
1970            ]))
1971            .unwrap();
1972
1973        table
1974            .add_row(DataRow::new(vec![
1975                DataValue::Integer(4),
1976                DataValue::String("false".to_string()),
1977                DataValue::Null,
1978            ]))
1979            .unwrap();
1980
1981        let table = Arc::new(table);
1982        let engine = QueryEngine::new();
1983
1984        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1985        println!("Table has {} rows", table.row_count());
1986        for i in 0..table.row_count() {
1987            let value = table.get_value(i, 1);
1988            let nullable = table.get_value(i, 2);
1989            println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
1990        }
1991
1992        // Test type coercion with boolean Contains
1993        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1994        let result = engine.execute(
1995            table.clone(),
1996            "SELECT * FROM test WHERE value.Contains('true')",
1997        );
1998        match result {
1999            Ok(view) => {
2000                println!(
2001                    "SUCCESS: Found {} rows with boolean coercion",
2002                    view.row_count()
2003                );
2004                assert_eq!(view.row_count(), 1); // Should find the boolean true row
2005            }
2006            Err(e) => {
2007                panic!("Boolean coercion query failed: {e}");
2008            }
2009        }
2010
2011        // Test multiple IN values with mixed types
2012        println!("\n--- Test: id IN (1, 3) ---");
2013        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
2014        match result {
2015            Ok(view) => {
2016                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
2017                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
2018            }
2019            Err(e) => {
2020                panic!("Multiple IN values query failed: {e}");
2021            }
2022        }
2023
2024        println!("\n=== Mixed data types test complete! ===");
2025    }
2026
2027    #[test]
2028    fn test_not_in_parsing() {
2029        use crate::sql::recursive_parser::Parser;
2030
2031        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
2032        println!("\n=== Testing NOT IN parsing ===");
2033        println!("Parsing query: {query}");
2034
2035        let mut parser = Parser::new(query);
2036        match parser.parse() {
2037            Ok(statement) => {
2038                println!("Parsed statement: {statement:#?}");
2039                if let Some(where_clause) = statement.where_clause {
2040                    println!("WHERE conditions: {:#?}", where_clause.conditions);
2041                    if let Some(first_condition) = where_clause.conditions.first() {
2042                        println!("First condition expression: {:#?}", first_condition.expr);
2043                    }
2044                }
2045            }
2046            Err(e) => {
2047                panic!("Parse error: {e}");
2048            }
2049        }
2050    }
2051}
2052
2053#[cfg(test)]
2054#[path = "query_engine_tests.rs"]
2055mod query_engine_tests;