sql_cli/data/
query_engine.rs

1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::config::global::get_date_notation;
9use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
10use crate::data::data_view::DataView;
11use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
12use crate::data::group_by_expressions::GroupByExpressions;
13use crate::data::hash_join::HashJoinExecutor;
14use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
15use crate::data::subquery_executor::SubqueryExecutor;
16use crate::data::virtual_table_generator::VirtualTableGenerator;
17use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
18use crate::sql::aggregates::{contains_aggregate, is_aggregate_compatible};
19use crate::sql::parser::ast::TableSource;
20use crate::sql::recursive_parser::{
21    CTEType, OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression,
22    TableFunction,
23};
24
25/// Query engine that executes SQL directly on `DataTable`
26#[derive(Clone)]
27pub struct QueryEngine {
28    case_insensitive: bool,
29    date_notation: String,
30    behavior_config: Option<BehaviorConfig>,
31}
32
33impl Default for QueryEngine {
34    fn default() -> Self {
35        Self::new()
36    }
37}
38
39impl QueryEngine {
40    #[must_use]
41    pub fn new() -> Self {
42        Self {
43            case_insensitive: false,
44            date_notation: get_date_notation(),
45            behavior_config: None,
46        }
47    }
48
49    #[must_use]
50    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
51        let case_insensitive = config.case_insensitive_default;
52        // Use get_date_notation() to respect environment variable override
53        let date_notation = get_date_notation();
54        Self {
55            case_insensitive,
56            date_notation,
57            behavior_config: Some(config),
58        }
59    }
60
61    #[must_use]
62    pub fn with_date_notation(_date_notation: String) -> Self {
63        Self {
64            case_insensitive: false,
65            date_notation: get_date_notation(), // Always use the global function
66            behavior_config: None,
67        }
68    }
69
70    #[must_use]
71    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
72        Self {
73            case_insensitive,
74            date_notation: get_date_notation(),
75            behavior_config: None,
76        }
77    }
78
79    #[must_use]
80    pub fn with_case_insensitive_and_date_notation(
81        case_insensitive: bool,
82        _date_notation: String, // Keep parameter for compatibility but use get_date_notation()
83    ) -> Self {
84        Self {
85            case_insensitive,
86            date_notation: get_date_notation(), // Always use the global function
87            behavior_config: None,
88        }
89    }
90
91    /// Find a column name similar to the given name using edit distance
92    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
93        let columns = table.column_names();
94        let mut best_match: Option<(String, usize)> = None;
95
96        for col in columns {
97            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
98            // Only suggest if distance is small (likely a typo)
99            // Allow up to 3 edits for longer names
100            let max_distance = if name.len() > 10 { 3 } else { 2 };
101            if distance <= max_distance {
102                match &best_match {
103                    None => best_match = Some((col, distance)),
104                    Some((_, best_dist)) if distance < *best_dist => {
105                        best_match = Some((col, distance));
106                    }
107                    _ => {}
108                }
109            }
110        }
111
112        best_match.map(|(name, _)| name)
113    }
114
115    /// Calculate Levenshtein edit distance between two strings
116    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
117        let len1 = s1.len();
118        let len2 = s2.len();
119        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
120
121        for i in 0..=len1 {
122            matrix[i][0] = i;
123        }
124        for j in 0..=len2 {
125            matrix[0][j] = j;
126        }
127
128        for (i, c1) in s1.chars().enumerate() {
129            for (j, c2) in s2.chars().enumerate() {
130                let cost = usize::from(c1 != c2);
131                matrix[i + 1][j + 1] = std::cmp::min(
132                    matrix[i][j + 1] + 1, // deletion
133                    std::cmp::min(
134                        matrix[i + 1][j] + 1, // insertion
135                        matrix[i][j] + cost,  // substitution
136                    ),
137                );
138            }
139        }
140
141        matrix[len1][len2]
142    }
143
144    /// Execute a SQL query on a `DataTable` and return a `DataView` (for backward compatibility)
145    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
146        let (view, _plan) = self.execute_with_plan(table, sql)?;
147        Ok(view)
148    }
149
150    /// Execute a parsed SelectStatement on a `DataTable` and return a `DataView`
151    pub fn execute_statement(
152        &self,
153        table: Arc<DataTable>,
154        statement: SelectStatement,
155    ) -> Result<DataView> {
156        // First process CTEs to build context
157        let mut cte_context = HashMap::new();
158        for cte in &statement.ctes {
159            debug!("QueryEngine: Pre-processing CTE '{}'...", cte.name);
160            // Execute the CTE based on its type
161            let cte_result = match &cte.cte_type {
162                CTEType::Standard(query) => {
163                    // Execute the CTE query (it might reference earlier CTEs)
164                    self.build_view_with_context(table.clone(), query.clone(), &mut cte_context)?
165                }
166                CTEType::Web(web_spec) => {
167                    // Fetch data from URL
168                    use crate::web::http_fetcher::WebDataFetcher;
169
170                    let fetcher = WebDataFetcher::new()?;
171                    let data_table = fetcher.fetch(web_spec, &cte.name)?;
172
173                    // Convert DataTable to DataView
174                    DataView::new(Arc::new(data_table))
175                }
176            };
177            // Store the result in the context for later use
178            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
179            debug!(
180                "QueryEngine: CTE '{}' pre-processed, stored in context",
181                cte.name
182            );
183        }
184
185        // Now process subqueries with CTE context available
186        let mut subquery_executor =
187            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
188        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
189
190        // Build the view with the same CTE context
191        self.build_view_with_context(table, processed_statement, &mut cte_context)
192    }
193
194    /// Execute a statement with provided CTE context (for subqueries)
195    pub fn execute_statement_with_cte_context(
196        &self,
197        table: Arc<DataTable>,
198        statement: SelectStatement,
199        cte_context: &HashMap<String, Arc<DataView>>,
200    ) -> Result<DataView> {
201        // Clone the context so we can add any CTEs from this statement
202        let mut local_context = cte_context.clone();
203
204        // Process any CTEs in this statement (they might be nested)
205        for cte in &statement.ctes {
206            debug!("QueryEngine: Processing nested CTE '{}'...", cte.name);
207            let cte_result = match &cte.cte_type {
208                CTEType::Standard(query) => {
209                    self.build_view_with_context(table.clone(), query.clone(), &mut local_context)?
210                }
211                CTEType::Web(web_spec) => {
212                    // Fetch data from URL
213                    use crate::web::http_fetcher::WebDataFetcher;
214
215                    let fetcher = WebDataFetcher::new()?;
216                    let data_table = fetcher.fetch(web_spec, &cte.name)?;
217
218                    // Convert DataTable to DataView
219                    DataView::new(Arc::new(data_table))
220                }
221            };
222            local_context.insert(cte.name.clone(), Arc::new(cte_result));
223        }
224
225        // Process subqueries with the complete context
226        let mut subquery_executor =
227            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), local_context.clone());
228        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
229
230        // Build the view
231        self.build_view_with_context(table, processed_statement, &mut local_context)
232    }
233
234    /// Execute a query and return both the result and the execution plan
235    pub fn execute_with_plan(
236        &self,
237        table: Arc<DataTable>,
238        sql: &str,
239    ) -> Result<(DataView, ExecutionPlan)> {
240        let mut plan_builder = ExecutionPlanBuilder::new();
241        let start_time = Instant::now();
242
243        // Parse the SQL query
244        plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
245        plan_builder.add_detail(format!("Query: {}", sql));
246        let mut parser = Parser::new(sql);
247        let statement = parser
248            .parse()
249            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
250        plan_builder.add_detail(format!("Parsed successfully"));
251        if let Some(from) = &statement.from_table {
252            plan_builder.add_detail(format!("FROM: {}", from));
253        }
254        if statement.where_clause.is_some() {
255            plan_builder.add_detail("WHERE clause present".to_string());
256        }
257        plan_builder.end_step();
258
259        // First process CTEs to build context
260        let mut cte_context = HashMap::new();
261
262        if !statement.ctes.is_empty() {
263            plan_builder.begin_step(
264                StepType::CTE,
265                format!("Process {} CTEs", statement.ctes.len()),
266            );
267
268            for cte in &statement.ctes {
269                let cte_start = Instant::now();
270                plan_builder.begin_step(StepType::CTE, format!("CTE '{}'", cte.name));
271
272                let cte_result = match &cte.cte_type {
273                    CTEType::Standard(query) => {
274                        // Add CTE query details
275                        if let Some(from) = &query.from_table {
276                            plan_builder.add_detail(format!("Source: {}", from));
277                        }
278                        if query.where_clause.is_some() {
279                            plan_builder.add_detail("Has WHERE clause".to_string());
280                        }
281                        if query.group_by.is_some() {
282                            plan_builder.add_detail("Has GROUP BY".to_string());
283                        }
284
285                        debug!(
286                            "QueryEngine: Processing CTE '{}' with existing context: {:?}",
287                            cte.name,
288                            cte_context.keys().collect::<Vec<_>>()
289                        );
290
291                        // Process subqueries in the CTE's query FIRST
292                        // This allows the subqueries to see all previously defined CTEs
293                        let mut subquery_executor = SubqueryExecutor::with_cte_context(
294                            self.clone(),
295                            table.clone(),
296                            cte_context.clone(),
297                        );
298                        let processed_query = subquery_executor.execute_subqueries(query)?;
299
300                        self.build_view_with_context(
301                            table.clone(),
302                            processed_query,
303                            &mut cte_context,
304                        )?
305                    }
306                    CTEType::Web(web_spec) => {
307                        plan_builder.add_detail(format!("URL: {}", web_spec.url));
308                        if let Some(format) = &web_spec.format {
309                            plan_builder.add_detail(format!("Format: {:?}", format));
310                        }
311                        if let Some(cache) = web_spec.cache_seconds {
312                            plan_builder.add_detail(format!("Cache: {} seconds", cache));
313                        }
314
315                        // Fetch data from URL
316                        use crate::web::http_fetcher::WebDataFetcher;
317
318                        let fetcher = WebDataFetcher::new()?;
319                        let data_table = fetcher.fetch(web_spec, &cte.name)?;
320
321                        // Convert DataTable to DataView
322                        DataView::new(Arc::new(data_table))
323                    }
324                };
325
326                // Record CTE statistics
327                plan_builder.set_rows_out(cte_result.row_count());
328                plan_builder.add_detail(format!(
329                    "Result: {} rows, {} columns",
330                    cte_result.row_count(),
331                    cte_result.column_count()
332                ));
333                plan_builder.add_detail(format!(
334                    "Execution time: {:.3}ms",
335                    cte_start.elapsed().as_secs_f64() * 1000.0
336                ));
337
338                debug!(
339                    "QueryEngine: Storing CTE '{}' in context with {} rows",
340                    cte.name,
341                    cte_result.row_count()
342                );
343                cte_context.insert(cte.name.clone(), Arc::new(cte_result));
344                plan_builder.end_step();
345            }
346
347            plan_builder.add_detail(format!(
348                "All {} CTEs cached in context",
349                statement.ctes.len()
350            ));
351            plan_builder.end_step();
352        }
353
354        // Process subqueries in the statement with CTE context
355        plan_builder.begin_step(StepType::Subquery, "Process subqueries".to_string());
356        let mut subquery_executor =
357            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
358
359        // Check if there are subqueries to process
360        let has_subqueries = statement.where_clause.as_ref().map_or(false, |w| {
361            // This is a simplified check - in reality we'd need to walk the AST
362            format!("{:?}", w).contains("Subquery")
363        });
364
365        if has_subqueries {
366            plan_builder.add_detail("Evaluating subqueries in WHERE clause".to_string());
367        }
368
369        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
370
371        if has_subqueries {
372            plan_builder.add_detail("Subqueries replaced with materialized values".to_string());
373        } else {
374            plan_builder.add_detail("No subqueries to process".to_string());
375        }
376
377        plan_builder.end_step();
378        let result = self.build_view_with_context_and_plan(
379            table,
380            processed_statement,
381            &mut cte_context,
382            &mut plan_builder,
383        )?;
384
385        let total_duration = start_time.elapsed();
386        info!(
387            "Query execution complete: total={:?}, rows={}",
388            total_duration,
389            result.row_count()
390        );
391
392        let plan = plan_builder.build();
393        Ok((result, plan))
394    }
395
396    /// Build a `DataView` from a parsed SQL statement
397    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
398        let mut cte_context = HashMap::new();
399        self.build_view_with_context(table, statement, &mut cte_context)
400    }
401
402    /// Build a DataView from a SelectStatement with CTE context
403    fn build_view_with_context(
404        &self,
405        table: Arc<DataTable>,
406        statement: SelectStatement,
407        cte_context: &mut HashMap<String, Arc<DataView>>,
408    ) -> Result<DataView> {
409        let mut dummy_plan = ExecutionPlanBuilder::new();
410        self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
411    }
412
413    /// Build a DataView from a SelectStatement with CTE context and execution plan tracking
414    fn build_view_with_context_and_plan(
415        &self,
416        table: Arc<DataTable>,
417        statement: SelectStatement,
418        cte_context: &mut HashMap<String, Arc<DataView>>,
419        plan: &mut ExecutionPlanBuilder,
420    ) -> Result<DataView> {
421        // First, process any CTEs that aren't already in the context
422        for cte in &statement.ctes {
423            // Skip if already processed (e.g., by execute_select for WEB CTEs)
424            if cte_context.contains_key(&cte.name) {
425                debug!(
426                    "QueryEngine: CTE '{}' already in context, skipping",
427                    cte.name
428                );
429                continue;
430            }
431
432            debug!("QueryEngine: Processing CTE '{}'...", cte.name);
433            debug!(
434                "QueryEngine: Available CTEs for '{}': {:?}",
435                cte.name,
436                cte_context.keys().collect::<Vec<_>>()
437            );
438
439            // Execute the CTE query (it might reference earlier CTEs)
440            let cte_result = match &cte.cte_type {
441                CTEType::Standard(query) => {
442                    self.build_view_with_context(table.clone(), query.clone(), cte_context)?
443                }
444                CTEType::Web(_web_spec) => {
445                    // Web CTEs should have been processed earlier in execute_select
446                    return Err(anyhow!(
447                        "Web CTEs should be processed in execute_select method"
448                    ));
449                }
450            };
451
452            // Store the result in the context for later use
453            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
454            debug!(
455                "QueryEngine: CTE '{}' processed, stored in context",
456                cte.name
457            );
458        }
459
460        // Determine the source table for the main query
461        let source_table = if let Some(ref table_func) = statement.from_function {
462            // Handle table functions like RANGE()
463            debug!("QueryEngine: Processing table function...");
464            match table_func {
465                TableFunction::Range { start, end, step } => {
466                    // Evaluate expressions to get numeric values
467                    let mut evaluator =
468                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
469
470                    // Create a dummy row for evaluating constant expressions
471                    let dummy_row = 0;
472
473                    let start_val = evaluator.evaluate(start, dummy_row)?;
474                    let end_val = evaluator.evaluate(end, dummy_row)?;
475                    let step_val = if let Some(step_expr) = step {
476                        Some(evaluator.evaluate(step_expr, dummy_row)?)
477                    } else {
478                        None
479                    };
480
481                    // Convert DataValues to integers
482                    let start_int = match start_val {
483                        DataValue::Integer(i) => i,
484                        DataValue::Float(f) => f as i64,
485                        _ => return Err(anyhow!("RANGE start must be numeric")),
486                    };
487
488                    let end_int = match end_val {
489                        DataValue::Integer(i) => i,
490                        DataValue::Float(f) => f as i64,
491                        _ => return Err(anyhow!("RANGE end must be numeric")),
492                    };
493
494                    let step_int = if let Some(step) = step_val {
495                        match step {
496                            DataValue::Integer(i) => Some(i),
497                            DataValue::Float(f) => Some(f as i64),
498                            _ => return Err(anyhow!("RANGE step must be numeric")),
499                        }
500                    } else {
501                        None
502                    };
503
504                    // Generate the virtual table
505                    VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
506                }
507            }
508        } else if let Some(ref subquery) = statement.from_subquery {
509            // Execute the subquery and use its result as the source
510            debug!("QueryEngine: Processing FROM subquery...");
511            let subquery_result =
512                self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
513
514            // Convert the DataView to a DataTable for use as source
515            // This materializes the subquery result
516            let materialized = self.materialize_view(subquery_result)?;
517            Arc::new(materialized)
518        } else if let Some(ref table_name) = statement.from_table {
519            // Check if this references a CTE
520            if let Some(cte_view) = cte_context.get(table_name) {
521                debug!("QueryEngine: Using CTE '{}' as source table", table_name);
522                // Materialize the CTE view as a table
523                let materialized = self.materialize_view((**cte_view).clone())?;
524                Arc::new(materialized)
525            } else {
526                // Regular table reference - use the provided table
527                table.clone()
528            }
529        } else {
530            // No FROM clause - use the provided table
531            table.clone()
532        };
533
534        // Process JOINs if present
535        let final_table = if !statement.joins.is_empty() {
536            plan.begin_step(
537                StepType::Join,
538                format!("Process {} JOINs", statement.joins.len()),
539            );
540            plan.set_rows_in(source_table.row_count());
541
542            let join_executor = HashJoinExecutor::new(self.case_insensitive);
543            let mut current_table = source_table;
544
545            for (idx, join_clause) in statement.joins.iter().enumerate() {
546                let join_start = Instant::now();
547                plan.begin_step(StepType::Join, format!("JOIN #{}", idx + 1));
548                plan.add_detail(format!("Type: {:?}", join_clause.join_type));
549                plan.add_detail(format!("Left table: {} rows", current_table.row_count()));
550                plan.add_detail(format!(
551                    "Executing {:?} JOIN on {}",
552                    join_clause.join_type, join_clause.condition.left_column
553                ));
554
555                // Resolve the right table for the join
556                let right_table = match &join_clause.table {
557                    TableSource::Table(name) => {
558                        // Check if it's a CTE reference
559                        if let Some(cte_view) = cte_context.get(name) {
560                            let materialized = self.materialize_view((**cte_view).clone())?;
561                            Arc::new(materialized)
562                        } else {
563                            // For now, we need the actual table data
564                            // In a real implementation, this would load from file
565                            return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
566                        }
567                    }
568                    TableSource::DerivedTable { query, alias: _ } => {
569                        // Execute the subquery
570                        let subquery_result = self.build_view_with_context(
571                            table.clone(),
572                            *query.clone(),
573                            cte_context,
574                        )?;
575                        let materialized = self.materialize_view(subquery_result)?;
576                        Arc::new(materialized)
577                    }
578                };
579
580                // Execute the join
581                let joined = join_executor.execute_join(
582                    current_table.clone(),
583                    join_clause,
584                    right_table.clone(),
585                )?;
586
587                plan.add_detail(format!("Right table: {} rows", right_table.row_count()));
588                plan.set_rows_out(joined.row_count());
589                plan.add_detail(format!("Result: {} rows", joined.row_count()));
590                plan.add_detail(format!(
591                    "Join time: {:.3}ms",
592                    join_start.elapsed().as_secs_f64() * 1000.0
593                ));
594                plan.end_step();
595
596                current_table = Arc::new(joined);
597            }
598
599            plan.set_rows_out(current_table.row_count());
600            plan.add_detail(format!(
601                "Final result after all joins: {} rows",
602                current_table.row_count()
603            ));
604            plan.end_step();
605            current_table
606        } else {
607            source_table
608        };
609
610        // Continue with the existing build_view logic but using final_table
611        self.build_view_internal_with_plan(final_table, statement, plan)
612    }
613
614    /// Materialize a DataView into a new DataTable
615    fn materialize_view(&self, view: DataView) -> Result<DataTable> {
616        let source = view.source();
617        let mut result_table = DataTable::new("derived");
618
619        // Get the visible columns from the view
620        let visible_cols = view.visible_column_indices().to_vec();
621
622        // Copy column definitions
623        for col_idx in &visible_cols {
624            let col = &source.columns[*col_idx];
625            let new_col = DataColumn {
626                name: col.name.clone(),
627                data_type: col.data_type.clone(),
628                nullable: col.nullable,
629                unique_values: col.unique_values,
630                null_count: col.null_count,
631                metadata: col.metadata.clone(),
632            };
633            result_table.add_column(new_col);
634        }
635
636        // Copy visible rows
637        for row_idx in view.visible_row_indices() {
638            let source_row = &source.rows[*row_idx];
639            let mut new_row = DataRow { values: Vec::new() };
640
641            for col_idx in &visible_cols {
642                new_row.values.push(source_row.values[*col_idx].clone());
643            }
644
645            result_table.add_row(new_row);
646        }
647
648        Ok(result_table)
649    }
650
651    fn build_view_internal(
652        &self,
653        table: Arc<DataTable>,
654        statement: SelectStatement,
655    ) -> Result<DataView> {
656        let mut dummy_plan = ExecutionPlanBuilder::new();
657        self.build_view_internal_with_plan(table, statement, &mut dummy_plan)
658    }
659
660    fn build_view_internal_with_plan(
661        &self,
662        table: Arc<DataTable>,
663        statement: SelectStatement,
664        plan: &mut ExecutionPlanBuilder,
665    ) -> Result<DataView> {
666        debug!(
667            "QueryEngine::build_view - select_items: {:?}",
668            statement.select_items
669        );
670        debug!(
671            "QueryEngine::build_view - where_clause: {:?}",
672            statement.where_clause
673        );
674
675        // Start with all rows visible
676        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
677
678        // Apply WHERE clause filtering using recursive evaluator
679        if let Some(where_clause) = &statement.where_clause {
680            let total_rows = table.row_count();
681            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
682            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
683
684            plan.begin_step(StepType::Filter, "WHERE clause filtering".to_string());
685            plan.set_rows_in(total_rows);
686            plan.add_detail(format!("Input: {} rows", total_rows));
687
688            // Add details about WHERE conditions
689            for condition in &where_clause.conditions {
690                plan.add_detail(format!("Condition: {:?}", condition.expr));
691            }
692
693            let filter_start = Instant::now();
694            // Filter visible rows based on WHERE clause
695            let mut filtered_rows = Vec::new();
696            for row_idx in visible_rows {
697                // Only log for first few rows to avoid performance impact
698                if row_idx < 3 {
699                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
700                }
701                let evaluator = RecursiveWhereEvaluator::with_config(
702                    &table,
703                    self.case_insensitive,
704                    self.date_notation.clone(),
705                );
706                match evaluator.evaluate(where_clause, row_idx) {
707                    Ok(result) => {
708                        if row_idx < 3 {
709                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
710                        }
711                        if result {
712                            filtered_rows.push(row_idx);
713                        }
714                    }
715                    Err(e) => {
716                        if row_idx < 3 {
717                            debug!(
718                                "QueryEngine: WHERE evaluation error for row {}: {}",
719                                row_idx, e
720                            );
721                        }
722                        // Propagate WHERE clause errors instead of silently ignoring them
723                        return Err(e);
724                    }
725                }
726            }
727            visible_rows = filtered_rows;
728            let filter_duration = filter_start.elapsed();
729            info!(
730                "WHERE clause filtering: {} rows -> {} rows in {:?}",
731                total_rows,
732                visible_rows.len(),
733                filter_duration
734            );
735
736            plan.set_rows_out(visible_rows.len());
737            plan.add_detail(format!("Output: {} rows", visible_rows.len()));
738            plan.add_detail(format!(
739                "Filter time: {:.3}ms",
740                filter_duration.as_secs_f64() * 1000.0
741            ));
742            plan.end_step();
743        }
744
745        // Create initial DataView with filtered rows
746        let mut view = DataView::new(table.clone());
747        view = view.with_rows(visible_rows);
748
749        // Handle GROUP BY if present
750        if let Some(group_by_exprs) = &statement.group_by {
751            if !group_by_exprs.is_empty() {
752                debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_exprs);
753
754                plan.begin_step(
755                    StepType::GroupBy,
756                    format!("GROUP BY {} expressions", group_by_exprs.len()),
757                );
758                plan.set_rows_in(view.row_count());
759                plan.add_detail(format!("Input: {} rows", view.row_count()));
760                for expr in group_by_exprs {
761                    plan.add_detail(format!("Group by: {:?}", expr));
762                }
763
764                let group_start = Instant::now();
765                view = self.apply_group_by(
766                    view,
767                    group_by_exprs,
768                    &statement.select_items,
769                    statement.having.as_ref(),
770                )?;
771
772                plan.set_rows_out(view.row_count());
773                plan.add_detail(format!("Output: {} groups", view.row_count()));
774                plan.add_detail(format!(
775                    "Group time: {:.3}ms",
776                    group_start.elapsed().as_secs_f64() * 1000.0
777                ));
778                plan.end_step();
779            }
780        } else {
781            // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
782            if !statement.select_items.is_empty() {
783                // Check if we have ANY non-star items (not just the first one)
784                let has_non_star_items = statement
785                    .select_items
786                    .iter()
787                    .any(|item| !matches!(item, SelectItem::Star));
788
789                // Apply select items if:
790                // 1. We have computed expressions or explicit columns
791                // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
792                if has_non_star_items || statement.select_items.len() > 1 {
793                    view = self.apply_select_items(view, &statement.select_items)?;
794                }
795                // If it's just a single star, no projection needed
796            } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
797                // Fallback to legacy column projection for backward compatibility
798                let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
799                view = view.with_columns(column_indices);
800            }
801        }
802
803        // Apply DISTINCT if specified
804        if statement.distinct {
805            plan.begin_step(StepType::Distinct, "Remove duplicate rows".to_string());
806            plan.set_rows_in(view.row_count());
807            plan.add_detail(format!("Input: {} rows", view.row_count()));
808
809            let distinct_start = Instant::now();
810            view = self.apply_distinct(view)?;
811
812            plan.set_rows_out(view.row_count());
813            plan.add_detail(format!("Output: {} unique rows", view.row_count()));
814            plan.add_detail(format!(
815                "Distinct time: {:.3}ms",
816                distinct_start.elapsed().as_secs_f64() * 1000.0
817            ));
818            plan.end_step();
819        }
820
821        // Apply ORDER BY sorting
822        if let Some(order_by_columns) = &statement.order_by {
823            if !order_by_columns.is_empty() {
824                plan.begin_step(
825                    StepType::Sort,
826                    format!("ORDER BY {} columns", order_by_columns.len()),
827                );
828                plan.set_rows_in(view.row_count());
829                for col in order_by_columns {
830                    plan.add_detail(format!("{} {:?}", col.column, col.direction));
831                }
832
833                let sort_start = Instant::now();
834                view = self.apply_multi_order_by(view, order_by_columns)?;
835
836                plan.add_detail(format!(
837                    "Sort time: {:.3}ms",
838                    sort_start.elapsed().as_secs_f64() * 1000.0
839                ));
840                plan.end_step();
841            }
842        }
843
844        // Apply LIMIT/OFFSET
845        if let Some(limit) = statement.limit {
846            let offset = statement.offset.unwrap_or(0);
847            plan.begin_step(StepType::Limit, format!("LIMIT {}", limit));
848            plan.set_rows_in(view.row_count());
849            if offset > 0 {
850                plan.add_detail(format!("OFFSET: {}", offset));
851            }
852            view = view.with_limit(limit, offset);
853            plan.set_rows_out(view.row_count());
854            plan.add_detail(format!("Output: {} rows", view.row_count()));
855            plan.end_step();
856        }
857
858        Ok(view)
859    }
860
861    /// Resolve column names to indices
862    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
863        let mut indices = Vec::new();
864        let table_columns = table.column_names();
865
866        for col_name in columns {
867            let index = table_columns
868                .iter()
869                .position(|c| c.eq_ignore_ascii_case(col_name))
870                .ok_or_else(|| {
871                    let suggestion = self.find_similar_column(table, col_name);
872                    match suggestion {
873                        Some(similar) => anyhow::anyhow!(
874                            "Column '{}' not found. Did you mean '{}'?",
875                            col_name,
876                            similar
877                        ),
878                        None => anyhow::anyhow!("Column '{}' not found", col_name),
879                    }
880                })?;
881            indices.push(index);
882        }
883
884        Ok(indices)
885    }
886
887    /// Apply SELECT items (columns and computed expressions) to create new view
888    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
889        debug!(
890            "QueryEngine::apply_select_items - items: {:?}",
891            select_items
892        );
893        debug!(
894            "QueryEngine::apply_select_items - input view has {} rows",
895            view.row_count()
896        );
897
898        // Check if this is an aggregate query:
899        // 1. At least one aggregate function exists
900        // 2. All other items are either aggregates or constants (aggregate-compatible)
901        let has_aggregates = select_items.iter().any(|item| match item {
902            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
903            SelectItem::Column(_) => false,
904            SelectItem::Star => false,
905        });
906
907        let all_aggregate_compatible = select_items.iter().all(|item| match item {
908            SelectItem::Expression { expr, .. } => is_aggregate_compatible(expr),
909            SelectItem::Column(_) => false, // Columns are not aggregate-compatible
910            SelectItem::Star => false,      // Star is not aggregate-compatible
911        });
912
913        if has_aggregates && all_aggregate_compatible && view.row_count() > 0 {
914            // Special handling for aggregate queries with constants (no GROUP BY)
915            // These should produce exactly one row
916            debug!("QueryEngine::apply_select_items - detected aggregate query with constants");
917            return self.apply_aggregate_select(view, select_items);
918        }
919
920        // Check if we need to create computed columns
921        let has_computed_expressions = select_items
922            .iter()
923            .any(|item| matches!(item, SelectItem::Expression { .. }));
924
925        debug!(
926            "QueryEngine::apply_select_items - has_computed_expressions: {}",
927            has_computed_expressions
928        );
929
930        if !has_computed_expressions {
931            // Simple case: only columns, use existing projection logic
932            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
933            return Ok(view.with_columns(column_indices));
934        }
935
936        // Complex case: we have computed expressions
937        // IMPORTANT: We create a PROJECTED view, not a new table
938        // This preserves the original DataTable reference
939
940        let source_table = view.source();
941        let visible_rows = view.visible_row_indices();
942
943        // Create a temporary table just for the computed result view
944        // But this table is only used for the current query result
945        let mut computed_table = DataTable::new("query_result");
946
947        // First, expand any Star selectors to actual columns
948        let mut expanded_items = Vec::new();
949        for item in select_items {
950            match item {
951                SelectItem::Star => {
952                    // Expand * to all columns from source table
953                    for col_name in source_table.column_names() {
954                        expanded_items.push(SelectItem::Column(col_name));
955                    }
956                }
957                _ => expanded_items.push(item.clone()),
958            }
959        }
960
961        // Add columns based on expanded SelectItems, handling duplicates
962        let mut column_name_counts: std::collections::HashMap<String, usize> =
963            std::collections::HashMap::new();
964
965        for item in &expanded_items {
966            let base_name = match item {
967                SelectItem::Column(name) => name.clone(),
968                SelectItem::Expression { alias, .. } => alias.clone(),
969                SelectItem::Star => unreachable!("Star should have been expanded"),
970            };
971
972            // Check if this column name has been used before
973            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
974            let column_name = if *count == 0 {
975                // First occurrence, use the name as-is
976                base_name.clone()
977            } else {
978                // Duplicate, append a suffix
979                format!("{base_name}_{count}")
980            };
981            *count += 1;
982
983            computed_table.add_column(DataColumn::new(&column_name));
984        }
985
986        // Calculate values for each row
987        let mut evaluator =
988            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
989
990        for &row_idx in visible_rows {
991            let mut row_values = Vec::new();
992
993            for item in &expanded_items {
994                let value = match item {
995                    SelectItem::Column(col_name) => {
996                        // Simple column reference
997                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
998                            let suggestion = self.find_similar_column(source_table, col_name);
999                            match suggestion {
1000                                Some(similar) => anyhow::anyhow!(
1001                                    "Column '{}' not found. Did you mean '{}'?",
1002                                    col_name,
1003                                    similar
1004                                ),
1005                                None => anyhow::anyhow!("Column '{}' not found", col_name),
1006                            }
1007                        })?;
1008                        let row = source_table
1009                            .get_row(row_idx)
1010                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
1011                        row.get(col_idx)
1012                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
1013                            .clone()
1014                    }
1015                    SelectItem::Expression { expr, .. } => {
1016                        // Computed expression
1017                        evaluator.evaluate(expr, row_idx)?
1018                    }
1019                    SelectItem::Star => unreachable!("Star should have been expanded"),
1020                };
1021                row_values.push(value);
1022            }
1023
1024            computed_table
1025                .add_row(DataRow::new(row_values))
1026                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
1027        }
1028
1029        // Return a view of the computed result
1030        // This is a temporary view for this query only
1031        Ok(DataView::new(Arc::new(computed_table)))
1032    }
1033
1034    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
1035    fn apply_aggregate_select(
1036        &self,
1037        view: DataView,
1038        select_items: &[SelectItem],
1039    ) -> Result<DataView> {
1040        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
1041
1042        let source_table = view.source();
1043        let mut result_table = DataTable::new("aggregate_result");
1044
1045        // Add columns for each select item
1046        for item in select_items {
1047            let column_name = match item {
1048                SelectItem::Expression { alias, .. } => alias.clone(),
1049                _ => unreachable!("Should only have expressions in aggregate-only query"),
1050            };
1051            result_table.add_column(DataColumn::new(&column_name));
1052        }
1053
1054        // Create evaluator with visible rows from the view (for filtered aggregates)
1055        let visible_rows = view.visible_row_indices().to_vec();
1056        let mut evaluator =
1057            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
1058                .with_visible_rows(visible_rows);
1059
1060        // Evaluate each aggregate expression once (they handle all rows internally)
1061        let mut row_values = Vec::new();
1062        for item in select_items {
1063            match item {
1064                SelectItem::Expression { expr, .. } => {
1065                    // The evaluator will handle aggregates over all rows
1066                    // We pass row_index=0 but aggregates ignore it and process all rows
1067                    let value = evaluator.evaluate(expr, 0)?;
1068                    row_values.push(value);
1069                }
1070                _ => unreachable!("Should only have expressions in aggregate-only query"),
1071            }
1072        }
1073
1074        // Add the single result row
1075        result_table
1076            .add_row(DataRow::new(row_values))
1077            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
1078
1079        Ok(DataView::new(Arc::new(result_table)))
1080    }
1081
1082    /// Resolve `SelectItem` columns to indices (for simple column projections only)
1083    fn resolve_select_columns(
1084        &self,
1085        table: &DataTable,
1086        select_items: &[SelectItem],
1087    ) -> Result<Vec<usize>> {
1088        let mut indices = Vec::new();
1089        let table_columns = table.column_names();
1090
1091        for item in select_items {
1092            match item {
1093                SelectItem::Column(col_name) => {
1094                    let index = table_columns
1095                        .iter()
1096                        .position(|c| c.eq_ignore_ascii_case(col_name))
1097                        .ok_or_else(|| {
1098                            let suggestion = self.find_similar_column(table, col_name);
1099                            match suggestion {
1100                                Some(similar) => anyhow::anyhow!(
1101                                    "Column '{}' not found. Did you mean '{}'?",
1102                                    col_name,
1103                                    similar
1104                                ),
1105                                None => anyhow::anyhow!("Column '{}' not found", col_name),
1106                            }
1107                        })?;
1108                    indices.push(index);
1109                }
1110                SelectItem::Star => {
1111                    // Expand * to all column indices
1112                    for i in 0..table_columns.len() {
1113                        indices.push(i);
1114                    }
1115                }
1116                SelectItem::Expression { .. } => {
1117                    return Err(anyhow::anyhow!(
1118                        "Computed expressions require new table creation"
1119                    ));
1120                }
1121            }
1122        }
1123
1124        Ok(indices)
1125    }
1126
1127    /// Apply DISTINCT to remove duplicate rows
1128    fn apply_distinct(&self, view: DataView) -> Result<DataView> {
1129        use std::collections::HashSet;
1130
1131        let source = view.source();
1132        let visible_cols = view.visible_column_indices();
1133        let visible_rows = view.visible_row_indices();
1134
1135        // Build a set to track unique rows
1136        let mut seen_rows = HashSet::new();
1137        let mut unique_row_indices = Vec::new();
1138
1139        for &row_idx in visible_rows {
1140            // Build a key representing this row's visible column values
1141            let mut row_key = Vec::new();
1142            for &col_idx in visible_cols {
1143                let value = source
1144                    .get_value(row_idx, col_idx)
1145                    .ok_or_else(|| anyhow!("Invalid cell reference"))?;
1146                // Convert value to a hashable representation
1147                row_key.push(format!("{:?}", value));
1148            }
1149
1150            // Check if we've seen this row before
1151            if seen_rows.insert(row_key) {
1152                // First time seeing this row combination
1153                unique_row_indices.push(row_idx);
1154            }
1155        }
1156
1157        // Create a new view with only unique rows
1158        Ok(view.with_rows(unique_row_indices))
1159    }
1160
1161    /// Apply multi-column ORDER BY sorting to the view
1162    fn apply_multi_order_by(
1163        &self,
1164        mut view: DataView,
1165        order_by_columns: &[OrderByColumn],
1166    ) -> Result<DataView> {
1167        // Build list of (source_column_index, ascending) tuples
1168        let mut sort_columns = Vec::new();
1169
1170        for order_col in order_by_columns {
1171            // Try to find the column index
1172            // Check in the current view's source table (this handles both regular columns and computed columns)
1173            // This is especially important after GROUP BY where we have a new result table with aggregate aliases
1174            let col_index = view
1175                .source()
1176                .get_column_index(&order_col.column)
1177                .ok_or_else(|| {
1178                    // If not found, provide helpful error with suggestions
1179                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
1180                    match suggestion {
1181                        Some(similar) => anyhow::anyhow!(
1182                            "Column '{}' not found. Did you mean '{}'?",
1183                            order_col.column,
1184                            similar
1185                        ),
1186                        None => {
1187                            // Also list available columns for debugging
1188                            let available_cols = view.source().column_names().join(", ");
1189                            anyhow::anyhow!(
1190                                "Column '{}' not found. Available columns: {}",
1191                                order_col.column,
1192                                available_cols
1193                            )
1194                        }
1195                    }
1196                })?;
1197
1198            let ascending = matches!(order_col.direction, SortDirection::Asc);
1199            sort_columns.push((col_index, ascending));
1200        }
1201
1202        // Apply multi-column sorting
1203        view.apply_multi_sort(&sort_columns)?;
1204        Ok(view)
1205    }
1206
1207    /// Apply GROUP BY to the view with optional HAVING clause
1208    fn apply_group_by(
1209        &self,
1210        view: DataView,
1211        group_by_exprs: &[SqlExpression],
1212        select_items: &[SelectItem],
1213        having: Option<&SqlExpression>,
1214    ) -> Result<DataView> {
1215        // Use the new expression-based GROUP BY implementation
1216        self.apply_group_by_expressions(
1217            view,
1218            group_by_exprs,
1219            select_items,
1220            having,
1221            self.case_insensitive,
1222            self.date_notation.clone(),
1223        )
1224    }
1225}
1226
1227#[cfg(test)]
1228mod tests {
1229    use super::*;
1230    use crate::data::datatable::{DataColumn, DataRow, DataValue};
1231
1232    fn create_test_table() -> Arc<DataTable> {
1233        let mut table = DataTable::new("test");
1234
1235        // Add columns
1236        table.add_column(DataColumn::new("id"));
1237        table.add_column(DataColumn::new("name"));
1238        table.add_column(DataColumn::new("age"));
1239
1240        // Add rows
1241        table
1242            .add_row(DataRow::new(vec![
1243                DataValue::Integer(1),
1244                DataValue::String("Alice".to_string()),
1245                DataValue::Integer(30),
1246            ]))
1247            .unwrap();
1248
1249        table
1250            .add_row(DataRow::new(vec![
1251                DataValue::Integer(2),
1252                DataValue::String("Bob".to_string()),
1253                DataValue::Integer(25),
1254            ]))
1255            .unwrap();
1256
1257        table
1258            .add_row(DataRow::new(vec![
1259                DataValue::Integer(3),
1260                DataValue::String("Charlie".to_string()),
1261                DataValue::Integer(35),
1262            ]))
1263            .unwrap();
1264
1265        Arc::new(table)
1266    }
1267
1268    #[test]
1269    fn test_select_all() {
1270        let table = create_test_table();
1271        let engine = QueryEngine::new();
1272
1273        let view = engine
1274            .execute(table.clone(), "SELECT * FROM users")
1275            .unwrap();
1276        assert_eq!(view.row_count(), 3);
1277        assert_eq!(view.column_count(), 3);
1278    }
1279
1280    #[test]
1281    fn test_select_columns() {
1282        let table = create_test_table();
1283        let engine = QueryEngine::new();
1284
1285        let view = engine
1286            .execute(table.clone(), "SELECT name, age FROM users")
1287            .unwrap();
1288        assert_eq!(view.row_count(), 3);
1289        assert_eq!(view.column_count(), 2);
1290    }
1291
1292    #[test]
1293    fn test_select_with_limit() {
1294        let table = create_test_table();
1295        let engine = QueryEngine::new();
1296
1297        let view = engine
1298            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1299            .unwrap();
1300        assert_eq!(view.row_count(), 2);
1301    }
1302
1303    #[test]
1304    fn test_type_coercion_contains() {
1305        // Initialize tracing for debug output
1306        let _ = tracing_subscriber::fmt()
1307            .with_max_level(tracing::Level::DEBUG)
1308            .try_init();
1309
1310        let mut table = DataTable::new("test");
1311        table.add_column(DataColumn::new("id"));
1312        table.add_column(DataColumn::new("status"));
1313        table.add_column(DataColumn::new("price"));
1314
1315        // Add test data with mixed types
1316        table
1317            .add_row(DataRow::new(vec![
1318                DataValue::Integer(1),
1319                DataValue::String("Pending".to_string()),
1320                DataValue::Float(99.99),
1321            ]))
1322            .unwrap();
1323
1324        table
1325            .add_row(DataRow::new(vec![
1326                DataValue::Integer(2),
1327                DataValue::String("Confirmed".to_string()),
1328                DataValue::Float(150.50),
1329            ]))
1330            .unwrap();
1331
1332        table
1333            .add_row(DataRow::new(vec![
1334                DataValue::Integer(3),
1335                DataValue::String("Pending".to_string()),
1336                DataValue::Float(75.00),
1337            ]))
1338            .unwrap();
1339
1340        let table = Arc::new(table);
1341        let engine = QueryEngine::new();
1342
1343        println!("\n=== Testing WHERE clause with Contains ===");
1344        println!("Table has {} rows", table.row_count());
1345        for i in 0..table.row_count() {
1346            let status = table.get_value(i, 1);
1347            println!("Row {i}: status = {status:?}");
1348        }
1349
1350        // Test 1: Basic string contains (should work)
1351        println!("\n--- Test 1: status.Contains('pend') ---");
1352        let result = engine.execute(
1353            table.clone(),
1354            "SELECT * FROM test WHERE status.Contains('pend')",
1355        );
1356        match result {
1357            Ok(view) => {
1358                println!("SUCCESS: Found {} matching rows", view.row_count());
1359                assert_eq!(view.row_count(), 2); // Should find both Pending rows
1360            }
1361            Err(e) => {
1362                panic!("Query failed: {e}");
1363            }
1364        }
1365
1366        // Test 2: Numeric contains (should work with type coercion)
1367        println!("\n--- Test 2: price.Contains('9') ---");
1368        let result = engine.execute(
1369            table.clone(),
1370            "SELECT * FROM test WHERE price.Contains('9')",
1371        );
1372        match result {
1373            Ok(view) => {
1374                println!(
1375                    "SUCCESS: Found {} matching rows with price containing '9'",
1376                    view.row_count()
1377                );
1378                // Should find 99.99 row
1379                assert!(view.row_count() >= 1);
1380            }
1381            Err(e) => {
1382                panic!("Numeric coercion query failed: {e}");
1383            }
1384        }
1385
1386        println!("\n=== All tests passed! ===");
1387    }
1388
1389    #[test]
1390    fn test_not_in_clause() {
1391        // Initialize tracing for debug output
1392        let _ = tracing_subscriber::fmt()
1393            .with_max_level(tracing::Level::DEBUG)
1394            .try_init();
1395
1396        let mut table = DataTable::new("test");
1397        table.add_column(DataColumn::new("id"));
1398        table.add_column(DataColumn::new("country"));
1399
1400        // Add test data
1401        table
1402            .add_row(DataRow::new(vec![
1403                DataValue::Integer(1),
1404                DataValue::String("CA".to_string()),
1405            ]))
1406            .unwrap();
1407
1408        table
1409            .add_row(DataRow::new(vec![
1410                DataValue::Integer(2),
1411                DataValue::String("US".to_string()),
1412            ]))
1413            .unwrap();
1414
1415        table
1416            .add_row(DataRow::new(vec![
1417                DataValue::Integer(3),
1418                DataValue::String("UK".to_string()),
1419            ]))
1420            .unwrap();
1421
1422        let table = Arc::new(table);
1423        let engine = QueryEngine::new();
1424
1425        println!("\n=== Testing NOT IN clause ===");
1426        println!("Table has {} rows", table.row_count());
1427        for i in 0..table.row_count() {
1428            let country = table.get_value(i, 1);
1429            println!("Row {i}: country = {country:?}");
1430        }
1431
1432        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
1433        println!("\n--- Test: country NOT IN ('CA') ---");
1434        let result = engine.execute(
1435            table.clone(),
1436            "SELECT * FROM test WHERE country NOT IN ('CA')",
1437        );
1438        match result {
1439            Ok(view) => {
1440                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1441                assert_eq!(view.row_count(), 2); // Should find US and UK
1442            }
1443            Err(e) => {
1444                panic!("NOT IN query failed: {e}");
1445            }
1446        }
1447
1448        println!("\n=== NOT IN test complete! ===");
1449    }
1450
1451    #[test]
1452    fn test_case_insensitive_in_and_not_in() {
1453        // Initialize tracing for debug output
1454        let _ = tracing_subscriber::fmt()
1455            .with_max_level(tracing::Level::DEBUG)
1456            .try_init();
1457
1458        let mut table = DataTable::new("test");
1459        table.add_column(DataColumn::new("id"));
1460        table.add_column(DataColumn::new("country"));
1461
1462        // Add test data with mixed case
1463        table
1464            .add_row(DataRow::new(vec![
1465                DataValue::Integer(1),
1466                DataValue::String("CA".to_string()), // uppercase
1467            ]))
1468            .unwrap();
1469
1470        table
1471            .add_row(DataRow::new(vec![
1472                DataValue::Integer(2),
1473                DataValue::String("us".to_string()), // lowercase
1474            ]))
1475            .unwrap();
1476
1477        table
1478            .add_row(DataRow::new(vec![
1479                DataValue::Integer(3),
1480                DataValue::String("UK".to_string()), // uppercase
1481            ]))
1482            .unwrap();
1483
1484        let table = Arc::new(table);
1485
1486        println!("\n=== Testing Case-Insensitive IN clause ===");
1487        println!("Table has {} rows", table.row_count());
1488        for i in 0..table.row_count() {
1489            let country = table.get_value(i, 1);
1490            println!("Row {i}: country = {country:?}");
1491        }
1492
1493        // Test case-insensitive IN - should match 'CA' with 'ca'
1494        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1495        let engine = QueryEngine::with_case_insensitive(true);
1496        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1497        match result {
1498            Ok(view) => {
1499                println!(
1500                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1501                    view.row_count()
1502                );
1503                assert_eq!(view.row_count(), 1); // Should find CA row
1504            }
1505            Err(e) => {
1506                panic!("Case-insensitive IN query failed: {e}");
1507            }
1508        }
1509
1510        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
1511        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1512        let result = engine.execute(
1513            table.clone(),
1514            "SELECT * FROM test WHERE country NOT IN ('ca')",
1515        );
1516        match result {
1517            Ok(view) => {
1518                println!(
1519                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1520                    view.row_count()
1521                );
1522                assert_eq!(view.row_count(), 2); // Should find us and UK rows
1523            }
1524            Err(e) => {
1525                panic!("Case-insensitive NOT IN query failed: {e}");
1526            }
1527        }
1528
1529        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
1530        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1531        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
1532        let result = engine_case_sensitive
1533            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1534        match result {
1535            Ok(view) => {
1536                println!(
1537                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1538                    view.row_count()
1539                );
1540                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
1541            }
1542            Err(e) => {
1543                panic!("Case-sensitive IN query failed: {e}");
1544            }
1545        }
1546
1547        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1548    }
1549
1550    #[test]
1551    #[ignore = "Parentheses in WHERE clause not yet implemented"]
1552    fn test_parentheses_in_where_clause() {
1553        // Initialize tracing for debug output
1554        let _ = tracing_subscriber::fmt()
1555            .with_max_level(tracing::Level::DEBUG)
1556            .try_init();
1557
1558        let mut table = DataTable::new("test");
1559        table.add_column(DataColumn::new("id"));
1560        table.add_column(DataColumn::new("status"));
1561        table.add_column(DataColumn::new("priority"));
1562
1563        // Add test data
1564        table
1565            .add_row(DataRow::new(vec![
1566                DataValue::Integer(1),
1567                DataValue::String("Pending".to_string()),
1568                DataValue::String("High".to_string()),
1569            ]))
1570            .unwrap();
1571
1572        table
1573            .add_row(DataRow::new(vec![
1574                DataValue::Integer(2),
1575                DataValue::String("Complete".to_string()),
1576                DataValue::String("High".to_string()),
1577            ]))
1578            .unwrap();
1579
1580        table
1581            .add_row(DataRow::new(vec![
1582                DataValue::Integer(3),
1583                DataValue::String("Pending".to_string()),
1584                DataValue::String("Low".to_string()),
1585            ]))
1586            .unwrap();
1587
1588        table
1589            .add_row(DataRow::new(vec![
1590                DataValue::Integer(4),
1591                DataValue::String("Complete".to_string()),
1592                DataValue::String("Low".to_string()),
1593            ]))
1594            .unwrap();
1595
1596        let table = Arc::new(table);
1597        let engine = QueryEngine::new();
1598
1599        println!("\n=== Testing Parentheses in WHERE clause ===");
1600        println!("Table has {} rows", table.row_count());
1601        for i in 0..table.row_count() {
1602            let status = table.get_value(i, 1);
1603            let priority = table.get_value(i, 2);
1604            println!("Row {i}: status = {status:?}, priority = {priority:?}");
1605        }
1606
1607        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
1608        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1609        let result = engine.execute(
1610            table.clone(),
1611            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1612        );
1613        match result {
1614            Ok(view) => {
1615                println!(
1616                    "SUCCESS: Found {} rows with parenthetical logic",
1617                    view.row_count()
1618                );
1619                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
1620            }
1621            Err(e) => {
1622                panic!("Parentheses query failed: {e}");
1623            }
1624        }
1625
1626        println!("\n=== Parentheses test complete! ===");
1627    }
1628
1629    #[test]
1630    #[ignore = "Numeric type coercion needs fixing"]
1631    fn test_numeric_type_coercion() {
1632        // Initialize tracing for debug output
1633        let _ = tracing_subscriber::fmt()
1634            .with_max_level(tracing::Level::DEBUG)
1635            .try_init();
1636
1637        let mut table = DataTable::new("test");
1638        table.add_column(DataColumn::new("id"));
1639        table.add_column(DataColumn::new("price"));
1640        table.add_column(DataColumn::new("quantity"));
1641
1642        // Add test data with different numeric types
1643        table
1644            .add_row(DataRow::new(vec![
1645                DataValue::Integer(1),
1646                DataValue::Float(99.50), // Contains '.'
1647                DataValue::Integer(100),
1648            ]))
1649            .unwrap();
1650
1651        table
1652            .add_row(DataRow::new(vec![
1653                DataValue::Integer(2),
1654                DataValue::Float(150.0), // Contains '.' and '0'
1655                DataValue::Integer(200),
1656            ]))
1657            .unwrap();
1658
1659        table
1660            .add_row(DataRow::new(vec![
1661                DataValue::Integer(3),
1662                DataValue::Integer(75), // No decimal point
1663                DataValue::Integer(50),
1664            ]))
1665            .unwrap();
1666
1667        let table = Arc::new(table);
1668        let engine = QueryEngine::new();
1669
1670        println!("\n=== Testing Numeric Type Coercion ===");
1671        println!("Table has {} rows", table.row_count());
1672        for i in 0..table.row_count() {
1673            let price = table.get_value(i, 1);
1674            let quantity = table.get_value(i, 2);
1675            println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1676        }
1677
1678        // Test Contains on float values - should find rows with decimal points
1679        println!("\n--- Test: price.Contains('.') ---");
1680        let result = engine.execute(
1681            table.clone(),
1682            "SELECT * FROM test WHERE price.Contains('.')",
1683        );
1684        match result {
1685            Ok(view) => {
1686                println!(
1687                    "SUCCESS: Found {} rows with decimal points in price",
1688                    view.row_count()
1689                );
1690                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1691            }
1692            Err(e) => {
1693                panic!("Numeric Contains query failed: {e}");
1694            }
1695        }
1696
1697        // Test Contains on integer values converted to string
1698        println!("\n--- Test: quantity.Contains('0') ---");
1699        let result = engine.execute(
1700            table.clone(),
1701            "SELECT * FROM test WHERE quantity.Contains('0')",
1702        );
1703        match result {
1704            Ok(view) => {
1705                println!(
1706                    "SUCCESS: Found {} rows with '0' in quantity",
1707                    view.row_count()
1708                );
1709                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1710            }
1711            Err(e) => {
1712                panic!("Integer Contains query failed: {e}");
1713            }
1714        }
1715
1716        println!("\n=== Numeric type coercion test complete! ===");
1717    }
1718
1719    #[test]
1720    fn test_datetime_comparisons() {
1721        // Initialize tracing for debug output
1722        let _ = tracing_subscriber::fmt()
1723            .with_max_level(tracing::Level::DEBUG)
1724            .try_init();
1725
1726        let mut table = DataTable::new("test");
1727        table.add_column(DataColumn::new("id"));
1728        table.add_column(DataColumn::new("created_date"));
1729
1730        // Add test data with date strings (as they would come from CSV)
1731        table
1732            .add_row(DataRow::new(vec![
1733                DataValue::Integer(1),
1734                DataValue::String("2024-12-15".to_string()),
1735            ]))
1736            .unwrap();
1737
1738        table
1739            .add_row(DataRow::new(vec![
1740                DataValue::Integer(2),
1741                DataValue::String("2025-01-15".to_string()),
1742            ]))
1743            .unwrap();
1744
1745        table
1746            .add_row(DataRow::new(vec![
1747                DataValue::Integer(3),
1748                DataValue::String("2025-02-15".to_string()),
1749            ]))
1750            .unwrap();
1751
1752        let table = Arc::new(table);
1753        let engine = QueryEngine::new();
1754
1755        println!("\n=== Testing DateTime Comparisons ===");
1756        println!("Table has {} rows", table.row_count());
1757        for i in 0..table.row_count() {
1758            let date = table.get_value(i, 1);
1759            println!("Row {i}: created_date = {date:?}");
1760        }
1761
1762        // Test DateTime constructor comparison - should find dates after 2025-01-01
1763        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1764        let result = engine.execute(
1765            table.clone(),
1766            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1767        );
1768        match result {
1769            Ok(view) => {
1770                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1771                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1772            }
1773            Err(e) => {
1774                panic!("DateTime comparison query failed: {e}");
1775            }
1776        }
1777
1778        println!("\n=== DateTime comparison test complete! ===");
1779    }
1780
1781    #[test]
1782    fn test_not_with_method_calls() {
1783        // Initialize tracing for debug output
1784        let _ = tracing_subscriber::fmt()
1785            .with_max_level(tracing::Level::DEBUG)
1786            .try_init();
1787
1788        let mut table = DataTable::new("test");
1789        table.add_column(DataColumn::new("id"));
1790        table.add_column(DataColumn::new("status"));
1791
1792        // Add test data
1793        table
1794            .add_row(DataRow::new(vec![
1795                DataValue::Integer(1),
1796                DataValue::String("Pending Review".to_string()),
1797            ]))
1798            .unwrap();
1799
1800        table
1801            .add_row(DataRow::new(vec![
1802                DataValue::Integer(2),
1803                DataValue::String("Complete".to_string()),
1804            ]))
1805            .unwrap();
1806
1807        table
1808            .add_row(DataRow::new(vec![
1809                DataValue::Integer(3),
1810                DataValue::String("Pending Approval".to_string()),
1811            ]))
1812            .unwrap();
1813
1814        let table = Arc::new(table);
1815        let engine = QueryEngine::with_case_insensitive(true);
1816
1817        println!("\n=== Testing NOT with Method Calls ===");
1818        println!("Table has {} rows", table.row_count());
1819        for i in 0..table.row_count() {
1820            let status = table.get_value(i, 1);
1821            println!("Row {i}: status = {status:?}");
1822        }
1823
1824        // Test NOT with Contains - should exclude rows containing "pend"
1825        println!("\n--- Test: NOT status.Contains('pend') ---");
1826        let result = engine.execute(
1827            table.clone(),
1828            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1829        );
1830        match result {
1831            Ok(view) => {
1832                println!(
1833                    "SUCCESS: Found {} rows NOT containing 'pend'",
1834                    view.row_count()
1835                );
1836                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1837            }
1838            Err(e) => {
1839                panic!("NOT Contains query failed: {e}");
1840            }
1841        }
1842
1843        // Test NOT with StartsWith
1844        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1845        let result = engine.execute(
1846            table.clone(),
1847            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1848        );
1849        match result {
1850            Ok(view) => {
1851                println!(
1852                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1853                    view.row_count()
1854                );
1855                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1856            }
1857            Err(e) => {
1858                panic!("NOT StartsWith query failed: {e}");
1859            }
1860        }
1861
1862        println!("\n=== NOT with method calls test complete! ===");
1863    }
1864
1865    #[test]
1866    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1867    fn test_complex_logical_expressions() {
1868        // Initialize tracing for debug output
1869        let _ = tracing_subscriber::fmt()
1870            .with_max_level(tracing::Level::DEBUG)
1871            .try_init();
1872
1873        let mut table = DataTable::new("test");
1874        table.add_column(DataColumn::new("id"));
1875        table.add_column(DataColumn::new("status"));
1876        table.add_column(DataColumn::new("priority"));
1877        table.add_column(DataColumn::new("assigned"));
1878
1879        // Add comprehensive test data
1880        table
1881            .add_row(DataRow::new(vec![
1882                DataValue::Integer(1),
1883                DataValue::String("Pending".to_string()),
1884                DataValue::String("High".to_string()),
1885                DataValue::String("John".to_string()),
1886            ]))
1887            .unwrap();
1888
1889        table
1890            .add_row(DataRow::new(vec![
1891                DataValue::Integer(2),
1892                DataValue::String("Complete".to_string()),
1893                DataValue::String("High".to_string()),
1894                DataValue::String("Jane".to_string()),
1895            ]))
1896            .unwrap();
1897
1898        table
1899            .add_row(DataRow::new(vec![
1900                DataValue::Integer(3),
1901                DataValue::String("Pending".to_string()),
1902                DataValue::String("Low".to_string()),
1903                DataValue::String("John".to_string()),
1904            ]))
1905            .unwrap();
1906
1907        table
1908            .add_row(DataRow::new(vec![
1909                DataValue::Integer(4),
1910                DataValue::String("In Progress".to_string()),
1911                DataValue::String("Medium".to_string()),
1912                DataValue::String("Jane".to_string()),
1913            ]))
1914            .unwrap();
1915
1916        let table = Arc::new(table);
1917        let engine = QueryEngine::new();
1918
1919        println!("\n=== Testing Complex Logical Expressions ===");
1920        println!("Table has {} rows", table.row_count());
1921        for i in 0..table.row_count() {
1922            let status = table.get_value(i, 1);
1923            let priority = table.get_value(i, 2);
1924            let assigned = table.get_value(i, 3);
1925            println!(
1926                "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1927            );
1928        }
1929
1930        // Test complex AND/OR logic
1931        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1932        let result = engine.execute(
1933            table.clone(),
1934            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1935        );
1936        match result {
1937            Ok(view) => {
1938                println!(
1939                    "SUCCESS: Found {} rows with complex logic",
1940                    view.row_count()
1941                );
1942                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
1943            }
1944            Err(e) => {
1945                panic!("Complex logic query failed: {e}");
1946            }
1947        }
1948
1949        // Test NOT with complex expressions
1950        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1951        let result = engine.execute(
1952            table.clone(),
1953            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1954        );
1955        match result {
1956            Ok(view) => {
1957                println!(
1958                    "SUCCESS: Found {} rows with NOT complex logic",
1959                    view.row_count()
1960                );
1961                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
1962            }
1963            Err(e) => {
1964                panic!("NOT complex logic query failed: {e}");
1965            }
1966        }
1967
1968        println!("\n=== Complex logical expressions test complete! ===");
1969    }
1970
1971    #[test]
1972    fn test_mixed_data_types_and_edge_cases() {
1973        // Initialize tracing for debug output
1974        let _ = tracing_subscriber::fmt()
1975            .with_max_level(tracing::Level::DEBUG)
1976            .try_init();
1977
1978        let mut table = DataTable::new("test");
1979        table.add_column(DataColumn::new("id"));
1980        table.add_column(DataColumn::new("value"));
1981        table.add_column(DataColumn::new("nullable_field"));
1982
1983        // Add test data with mixed types and edge cases
1984        table
1985            .add_row(DataRow::new(vec![
1986                DataValue::Integer(1),
1987                DataValue::String("123.45".to_string()),
1988                DataValue::String("present".to_string()),
1989            ]))
1990            .unwrap();
1991
1992        table
1993            .add_row(DataRow::new(vec![
1994                DataValue::Integer(2),
1995                DataValue::Float(678.90),
1996                DataValue::Null,
1997            ]))
1998            .unwrap();
1999
2000        table
2001            .add_row(DataRow::new(vec![
2002                DataValue::Integer(3),
2003                DataValue::Boolean(true),
2004                DataValue::String("also present".to_string()),
2005            ]))
2006            .unwrap();
2007
2008        table
2009            .add_row(DataRow::new(vec![
2010                DataValue::Integer(4),
2011                DataValue::String("false".to_string()),
2012                DataValue::Null,
2013            ]))
2014            .unwrap();
2015
2016        let table = Arc::new(table);
2017        let engine = QueryEngine::new();
2018
2019        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
2020        println!("Table has {} rows", table.row_count());
2021        for i in 0..table.row_count() {
2022            let value = table.get_value(i, 1);
2023            let nullable = table.get_value(i, 2);
2024            println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
2025        }
2026
2027        // Test type coercion with boolean Contains
2028        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
2029        let result = engine.execute(
2030            table.clone(),
2031            "SELECT * FROM test WHERE value.Contains('true')",
2032        );
2033        match result {
2034            Ok(view) => {
2035                println!(
2036                    "SUCCESS: Found {} rows with boolean coercion",
2037                    view.row_count()
2038                );
2039                assert_eq!(view.row_count(), 1); // Should find the boolean true row
2040            }
2041            Err(e) => {
2042                panic!("Boolean coercion query failed: {e}");
2043            }
2044        }
2045
2046        // Test multiple IN values with mixed types
2047        println!("\n--- Test: id IN (1, 3) ---");
2048        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
2049        match result {
2050            Ok(view) => {
2051                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
2052                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
2053            }
2054            Err(e) => {
2055                panic!("Multiple IN values query failed: {e}");
2056            }
2057        }
2058
2059        println!("\n=== Mixed data types test complete! ===");
2060    }
2061
2062    /// Test that aggregate-only queries return exactly one row (regression test)
2063    #[test]
2064    fn test_aggregate_only_single_row() {
2065        let table = create_test_stock_data();
2066        let engine = QueryEngine::new();
2067
2068        // Test query with multiple aggregates - should return exactly 1 row
2069        let result = engine
2070            .execute(
2071                table.clone(),
2072                "SELECT COUNT(*), MIN(close), MAX(close), AVG(close) FROM stock",
2073            )
2074            .expect("Query should succeed");
2075
2076        assert_eq!(
2077            result.row_count(),
2078            1,
2079            "Aggregate-only query should return exactly 1 row"
2080        );
2081        assert_eq!(result.column_count(), 4, "Should have 4 aggregate columns");
2082
2083        // Verify the actual values are correct
2084        let source = result.source();
2085        let row = source.get_row(0).expect("Should have first row");
2086
2087        // COUNT(*) should be 5 (total rows)
2088        assert_eq!(row.values[0], DataValue::Integer(5));
2089
2090        // MIN should be 99.5
2091        assert_eq!(row.values[1], DataValue::Float(99.5));
2092
2093        // MAX should be 105.0
2094        assert_eq!(row.values[2], DataValue::Float(105.0));
2095
2096        // AVG should be approximately 102.4
2097        if let DataValue::Float(avg) = &row.values[3] {
2098            assert!(
2099                (avg - 102.4).abs() < 0.01,
2100                "Average should be approximately 102.4, got {}",
2101                avg
2102            );
2103        } else {
2104            panic!("AVG should return a Float value");
2105        }
2106    }
2107
2108    /// Test single aggregate function returns single row
2109    #[test]
2110    fn test_single_aggregate_single_row() {
2111        let table = create_test_stock_data();
2112        let engine = QueryEngine::new();
2113
2114        let result = engine
2115            .execute(table.clone(), "SELECT COUNT(*) FROM stock")
2116            .expect("Query should succeed");
2117
2118        assert_eq!(
2119            result.row_count(),
2120            1,
2121            "Single aggregate query should return exactly 1 row"
2122        );
2123        assert_eq!(result.column_count(), 1, "Should have 1 column");
2124
2125        let source = result.source();
2126        let row = source.get_row(0).expect("Should have first row");
2127        assert_eq!(row.values[0], DataValue::Integer(5));
2128    }
2129
2130    /// Test aggregate with WHERE clause filtering
2131    #[test]
2132    fn test_aggregate_with_where_single_row() {
2133        let table = create_test_stock_data();
2134        let engine = QueryEngine::new();
2135
2136        // Filter to only high-value stocks (>= 103.0) and aggregate
2137        let result = engine
2138            .execute(
2139                table.clone(),
2140                "SELECT COUNT(*), MIN(close), MAX(close) FROM stock WHERE close >= 103.0",
2141            )
2142            .expect("Query should succeed");
2143
2144        assert_eq!(
2145            result.row_count(),
2146            1,
2147            "Filtered aggregate query should return exactly 1 row"
2148        );
2149        assert_eq!(result.column_count(), 3, "Should have 3 aggregate columns");
2150
2151        let source = result.source();
2152        let row = source.get_row(0).expect("Should have first row");
2153
2154        // Should find 2 rows (103.5 and 105.0)
2155        assert_eq!(row.values[0], DataValue::Integer(2));
2156        assert_eq!(row.values[1], DataValue::Float(103.5)); // MIN
2157        assert_eq!(row.values[2], DataValue::Float(105.0)); // MAX
2158    }
2159
2160    #[test]
2161    fn test_not_in_parsing() {
2162        use crate::sql::recursive_parser::Parser;
2163
2164        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
2165        println!("\n=== Testing NOT IN parsing ===");
2166        println!("Parsing query: {query}");
2167
2168        let mut parser = Parser::new(query);
2169        match parser.parse() {
2170            Ok(statement) => {
2171                println!("Parsed statement: {statement:#?}");
2172                if let Some(where_clause) = statement.where_clause {
2173                    println!("WHERE conditions: {:#?}", where_clause.conditions);
2174                    if let Some(first_condition) = where_clause.conditions.first() {
2175                        println!("First condition expression: {:#?}", first_condition.expr);
2176                    }
2177                }
2178            }
2179            Err(e) => {
2180                panic!("Parse error: {e}");
2181            }
2182        }
2183    }
2184
2185    /// Create test stock data for aggregate testing
2186    fn create_test_stock_data() -> Arc<DataTable> {
2187        let mut table = DataTable::new("stock");
2188
2189        table.add_column(DataColumn::new("symbol"));
2190        table.add_column(DataColumn::new("close"));
2191        table.add_column(DataColumn::new("volume"));
2192
2193        // Add 5 rows of test data
2194        let test_data = vec![
2195            ("AAPL", 99.5, 1000),
2196            ("AAPL", 101.2, 1500),
2197            ("AAPL", 103.5, 2000),
2198            ("AAPL", 105.0, 1200),
2199            ("AAPL", 102.8, 1800),
2200        ];
2201
2202        for (symbol, close, volume) in test_data {
2203            table
2204                .add_row(DataRow::new(vec![
2205                    DataValue::String(symbol.to_string()),
2206                    DataValue::Float(close),
2207                    DataValue::Integer(volume),
2208                ]))
2209                .expect("Should add row successfully");
2210        }
2211
2212        Arc::new(table)
2213    }
2214}
2215
2216#[cfg(test)]
2217#[path = "query_engine_tests.rs"]
2218mod query_engine_tests;