sql_cli/data/
query_engine.rs

1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::config::global::get_date_notation;
9use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
10use crate::data::data_view::DataView;
11use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
12use crate::data::evaluation_context::EvaluationContext;
13use crate::data::group_by_expressions::GroupByExpressions;
14use crate::data::hash_join::HashJoinExecutor;
15use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
16use crate::data::subquery_executor::SubqueryExecutor;
17use crate::data::virtual_table_generator::VirtualTableGenerator;
18use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
19use crate::sql::aggregates::{contains_aggregate, is_aggregate_compatible};
20use crate::sql::parser::ast::TableSource;
21use crate::sql::recursive_parser::{
22    CTEType, OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression,
23    TableFunction,
24};
25
26/// Query engine that executes SQL directly on `DataTable`
27#[derive(Clone)]
28pub struct QueryEngine {
29    case_insensitive: bool,
30    date_notation: String,
31    behavior_config: Option<BehaviorConfig>,
32}
33
34impl Default for QueryEngine {
35    fn default() -> Self {
36        Self::new()
37    }
38}
39
40impl QueryEngine {
41    #[must_use]
42    pub fn new() -> Self {
43        Self {
44            case_insensitive: false,
45            date_notation: get_date_notation(),
46            behavior_config: None,
47        }
48    }
49
50    #[must_use]
51    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
52        let case_insensitive = config.case_insensitive_default;
53        // Use get_date_notation() to respect environment variable override
54        let date_notation = get_date_notation();
55        Self {
56            case_insensitive,
57            date_notation,
58            behavior_config: Some(config),
59        }
60    }
61
62    #[must_use]
63    pub fn with_date_notation(_date_notation: String) -> Self {
64        Self {
65            case_insensitive: false,
66            date_notation: get_date_notation(), // Always use the global function
67            behavior_config: None,
68        }
69    }
70
71    #[must_use]
72    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
73        Self {
74            case_insensitive,
75            date_notation: get_date_notation(),
76            behavior_config: None,
77        }
78    }
79
80    #[must_use]
81    pub fn with_case_insensitive_and_date_notation(
82        case_insensitive: bool,
83        _date_notation: String, // Keep parameter for compatibility but use get_date_notation()
84    ) -> Self {
85        Self {
86            case_insensitive,
87            date_notation: get_date_notation(), // Always use the global function
88            behavior_config: None,
89        }
90    }
91
92    /// Find a column name similar to the given name using edit distance
93    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
94        let columns = table.column_names();
95        let mut best_match: Option<(String, usize)> = None;
96
97        for col in columns {
98            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
99            // Only suggest if distance is small (likely a typo)
100            // Allow up to 3 edits for longer names
101            let max_distance = if name.len() > 10 { 3 } else { 2 };
102            if distance <= max_distance {
103                match &best_match {
104                    None => best_match = Some((col, distance)),
105                    Some((_, best_dist)) if distance < *best_dist => {
106                        best_match = Some((col, distance));
107                    }
108                    _ => {}
109                }
110            }
111        }
112
113        best_match.map(|(name, _)| name)
114    }
115
116    /// Calculate Levenshtein edit distance between two strings
117    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
118        let len1 = s1.len();
119        let len2 = s2.len();
120        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
121
122        for i in 0..=len1 {
123            matrix[i][0] = i;
124        }
125        for j in 0..=len2 {
126            matrix[0][j] = j;
127        }
128
129        for (i, c1) in s1.chars().enumerate() {
130            for (j, c2) in s2.chars().enumerate() {
131                let cost = usize::from(c1 != c2);
132                matrix[i + 1][j + 1] = std::cmp::min(
133                    matrix[i][j + 1] + 1, // deletion
134                    std::cmp::min(
135                        matrix[i + 1][j] + 1, // insertion
136                        matrix[i][j] + cost,  // substitution
137                    ),
138                );
139            }
140        }
141
142        matrix[len1][len2]
143    }
144
145    /// Execute a SQL query on a `DataTable` and return a `DataView` (for backward compatibility)
146    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
147        let (view, _plan) = self.execute_with_plan(table, sql)?;
148        Ok(view)
149    }
150
151    /// Execute a parsed SelectStatement on a `DataTable` and return a `DataView`
152    pub fn execute_statement(
153        &self,
154        table: Arc<DataTable>,
155        statement: SelectStatement,
156    ) -> Result<DataView> {
157        // First process CTEs to build context
158        let mut cte_context = HashMap::new();
159        for cte in &statement.ctes {
160            debug!("QueryEngine: Pre-processing CTE '{}'...", cte.name);
161            // Execute the CTE based on its type
162            let cte_result = match &cte.cte_type {
163                CTEType::Standard(query) => {
164                    // Execute the CTE query (it might reference earlier CTEs)
165                    self.build_view_with_context(table.clone(), query.clone(), &mut cte_context)?
166                }
167                CTEType::Web(web_spec) => {
168                    // Fetch data from URL
169                    use crate::web::http_fetcher::WebDataFetcher;
170
171                    let fetcher = WebDataFetcher::new()?;
172                    let data_table = fetcher.fetch(web_spec, &cte.name)?;
173
174                    // Convert DataTable to DataView
175                    DataView::new(Arc::new(data_table))
176                }
177            };
178            // Store the result in the context for later use
179            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
180            debug!(
181                "QueryEngine: CTE '{}' pre-processed, stored in context",
182                cte.name
183            );
184        }
185
186        // Now process subqueries with CTE context available
187        let mut subquery_executor =
188            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
189        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
190
191        // Build the view with the same CTE context
192        self.build_view_with_context(table, processed_statement, &mut cte_context)
193    }
194
195    /// Execute a statement with provided CTE context (for subqueries)
196    pub fn execute_statement_with_cte_context(
197        &self,
198        table: Arc<DataTable>,
199        statement: SelectStatement,
200        cte_context: &HashMap<String, Arc<DataView>>,
201    ) -> Result<DataView> {
202        // Clone the context so we can add any CTEs from this statement
203        let mut local_context = cte_context.clone();
204
205        // Process any CTEs in this statement (they might be nested)
206        for cte in &statement.ctes {
207            debug!("QueryEngine: Processing nested CTE '{}'...", cte.name);
208            let cte_result = match &cte.cte_type {
209                CTEType::Standard(query) => {
210                    self.build_view_with_context(table.clone(), query.clone(), &mut local_context)?
211                }
212                CTEType::Web(web_spec) => {
213                    // Fetch data from URL
214                    use crate::web::http_fetcher::WebDataFetcher;
215
216                    let fetcher = WebDataFetcher::new()?;
217                    let data_table = fetcher.fetch(web_spec, &cte.name)?;
218
219                    // Convert DataTable to DataView
220                    DataView::new(Arc::new(data_table))
221                }
222            };
223            local_context.insert(cte.name.clone(), Arc::new(cte_result));
224        }
225
226        // Process subqueries with the complete context
227        let mut subquery_executor =
228            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), local_context.clone());
229        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
230
231        // Build the view
232        self.build_view_with_context(table, processed_statement, &mut local_context)
233    }
234
235    /// Execute a query and return both the result and the execution plan
236    pub fn execute_with_plan(
237        &self,
238        table: Arc<DataTable>,
239        sql: &str,
240    ) -> Result<(DataView, ExecutionPlan)> {
241        let mut plan_builder = ExecutionPlanBuilder::new();
242        let start_time = Instant::now();
243
244        // Parse the SQL query
245        plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
246        plan_builder.add_detail(format!("Query: {}", sql));
247        let mut parser = Parser::new(sql);
248        let statement = parser
249            .parse()
250            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
251        plan_builder.add_detail(format!("Parsed successfully"));
252        if let Some(from) = &statement.from_table {
253            plan_builder.add_detail(format!("FROM: {}", from));
254        }
255        if statement.where_clause.is_some() {
256            plan_builder.add_detail("WHERE clause present".to_string());
257        }
258        plan_builder.end_step();
259
260        // First process CTEs to build context
261        let mut cte_context = HashMap::new();
262
263        if !statement.ctes.is_empty() {
264            plan_builder.begin_step(
265                StepType::CTE,
266                format!("Process {} CTEs", statement.ctes.len()),
267            );
268
269            for cte in &statement.ctes {
270                let cte_start = Instant::now();
271                plan_builder.begin_step(StepType::CTE, format!("CTE '{}'", cte.name));
272
273                let cte_result = match &cte.cte_type {
274                    CTEType::Standard(query) => {
275                        // Add CTE query details
276                        if let Some(from) = &query.from_table {
277                            plan_builder.add_detail(format!("Source: {}", from));
278                        }
279                        if query.where_clause.is_some() {
280                            plan_builder.add_detail("Has WHERE clause".to_string());
281                        }
282                        if query.group_by.is_some() {
283                            plan_builder.add_detail("Has GROUP BY".to_string());
284                        }
285
286                        debug!(
287                            "QueryEngine: Processing CTE '{}' with existing context: {:?}",
288                            cte.name,
289                            cte_context.keys().collect::<Vec<_>>()
290                        );
291
292                        // Process subqueries in the CTE's query FIRST
293                        // This allows the subqueries to see all previously defined CTEs
294                        let mut subquery_executor = SubqueryExecutor::with_cte_context(
295                            self.clone(),
296                            table.clone(),
297                            cte_context.clone(),
298                        );
299                        let processed_query = subquery_executor.execute_subqueries(query)?;
300
301                        self.build_view_with_context(
302                            table.clone(),
303                            processed_query,
304                            &mut cte_context,
305                        )?
306                    }
307                    CTEType::Web(web_spec) => {
308                        plan_builder.add_detail(format!("URL: {}", web_spec.url));
309                        if let Some(format) = &web_spec.format {
310                            plan_builder.add_detail(format!("Format: {:?}", format));
311                        }
312                        if let Some(cache) = web_spec.cache_seconds {
313                            plan_builder.add_detail(format!("Cache: {} seconds", cache));
314                        }
315
316                        // Fetch data from URL
317                        use crate::web::http_fetcher::WebDataFetcher;
318
319                        let fetcher = WebDataFetcher::new()?;
320                        let data_table = fetcher.fetch(web_spec, &cte.name)?;
321
322                        // Convert DataTable to DataView
323                        DataView::new(Arc::new(data_table))
324                    }
325                };
326
327                // Record CTE statistics
328                plan_builder.set_rows_out(cte_result.row_count());
329                plan_builder.add_detail(format!(
330                    "Result: {} rows, {} columns",
331                    cte_result.row_count(),
332                    cte_result.column_count()
333                ));
334                plan_builder.add_detail(format!(
335                    "Execution time: {:.3}ms",
336                    cte_start.elapsed().as_secs_f64() * 1000.0
337                ));
338
339                debug!(
340                    "QueryEngine: Storing CTE '{}' in context with {} rows",
341                    cte.name,
342                    cte_result.row_count()
343                );
344                cte_context.insert(cte.name.clone(), Arc::new(cte_result));
345                plan_builder.end_step();
346            }
347
348            plan_builder.add_detail(format!(
349                "All {} CTEs cached in context",
350                statement.ctes.len()
351            ));
352            plan_builder.end_step();
353        }
354
355        // Process subqueries in the statement with CTE context
356        plan_builder.begin_step(StepType::Subquery, "Process subqueries".to_string());
357        let mut subquery_executor =
358            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
359
360        // Check if there are subqueries to process
361        let has_subqueries = statement.where_clause.as_ref().map_or(false, |w| {
362            // This is a simplified check - in reality we'd need to walk the AST
363            format!("{:?}", w).contains("Subquery")
364        });
365
366        if has_subqueries {
367            plan_builder.add_detail("Evaluating subqueries in WHERE clause".to_string());
368        }
369
370        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
371
372        if has_subqueries {
373            plan_builder.add_detail("Subqueries replaced with materialized values".to_string());
374        } else {
375            plan_builder.add_detail("No subqueries to process".to_string());
376        }
377
378        plan_builder.end_step();
379        let result = self.build_view_with_context_and_plan(
380            table,
381            processed_statement,
382            &mut cte_context,
383            &mut plan_builder,
384        )?;
385
386        let total_duration = start_time.elapsed();
387        info!(
388            "Query execution complete: total={:?}, rows={}",
389            total_duration,
390            result.row_count()
391        );
392
393        let plan = plan_builder.build();
394        Ok((result, plan))
395    }
396
397    /// Build a `DataView` from a parsed SQL statement
398    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
399        let mut cte_context = HashMap::new();
400        self.build_view_with_context(table, statement, &mut cte_context)
401    }
402
403    /// Build a DataView from a SelectStatement with CTE context
404    fn build_view_with_context(
405        &self,
406        table: Arc<DataTable>,
407        statement: SelectStatement,
408        cte_context: &mut HashMap<String, Arc<DataView>>,
409    ) -> Result<DataView> {
410        let mut dummy_plan = ExecutionPlanBuilder::new();
411        self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
412    }
413
414    /// Build a DataView from a SelectStatement with CTE context and execution plan tracking
415    fn build_view_with_context_and_plan(
416        &self,
417        table: Arc<DataTable>,
418        statement: SelectStatement,
419        cte_context: &mut HashMap<String, Arc<DataView>>,
420        plan: &mut ExecutionPlanBuilder,
421    ) -> Result<DataView> {
422        // First, process any CTEs that aren't already in the context
423        for cte in &statement.ctes {
424            // Skip if already processed (e.g., by execute_select for WEB CTEs)
425            if cte_context.contains_key(&cte.name) {
426                debug!(
427                    "QueryEngine: CTE '{}' already in context, skipping",
428                    cte.name
429                );
430                continue;
431            }
432
433            debug!("QueryEngine: Processing CTE '{}'...", cte.name);
434            debug!(
435                "QueryEngine: Available CTEs for '{}': {:?}",
436                cte.name,
437                cte_context.keys().collect::<Vec<_>>()
438            );
439
440            // Execute the CTE query (it might reference earlier CTEs)
441            let cte_result = match &cte.cte_type {
442                CTEType::Standard(query) => {
443                    self.build_view_with_context(table.clone(), query.clone(), cte_context)?
444                }
445                CTEType::Web(_web_spec) => {
446                    // Web CTEs should have been processed earlier in execute_select
447                    return Err(anyhow!(
448                        "Web CTEs should be processed in execute_select method"
449                    ));
450                }
451            };
452
453            // Store the result in the context for later use
454            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
455            debug!(
456                "QueryEngine: CTE '{}' processed, stored in context",
457                cte.name
458            );
459        }
460
461        // Determine the source table for the main query
462        let source_table = if let Some(ref table_func) = statement.from_function {
463            // Handle table functions like RANGE()
464            debug!("QueryEngine: Processing table function...");
465            match table_func {
466                TableFunction::Range { start, end, step } => {
467                    // Evaluate expressions to get numeric values
468                    let mut evaluator =
469                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
470
471                    // Create a dummy row for evaluating constant expressions
472                    let dummy_row = 0;
473
474                    let start_val = evaluator.evaluate(start, dummy_row)?;
475                    let end_val = evaluator.evaluate(end, dummy_row)?;
476                    let step_val = if let Some(step_expr) = step {
477                        Some(evaluator.evaluate(step_expr, dummy_row)?)
478                    } else {
479                        None
480                    };
481
482                    // Convert DataValues to integers
483                    let start_int = match start_val {
484                        DataValue::Integer(i) => i,
485                        DataValue::Float(f) => f as i64,
486                        _ => return Err(anyhow!("RANGE start must be numeric")),
487                    };
488
489                    let end_int = match end_val {
490                        DataValue::Integer(i) => i,
491                        DataValue::Float(f) => f as i64,
492                        _ => return Err(anyhow!("RANGE end must be numeric")),
493                    };
494
495                    let step_int = if let Some(step) = step_val {
496                        match step {
497                            DataValue::Integer(i) => Some(i),
498                            DataValue::Float(f) => Some(f as i64),
499                            _ => return Err(anyhow!("RANGE step must be numeric")),
500                        }
501                    } else {
502                        None
503                    };
504
505                    // Generate the virtual table
506                    VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
507                }
508                TableFunction::Split { text, delimiter } => {
509                    // Evaluate expressions to get string values
510                    let mut evaluator =
511                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
512
513                    // Create a dummy row for evaluating constant expressions
514                    let dummy_row = 0;
515
516                    let text_val = evaluator.evaluate(text, dummy_row)?;
517                    let delimiter_val = if let Some(delim_expr) = delimiter {
518                        Some(evaluator.evaluate(delim_expr, dummy_row)?)
519                    } else {
520                        None
521                    };
522
523                    // Convert DataValues to strings
524                    let text_str = match text_val {
525                        DataValue::String(s) => s,
526                        DataValue::Null => return Err(anyhow!("SPLIT text cannot be NULL")),
527                        _ => text_val.to_string(),
528                    };
529
530                    let delimiter_str = if let Some(delim) = delimiter_val {
531                        match delim {
532                            DataValue::String(s) => Some(s),
533                            DataValue::Null => None,
534                            _ => Some(delim.to_string()),
535                        }
536                    } else {
537                        None
538                    };
539
540                    // Generate the virtual table from split string
541                    VirtualTableGenerator::generate_split(
542                        &text_str,
543                        delimiter_str.as_deref(),
544                        None,
545                    )?
546                }
547                TableFunction::Generator { name, args } => {
548                    // Use the generator registry to create the table
549                    use crate::sql::generators::GeneratorRegistry;
550
551                    // Create generator registry (could be cached in QueryEngine)
552                    let registry = GeneratorRegistry::new();
553
554                    if let Some(generator) = registry.get(name) {
555                        // Evaluate arguments
556                        let mut evaluator = ArithmeticEvaluator::with_date_notation(
557                            &table,
558                            self.date_notation.clone(),
559                        );
560                        let dummy_row = 0;
561
562                        let mut evaluated_args = Vec::new();
563                        for arg in args {
564                            evaluated_args.push(evaluator.evaluate(arg, dummy_row)?);
565                        }
566
567                        // Generate the table
568                        generator.generate(evaluated_args)?
569                    } else {
570                        return Err(anyhow!("Unknown generator function: {}", name));
571                    }
572                }
573            }
574        } else if let Some(ref subquery) = statement.from_subquery {
575            // Execute the subquery and use its result as the source
576            debug!("QueryEngine: Processing FROM subquery...");
577            let subquery_result =
578                self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
579
580            // Convert the DataView to a DataTable for use as source
581            // This materializes the subquery result
582            let materialized = self.materialize_view(subquery_result)?;
583            Arc::new(materialized)
584        } else if let Some(ref table_name) = statement.from_table {
585            // Check if this references a CTE
586            if let Some(cte_view) = cte_context.get(table_name) {
587                debug!("QueryEngine: Using CTE '{}' as source table", table_name);
588                // Materialize the CTE view as a table
589                let materialized = self.materialize_view((**cte_view).clone())?;
590                Arc::new(materialized)
591            } else {
592                // Regular table reference - use the provided table
593                table.clone()
594            }
595        } else {
596            // No FROM clause - use the provided table
597            table.clone()
598        };
599
600        // Process JOINs if present
601        let final_table = if !statement.joins.is_empty() {
602            plan.begin_step(
603                StepType::Join,
604                format!("Process {} JOINs", statement.joins.len()),
605            );
606            plan.set_rows_in(source_table.row_count());
607
608            let join_executor = HashJoinExecutor::new(self.case_insensitive);
609            let mut current_table = source_table;
610
611            for (idx, join_clause) in statement.joins.iter().enumerate() {
612                let join_start = Instant::now();
613                plan.begin_step(StepType::Join, format!("JOIN #{}", idx + 1));
614                plan.add_detail(format!("Type: {:?}", join_clause.join_type));
615                plan.add_detail(format!("Left table: {} rows", current_table.row_count()));
616                plan.add_detail(format!(
617                    "Executing {:?} JOIN on {}",
618                    join_clause.join_type, join_clause.condition.left_column
619                ));
620
621                // Resolve the right table for the join
622                let right_table = match &join_clause.table {
623                    TableSource::Table(name) => {
624                        // Check if it's a CTE reference
625                        if let Some(cte_view) = cte_context.get(name) {
626                            let materialized = self.materialize_view((**cte_view).clone())?;
627                            Arc::new(materialized)
628                        } else {
629                            // For now, we need the actual table data
630                            // In a real implementation, this would load from file
631                            return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
632                        }
633                    }
634                    TableSource::DerivedTable { query, alias: _ } => {
635                        // Execute the subquery
636                        let subquery_result = self.build_view_with_context(
637                            table.clone(),
638                            *query.clone(),
639                            cte_context,
640                        )?;
641                        let materialized = self.materialize_view(subquery_result)?;
642                        Arc::new(materialized)
643                    }
644                };
645
646                // Execute the join
647                let joined = join_executor.execute_join(
648                    current_table.clone(),
649                    join_clause,
650                    right_table.clone(),
651                )?;
652
653                plan.add_detail(format!("Right table: {} rows", right_table.row_count()));
654                plan.set_rows_out(joined.row_count());
655                plan.add_detail(format!("Result: {} rows", joined.row_count()));
656                plan.add_detail(format!(
657                    "Join time: {:.3}ms",
658                    join_start.elapsed().as_secs_f64() * 1000.0
659                ));
660                plan.end_step();
661
662                current_table = Arc::new(joined);
663            }
664
665            plan.set_rows_out(current_table.row_count());
666            plan.add_detail(format!(
667                "Final result after all joins: {} rows",
668                current_table.row_count()
669            ));
670            plan.end_step();
671            current_table
672        } else {
673            source_table
674        };
675
676        // Continue with the existing build_view logic but using final_table
677        self.build_view_internal_with_plan(final_table, statement, plan)
678    }
679
680    /// Materialize a DataView into a new DataTable
681    fn materialize_view(&self, view: DataView) -> Result<DataTable> {
682        let source = view.source();
683        let mut result_table = DataTable::new("derived");
684
685        // Get the visible columns from the view
686        let visible_cols = view.visible_column_indices().to_vec();
687
688        // Copy column definitions
689        for col_idx in &visible_cols {
690            let col = &source.columns[*col_idx];
691            let new_col = DataColumn {
692                name: col.name.clone(),
693                data_type: col.data_type.clone(),
694                nullable: col.nullable,
695                unique_values: col.unique_values,
696                null_count: col.null_count,
697                metadata: col.metadata.clone(),
698            };
699            result_table.add_column(new_col);
700        }
701
702        // Copy visible rows
703        for row_idx in view.visible_row_indices() {
704            let source_row = &source.rows[*row_idx];
705            let mut new_row = DataRow { values: Vec::new() };
706
707            for col_idx in &visible_cols {
708                new_row.values.push(source_row.values[*col_idx].clone());
709            }
710
711            result_table.add_row(new_row);
712        }
713
714        Ok(result_table)
715    }
716
717    fn build_view_internal(
718        &self,
719        table: Arc<DataTable>,
720        statement: SelectStatement,
721    ) -> Result<DataView> {
722        let mut dummy_plan = ExecutionPlanBuilder::new();
723        self.build_view_internal_with_plan(table, statement, &mut dummy_plan)
724    }
725
726    fn build_view_internal_with_plan(
727        &self,
728        table: Arc<DataTable>,
729        statement: SelectStatement,
730        plan: &mut ExecutionPlanBuilder,
731    ) -> Result<DataView> {
732        debug!(
733            "QueryEngine::build_view - select_items: {:?}",
734            statement.select_items
735        );
736        debug!(
737            "QueryEngine::build_view - where_clause: {:?}",
738            statement.where_clause
739        );
740
741        // Start with all rows visible
742        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
743
744        // Apply WHERE clause filtering using recursive evaluator
745        if let Some(where_clause) = &statement.where_clause {
746            let total_rows = table.row_count();
747            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
748            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
749
750            plan.begin_step(StepType::Filter, "WHERE clause filtering".to_string());
751            plan.set_rows_in(total_rows);
752            plan.add_detail(format!("Input: {} rows", total_rows));
753
754            // Add details about WHERE conditions
755            for condition in &where_clause.conditions {
756                plan.add_detail(format!("Condition: {:?}", condition.expr));
757            }
758
759            let filter_start = Instant::now();
760            // Create an evaluation context for caching compiled regexes
761            let mut eval_context = EvaluationContext::new(self.case_insensitive);
762
763            // Filter visible rows based on WHERE clause
764            let mut filtered_rows = Vec::new();
765            for row_idx in visible_rows {
766                // Only log for first few rows to avoid performance impact
767                if row_idx < 3 {
768                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
769                }
770                let mut evaluator =
771                    RecursiveWhereEvaluator::with_context(&table, &mut eval_context);
772                match evaluator.evaluate(where_clause, row_idx) {
773                    Ok(result) => {
774                        if row_idx < 3 {
775                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
776                        }
777                        if result {
778                            filtered_rows.push(row_idx);
779                        }
780                    }
781                    Err(e) => {
782                        if row_idx < 3 {
783                            debug!(
784                                "QueryEngine: WHERE evaluation error for row {}: {}",
785                                row_idx, e
786                            );
787                        }
788                        // Propagate WHERE clause errors instead of silently ignoring them
789                        return Err(e);
790                    }
791                }
792            }
793
794            // Log regex cache statistics
795            let (compilations, cache_hits) = eval_context.get_stats();
796            if compilations > 0 || cache_hits > 0 {
797                debug!(
798                    "LIKE pattern cache: {} compilations, {} cache hits",
799                    compilations, cache_hits
800                );
801            }
802            visible_rows = filtered_rows;
803            let filter_duration = filter_start.elapsed();
804            info!(
805                "WHERE clause filtering: {} rows -> {} rows in {:?}",
806                total_rows,
807                visible_rows.len(),
808                filter_duration
809            );
810
811            plan.set_rows_out(visible_rows.len());
812            plan.add_detail(format!("Output: {} rows", visible_rows.len()));
813            plan.add_detail(format!(
814                "Filter time: {:.3}ms",
815                filter_duration.as_secs_f64() * 1000.0
816            ));
817            plan.end_step();
818        }
819
820        // Create initial DataView with filtered rows
821        let mut view = DataView::new(table.clone());
822        view = view.with_rows(visible_rows);
823
824        // Handle GROUP BY if present
825        if let Some(group_by_exprs) = &statement.group_by {
826            if !group_by_exprs.is_empty() {
827                debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_exprs);
828
829                plan.begin_step(
830                    StepType::GroupBy,
831                    format!("GROUP BY {} expressions", group_by_exprs.len()),
832                );
833                plan.set_rows_in(view.row_count());
834                plan.add_detail(format!("Input: {} rows", view.row_count()));
835                for expr in group_by_exprs {
836                    plan.add_detail(format!("Group by: {:?}", expr));
837                }
838
839                let group_start = Instant::now();
840                view = self.apply_group_by(
841                    view,
842                    group_by_exprs,
843                    &statement.select_items,
844                    statement.having.as_ref(),
845                )?;
846
847                plan.set_rows_out(view.row_count());
848                plan.add_detail(format!("Output: {} groups", view.row_count()));
849                plan.add_detail(format!(
850                    "Group time: {:.3}ms",
851                    group_start.elapsed().as_secs_f64() * 1000.0
852                ));
853                plan.end_step();
854            }
855        } else {
856            // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
857            if !statement.select_items.is_empty() {
858                // Check if we have ANY non-star items (not just the first one)
859                let has_non_star_items = statement
860                    .select_items
861                    .iter()
862                    .any(|item| !matches!(item, SelectItem::Star));
863
864                // Apply select items if:
865                // 1. We have computed expressions or explicit columns
866                // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
867                if has_non_star_items || statement.select_items.len() > 1 {
868                    view = self.apply_select_items(view, &statement.select_items)?;
869                }
870                // If it's just a single star, no projection needed
871            } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
872                // Fallback to legacy column projection for backward compatibility
873                let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
874                view = view.with_columns(column_indices);
875            }
876        }
877
878        // Apply DISTINCT if specified
879        if statement.distinct {
880            plan.begin_step(StepType::Distinct, "Remove duplicate rows".to_string());
881            plan.set_rows_in(view.row_count());
882            plan.add_detail(format!("Input: {} rows", view.row_count()));
883
884            let distinct_start = Instant::now();
885            view = self.apply_distinct(view)?;
886
887            plan.set_rows_out(view.row_count());
888            plan.add_detail(format!("Output: {} unique rows", view.row_count()));
889            plan.add_detail(format!(
890                "Distinct time: {:.3}ms",
891                distinct_start.elapsed().as_secs_f64() * 1000.0
892            ));
893            plan.end_step();
894        }
895
896        // Apply ORDER BY sorting
897        if let Some(order_by_columns) = &statement.order_by {
898            if !order_by_columns.is_empty() {
899                plan.begin_step(
900                    StepType::Sort,
901                    format!("ORDER BY {} columns", order_by_columns.len()),
902                );
903                plan.set_rows_in(view.row_count());
904                for col in order_by_columns {
905                    plan.add_detail(format!("{} {:?}", col.column, col.direction));
906                }
907
908                let sort_start = Instant::now();
909                view = self.apply_multi_order_by(view, order_by_columns)?;
910
911                plan.add_detail(format!(
912                    "Sort time: {:.3}ms",
913                    sort_start.elapsed().as_secs_f64() * 1000.0
914                ));
915                plan.end_step();
916            }
917        }
918
919        // Apply LIMIT/OFFSET
920        if let Some(limit) = statement.limit {
921            let offset = statement.offset.unwrap_or(0);
922            plan.begin_step(StepType::Limit, format!("LIMIT {}", limit));
923            plan.set_rows_in(view.row_count());
924            if offset > 0 {
925                plan.add_detail(format!("OFFSET: {}", offset));
926            }
927            view = view.with_limit(limit, offset);
928            plan.set_rows_out(view.row_count());
929            plan.add_detail(format!("Output: {} rows", view.row_count()));
930            plan.end_step();
931        }
932
933        Ok(view)
934    }
935
936    /// Resolve column names to indices
937    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
938        let mut indices = Vec::new();
939        let table_columns = table.column_names();
940
941        for col_name in columns {
942            let index = table_columns
943                .iter()
944                .position(|c| c.eq_ignore_ascii_case(col_name))
945                .ok_or_else(|| {
946                    let suggestion = self.find_similar_column(table, col_name);
947                    match suggestion {
948                        Some(similar) => anyhow::anyhow!(
949                            "Column '{}' not found. Did you mean '{}'?",
950                            col_name,
951                            similar
952                        ),
953                        None => anyhow::anyhow!("Column '{}' not found", col_name),
954                    }
955                })?;
956            indices.push(index);
957        }
958
959        Ok(indices)
960    }
961
962    /// Apply SELECT items (columns and computed expressions) to create new view
963    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
964        debug!(
965            "QueryEngine::apply_select_items - items: {:?}",
966            select_items
967        );
968        debug!(
969            "QueryEngine::apply_select_items - input view has {} rows",
970            view.row_count()
971        );
972
973        // Check if this is an aggregate query:
974        // 1. At least one aggregate function exists
975        // 2. All other items are either aggregates or constants (aggregate-compatible)
976        let has_aggregates = select_items.iter().any(|item| match item {
977            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
978            SelectItem::Column(_) => false,
979            SelectItem::Star => false,
980        });
981
982        let all_aggregate_compatible = select_items.iter().all(|item| match item {
983            SelectItem::Expression { expr, .. } => is_aggregate_compatible(expr),
984            SelectItem::Column(_) => false, // Columns are not aggregate-compatible
985            SelectItem::Star => false,      // Star is not aggregate-compatible
986        });
987
988        if has_aggregates && all_aggregate_compatible && view.row_count() > 0 {
989            // Special handling for aggregate queries with constants (no GROUP BY)
990            // These should produce exactly one row
991            debug!("QueryEngine::apply_select_items - detected aggregate query with constants");
992            return self.apply_aggregate_select(view, select_items);
993        }
994
995        // Check if we need to create computed columns
996        let has_computed_expressions = select_items
997            .iter()
998            .any(|item| matches!(item, SelectItem::Expression { .. }));
999
1000        debug!(
1001            "QueryEngine::apply_select_items - has_computed_expressions: {}",
1002            has_computed_expressions
1003        );
1004
1005        if !has_computed_expressions {
1006            // Simple case: only columns, use existing projection logic
1007            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
1008            return Ok(view.with_columns(column_indices));
1009        }
1010
1011        // Complex case: we have computed expressions
1012        // IMPORTANT: We create a PROJECTED view, not a new table
1013        // This preserves the original DataTable reference
1014
1015        let source_table = view.source();
1016        let visible_rows = view.visible_row_indices();
1017
1018        // Create a temporary table just for the computed result view
1019        // But this table is only used for the current query result
1020        let mut computed_table = DataTable::new("query_result");
1021
1022        // First, expand any Star selectors to actual columns
1023        let mut expanded_items = Vec::new();
1024        for item in select_items {
1025            match item {
1026                SelectItem::Star => {
1027                    // Expand * to all columns from source table
1028                    for col_name in source_table.column_names() {
1029                        expanded_items.push(SelectItem::Column(col_name));
1030                    }
1031                }
1032                _ => expanded_items.push(item.clone()),
1033            }
1034        }
1035
1036        // Add columns based on expanded SelectItems, handling duplicates
1037        let mut column_name_counts: std::collections::HashMap<String, usize> =
1038            std::collections::HashMap::new();
1039
1040        for item in &expanded_items {
1041            let base_name = match item {
1042                SelectItem::Column(name) => name.clone(),
1043                SelectItem::Expression { alias, .. } => alias.clone(),
1044                SelectItem::Star => unreachable!("Star should have been expanded"),
1045            };
1046
1047            // Check if this column name has been used before
1048            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
1049            let column_name = if *count == 0 {
1050                // First occurrence, use the name as-is
1051                base_name.clone()
1052            } else {
1053                // Duplicate, append a suffix
1054                format!("{base_name}_{count}")
1055            };
1056            *count += 1;
1057
1058            computed_table.add_column(DataColumn::new(&column_name));
1059        }
1060
1061        // Calculate values for each row
1062        let mut evaluator =
1063            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
1064
1065        for &row_idx in visible_rows {
1066            let mut row_values = Vec::new();
1067
1068            for item in &expanded_items {
1069                let value = match item {
1070                    SelectItem::Column(col_name) => {
1071                        // Simple column reference
1072                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
1073                            let suggestion = self.find_similar_column(source_table, col_name);
1074                            match suggestion {
1075                                Some(similar) => anyhow::anyhow!(
1076                                    "Column '{}' not found. Did you mean '{}'?",
1077                                    col_name,
1078                                    similar
1079                                ),
1080                                None => anyhow::anyhow!("Column '{}' not found", col_name),
1081                            }
1082                        })?;
1083                        let row = source_table
1084                            .get_row(row_idx)
1085                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
1086                        row.get(col_idx)
1087                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
1088                            .clone()
1089                    }
1090                    SelectItem::Expression { expr, .. } => {
1091                        // Computed expression
1092                        evaluator.evaluate(expr, row_idx)?
1093                    }
1094                    SelectItem::Star => unreachable!("Star should have been expanded"),
1095                };
1096                row_values.push(value);
1097            }
1098
1099            computed_table
1100                .add_row(DataRow::new(row_values))
1101                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
1102        }
1103
1104        // Return a view of the computed result
1105        // This is a temporary view for this query only
1106        Ok(DataView::new(Arc::new(computed_table)))
1107    }
1108
1109    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
1110    fn apply_aggregate_select(
1111        &self,
1112        view: DataView,
1113        select_items: &[SelectItem],
1114    ) -> Result<DataView> {
1115        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
1116
1117        let source_table = view.source();
1118        let mut result_table = DataTable::new("aggregate_result");
1119
1120        // Add columns for each select item
1121        for item in select_items {
1122            let column_name = match item {
1123                SelectItem::Expression { alias, .. } => alias.clone(),
1124                _ => unreachable!("Should only have expressions in aggregate-only query"),
1125            };
1126            result_table.add_column(DataColumn::new(&column_name));
1127        }
1128
1129        // Create evaluator with visible rows from the view (for filtered aggregates)
1130        let visible_rows = view.visible_row_indices().to_vec();
1131        let mut evaluator =
1132            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
1133                .with_visible_rows(visible_rows);
1134
1135        // Evaluate each aggregate expression once (they handle all rows internally)
1136        let mut row_values = Vec::new();
1137        for item in select_items {
1138            match item {
1139                SelectItem::Expression { expr, .. } => {
1140                    // The evaluator will handle aggregates over all rows
1141                    // We pass row_index=0 but aggregates ignore it and process all rows
1142                    let value = evaluator.evaluate(expr, 0)?;
1143                    row_values.push(value);
1144                }
1145                _ => unreachable!("Should only have expressions in aggregate-only query"),
1146            }
1147        }
1148
1149        // Add the single result row
1150        result_table
1151            .add_row(DataRow::new(row_values))
1152            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
1153
1154        Ok(DataView::new(Arc::new(result_table)))
1155    }
1156
1157    /// Resolve `SelectItem` columns to indices (for simple column projections only)
1158    fn resolve_select_columns(
1159        &self,
1160        table: &DataTable,
1161        select_items: &[SelectItem],
1162    ) -> Result<Vec<usize>> {
1163        let mut indices = Vec::new();
1164        let table_columns = table.column_names();
1165
1166        for item in select_items {
1167            match item {
1168                SelectItem::Column(col_name) => {
1169                    let index = table_columns
1170                        .iter()
1171                        .position(|c| c.eq_ignore_ascii_case(col_name))
1172                        .ok_or_else(|| {
1173                            let suggestion = self.find_similar_column(table, col_name);
1174                            match suggestion {
1175                                Some(similar) => anyhow::anyhow!(
1176                                    "Column '{}' not found. Did you mean '{}'?",
1177                                    col_name,
1178                                    similar
1179                                ),
1180                                None => anyhow::anyhow!("Column '{}' not found", col_name),
1181                            }
1182                        })?;
1183                    indices.push(index);
1184                }
1185                SelectItem::Star => {
1186                    // Expand * to all column indices
1187                    for i in 0..table_columns.len() {
1188                        indices.push(i);
1189                    }
1190                }
1191                SelectItem::Expression { .. } => {
1192                    return Err(anyhow::anyhow!(
1193                        "Computed expressions require new table creation"
1194                    ));
1195                }
1196            }
1197        }
1198
1199        Ok(indices)
1200    }
1201
1202    /// Apply DISTINCT to remove duplicate rows
1203    fn apply_distinct(&self, view: DataView) -> Result<DataView> {
1204        use std::collections::HashSet;
1205
1206        let source = view.source();
1207        let visible_cols = view.visible_column_indices();
1208        let visible_rows = view.visible_row_indices();
1209
1210        // Build a set to track unique rows
1211        let mut seen_rows = HashSet::new();
1212        let mut unique_row_indices = Vec::new();
1213
1214        for &row_idx in visible_rows {
1215            // Build a key representing this row's visible column values
1216            let mut row_key = Vec::new();
1217            for &col_idx in visible_cols {
1218                let value = source
1219                    .get_value(row_idx, col_idx)
1220                    .ok_or_else(|| anyhow!("Invalid cell reference"))?;
1221                // Convert value to a hashable representation
1222                row_key.push(format!("{:?}", value));
1223            }
1224
1225            // Check if we've seen this row before
1226            if seen_rows.insert(row_key) {
1227                // First time seeing this row combination
1228                unique_row_indices.push(row_idx);
1229            }
1230        }
1231
1232        // Create a new view with only unique rows
1233        Ok(view.with_rows(unique_row_indices))
1234    }
1235
1236    /// Apply multi-column ORDER BY sorting to the view
1237    fn apply_multi_order_by(
1238        &self,
1239        mut view: DataView,
1240        order_by_columns: &[OrderByColumn],
1241    ) -> Result<DataView> {
1242        // Build list of (source_column_index, ascending) tuples
1243        let mut sort_columns = Vec::new();
1244
1245        for order_col in order_by_columns {
1246            // Try to find the column index
1247            // Check in the current view's source table (this handles both regular columns and computed columns)
1248            // This is especially important after GROUP BY where we have a new result table with aggregate aliases
1249            let col_index = view
1250                .source()
1251                .get_column_index(&order_col.column)
1252                .ok_or_else(|| {
1253                    // If not found, provide helpful error with suggestions
1254                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
1255                    match suggestion {
1256                        Some(similar) => anyhow::anyhow!(
1257                            "Column '{}' not found. Did you mean '{}'?",
1258                            order_col.column,
1259                            similar
1260                        ),
1261                        None => {
1262                            // Also list available columns for debugging
1263                            let available_cols = view.source().column_names().join(", ");
1264                            anyhow::anyhow!(
1265                                "Column '{}' not found. Available columns: {}",
1266                                order_col.column,
1267                                available_cols
1268                            )
1269                        }
1270                    }
1271                })?;
1272
1273            let ascending = matches!(order_col.direction, SortDirection::Asc);
1274            sort_columns.push((col_index, ascending));
1275        }
1276
1277        // Apply multi-column sorting
1278        view.apply_multi_sort(&sort_columns)?;
1279        Ok(view)
1280    }
1281
1282    /// Apply GROUP BY to the view with optional HAVING clause
1283    fn apply_group_by(
1284        &self,
1285        view: DataView,
1286        group_by_exprs: &[SqlExpression],
1287        select_items: &[SelectItem],
1288        having: Option<&SqlExpression>,
1289    ) -> Result<DataView> {
1290        // Use the new expression-based GROUP BY implementation
1291        self.apply_group_by_expressions(
1292            view,
1293            group_by_exprs,
1294            select_items,
1295            having,
1296            self.case_insensitive,
1297            self.date_notation.clone(),
1298        )
1299    }
1300}
1301
1302#[cfg(test)]
1303mod tests {
1304    use super::*;
1305    use crate::data::datatable::{DataColumn, DataRow, DataValue};
1306
1307    fn create_test_table() -> Arc<DataTable> {
1308        let mut table = DataTable::new("test");
1309
1310        // Add columns
1311        table.add_column(DataColumn::new("id"));
1312        table.add_column(DataColumn::new("name"));
1313        table.add_column(DataColumn::new("age"));
1314
1315        // Add rows
1316        table
1317            .add_row(DataRow::new(vec![
1318                DataValue::Integer(1),
1319                DataValue::String("Alice".to_string()),
1320                DataValue::Integer(30),
1321            ]))
1322            .unwrap();
1323
1324        table
1325            .add_row(DataRow::new(vec![
1326                DataValue::Integer(2),
1327                DataValue::String("Bob".to_string()),
1328                DataValue::Integer(25),
1329            ]))
1330            .unwrap();
1331
1332        table
1333            .add_row(DataRow::new(vec![
1334                DataValue::Integer(3),
1335                DataValue::String("Charlie".to_string()),
1336                DataValue::Integer(35),
1337            ]))
1338            .unwrap();
1339
1340        Arc::new(table)
1341    }
1342
1343    #[test]
1344    fn test_select_all() {
1345        let table = create_test_table();
1346        let engine = QueryEngine::new();
1347
1348        let view = engine
1349            .execute(table.clone(), "SELECT * FROM users")
1350            .unwrap();
1351        assert_eq!(view.row_count(), 3);
1352        assert_eq!(view.column_count(), 3);
1353    }
1354
1355    #[test]
1356    fn test_select_columns() {
1357        let table = create_test_table();
1358        let engine = QueryEngine::new();
1359
1360        let view = engine
1361            .execute(table.clone(), "SELECT name, age FROM users")
1362            .unwrap();
1363        assert_eq!(view.row_count(), 3);
1364        assert_eq!(view.column_count(), 2);
1365    }
1366
1367    #[test]
1368    fn test_select_with_limit() {
1369        let table = create_test_table();
1370        let engine = QueryEngine::new();
1371
1372        let view = engine
1373            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1374            .unwrap();
1375        assert_eq!(view.row_count(), 2);
1376    }
1377
1378    #[test]
1379    fn test_type_coercion_contains() {
1380        // Initialize tracing for debug output
1381        let _ = tracing_subscriber::fmt()
1382            .with_max_level(tracing::Level::DEBUG)
1383            .try_init();
1384
1385        let mut table = DataTable::new("test");
1386        table.add_column(DataColumn::new("id"));
1387        table.add_column(DataColumn::new("status"));
1388        table.add_column(DataColumn::new("price"));
1389
1390        // Add test data with mixed types
1391        table
1392            .add_row(DataRow::new(vec![
1393                DataValue::Integer(1),
1394                DataValue::String("Pending".to_string()),
1395                DataValue::Float(99.99),
1396            ]))
1397            .unwrap();
1398
1399        table
1400            .add_row(DataRow::new(vec![
1401                DataValue::Integer(2),
1402                DataValue::String("Confirmed".to_string()),
1403                DataValue::Float(150.50),
1404            ]))
1405            .unwrap();
1406
1407        table
1408            .add_row(DataRow::new(vec![
1409                DataValue::Integer(3),
1410                DataValue::String("Pending".to_string()),
1411                DataValue::Float(75.00),
1412            ]))
1413            .unwrap();
1414
1415        let table = Arc::new(table);
1416        let engine = QueryEngine::new();
1417
1418        println!("\n=== Testing WHERE clause with Contains ===");
1419        println!("Table has {} rows", table.row_count());
1420        for i in 0..table.row_count() {
1421            let status = table.get_value(i, 1);
1422            println!("Row {i}: status = {status:?}");
1423        }
1424
1425        // Test 1: Basic string contains (should work)
1426        println!("\n--- Test 1: status.Contains('pend') ---");
1427        let result = engine.execute(
1428            table.clone(),
1429            "SELECT * FROM test WHERE status.Contains('pend')",
1430        );
1431        match result {
1432            Ok(view) => {
1433                println!("SUCCESS: Found {} matching rows", view.row_count());
1434                assert_eq!(view.row_count(), 2); // Should find both Pending rows
1435            }
1436            Err(e) => {
1437                panic!("Query failed: {e}");
1438            }
1439        }
1440
1441        // Test 2: Numeric contains (should work with type coercion)
1442        println!("\n--- Test 2: price.Contains('9') ---");
1443        let result = engine.execute(
1444            table.clone(),
1445            "SELECT * FROM test WHERE price.Contains('9')",
1446        );
1447        match result {
1448            Ok(view) => {
1449                println!(
1450                    "SUCCESS: Found {} matching rows with price containing '9'",
1451                    view.row_count()
1452                );
1453                // Should find 99.99 row
1454                assert!(view.row_count() >= 1);
1455            }
1456            Err(e) => {
1457                panic!("Numeric coercion query failed: {e}");
1458            }
1459        }
1460
1461        println!("\n=== All tests passed! ===");
1462    }
1463
1464    #[test]
1465    fn test_not_in_clause() {
1466        // Initialize tracing for debug output
1467        let _ = tracing_subscriber::fmt()
1468            .with_max_level(tracing::Level::DEBUG)
1469            .try_init();
1470
1471        let mut table = DataTable::new("test");
1472        table.add_column(DataColumn::new("id"));
1473        table.add_column(DataColumn::new("country"));
1474
1475        // Add test data
1476        table
1477            .add_row(DataRow::new(vec![
1478                DataValue::Integer(1),
1479                DataValue::String("CA".to_string()),
1480            ]))
1481            .unwrap();
1482
1483        table
1484            .add_row(DataRow::new(vec![
1485                DataValue::Integer(2),
1486                DataValue::String("US".to_string()),
1487            ]))
1488            .unwrap();
1489
1490        table
1491            .add_row(DataRow::new(vec![
1492                DataValue::Integer(3),
1493                DataValue::String("UK".to_string()),
1494            ]))
1495            .unwrap();
1496
1497        let table = Arc::new(table);
1498        let engine = QueryEngine::new();
1499
1500        println!("\n=== Testing NOT IN clause ===");
1501        println!("Table has {} rows", table.row_count());
1502        for i in 0..table.row_count() {
1503            let country = table.get_value(i, 1);
1504            println!("Row {i}: country = {country:?}");
1505        }
1506
1507        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
1508        println!("\n--- Test: country NOT IN ('CA') ---");
1509        let result = engine.execute(
1510            table.clone(),
1511            "SELECT * FROM test WHERE country NOT IN ('CA')",
1512        );
1513        match result {
1514            Ok(view) => {
1515                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1516                assert_eq!(view.row_count(), 2); // Should find US and UK
1517            }
1518            Err(e) => {
1519                panic!("NOT IN query failed: {e}");
1520            }
1521        }
1522
1523        println!("\n=== NOT IN test complete! ===");
1524    }
1525
1526    #[test]
1527    fn test_case_insensitive_in_and_not_in() {
1528        // Initialize tracing for debug output
1529        let _ = tracing_subscriber::fmt()
1530            .with_max_level(tracing::Level::DEBUG)
1531            .try_init();
1532
1533        let mut table = DataTable::new("test");
1534        table.add_column(DataColumn::new("id"));
1535        table.add_column(DataColumn::new("country"));
1536
1537        // Add test data with mixed case
1538        table
1539            .add_row(DataRow::new(vec![
1540                DataValue::Integer(1),
1541                DataValue::String("CA".to_string()), // uppercase
1542            ]))
1543            .unwrap();
1544
1545        table
1546            .add_row(DataRow::new(vec![
1547                DataValue::Integer(2),
1548                DataValue::String("us".to_string()), // lowercase
1549            ]))
1550            .unwrap();
1551
1552        table
1553            .add_row(DataRow::new(vec![
1554                DataValue::Integer(3),
1555                DataValue::String("UK".to_string()), // uppercase
1556            ]))
1557            .unwrap();
1558
1559        let table = Arc::new(table);
1560
1561        println!("\n=== Testing Case-Insensitive IN clause ===");
1562        println!("Table has {} rows", table.row_count());
1563        for i in 0..table.row_count() {
1564            let country = table.get_value(i, 1);
1565            println!("Row {i}: country = {country:?}");
1566        }
1567
1568        // Test case-insensitive IN - should match 'CA' with 'ca'
1569        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1570        let engine = QueryEngine::with_case_insensitive(true);
1571        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1572        match result {
1573            Ok(view) => {
1574                println!(
1575                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1576                    view.row_count()
1577                );
1578                assert_eq!(view.row_count(), 1); // Should find CA row
1579            }
1580            Err(e) => {
1581                panic!("Case-insensitive IN query failed: {e}");
1582            }
1583        }
1584
1585        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
1586        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1587        let result = engine.execute(
1588            table.clone(),
1589            "SELECT * FROM test WHERE country NOT IN ('ca')",
1590        );
1591        match result {
1592            Ok(view) => {
1593                println!(
1594                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1595                    view.row_count()
1596                );
1597                assert_eq!(view.row_count(), 2); // Should find us and UK rows
1598            }
1599            Err(e) => {
1600                panic!("Case-insensitive NOT IN query failed: {e}");
1601            }
1602        }
1603
1604        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
1605        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1606        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
1607        let result = engine_case_sensitive
1608            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1609        match result {
1610            Ok(view) => {
1611                println!(
1612                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1613                    view.row_count()
1614                );
1615                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
1616            }
1617            Err(e) => {
1618                panic!("Case-sensitive IN query failed: {e}");
1619            }
1620        }
1621
1622        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1623    }
1624
1625    #[test]
1626    #[ignore = "Parentheses in WHERE clause not yet implemented"]
1627    fn test_parentheses_in_where_clause() {
1628        // Initialize tracing for debug output
1629        let _ = tracing_subscriber::fmt()
1630            .with_max_level(tracing::Level::DEBUG)
1631            .try_init();
1632
1633        let mut table = DataTable::new("test");
1634        table.add_column(DataColumn::new("id"));
1635        table.add_column(DataColumn::new("status"));
1636        table.add_column(DataColumn::new("priority"));
1637
1638        // Add test data
1639        table
1640            .add_row(DataRow::new(vec![
1641                DataValue::Integer(1),
1642                DataValue::String("Pending".to_string()),
1643                DataValue::String("High".to_string()),
1644            ]))
1645            .unwrap();
1646
1647        table
1648            .add_row(DataRow::new(vec![
1649                DataValue::Integer(2),
1650                DataValue::String("Complete".to_string()),
1651                DataValue::String("High".to_string()),
1652            ]))
1653            .unwrap();
1654
1655        table
1656            .add_row(DataRow::new(vec![
1657                DataValue::Integer(3),
1658                DataValue::String("Pending".to_string()),
1659                DataValue::String("Low".to_string()),
1660            ]))
1661            .unwrap();
1662
1663        table
1664            .add_row(DataRow::new(vec![
1665                DataValue::Integer(4),
1666                DataValue::String("Complete".to_string()),
1667                DataValue::String("Low".to_string()),
1668            ]))
1669            .unwrap();
1670
1671        let table = Arc::new(table);
1672        let engine = QueryEngine::new();
1673
1674        println!("\n=== Testing Parentheses in WHERE clause ===");
1675        println!("Table has {} rows", table.row_count());
1676        for i in 0..table.row_count() {
1677            let status = table.get_value(i, 1);
1678            let priority = table.get_value(i, 2);
1679            println!("Row {i}: status = {status:?}, priority = {priority:?}");
1680        }
1681
1682        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
1683        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1684        let result = engine.execute(
1685            table.clone(),
1686            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1687        );
1688        match result {
1689            Ok(view) => {
1690                println!(
1691                    "SUCCESS: Found {} rows with parenthetical logic",
1692                    view.row_count()
1693                );
1694                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
1695            }
1696            Err(e) => {
1697                panic!("Parentheses query failed: {e}");
1698            }
1699        }
1700
1701        println!("\n=== Parentheses test complete! ===");
1702    }
1703
1704    #[test]
1705    #[ignore = "Numeric type coercion needs fixing"]
1706    fn test_numeric_type_coercion() {
1707        // Initialize tracing for debug output
1708        let _ = tracing_subscriber::fmt()
1709            .with_max_level(tracing::Level::DEBUG)
1710            .try_init();
1711
1712        let mut table = DataTable::new("test");
1713        table.add_column(DataColumn::new("id"));
1714        table.add_column(DataColumn::new("price"));
1715        table.add_column(DataColumn::new("quantity"));
1716
1717        // Add test data with different numeric types
1718        table
1719            .add_row(DataRow::new(vec![
1720                DataValue::Integer(1),
1721                DataValue::Float(99.50), // Contains '.'
1722                DataValue::Integer(100),
1723            ]))
1724            .unwrap();
1725
1726        table
1727            .add_row(DataRow::new(vec![
1728                DataValue::Integer(2),
1729                DataValue::Float(150.0), // Contains '.' and '0'
1730                DataValue::Integer(200),
1731            ]))
1732            .unwrap();
1733
1734        table
1735            .add_row(DataRow::new(vec![
1736                DataValue::Integer(3),
1737                DataValue::Integer(75), // No decimal point
1738                DataValue::Integer(50),
1739            ]))
1740            .unwrap();
1741
1742        let table = Arc::new(table);
1743        let engine = QueryEngine::new();
1744
1745        println!("\n=== Testing Numeric Type Coercion ===");
1746        println!("Table has {} rows", table.row_count());
1747        for i in 0..table.row_count() {
1748            let price = table.get_value(i, 1);
1749            let quantity = table.get_value(i, 2);
1750            println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1751        }
1752
1753        // Test Contains on float values - should find rows with decimal points
1754        println!("\n--- Test: price.Contains('.') ---");
1755        let result = engine.execute(
1756            table.clone(),
1757            "SELECT * FROM test WHERE price.Contains('.')",
1758        );
1759        match result {
1760            Ok(view) => {
1761                println!(
1762                    "SUCCESS: Found {} rows with decimal points in price",
1763                    view.row_count()
1764                );
1765                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1766            }
1767            Err(e) => {
1768                panic!("Numeric Contains query failed: {e}");
1769            }
1770        }
1771
1772        // Test Contains on integer values converted to string
1773        println!("\n--- Test: quantity.Contains('0') ---");
1774        let result = engine.execute(
1775            table.clone(),
1776            "SELECT * FROM test WHERE quantity.Contains('0')",
1777        );
1778        match result {
1779            Ok(view) => {
1780                println!(
1781                    "SUCCESS: Found {} rows with '0' in quantity",
1782                    view.row_count()
1783                );
1784                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1785            }
1786            Err(e) => {
1787                panic!("Integer Contains query failed: {e}");
1788            }
1789        }
1790
1791        println!("\n=== Numeric type coercion test complete! ===");
1792    }
1793
1794    #[test]
1795    fn test_datetime_comparisons() {
1796        // Initialize tracing for debug output
1797        let _ = tracing_subscriber::fmt()
1798            .with_max_level(tracing::Level::DEBUG)
1799            .try_init();
1800
1801        let mut table = DataTable::new("test");
1802        table.add_column(DataColumn::new("id"));
1803        table.add_column(DataColumn::new("created_date"));
1804
1805        // Add test data with date strings (as they would come from CSV)
1806        table
1807            .add_row(DataRow::new(vec![
1808                DataValue::Integer(1),
1809                DataValue::String("2024-12-15".to_string()),
1810            ]))
1811            .unwrap();
1812
1813        table
1814            .add_row(DataRow::new(vec![
1815                DataValue::Integer(2),
1816                DataValue::String("2025-01-15".to_string()),
1817            ]))
1818            .unwrap();
1819
1820        table
1821            .add_row(DataRow::new(vec![
1822                DataValue::Integer(3),
1823                DataValue::String("2025-02-15".to_string()),
1824            ]))
1825            .unwrap();
1826
1827        let table = Arc::new(table);
1828        let engine = QueryEngine::new();
1829
1830        println!("\n=== Testing DateTime Comparisons ===");
1831        println!("Table has {} rows", table.row_count());
1832        for i in 0..table.row_count() {
1833            let date = table.get_value(i, 1);
1834            println!("Row {i}: created_date = {date:?}");
1835        }
1836
1837        // Test DateTime constructor comparison - should find dates after 2025-01-01
1838        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1839        let result = engine.execute(
1840            table.clone(),
1841            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1842        );
1843        match result {
1844            Ok(view) => {
1845                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1846                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1847            }
1848            Err(e) => {
1849                panic!("DateTime comparison query failed: {e}");
1850            }
1851        }
1852
1853        println!("\n=== DateTime comparison test complete! ===");
1854    }
1855
1856    #[test]
1857    fn test_not_with_method_calls() {
1858        // Initialize tracing for debug output
1859        let _ = tracing_subscriber::fmt()
1860            .with_max_level(tracing::Level::DEBUG)
1861            .try_init();
1862
1863        let mut table = DataTable::new("test");
1864        table.add_column(DataColumn::new("id"));
1865        table.add_column(DataColumn::new("status"));
1866
1867        // Add test data
1868        table
1869            .add_row(DataRow::new(vec![
1870                DataValue::Integer(1),
1871                DataValue::String("Pending Review".to_string()),
1872            ]))
1873            .unwrap();
1874
1875        table
1876            .add_row(DataRow::new(vec![
1877                DataValue::Integer(2),
1878                DataValue::String("Complete".to_string()),
1879            ]))
1880            .unwrap();
1881
1882        table
1883            .add_row(DataRow::new(vec![
1884                DataValue::Integer(3),
1885                DataValue::String("Pending Approval".to_string()),
1886            ]))
1887            .unwrap();
1888
1889        let table = Arc::new(table);
1890        let engine = QueryEngine::with_case_insensitive(true);
1891
1892        println!("\n=== Testing NOT with Method Calls ===");
1893        println!("Table has {} rows", table.row_count());
1894        for i in 0..table.row_count() {
1895            let status = table.get_value(i, 1);
1896            println!("Row {i}: status = {status:?}");
1897        }
1898
1899        // Test NOT with Contains - should exclude rows containing "pend"
1900        println!("\n--- Test: NOT status.Contains('pend') ---");
1901        let result = engine.execute(
1902            table.clone(),
1903            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1904        );
1905        match result {
1906            Ok(view) => {
1907                println!(
1908                    "SUCCESS: Found {} rows NOT containing 'pend'",
1909                    view.row_count()
1910                );
1911                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1912            }
1913            Err(e) => {
1914                panic!("NOT Contains query failed: {e}");
1915            }
1916        }
1917
1918        // Test NOT with StartsWith
1919        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1920        let result = engine.execute(
1921            table.clone(),
1922            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1923        );
1924        match result {
1925            Ok(view) => {
1926                println!(
1927                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1928                    view.row_count()
1929                );
1930                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1931            }
1932            Err(e) => {
1933                panic!("NOT StartsWith query failed: {e}");
1934            }
1935        }
1936
1937        println!("\n=== NOT with method calls test complete! ===");
1938    }
1939
1940    #[test]
1941    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1942    fn test_complex_logical_expressions() {
1943        // Initialize tracing for debug output
1944        let _ = tracing_subscriber::fmt()
1945            .with_max_level(tracing::Level::DEBUG)
1946            .try_init();
1947
1948        let mut table = DataTable::new("test");
1949        table.add_column(DataColumn::new("id"));
1950        table.add_column(DataColumn::new("status"));
1951        table.add_column(DataColumn::new("priority"));
1952        table.add_column(DataColumn::new("assigned"));
1953
1954        // Add comprehensive test data
1955        table
1956            .add_row(DataRow::new(vec![
1957                DataValue::Integer(1),
1958                DataValue::String("Pending".to_string()),
1959                DataValue::String("High".to_string()),
1960                DataValue::String("John".to_string()),
1961            ]))
1962            .unwrap();
1963
1964        table
1965            .add_row(DataRow::new(vec![
1966                DataValue::Integer(2),
1967                DataValue::String("Complete".to_string()),
1968                DataValue::String("High".to_string()),
1969                DataValue::String("Jane".to_string()),
1970            ]))
1971            .unwrap();
1972
1973        table
1974            .add_row(DataRow::new(vec![
1975                DataValue::Integer(3),
1976                DataValue::String("Pending".to_string()),
1977                DataValue::String("Low".to_string()),
1978                DataValue::String("John".to_string()),
1979            ]))
1980            .unwrap();
1981
1982        table
1983            .add_row(DataRow::new(vec![
1984                DataValue::Integer(4),
1985                DataValue::String("In Progress".to_string()),
1986                DataValue::String("Medium".to_string()),
1987                DataValue::String("Jane".to_string()),
1988            ]))
1989            .unwrap();
1990
1991        let table = Arc::new(table);
1992        let engine = QueryEngine::new();
1993
1994        println!("\n=== Testing Complex Logical Expressions ===");
1995        println!("Table has {} rows", table.row_count());
1996        for i in 0..table.row_count() {
1997            let status = table.get_value(i, 1);
1998            let priority = table.get_value(i, 2);
1999            let assigned = table.get_value(i, 3);
2000            println!(
2001                "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
2002            );
2003        }
2004
2005        // Test complex AND/OR logic
2006        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
2007        let result = engine.execute(
2008            table.clone(),
2009            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
2010        );
2011        match result {
2012            Ok(view) => {
2013                println!(
2014                    "SUCCESS: Found {} rows with complex logic",
2015                    view.row_count()
2016                );
2017                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
2018            }
2019            Err(e) => {
2020                panic!("Complex logic query failed: {e}");
2021            }
2022        }
2023
2024        // Test NOT with complex expressions
2025        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
2026        let result = engine.execute(
2027            table.clone(),
2028            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
2029        );
2030        match result {
2031            Ok(view) => {
2032                println!(
2033                    "SUCCESS: Found {} rows with NOT complex logic",
2034                    view.row_count()
2035                );
2036                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
2037            }
2038            Err(e) => {
2039                panic!("NOT complex logic query failed: {e}");
2040            }
2041        }
2042
2043        println!("\n=== Complex logical expressions test complete! ===");
2044    }
2045
2046    #[test]
2047    fn test_mixed_data_types_and_edge_cases() {
2048        // Initialize tracing for debug output
2049        let _ = tracing_subscriber::fmt()
2050            .with_max_level(tracing::Level::DEBUG)
2051            .try_init();
2052
2053        let mut table = DataTable::new("test");
2054        table.add_column(DataColumn::new("id"));
2055        table.add_column(DataColumn::new("value"));
2056        table.add_column(DataColumn::new("nullable_field"));
2057
2058        // Add test data with mixed types and edge cases
2059        table
2060            .add_row(DataRow::new(vec![
2061                DataValue::Integer(1),
2062                DataValue::String("123.45".to_string()),
2063                DataValue::String("present".to_string()),
2064            ]))
2065            .unwrap();
2066
2067        table
2068            .add_row(DataRow::new(vec![
2069                DataValue::Integer(2),
2070                DataValue::Float(678.90),
2071                DataValue::Null,
2072            ]))
2073            .unwrap();
2074
2075        table
2076            .add_row(DataRow::new(vec![
2077                DataValue::Integer(3),
2078                DataValue::Boolean(true),
2079                DataValue::String("also present".to_string()),
2080            ]))
2081            .unwrap();
2082
2083        table
2084            .add_row(DataRow::new(vec![
2085                DataValue::Integer(4),
2086                DataValue::String("false".to_string()),
2087                DataValue::Null,
2088            ]))
2089            .unwrap();
2090
2091        let table = Arc::new(table);
2092        let engine = QueryEngine::new();
2093
2094        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
2095        println!("Table has {} rows", table.row_count());
2096        for i in 0..table.row_count() {
2097            let value = table.get_value(i, 1);
2098            let nullable = table.get_value(i, 2);
2099            println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
2100        }
2101
2102        // Test type coercion with boolean Contains
2103        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
2104        let result = engine.execute(
2105            table.clone(),
2106            "SELECT * FROM test WHERE value.Contains('true')",
2107        );
2108        match result {
2109            Ok(view) => {
2110                println!(
2111                    "SUCCESS: Found {} rows with boolean coercion",
2112                    view.row_count()
2113                );
2114                assert_eq!(view.row_count(), 1); // Should find the boolean true row
2115            }
2116            Err(e) => {
2117                panic!("Boolean coercion query failed: {e}");
2118            }
2119        }
2120
2121        // Test multiple IN values with mixed types
2122        println!("\n--- Test: id IN (1, 3) ---");
2123        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
2124        match result {
2125            Ok(view) => {
2126                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
2127                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
2128            }
2129            Err(e) => {
2130                panic!("Multiple IN values query failed: {e}");
2131            }
2132        }
2133
2134        println!("\n=== Mixed data types test complete! ===");
2135    }
2136
2137    /// Test that aggregate-only queries return exactly one row (regression test)
2138    #[test]
2139    fn test_aggregate_only_single_row() {
2140        let table = create_test_stock_data();
2141        let engine = QueryEngine::new();
2142
2143        // Test query with multiple aggregates - should return exactly 1 row
2144        let result = engine
2145            .execute(
2146                table.clone(),
2147                "SELECT COUNT(*), MIN(close), MAX(close), AVG(close) FROM stock",
2148            )
2149            .expect("Query should succeed");
2150
2151        assert_eq!(
2152            result.row_count(),
2153            1,
2154            "Aggregate-only query should return exactly 1 row"
2155        );
2156        assert_eq!(result.column_count(), 4, "Should have 4 aggregate columns");
2157
2158        // Verify the actual values are correct
2159        let source = result.source();
2160        let row = source.get_row(0).expect("Should have first row");
2161
2162        // COUNT(*) should be 5 (total rows)
2163        assert_eq!(row.values[0], DataValue::Integer(5));
2164
2165        // MIN should be 99.5
2166        assert_eq!(row.values[1], DataValue::Float(99.5));
2167
2168        // MAX should be 105.0
2169        assert_eq!(row.values[2], DataValue::Float(105.0));
2170
2171        // AVG should be approximately 102.4
2172        if let DataValue::Float(avg) = &row.values[3] {
2173            assert!(
2174                (avg - 102.4).abs() < 0.01,
2175                "Average should be approximately 102.4, got {}",
2176                avg
2177            );
2178        } else {
2179            panic!("AVG should return a Float value");
2180        }
2181    }
2182
2183    /// Test single aggregate function returns single row
2184    #[test]
2185    fn test_single_aggregate_single_row() {
2186        let table = create_test_stock_data();
2187        let engine = QueryEngine::new();
2188
2189        let result = engine
2190            .execute(table.clone(), "SELECT COUNT(*) FROM stock")
2191            .expect("Query should succeed");
2192
2193        assert_eq!(
2194            result.row_count(),
2195            1,
2196            "Single aggregate query should return exactly 1 row"
2197        );
2198        assert_eq!(result.column_count(), 1, "Should have 1 column");
2199
2200        let source = result.source();
2201        let row = source.get_row(0).expect("Should have first row");
2202        assert_eq!(row.values[0], DataValue::Integer(5));
2203    }
2204
2205    /// Test aggregate with WHERE clause filtering
2206    #[test]
2207    fn test_aggregate_with_where_single_row() {
2208        let table = create_test_stock_data();
2209        let engine = QueryEngine::new();
2210
2211        // Filter to only high-value stocks (>= 103.0) and aggregate
2212        let result = engine
2213            .execute(
2214                table.clone(),
2215                "SELECT COUNT(*), MIN(close), MAX(close) FROM stock WHERE close >= 103.0",
2216            )
2217            .expect("Query should succeed");
2218
2219        assert_eq!(
2220            result.row_count(),
2221            1,
2222            "Filtered aggregate query should return exactly 1 row"
2223        );
2224        assert_eq!(result.column_count(), 3, "Should have 3 aggregate columns");
2225
2226        let source = result.source();
2227        let row = source.get_row(0).expect("Should have first row");
2228
2229        // Should find 2 rows (103.5 and 105.0)
2230        assert_eq!(row.values[0], DataValue::Integer(2));
2231        assert_eq!(row.values[1], DataValue::Float(103.5)); // MIN
2232        assert_eq!(row.values[2], DataValue::Float(105.0)); // MAX
2233    }
2234
2235    #[test]
2236    fn test_not_in_parsing() {
2237        use crate::sql::recursive_parser::Parser;
2238
2239        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
2240        println!("\n=== Testing NOT IN parsing ===");
2241        println!("Parsing query: {query}");
2242
2243        let mut parser = Parser::new(query);
2244        match parser.parse() {
2245            Ok(statement) => {
2246                println!("Parsed statement: {statement:#?}");
2247                if let Some(where_clause) = statement.where_clause {
2248                    println!("WHERE conditions: {:#?}", where_clause.conditions);
2249                    if let Some(first_condition) = where_clause.conditions.first() {
2250                        println!("First condition expression: {:#?}", first_condition.expr);
2251                    }
2252                }
2253            }
2254            Err(e) => {
2255                panic!("Parse error: {e}");
2256            }
2257        }
2258    }
2259
2260    /// Create test stock data for aggregate testing
2261    fn create_test_stock_data() -> Arc<DataTable> {
2262        let mut table = DataTable::new("stock");
2263
2264        table.add_column(DataColumn::new("symbol"));
2265        table.add_column(DataColumn::new("close"));
2266        table.add_column(DataColumn::new("volume"));
2267
2268        // Add 5 rows of test data
2269        let test_data = vec![
2270            ("AAPL", 99.5, 1000),
2271            ("AAPL", 101.2, 1500),
2272            ("AAPL", 103.5, 2000),
2273            ("AAPL", 105.0, 1200),
2274            ("AAPL", 102.8, 1800),
2275        ];
2276
2277        for (symbol, close, volume) in test_data {
2278            table
2279                .add_row(DataRow::new(vec![
2280                    DataValue::String(symbol.to_string()),
2281                    DataValue::Float(close),
2282                    DataValue::Integer(volume),
2283                ]))
2284                .expect("Should add row successfully");
2285        }
2286
2287        Arc::new(table)
2288    }
2289}
2290
2291#[cfg(test)]
2292#[path = "query_engine_tests.rs"]
2293mod query_engine_tests;