sql_cli/data/
query_engine.rs

1use anyhow::{anyhow, Result};
2use fxhash::{FxHashMap, FxHashSet};
3use std::cmp::min;
4use std::collections::HashMap;
5use std::sync::Arc;
6use std::time::Instant;
7use tracing::{debug, info};
8
9use crate::config::config::BehaviorConfig;
10use crate::config::global::get_date_notation;
11use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
12use crate::data::data_view::DataView;
13use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
14use crate::data::evaluation_context::EvaluationContext;
15use crate::data::group_by_expressions::GroupByExpressions;
16use crate::data::hash_join::HashJoinExecutor;
17use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
18use crate::data::subquery_executor::SubqueryExecutor;
19use crate::data::virtual_table_generator::VirtualTableGenerator;
20use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
21use crate::sql::aggregates::{contains_aggregate, is_aggregate_compatible};
22use crate::sql::parser::ast::TableSource;
23use crate::sql::recursive_parser::{
24    CTEType, OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression,
25    TableFunction,
26};
27
28/// Query engine that executes SQL directly on `DataTable`
29#[derive(Clone)]
30pub struct QueryEngine {
31    case_insensitive: bool,
32    date_notation: String,
33    behavior_config: Option<BehaviorConfig>,
34}
35
36impl Default for QueryEngine {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42impl QueryEngine {
43    #[must_use]
44    pub fn new() -> Self {
45        Self {
46            case_insensitive: false,
47            date_notation: get_date_notation(),
48            behavior_config: None,
49        }
50    }
51
52    #[must_use]
53    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
54        let case_insensitive = config.case_insensitive_default;
55        // Use get_date_notation() to respect environment variable override
56        let date_notation = get_date_notation();
57        Self {
58            case_insensitive,
59            date_notation,
60            behavior_config: Some(config),
61        }
62    }
63
64    #[must_use]
65    pub fn with_date_notation(_date_notation: String) -> Self {
66        Self {
67            case_insensitive: false,
68            date_notation: get_date_notation(), // Always use the global function
69            behavior_config: None,
70        }
71    }
72
73    #[must_use]
74    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
75        Self {
76            case_insensitive,
77            date_notation: get_date_notation(),
78            behavior_config: None,
79        }
80    }
81
82    #[must_use]
83    pub fn with_case_insensitive_and_date_notation(
84        case_insensitive: bool,
85        _date_notation: String, // Keep parameter for compatibility but use get_date_notation()
86    ) -> Self {
87        Self {
88            case_insensitive,
89            date_notation: get_date_notation(), // Always use the global function
90            behavior_config: None,
91        }
92    }
93
94    /// Find a column name similar to the given name using edit distance
95    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
96        let columns = table.column_names();
97        let mut best_match: Option<(String, usize)> = None;
98
99        for col in columns {
100            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
101            // Only suggest if distance is small (likely a typo)
102            // Allow up to 3 edits for longer names
103            let max_distance = if name.len() > 10 { 3 } else { 2 };
104            if distance <= max_distance {
105                match &best_match {
106                    None => best_match = Some((col, distance)),
107                    Some((_, best_dist)) if distance < *best_dist => {
108                        best_match = Some((col, distance));
109                    }
110                    _ => {}
111                }
112            }
113        }
114
115        best_match.map(|(name, _)| name)
116    }
117
118    /// Calculate Levenshtein edit distance between two strings
119    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
120        let len1 = s1.len();
121        let len2 = s2.len();
122        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
123
124        for i in 0..=len1 {
125            matrix[i][0] = i;
126        }
127        for j in 0..=len2 {
128            matrix[0][j] = j;
129        }
130
131        for (i, c1) in s1.chars().enumerate() {
132            for (j, c2) in s2.chars().enumerate() {
133                let cost = usize::from(c1 != c2);
134                matrix[i + 1][j + 1] = std::cmp::min(
135                    matrix[i][j + 1] + 1, // deletion
136                    std::cmp::min(
137                        matrix[i + 1][j] + 1, // insertion
138                        matrix[i][j] + cost,  // substitution
139                    ),
140                );
141            }
142        }
143
144        matrix[len1][len2]
145    }
146
147    /// Execute a SQL query on a `DataTable` and return a `DataView` (for backward compatibility)
148    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
149        let (view, _plan) = self.execute_with_plan(table, sql)?;
150        Ok(view)
151    }
152
153    /// Execute a parsed SelectStatement on a `DataTable` and return a `DataView`
154    pub fn execute_statement(
155        &self,
156        table: Arc<DataTable>,
157        statement: SelectStatement,
158    ) -> Result<DataView> {
159        // First process CTEs to build context
160        let mut cte_context = HashMap::new();
161        for cte in &statement.ctes {
162            debug!("QueryEngine: Pre-processing CTE '{}'...", cte.name);
163            // Execute the CTE based on its type
164            let cte_result = match &cte.cte_type {
165                CTEType::Standard(query) => {
166                    // Execute the CTE query (it might reference earlier CTEs)
167                    self.build_view_with_context(table.clone(), query.clone(), &mut cte_context)?
168                }
169                CTEType::Web(web_spec) => {
170                    // Fetch data from URL
171                    use crate::web::http_fetcher::WebDataFetcher;
172
173                    let fetcher = WebDataFetcher::new()?;
174                    let data_table = fetcher.fetch(web_spec, &cte.name)?;
175
176                    // Convert DataTable to DataView
177                    DataView::new(Arc::new(data_table))
178                }
179            };
180            // Store the result in the context for later use
181            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
182            debug!(
183                "QueryEngine: CTE '{}' pre-processed, stored in context",
184                cte.name
185            );
186        }
187
188        // Now process subqueries with CTE context available
189        let mut subquery_executor =
190            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
191        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
192
193        // Build the view with the same CTE context
194        self.build_view_with_context(table, processed_statement, &mut cte_context)
195    }
196
197    /// Execute a statement with provided CTE context (for subqueries)
198    pub fn execute_statement_with_cte_context(
199        &self,
200        table: Arc<DataTable>,
201        statement: SelectStatement,
202        cte_context: &HashMap<String, Arc<DataView>>,
203    ) -> Result<DataView> {
204        // Clone the context so we can add any CTEs from this statement
205        let mut local_context = cte_context.clone();
206
207        // Process any CTEs in this statement (they might be nested)
208        for cte in &statement.ctes {
209            debug!("QueryEngine: Processing nested CTE '{}'...", cte.name);
210            let cte_result = match &cte.cte_type {
211                CTEType::Standard(query) => {
212                    self.build_view_with_context(table.clone(), query.clone(), &mut local_context)?
213                }
214                CTEType::Web(web_spec) => {
215                    // Fetch data from URL
216                    use crate::web::http_fetcher::WebDataFetcher;
217
218                    let fetcher = WebDataFetcher::new()?;
219                    let data_table = fetcher.fetch(web_spec, &cte.name)?;
220
221                    // Convert DataTable to DataView
222                    DataView::new(Arc::new(data_table))
223                }
224            };
225            local_context.insert(cte.name.clone(), Arc::new(cte_result));
226        }
227
228        // Process subqueries with the complete context
229        let mut subquery_executor =
230            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), local_context.clone());
231        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
232
233        // Build the view
234        self.build_view_with_context(table, processed_statement, &mut local_context)
235    }
236
237    /// Execute a query and return both the result and the execution plan
238    pub fn execute_with_plan(
239        &self,
240        table: Arc<DataTable>,
241        sql: &str,
242    ) -> Result<(DataView, ExecutionPlan)> {
243        let mut plan_builder = ExecutionPlanBuilder::new();
244        let start_time = Instant::now();
245
246        // Parse the SQL query
247        plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
248        plan_builder.add_detail(format!("Query: {}", sql));
249        let mut parser = Parser::new(sql);
250        let statement = parser
251            .parse()
252            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
253        plan_builder.add_detail(format!("Parsed successfully"));
254        if let Some(from) = &statement.from_table {
255            plan_builder.add_detail(format!("FROM: {}", from));
256        }
257        if statement.where_clause.is_some() {
258            plan_builder.add_detail("WHERE clause present".to_string());
259        }
260        plan_builder.end_step();
261
262        // First process CTEs to build context
263        let mut cte_context = HashMap::new();
264
265        if !statement.ctes.is_empty() {
266            plan_builder.begin_step(
267                StepType::CTE,
268                format!("Process {} CTEs", statement.ctes.len()),
269            );
270
271            for cte in &statement.ctes {
272                let cte_start = Instant::now();
273                plan_builder.begin_step(StepType::CTE, format!("CTE '{}'", cte.name));
274
275                let cte_result = match &cte.cte_type {
276                    CTEType::Standard(query) => {
277                        // Add CTE query details
278                        if let Some(from) = &query.from_table {
279                            plan_builder.add_detail(format!("Source: {}", from));
280                        }
281                        if query.where_clause.is_some() {
282                            plan_builder.add_detail("Has WHERE clause".to_string());
283                        }
284                        if query.group_by.is_some() {
285                            plan_builder.add_detail("Has GROUP BY".to_string());
286                        }
287
288                        debug!(
289                            "QueryEngine: Processing CTE '{}' with existing context: {:?}",
290                            cte.name,
291                            cte_context.keys().collect::<Vec<_>>()
292                        );
293
294                        // Process subqueries in the CTE's query FIRST
295                        // This allows the subqueries to see all previously defined CTEs
296                        let mut subquery_executor = SubqueryExecutor::with_cte_context(
297                            self.clone(),
298                            table.clone(),
299                            cte_context.clone(),
300                        );
301                        let processed_query = subquery_executor.execute_subqueries(query)?;
302
303                        self.build_view_with_context(
304                            table.clone(),
305                            processed_query,
306                            &mut cte_context,
307                        )?
308                    }
309                    CTEType::Web(web_spec) => {
310                        plan_builder.add_detail(format!("URL: {}", web_spec.url));
311                        if let Some(format) = &web_spec.format {
312                            plan_builder.add_detail(format!("Format: {:?}", format));
313                        }
314                        if let Some(cache) = web_spec.cache_seconds {
315                            plan_builder.add_detail(format!("Cache: {} seconds", cache));
316                        }
317
318                        // Fetch data from URL
319                        use crate::web::http_fetcher::WebDataFetcher;
320
321                        let fetcher = WebDataFetcher::new()?;
322                        let data_table = fetcher.fetch(web_spec, &cte.name)?;
323
324                        // Convert DataTable to DataView
325                        DataView::new(Arc::new(data_table))
326                    }
327                };
328
329                // Record CTE statistics
330                plan_builder.set_rows_out(cte_result.row_count());
331                plan_builder.add_detail(format!(
332                    "Result: {} rows, {} columns",
333                    cte_result.row_count(),
334                    cte_result.column_count()
335                ));
336                plan_builder.add_detail(format!(
337                    "Execution time: {:.3}ms",
338                    cte_start.elapsed().as_secs_f64() * 1000.0
339                ));
340
341                debug!(
342                    "QueryEngine: Storing CTE '{}' in context with {} rows",
343                    cte.name,
344                    cte_result.row_count()
345                );
346                cte_context.insert(cte.name.clone(), Arc::new(cte_result));
347                plan_builder.end_step();
348            }
349
350            plan_builder.add_detail(format!(
351                "All {} CTEs cached in context",
352                statement.ctes.len()
353            ));
354            plan_builder.end_step();
355        }
356
357        // Process subqueries in the statement with CTE context
358        plan_builder.begin_step(StepType::Subquery, "Process subqueries".to_string());
359        let mut subquery_executor =
360            SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
361
362        // Check if there are subqueries to process
363        let has_subqueries = statement.where_clause.as_ref().map_or(false, |w| {
364            // This is a simplified check - in reality we'd need to walk the AST
365            format!("{:?}", w).contains("Subquery")
366        });
367
368        if has_subqueries {
369            plan_builder.add_detail("Evaluating subqueries in WHERE clause".to_string());
370        }
371
372        let processed_statement = subquery_executor.execute_subqueries(&statement)?;
373
374        if has_subqueries {
375            plan_builder.add_detail("Subqueries replaced with materialized values".to_string());
376        } else {
377            plan_builder.add_detail("No subqueries to process".to_string());
378        }
379
380        plan_builder.end_step();
381        let result = self.build_view_with_context_and_plan(
382            table,
383            processed_statement,
384            &mut cte_context,
385            &mut plan_builder,
386        )?;
387
388        let total_duration = start_time.elapsed();
389        info!(
390            "Query execution complete: total={:?}, rows={}",
391            total_duration,
392            result.row_count()
393        );
394
395        let plan = plan_builder.build();
396        Ok((result, plan))
397    }
398
399    /// Build a `DataView` from a parsed SQL statement
400    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
401        let mut cte_context = HashMap::new();
402        self.build_view_with_context(table, statement, &mut cte_context)
403    }
404
405    /// Build a DataView from a SelectStatement with CTE context
406    fn build_view_with_context(
407        &self,
408        table: Arc<DataTable>,
409        statement: SelectStatement,
410        cte_context: &mut HashMap<String, Arc<DataView>>,
411    ) -> Result<DataView> {
412        let mut dummy_plan = ExecutionPlanBuilder::new();
413        self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
414    }
415
416    /// Build a DataView from a SelectStatement with CTE context and execution plan tracking
417    fn build_view_with_context_and_plan(
418        &self,
419        table: Arc<DataTable>,
420        statement: SelectStatement,
421        cte_context: &mut HashMap<String, Arc<DataView>>,
422        plan: &mut ExecutionPlanBuilder,
423    ) -> Result<DataView> {
424        // First, process any CTEs that aren't already in the context
425        for cte in &statement.ctes {
426            // Skip if already processed (e.g., by execute_select for WEB CTEs)
427            if cte_context.contains_key(&cte.name) {
428                debug!(
429                    "QueryEngine: CTE '{}' already in context, skipping",
430                    cte.name
431                );
432                continue;
433            }
434
435            debug!("QueryEngine: Processing CTE '{}'...", cte.name);
436            debug!(
437                "QueryEngine: Available CTEs for '{}': {:?}",
438                cte.name,
439                cte_context.keys().collect::<Vec<_>>()
440            );
441
442            // Execute the CTE query (it might reference earlier CTEs)
443            let cte_result = match &cte.cte_type {
444                CTEType::Standard(query) => {
445                    self.build_view_with_context(table.clone(), query.clone(), cte_context)?
446                }
447                CTEType::Web(_web_spec) => {
448                    // Web CTEs should have been processed earlier in execute_select
449                    return Err(anyhow!(
450                        "Web CTEs should be processed in execute_select method"
451                    ));
452                }
453            };
454
455            // Store the result in the context for later use
456            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
457            debug!(
458                "QueryEngine: CTE '{}' processed, stored in context",
459                cte.name
460            );
461        }
462
463        // Determine the source table for the main query
464        let source_table = if let Some(ref table_func) = statement.from_function {
465            // Handle table functions like RANGE()
466            debug!("QueryEngine: Processing table function...");
467            match table_func {
468                TableFunction::Range { start, end, step } => {
469                    // Evaluate expressions to get numeric values
470                    let mut evaluator =
471                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
472
473                    // Create a dummy row for evaluating constant expressions
474                    let dummy_row = 0;
475
476                    let start_val = evaluator.evaluate(start, dummy_row)?;
477                    let end_val = evaluator.evaluate(end, dummy_row)?;
478                    let step_val = if let Some(step_expr) = step {
479                        Some(evaluator.evaluate(step_expr, dummy_row)?)
480                    } else {
481                        None
482                    };
483
484                    // Convert DataValues to integers
485                    let start_int = match start_val {
486                        DataValue::Integer(i) => i,
487                        DataValue::Float(f) => f as i64,
488                        _ => return Err(anyhow!("RANGE start must be numeric")),
489                    };
490
491                    let end_int = match end_val {
492                        DataValue::Integer(i) => i,
493                        DataValue::Float(f) => f as i64,
494                        _ => return Err(anyhow!("RANGE end must be numeric")),
495                    };
496
497                    let step_int = if let Some(step) = step_val {
498                        match step {
499                            DataValue::Integer(i) => Some(i),
500                            DataValue::Float(f) => Some(f as i64),
501                            _ => return Err(anyhow!("RANGE step must be numeric")),
502                        }
503                    } else {
504                        None
505                    };
506
507                    // Generate the virtual table
508                    VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
509                }
510                TableFunction::Split { text, delimiter } => {
511                    // Evaluate expressions to get string values
512                    let mut evaluator =
513                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
514
515                    // Create a dummy row for evaluating constant expressions
516                    let dummy_row = 0;
517
518                    let text_val = evaluator.evaluate(text, dummy_row)?;
519                    let delimiter_val = if let Some(delim_expr) = delimiter {
520                        Some(evaluator.evaluate(delim_expr, dummy_row)?)
521                    } else {
522                        None
523                    };
524
525                    // Convert DataValues to strings
526                    let text_str = match text_val {
527                        DataValue::String(s) => s,
528                        DataValue::Null => return Err(anyhow!("SPLIT text cannot be NULL")),
529                        _ => text_val.to_string(),
530                    };
531
532                    let delimiter_str = if let Some(delim) = delimiter_val {
533                        match delim {
534                            DataValue::String(s) => Some(s),
535                            DataValue::Null => None,
536                            _ => Some(delim.to_string()),
537                        }
538                    } else {
539                        None
540                    };
541
542                    // Generate the virtual table from split string
543                    VirtualTableGenerator::generate_split(
544                        &text_str,
545                        delimiter_str.as_deref(),
546                        None,
547                    )?
548                }
549                TableFunction::Generator { name, args } => {
550                    // Use the generator registry to create the table
551                    use crate::sql::generators::GeneratorRegistry;
552
553                    // Create generator registry (could be cached in QueryEngine)
554                    let registry = GeneratorRegistry::new();
555
556                    if let Some(generator) = registry.get(name) {
557                        // Evaluate arguments
558                        let mut evaluator = ArithmeticEvaluator::with_date_notation(
559                            &table,
560                            self.date_notation.clone(),
561                        );
562                        let dummy_row = 0;
563
564                        let mut evaluated_args = Vec::new();
565                        for arg in args {
566                            evaluated_args.push(evaluator.evaluate(arg, dummy_row)?);
567                        }
568
569                        // Generate the table
570                        generator.generate(evaluated_args)?
571                    } else {
572                        return Err(anyhow!("Unknown generator function: {}", name));
573                    }
574                }
575            }
576        } else if let Some(ref subquery) = statement.from_subquery {
577            // Execute the subquery and use its result as the source
578            debug!("QueryEngine: Processing FROM subquery...");
579            let subquery_result =
580                self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
581
582            // Convert the DataView to a DataTable for use as source
583            // This materializes the subquery result
584            let materialized = self.materialize_view(subquery_result)?;
585            Arc::new(materialized)
586        } else if let Some(ref table_name) = statement.from_table {
587            // Check if this references a CTE
588            if let Some(cte_view) = cte_context.get(table_name) {
589                debug!("QueryEngine: Using CTE '{}' as source table", table_name);
590                // Materialize the CTE view as a table
591                let materialized = self.materialize_view((**cte_view).clone())?;
592                Arc::new(materialized)
593            } else {
594                // Regular table reference - use the provided table
595                table.clone()
596            }
597        } else {
598            // No FROM clause - use the provided table
599            table.clone()
600        };
601
602        // Process JOINs if present
603        let final_table = if !statement.joins.is_empty() {
604            plan.begin_step(
605                StepType::Join,
606                format!("Process {} JOINs", statement.joins.len()),
607            );
608            plan.set_rows_in(source_table.row_count());
609
610            let join_executor = HashJoinExecutor::new(self.case_insensitive);
611            let mut current_table = source_table;
612
613            for (idx, join_clause) in statement.joins.iter().enumerate() {
614                let join_start = Instant::now();
615                plan.begin_step(StepType::Join, format!("JOIN #{}", idx + 1));
616                plan.add_detail(format!("Type: {:?}", join_clause.join_type));
617                plan.add_detail(format!("Left table: {} rows", current_table.row_count()));
618                plan.add_detail(format!(
619                    "Executing {:?} JOIN on {}",
620                    join_clause.join_type, join_clause.condition.left_column
621                ));
622
623                // Resolve the right table for the join
624                let right_table = match &join_clause.table {
625                    TableSource::Table(name) => {
626                        // Check if it's a CTE reference
627                        if let Some(cte_view) = cte_context.get(name) {
628                            let materialized = self.materialize_view((**cte_view).clone())?;
629                            Arc::new(materialized)
630                        } else {
631                            // For now, we need the actual table data
632                            // In a real implementation, this would load from file
633                            return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
634                        }
635                    }
636                    TableSource::DerivedTable { query, alias: _ } => {
637                        // Execute the subquery
638                        let subquery_result = self.build_view_with_context(
639                            table.clone(),
640                            *query.clone(),
641                            cte_context,
642                        )?;
643                        let materialized = self.materialize_view(subquery_result)?;
644                        Arc::new(materialized)
645                    }
646                };
647
648                // Execute the join
649                let joined = join_executor.execute_join(
650                    current_table.clone(),
651                    join_clause,
652                    right_table.clone(),
653                )?;
654
655                plan.add_detail(format!("Right table: {} rows", right_table.row_count()));
656                plan.set_rows_out(joined.row_count());
657                plan.add_detail(format!("Result: {} rows", joined.row_count()));
658                plan.add_detail(format!(
659                    "Join time: {:.3}ms",
660                    join_start.elapsed().as_secs_f64() * 1000.0
661                ));
662                plan.end_step();
663
664                current_table = Arc::new(joined);
665            }
666
667            plan.set_rows_out(current_table.row_count());
668            plan.add_detail(format!(
669                "Final result after all joins: {} rows",
670                current_table.row_count()
671            ));
672            plan.end_step();
673            current_table
674        } else {
675            source_table
676        };
677
678        // Continue with the existing build_view logic but using final_table
679        self.build_view_internal_with_plan(final_table, statement, plan)
680    }
681
682    /// Materialize a DataView into a new DataTable
683    fn materialize_view(&self, view: DataView) -> Result<DataTable> {
684        let source = view.source();
685        let mut result_table = DataTable::new("derived");
686
687        // Get the visible columns from the view
688        let visible_cols = view.visible_column_indices().to_vec();
689
690        // Copy column definitions
691        for col_idx in &visible_cols {
692            let col = &source.columns[*col_idx];
693            let new_col = DataColumn {
694                name: col.name.clone(),
695                data_type: col.data_type.clone(),
696                nullable: col.nullable,
697                unique_values: col.unique_values,
698                null_count: col.null_count,
699                metadata: col.metadata.clone(),
700            };
701            result_table.add_column(new_col);
702        }
703
704        // Copy visible rows
705        for row_idx in view.visible_row_indices() {
706            let source_row = &source.rows[*row_idx];
707            let mut new_row = DataRow { values: Vec::new() };
708
709            for col_idx in &visible_cols {
710                new_row.values.push(source_row.values[*col_idx].clone());
711            }
712
713            result_table.add_row(new_row);
714        }
715
716        Ok(result_table)
717    }
718
719    fn build_view_internal(
720        &self,
721        table: Arc<DataTable>,
722        statement: SelectStatement,
723    ) -> Result<DataView> {
724        let mut dummy_plan = ExecutionPlanBuilder::new();
725        self.build_view_internal_with_plan(table, statement, &mut dummy_plan)
726    }
727
728    fn build_view_internal_with_plan(
729        &self,
730        table: Arc<DataTable>,
731        statement: SelectStatement,
732        plan: &mut ExecutionPlanBuilder,
733    ) -> Result<DataView> {
734        debug!(
735            "QueryEngine::build_view - select_items: {:?}",
736            statement.select_items
737        );
738        debug!(
739            "QueryEngine::build_view - where_clause: {:?}",
740            statement.where_clause
741        );
742
743        // Start with all rows visible
744        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
745
746        // Apply WHERE clause filtering using recursive evaluator
747        if let Some(where_clause) = &statement.where_clause {
748            let total_rows = table.row_count();
749            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
750            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
751
752            plan.begin_step(StepType::Filter, "WHERE clause filtering".to_string());
753            plan.set_rows_in(total_rows);
754            plan.add_detail(format!("Input: {} rows", total_rows));
755
756            // Add details about WHERE conditions
757            for condition in &where_clause.conditions {
758                plan.add_detail(format!("Condition: {:?}", condition.expr));
759            }
760
761            let filter_start = Instant::now();
762            // Create an evaluation context for caching compiled regexes
763            let mut eval_context = EvaluationContext::new(self.case_insensitive);
764
765            // Filter visible rows based on WHERE clause
766            let mut filtered_rows = Vec::new();
767            for row_idx in visible_rows {
768                // Only log for first few rows to avoid performance impact
769                if row_idx < 3 {
770                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
771                }
772                let mut evaluator =
773                    RecursiveWhereEvaluator::with_context(&table, &mut eval_context);
774                match evaluator.evaluate(where_clause, row_idx) {
775                    Ok(result) => {
776                        if row_idx < 3 {
777                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
778                        }
779                        if result {
780                            filtered_rows.push(row_idx);
781                        }
782                    }
783                    Err(e) => {
784                        if row_idx < 3 {
785                            debug!(
786                                "QueryEngine: WHERE evaluation error for row {}: {}",
787                                row_idx, e
788                            );
789                        }
790                        // Propagate WHERE clause errors instead of silently ignoring them
791                        return Err(e);
792                    }
793                }
794            }
795
796            // Log regex cache statistics
797            let (compilations, cache_hits) = eval_context.get_stats();
798            if compilations > 0 || cache_hits > 0 {
799                debug!(
800                    "LIKE pattern cache: {} compilations, {} cache hits",
801                    compilations, cache_hits
802                );
803            }
804            visible_rows = filtered_rows;
805            let filter_duration = filter_start.elapsed();
806            info!(
807                "WHERE clause filtering: {} rows -> {} rows in {:?}",
808                total_rows,
809                visible_rows.len(),
810                filter_duration
811            );
812
813            plan.set_rows_out(visible_rows.len());
814            plan.add_detail(format!("Output: {} rows", visible_rows.len()));
815            plan.add_detail(format!(
816                "Filter time: {:.3}ms",
817                filter_duration.as_secs_f64() * 1000.0
818            ));
819            plan.end_step();
820        }
821
822        // Create initial DataView with filtered rows
823        let mut view = DataView::new(table.clone());
824        view = view.with_rows(visible_rows);
825
826        // Handle GROUP BY if present
827        if let Some(group_by_exprs) = &statement.group_by {
828            if !group_by_exprs.is_empty() {
829                debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_exprs);
830
831                plan.begin_step(
832                    StepType::GroupBy,
833                    format!("GROUP BY {} expressions", group_by_exprs.len()),
834                );
835                plan.set_rows_in(view.row_count());
836                plan.add_detail(format!("Input: {} rows", view.row_count()));
837                for expr in group_by_exprs {
838                    plan.add_detail(format!("Group by: {:?}", expr));
839                }
840
841                let group_start = Instant::now();
842                view = self.apply_group_by(
843                    view,
844                    group_by_exprs,
845                    &statement.select_items,
846                    statement.having.as_ref(),
847                )?;
848
849                plan.set_rows_out(view.row_count());
850                plan.add_detail(format!("Output: {} groups", view.row_count()));
851                plan.add_detail(format!(
852                    "Group time: {:.3}ms",
853                    group_start.elapsed().as_secs_f64() * 1000.0
854                ));
855                plan.end_step();
856            }
857        } else {
858            // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
859            if !statement.select_items.is_empty() {
860                // Check if we have ANY non-star items (not just the first one)
861                let has_non_star_items = statement
862                    .select_items
863                    .iter()
864                    .any(|item| !matches!(item, SelectItem::Star));
865
866                // Apply select items if:
867                // 1. We have computed expressions or explicit columns
868                // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
869                if has_non_star_items || statement.select_items.len() > 1 {
870                    view = self.apply_select_items(view, &statement.select_items)?;
871                }
872                // If it's just a single star, no projection needed
873            } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
874                // Fallback to legacy column projection for backward compatibility
875                let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
876                view = view.with_columns(column_indices);
877            }
878        }
879
880        // Apply DISTINCT if specified
881        if statement.distinct {
882            plan.begin_step(StepType::Distinct, "Remove duplicate rows".to_string());
883            plan.set_rows_in(view.row_count());
884            plan.add_detail(format!("Input: {} rows", view.row_count()));
885
886            let distinct_start = Instant::now();
887            view = self.apply_distinct(view)?;
888
889            plan.set_rows_out(view.row_count());
890            plan.add_detail(format!("Output: {} unique rows", view.row_count()));
891            plan.add_detail(format!(
892                "Distinct time: {:.3}ms",
893                distinct_start.elapsed().as_secs_f64() * 1000.0
894            ));
895            plan.end_step();
896        }
897
898        // Apply ORDER BY sorting
899        if let Some(order_by_columns) = &statement.order_by {
900            if !order_by_columns.is_empty() {
901                plan.begin_step(
902                    StepType::Sort,
903                    format!("ORDER BY {} columns", order_by_columns.len()),
904                );
905                plan.set_rows_in(view.row_count());
906                for col in order_by_columns {
907                    plan.add_detail(format!("{} {:?}", col.column, col.direction));
908                }
909
910                let sort_start = Instant::now();
911                view = self.apply_multi_order_by(view, order_by_columns)?;
912
913                plan.add_detail(format!(
914                    "Sort time: {:.3}ms",
915                    sort_start.elapsed().as_secs_f64() * 1000.0
916                ));
917                plan.end_step();
918            }
919        }
920
921        // Apply LIMIT/OFFSET
922        if let Some(limit) = statement.limit {
923            let offset = statement.offset.unwrap_or(0);
924            plan.begin_step(StepType::Limit, format!("LIMIT {}", limit));
925            plan.set_rows_in(view.row_count());
926            if offset > 0 {
927                plan.add_detail(format!("OFFSET: {}", offset));
928            }
929            view = view.with_limit(limit, offset);
930            plan.set_rows_out(view.row_count());
931            plan.add_detail(format!("Output: {} rows", view.row_count()));
932            plan.end_step();
933        }
934
935        Ok(view)
936    }
937
938    /// Resolve column names to indices
939    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
940        let mut indices = Vec::new();
941        let table_columns = table.column_names();
942
943        for col_name in columns {
944            let index = table_columns
945                .iter()
946                .position(|c| c.eq_ignore_ascii_case(col_name))
947                .ok_or_else(|| {
948                    let suggestion = self.find_similar_column(table, col_name);
949                    match suggestion {
950                        Some(similar) => anyhow::anyhow!(
951                            "Column '{}' not found. Did you mean '{}'?",
952                            col_name,
953                            similar
954                        ),
955                        None => anyhow::anyhow!("Column '{}' not found", col_name),
956                    }
957                })?;
958            indices.push(index);
959        }
960
961        Ok(indices)
962    }
963
964    /// Apply SELECT items (columns and computed expressions) to create new view
965    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
966        debug!(
967            "QueryEngine::apply_select_items - items: {:?}",
968            select_items
969        );
970        debug!(
971            "QueryEngine::apply_select_items - input view has {} rows",
972            view.row_count()
973        );
974
975        // Check if this is an aggregate query:
976        // 1. At least one aggregate function exists
977        // 2. All other items are either aggregates or constants (aggregate-compatible)
978        let has_aggregates = select_items.iter().any(|item| match item {
979            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
980            SelectItem::Column(_) => false,
981            SelectItem::Star => false,
982        });
983
984        let all_aggregate_compatible = select_items.iter().all(|item| match item {
985            SelectItem::Expression { expr, .. } => is_aggregate_compatible(expr),
986            SelectItem::Column(_) => false, // Columns are not aggregate-compatible
987            SelectItem::Star => false,      // Star is not aggregate-compatible
988        });
989
990        if has_aggregates && all_aggregate_compatible && view.row_count() > 0 {
991            // Special handling for aggregate queries with constants (no GROUP BY)
992            // These should produce exactly one row
993            debug!("QueryEngine::apply_select_items - detected aggregate query with constants");
994            return self.apply_aggregate_select(view, select_items);
995        }
996
997        // Check if we need to create computed columns
998        let has_computed_expressions = select_items
999            .iter()
1000            .any(|item| matches!(item, SelectItem::Expression { .. }));
1001
1002        debug!(
1003            "QueryEngine::apply_select_items - has_computed_expressions: {}",
1004            has_computed_expressions
1005        );
1006
1007        if !has_computed_expressions {
1008            // Simple case: only columns, use existing projection logic
1009            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
1010            return Ok(view.with_columns(column_indices));
1011        }
1012
1013        // Complex case: we have computed expressions
1014        // IMPORTANT: We create a PROJECTED view, not a new table
1015        // This preserves the original DataTable reference
1016
1017        let source_table = view.source();
1018        let visible_rows = view.visible_row_indices();
1019
1020        // Create a temporary table just for the computed result view
1021        // But this table is only used for the current query result
1022        let mut computed_table = DataTable::new("query_result");
1023
1024        // First, expand any Star selectors to actual columns
1025        let mut expanded_items = Vec::new();
1026        for item in select_items {
1027            match item {
1028                SelectItem::Star => {
1029                    // Expand * to all columns from source table
1030                    for col_name in source_table.column_names() {
1031                        expanded_items.push(SelectItem::Column(col_name));
1032                    }
1033                }
1034                _ => expanded_items.push(item.clone()),
1035            }
1036        }
1037
1038        // Add columns based on expanded SelectItems, handling duplicates
1039        let mut column_name_counts: std::collections::HashMap<String, usize> =
1040            std::collections::HashMap::new();
1041
1042        for item in &expanded_items {
1043            let base_name = match item {
1044                SelectItem::Column(name) => name.clone(),
1045                SelectItem::Expression { alias, .. } => alias.clone(),
1046                SelectItem::Star => unreachable!("Star should have been expanded"),
1047            };
1048
1049            // Check if this column name has been used before
1050            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
1051            let column_name = if *count == 0 {
1052                // First occurrence, use the name as-is
1053                base_name.clone()
1054            } else {
1055                // Duplicate, append a suffix
1056                format!("{base_name}_{count}")
1057            };
1058            *count += 1;
1059
1060            computed_table.add_column(DataColumn::new(&column_name));
1061        }
1062
1063        // Calculate values for each row
1064        let mut evaluator =
1065            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
1066
1067        for &row_idx in visible_rows {
1068            let mut row_values = Vec::new();
1069
1070            for item in &expanded_items {
1071                let value = match item {
1072                    SelectItem::Column(col_name) => {
1073                        // Simple column reference
1074                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
1075                            let suggestion = self.find_similar_column(source_table, col_name);
1076                            match suggestion {
1077                                Some(similar) => anyhow::anyhow!(
1078                                    "Column '{}' not found. Did you mean '{}'?",
1079                                    col_name,
1080                                    similar
1081                                ),
1082                                None => anyhow::anyhow!("Column '{}' not found", col_name),
1083                            }
1084                        })?;
1085                        let row = source_table
1086                            .get_row(row_idx)
1087                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
1088                        row.get(col_idx)
1089                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
1090                            .clone()
1091                    }
1092                    SelectItem::Expression { expr, .. } => {
1093                        // Computed expression
1094                        evaluator.evaluate(expr, row_idx)?
1095                    }
1096                    SelectItem::Star => unreachable!("Star should have been expanded"),
1097                };
1098                row_values.push(value);
1099            }
1100
1101            computed_table
1102                .add_row(DataRow::new(row_values))
1103                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
1104        }
1105
1106        // Return a view of the computed result
1107        // This is a temporary view for this query only
1108        Ok(DataView::new(Arc::new(computed_table)))
1109    }
1110
1111    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
1112    fn apply_aggregate_select(
1113        &self,
1114        view: DataView,
1115        select_items: &[SelectItem],
1116    ) -> Result<DataView> {
1117        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
1118
1119        let source_table = view.source();
1120        let mut result_table = DataTable::new("aggregate_result");
1121
1122        // Add columns for each select item
1123        for item in select_items {
1124            let column_name = match item {
1125                SelectItem::Expression { alias, .. } => alias.clone(),
1126                _ => unreachable!("Should only have expressions in aggregate-only query"),
1127            };
1128            result_table.add_column(DataColumn::new(&column_name));
1129        }
1130
1131        // Create evaluator with visible rows from the view (for filtered aggregates)
1132        let visible_rows = view.visible_row_indices().to_vec();
1133        let mut evaluator =
1134            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
1135                .with_visible_rows(visible_rows);
1136
1137        // Evaluate each aggregate expression once (they handle all rows internally)
1138        let mut row_values = Vec::new();
1139        for item in select_items {
1140            match item {
1141                SelectItem::Expression { expr, .. } => {
1142                    // The evaluator will handle aggregates over all rows
1143                    // We pass row_index=0 but aggregates ignore it and process all rows
1144                    let value = evaluator.evaluate(expr, 0)?;
1145                    row_values.push(value);
1146                }
1147                _ => unreachable!("Should only have expressions in aggregate-only query"),
1148            }
1149        }
1150
1151        // Add the single result row
1152        result_table
1153            .add_row(DataRow::new(row_values))
1154            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
1155
1156        Ok(DataView::new(Arc::new(result_table)))
1157    }
1158
1159    /// Resolve `SelectItem` columns to indices (for simple column projections only)
1160    fn resolve_select_columns(
1161        &self,
1162        table: &DataTable,
1163        select_items: &[SelectItem],
1164    ) -> Result<Vec<usize>> {
1165        let mut indices = Vec::new();
1166        let table_columns = table.column_names();
1167
1168        for item in select_items {
1169            match item {
1170                SelectItem::Column(col_name) => {
1171                    let index = table_columns
1172                        .iter()
1173                        .position(|c| c.eq_ignore_ascii_case(col_name))
1174                        .ok_or_else(|| {
1175                            let suggestion = self.find_similar_column(table, col_name);
1176                            match suggestion {
1177                                Some(similar) => anyhow::anyhow!(
1178                                    "Column '{}' not found. Did you mean '{}'?",
1179                                    col_name,
1180                                    similar
1181                                ),
1182                                None => anyhow::anyhow!("Column '{}' not found", col_name),
1183                            }
1184                        })?;
1185                    indices.push(index);
1186                }
1187                SelectItem::Star => {
1188                    // Expand * to all column indices
1189                    for i in 0..table_columns.len() {
1190                        indices.push(i);
1191                    }
1192                }
1193                SelectItem::Expression { .. } => {
1194                    return Err(anyhow::anyhow!(
1195                        "Computed expressions require new table creation"
1196                    ));
1197                }
1198            }
1199        }
1200
1201        Ok(indices)
1202    }
1203
1204    /// Apply DISTINCT to remove duplicate rows
1205    fn apply_distinct(&self, view: DataView) -> Result<DataView> {
1206        use std::collections::HashSet;
1207
1208        let source = view.source();
1209        let visible_cols = view.visible_column_indices();
1210        let visible_rows = view.visible_row_indices();
1211
1212        // Build a set to track unique rows
1213        let mut seen_rows = HashSet::new();
1214        let mut unique_row_indices = Vec::new();
1215
1216        for &row_idx in visible_rows {
1217            // Build a key representing this row's visible column values
1218            let mut row_key = Vec::new();
1219            for &col_idx in visible_cols {
1220                let value = source
1221                    .get_value(row_idx, col_idx)
1222                    .ok_or_else(|| anyhow!("Invalid cell reference"))?;
1223                // Convert value to a hashable representation
1224                row_key.push(format!("{:?}", value));
1225            }
1226
1227            // Check if we've seen this row before
1228            if seen_rows.insert(row_key) {
1229                // First time seeing this row combination
1230                unique_row_indices.push(row_idx);
1231            }
1232        }
1233
1234        // Create a new view with only unique rows
1235        Ok(view.with_rows(unique_row_indices))
1236    }
1237
1238    /// Apply multi-column ORDER BY sorting to the view
1239    fn apply_multi_order_by(
1240        &self,
1241        mut view: DataView,
1242        order_by_columns: &[OrderByColumn],
1243    ) -> Result<DataView> {
1244        // Build list of (source_column_index, ascending) tuples
1245        let mut sort_columns = Vec::new();
1246
1247        for order_col in order_by_columns {
1248            // Try to find the column index
1249            // Check in the current view's source table (this handles both regular columns and computed columns)
1250            // This is especially important after GROUP BY where we have a new result table with aggregate aliases
1251            let col_index = view
1252                .source()
1253                .get_column_index(&order_col.column)
1254                .ok_or_else(|| {
1255                    // If not found, provide helpful error with suggestions
1256                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
1257                    match suggestion {
1258                        Some(similar) => anyhow::anyhow!(
1259                            "Column '{}' not found. Did you mean '{}'?",
1260                            order_col.column,
1261                            similar
1262                        ),
1263                        None => {
1264                            // Also list available columns for debugging
1265                            let available_cols = view.source().column_names().join(", ");
1266                            anyhow::anyhow!(
1267                                "Column '{}' not found. Available columns: {}",
1268                                order_col.column,
1269                                available_cols
1270                            )
1271                        }
1272                    }
1273                })?;
1274
1275            let ascending = matches!(order_col.direction, SortDirection::Asc);
1276            sort_columns.push((col_index, ascending));
1277        }
1278
1279        // Apply multi-column sorting
1280        view.apply_multi_sort(&sort_columns)?;
1281        Ok(view)
1282    }
1283
1284    /// Apply GROUP BY to the view with optional HAVING clause
1285    fn apply_group_by(
1286        &self,
1287        view: DataView,
1288        group_by_exprs: &[SqlExpression],
1289        select_items: &[SelectItem],
1290        having: Option<&SqlExpression>,
1291    ) -> Result<DataView> {
1292        // Use the new expression-based GROUP BY implementation
1293        self.apply_group_by_expressions(
1294            view,
1295            group_by_exprs,
1296            select_items,
1297            having,
1298            self.case_insensitive,
1299            self.date_notation.clone(),
1300        )
1301    }
1302
1303    /// Estimate the cardinality (number of unique groups) for GROUP BY operations
1304    /// This helps pre-size hash tables for better performance
1305    pub fn estimate_group_cardinality(
1306        &self,
1307        view: &DataView,
1308        group_by_exprs: &[SqlExpression],
1309    ) -> usize {
1310        // If we have few rows, just return the row count as upper bound
1311        let row_count = view.get_visible_rows().len();
1312        if row_count <= 100 {
1313            return row_count;
1314        }
1315
1316        // Sample first 1000 rows or 10% of data, whichever is smaller
1317        let sample_size = min(1000, row_count / 10).max(100);
1318        let mut seen = FxHashSet::default();
1319
1320        let visible_rows = view.get_visible_rows();
1321        for (i, &row_idx) in visible_rows.iter().enumerate() {
1322            if i >= sample_size {
1323                break;
1324            }
1325
1326            // Evaluate GROUP BY expressions for this row
1327            let mut key_values = Vec::new();
1328            for expr in group_by_exprs {
1329                let mut evaluator = ArithmeticEvaluator::new(view.source());
1330                let value = evaluator.evaluate(expr, row_idx).unwrap_or(DataValue::Null);
1331                key_values.push(value);
1332            }
1333
1334            seen.insert(key_values);
1335        }
1336
1337        // Estimate total cardinality based on sample
1338        let sample_cardinality = seen.len();
1339        let estimated = (sample_cardinality * row_count) / sample_size;
1340
1341        // Cap at row count and ensure minimum of sample cardinality
1342        estimated.min(row_count).max(sample_cardinality)
1343    }
1344}
1345
1346#[cfg(test)]
1347mod tests {
1348    use super::*;
1349    use crate::data::datatable::{DataColumn, DataRow, DataValue};
1350
1351    fn create_test_table() -> Arc<DataTable> {
1352        let mut table = DataTable::new("test");
1353
1354        // Add columns
1355        table.add_column(DataColumn::new("id"));
1356        table.add_column(DataColumn::new("name"));
1357        table.add_column(DataColumn::new("age"));
1358
1359        // Add rows
1360        table
1361            .add_row(DataRow::new(vec![
1362                DataValue::Integer(1),
1363                DataValue::String("Alice".to_string()),
1364                DataValue::Integer(30),
1365            ]))
1366            .unwrap();
1367
1368        table
1369            .add_row(DataRow::new(vec![
1370                DataValue::Integer(2),
1371                DataValue::String("Bob".to_string()),
1372                DataValue::Integer(25),
1373            ]))
1374            .unwrap();
1375
1376        table
1377            .add_row(DataRow::new(vec![
1378                DataValue::Integer(3),
1379                DataValue::String("Charlie".to_string()),
1380                DataValue::Integer(35),
1381            ]))
1382            .unwrap();
1383
1384        Arc::new(table)
1385    }
1386
1387    #[test]
1388    fn test_select_all() {
1389        let table = create_test_table();
1390        let engine = QueryEngine::new();
1391
1392        let view = engine
1393            .execute(table.clone(), "SELECT * FROM users")
1394            .unwrap();
1395        assert_eq!(view.row_count(), 3);
1396        assert_eq!(view.column_count(), 3);
1397    }
1398
1399    #[test]
1400    fn test_select_columns() {
1401        let table = create_test_table();
1402        let engine = QueryEngine::new();
1403
1404        let view = engine
1405            .execute(table.clone(), "SELECT name, age FROM users")
1406            .unwrap();
1407        assert_eq!(view.row_count(), 3);
1408        assert_eq!(view.column_count(), 2);
1409    }
1410
1411    #[test]
1412    fn test_select_with_limit() {
1413        let table = create_test_table();
1414        let engine = QueryEngine::new();
1415
1416        let view = engine
1417            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1418            .unwrap();
1419        assert_eq!(view.row_count(), 2);
1420    }
1421
1422    #[test]
1423    fn test_type_coercion_contains() {
1424        // Initialize tracing for debug output
1425        let _ = tracing_subscriber::fmt()
1426            .with_max_level(tracing::Level::DEBUG)
1427            .try_init();
1428
1429        let mut table = DataTable::new("test");
1430        table.add_column(DataColumn::new("id"));
1431        table.add_column(DataColumn::new("status"));
1432        table.add_column(DataColumn::new("price"));
1433
1434        // Add test data with mixed types
1435        table
1436            .add_row(DataRow::new(vec![
1437                DataValue::Integer(1),
1438                DataValue::String("Pending".to_string()),
1439                DataValue::Float(99.99),
1440            ]))
1441            .unwrap();
1442
1443        table
1444            .add_row(DataRow::new(vec![
1445                DataValue::Integer(2),
1446                DataValue::String("Confirmed".to_string()),
1447                DataValue::Float(150.50),
1448            ]))
1449            .unwrap();
1450
1451        table
1452            .add_row(DataRow::new(vec![
1453                DataValue::Integer(3),
1454                DataValue::String("Pending".to_string()),
1455                DataValue::Float(75.00),
1456            ]))
1457            .unwrap();
1458
1459        let table = Arc::new(table);
1460        let engine = QueryEngine::new();
1461
1462        println!("\n=== Testing WHERE clause with Contains ===");
1463        println!("Table has {} rows", table.row_count());
1464        for i in 0..table.row_count() {
1465            let status = table.get_value(i, 1);
1466            println!("Row {i}: status = {status:?}");
1467        }
1468
1469        // Test 1: Basic string contains (should work)
1470        println!("\n--- Test 1: status.Contains('pend') ---");
1471        let result = engine.execute(
1472            table.clone(),
1473            "SELECT * FROM test WHERE status.Contains('pend')",
1474        );
1475        match result {
1476            Ok(view) => {
1477                println!("SUCCESS: Found {} matching rows", view.row_count());
1478                assert_eq!(view.row_count(), 2); // Should find both Pending rows
1479            }
1480            Err(e) => {
1481                panic!("Query failed: {e}");
1482            }
1483        }
1484
1485        // Test 2: Numeric contains (should work with type coercion)
1486        println!("\n--- Test 2: price.Contains('9') ---");
1487        let result = engine.execute(
1488            table.clone(),
1489            "SELECT * FROM test WHERE price.Contains('9')",
1490        );
1491        match result {
1492            Ok(view) => {
1493                println!(
1494                    "SUCCESS: Found {} matching rows with price containing '9'",
1495                    view.row_count()
1496                );
1497                // Should find 99.99 row
1498                assert!(view.row_count() >= 1);
1499            }
1500            Err(e) => {
1501                panic!("Numeric coercion query failed: {e}");
1502            }
1503        }
1504
1505        println!("\n=== All tests passed! ===");
1506    }
1507
1508    #[test]
1509    fn test_not_in_clause() {
1510        // Initialize tracing for debug output
1511        let _ = tracing_subscriber::fmt()
1512            .with_max_level(tracing::Level::DEBUG)
1513            .try_init();
1514
1515        let mut table = DataTable::new("test");
1516        table.add_column(DataColumn::new("id"));
1517        table.add_column(DataColumn::new("country"));
1518
1519        // Add test data
1520        table
1521            .add_row(DataRow::new(vec![
1522                DataValue::Integer(1),
1523                DataValue::String("CA".to_string()),
1524            ]))
1525            .unwrap();
1526
1527        table
1528            .add_row(DataRow::new(vec![
1529                DataValue::Integer(2),
1530                DataValue::String("US".to_string()),
1531            ]))
1532            .unwrap();
1533
1534        table
1535            .add_row(DataRow::new(vec![
1536                DataValue::Integer(3),
1537                DataValue::String("UK".to_string()),
1538            ]))
1539            .unwrap();
1540
1541        let table = Arc::new(table);
1542        let engine = QueryEngine::new();
1543
1544        println!("\n=== Testing NOT IN clause ===");
1545        println!("Table has {} rows", table.row_count());
1546        for i in 0..table.row_count() {
1547            let country = table.get_value(i, 1);
1548            println!("Row {i}: country = {country:?}");
1549        }
1550
1551        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
1552        println!("\n--- Test: country NOT IN ('CA') ---");
1553        let result = engine.execute(
1554            table.clone(),
1555            "SELECT * FROM test WHERE country NOT IN ('CA')",
1556        );
1557        match result {
1558            Ok(view) => {
1559                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1560                assert_eq!(view.row_count(), 2); // Should find US and UK
1561            }
1562            Err(e) => {
1563                panic!("NOT IN query failed: {e}");
1564            }
1565        }
1566
1567        println!("\n=== NOT IN test complete! ===");
1568    }
1569
1570    #[test]
1571    fn test_case_insensitive_in_and_not_in() {
1572        // Initialize tracing for debug output
1573        let _ = tracing_subscriber::fmt()
1574            .with_max_level(tracing::Level::DEBUG)
1575            .try_init();
1576
1577        let mut table = DataTable::new("test");
1578        table.add_column(DataColumn::new("id"));
1579        table.add_column(DataColumn::new("country"));
1580
1581        // Add test data with mixed case
1582        table
1583            .add_row(DataRow::new(vec![
1584                DataValue::Integer(1),
1585                DataValue::String("CA".to_string()), // uppercase
1586            ]))
1587            .unwrap();
1588
1589        table
1590            .add_row(DataRow::new(vec![
1591                DataValue::Integer(2),
1592                DataValue::String("us".to_string()), // lowercase
1593            ]))
1594            .unwrap();
1595
1596        table
1597            .add_row(DataRow::new(vec![
1598                DataValue::Integer(3),
1599                DataValue::String("UK".to_string()), // uppercase
1600            ]))
1601            .unwrap();
1602
1603        let table = Arc::new(table);
1604
1605        println!("\n=== Testing Case-Insensitive IN clause ===");
1606        println!("Table has {} rows", table.row_count());
1607        for i in 0..table.row_count() {
1608            let country = table.get_value(i, 1);
1609            println!("Row {i}: country = {country:?}");
1610        }
1611
1612        // Test case-insensitive IN - should match 'CA' with 'ca'
1613        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1614        let engine = QueryEngine::with_case_insensitive(true);
1615        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1616        match result {
1617            Ok(view) => {
1618                println!(
1619                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1620                    view.row_count()
1621                );
1622                assert_eq!(view.row_count(), 1); // Should find CA row
1623            }
1624            Err(e) => {
1625                panic!("Case-insensitive IN query failed: {e}");
1626            }
1627        }
1628
1629        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
1630        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1631        let result = engine.execute(
1632            table.clone(),
1633            "SELECT * FROM test WHERE country NOT IN ('ca')",
1634        );
1635        match result {
1636            Ok(view) => {
1637                println!(
1638                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1639                    view.row_count()
1640                );
1641                assert_eq!(view.row_count(), 2); // Should find us and UK rows
1642            }
1643            Err(e) => {
1644                panic!("Case-insensitive NOT IN query failed: {e}");
1645            }
1646        }
1647
1648        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
1649        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1650        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
1651        let result = engine_case_sensitive
1652            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1653        match result {
1654            Ok(view) => {
1655                println!(
1656                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1657                    view.row_count()
1658                );
1659                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
1660            }
1661            Err(e) => {
1662                panic!("Case-sensitive IN query failed: {e}");
1663            }
1664        }
1665
1666        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1667    }
1668
1669    #[test]
1670    #[ignore = "Parentheses in WHERE clause not yet implemented"]
1671    fn test_parentheses_in_where_clause() {
1672        // Initialize tracing for debug output
1673        let _ = tracing_subscriber::fmt()
1674            .with_max_level(tracing::Level::DEBUG)
1675            .try_init();
1676
1677        let mut table = DataTable::new("test");
1678        table.add_column(DataColumn::new("id"));
1679        table.add_column(DataColumn::new("status"));
1680        table.add_column(DataColumn::new("priority"));
1681
1682        // Add test data
1683        table
1684            .add_row(DataRow::new(vec![
1685                DataValue::Integer(1),
1686                DataValue::String("Pending".to_string()),
1687                DataValue::String("High".to_string()),
1688            ]))
1689            .unwrap();
1690
1691        table
1692            .add_row(DataRow::new(vec![
1693                DataValue::Integer(2),
1694                DataValue::String("Complete".to_string()),
1695                DataValue::String("High".to_string()),
1696            ]))
1697            .unwrap();
1698
1699        table
1700            .add_row(DataRow::new(vec![
1701                DataValue::Integer(3),
1702                DataValue::String("Pending".to_string()),
1703                DataValue::String("Low".to_string()),
1704            ]))
1705            .unwrap();
1706
1707        table
1708            .add_row(DataRow::new(vec![
1709                DataValue::Integer(4),
1710                DataValue::String("Complete".to_string()),
1711                DataValue::String("Low".to_string()),
1712            ]))
1713            .unwrap();
1714
1715        let table = Arc::new(table);
1716        let engine = QueryEngine::new();
1717
1718        println!("\n=== Testing Parentheses in WHERE clause ===");
1719        println!("Table has {} rows", table.row_count());
1720        for i in 0..table.row_count() {
1721            let status = table.get_value(i, 1);
1722            let priority = table.get_value(i, 2);
1723            println!("Row {i}: status = {status:?}, priority = {priority:?}");
1724        }
1725
1726        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
1727        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1728        let result = engine.execute(
1729            table.clone(),
1730            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1731        );
1732        match result {
1733            Ok(view) => {
1734                println!(
1735                    "SUCCESS: Found {} rows with parenthetical logic",
1736                    view.row_count()
1737                );
1738                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
1739            }
1740            Err(e) => {
1741                panic!("Parentheses query failed: {e}");
1742            }
1743        }
1744
1745        println!("\n=== Parentheses test complete! ===");
1746    }
1747
1748    #[test]
1749    #[ignore = "Numeric type coercion needs fixing"]
1750    fn test_numeric_type_coercion() {
1751        // Initialize tracing for debug output
1752        let _ = tracing_subscriber::fmt()
1753            .with_max_level(tracing::Level::DEBUG)
1754            .try_init();
1755
1756        let mut table = DataTable::new("test");
1757        table.add_column(DataColumn::new("id"));
1758        table.add_column(DataColumn::new("price"));
1759        table.add_column(DataColumn::new("quantity"));
1760
1761        // Add test data with different numeric types
1762        table
1763            .add_row(DataRow::new(vec![
1764                DataValue::Integer(1),
1765                DataValue::Float(99.50), // Contains '.'
1766                DataValue::Integer(100),
1767            ]))
1768            .unwrap();
1769
1770        table
1771            .add_row(DataRow::new(vec![
1772                DataValue::Integer(2),
1773                DataValue::Float(150.0), // Contains '.' and '0'
1774                DataValue::Integer(200),
1775            ]))
1776            .unwrap();
1777
1778        table
1779            .add_row(DataRow::new(vec![
1780                DataValue::Integer(3),
1781                DataValue::Integer(75), // No decimal point
1782                DataValue::Integer(50),
1783            ]))
1784            .unwrap();
1785
1786        let table = Arc::new(table);
1787        let engine = QueryEngine::new();
1788
1789        println!("\n=== Testing Numeric Type Coercion ===");
1790        println!("Table has {} rows", table.row_count());
1791        for i in 0..table.row_count() {
1792            let price = table.get_value(i, 1);
1793            let quantity = table.get_value(i, 2);
1794            println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1795        }
1796
1797        // Test Contains on float values - should find rows with decimal points
1798        println!("\n--- Test: price.Contains('.') ---");
1799        let result = engine.execute(
1800            table.clone(),
1801            "SELECT * FROM test WHERE price.Contains('.')",
1802        );
1803        match result {
1804            Ok(view) => {
1805                println!(
1806                    "SUCCESS: Found {} rows with decimal points in price",
1807                    view.row_count()
1808                );
1809                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1810            }
1811            Err(e) => {
1812                panic!("Numeric Contains query failed: {e}");
1813            }
1814        }
1815
1816        // Test Contains on integer values converted to string
1817        println!("\n--- Test: quantity.Contains('0') ---");
1818        let result = engine.execute(
1819            table.clone(),
1820            "SELECT * FROM test WHERE quantity.Contains('0')",
1821        );
1822        match result {
1823            Ok(view) => {
1824                println!(
1825                    "SUCCESS: Found {} rows with '0' in quantity",
1826                    view.row_count()
1827                );
1828                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1829            }
1830            Err(e) => {
1831                panic!("Integer Contains query failed: {e}");
1832            }
1833        }
1834
1835        println!("\n=== Numeric type coercion test complete! ===");
1836    }
1837
1838    #[test]
1839    fn test_datetime_comparisons() {
1840        // Initialize tracing for debug output
1841        let _ = tracing_subscriber::fmt()
1842            .with_max_level(tracing::Level::DEBUG)
1843            .try_init();
1844
1845        let mut table = DataTable::new("test");
1846        table.add_column(DataColumn::new("id"));
1847        table.add_column(DataColumn::new("created_date"));
1848
1849        // Add test data with date strings (as they would come from CSV)
1850        table
1851            .add_row(DataRow::new(vec![
1852                DataValue::Integer(1),
1853                DataValue::String("2024-12-15".to_string()),
1854            ]))
1855            .unwrap();
1856
1857        table
1858            .add_row(DataRow::new(vec![
1859                DataValue::Integer(2),
1860                DataValue::String("2025-01-15".to_string()),
1861            ]))
1862            .unwrap();
1863
1864        table
1865            .add_row(DataRow::new(vec![
1866                DataValue::Integer(3),
1867                DataValue::String("2025-02-15".to_string()),
1868            ]))
1869            .unwrap();
1870
1871        let table = Arc::new(table);
1872        let engine = QueryEngine::new();
1873
1874        println!("\n=== Testing DateTime Comparisons ===");
1875        println!("Table has {} rows", table.row_count());
1876        for i in 0..table.row_count() {
1877            let date = table.get_value(i, 1);
1878            println!("Row {i}: created_date = {date:?}");
1879        }
1880
1881        // Test DateTime constructor comparison - should find dates after 2025-01-01
1882        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1883        let result = engine.execute(
1884            table.clone(),
1885            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1886        );
1887        match result {
1888            Ok(view) => {
1889                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1890                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1891            }
1892            Err(e) => {
1893                panic!("DateTime comparison query failed: {e}");
1894            }
1895        }
1896
1897        println!("\n=== DateTime comparison test complete! ===");
1898    }
1899
1900    #[test]
1901    fn test_not_with_method_calls() {
1902        // Initialize tracing for debug output
1903        let _ = tracing_subscriber::fmt()
1904            .with_max_level(tracing::Level::DEBUG)
1905            .try_init();
1906
1907        let mut table = DataTable::new("test");
1908        table.add_column(DataColumn::new("id"));
1909        table.add_column(DataColumn::new("status"));
1910
1911        // Add test data
1912        table
1913            .add_row(DataRow::new(vec![
1914                DataValue::Integer(1),
1915                DataValue::String("Pending Review".to_string()),
1916            ]))
1917            .unwrap();
1918
1919        table
1920            .add_row(DataRow::new(vec![
1921                DataValue::Integer(2),
1922                DataValue::String("Complete".to_string()),
1923            ]))
1924            .unwrap();
1925
1926        table
1927            .add_row(DataRow::new(vec![
1928                DataValue::Integer(3),
1929                DataValue::String("Pending Approval".to_string()),
1930            ]))
1931            .unwrap();
1932
1933        let table = Arc::new(table);
1934        let engine = QueryEngine::with_case_insensitive(true);
1935
1936        println!("\n=== Testing NOT with Method Calls ===");
1937        println!("Table has {} rows", table.row_count());
1938        for i in 0..table.row_count() {
1939            let status = table.get_value(i, 1);
1940            println!("Row {i}: status = {status:?}");
1941        }
1942
1943        // Test NOT with Contains - should exclude rows containing "pend"
1944        println!("\n--- Test: NOT status.Contains('pend') ---");
1945        let result = engine.execute(
1946            table.clone(),
1947            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1948        );
1949        match result {
1950            Ok(view) => {
1951                println!(
1952                    "SUCCESS: Found {} rows NOT containing 'pend'",
1953                    view.row_count()
1954                );
1955                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1956            }
1957            Err(e) => {
1958                panic!("NOT Contains query failed: {e}");
1959            }
1960        }
1961
1962        // Test NOT with StartsWith
1963        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1964        let result = engine.execute(
1965            table.clone(),
1966            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1967        );
1968        match result {
1969            Ok(view) => {
1970                println!(
1971                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1972                    view.row_count()
1973                );
1974                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1975            }
1976            Err(e) => {
1977                panic!("NOT StartsWith query failed: {e}");
1978            }
1979        }
1980
1981        println!("\n=== NOT with method calls test complete! ===");
1982    }
1983
1984    #[test]
1985    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1986    fn test_complex_logical_expressions() {
1987        // Initialize tracing for debug output
1988        let _ = tracing_subscriber::fmt()
1989            .with_max_level(tracing::Level::DEBUG)
1990            .try_init();
1991
1992        let mut table = DataTable::new("test");
1993        table.add_column(DataColumn::new("id"));
1994        table.add_column(DataColumn::new("status"));
1995        table.add_column(DataColumn::new("priority"));
1996        table.add_column(DataColumn::new("assigned"));
1997
1998        // Add comprehensive test data
1999        table
2000            .add_row(DataRow::new(vec![
2001                DataValue::Integer(1),
2002                DataValue::String("Pending".to_string()),
2003                DataValue::String("High".to_string()),
2004                DataValue::String("John".to_string()),
2005            ]))
2006            .unwrap();
2007
2008        table
2009            .add_row(DataRow::new(vec![
2010                DataValue::Integer(2),
2011                DataValue::String("Complete".to_string()),
2012                DataValue::String("High".to_string()),
2013                DataValue::String("Jane".to_string()),
2014            ]))
2015            .unwrap();
2016
2017        table
2018            .add_row(DataRow::new(vec![
2019                DataValue::Integer(3),
2020                DataValue::String("Pending".to_string()),
2021                DataValue::String("Low".to_string()),
2022                DataValue::String("John".to_string()),
2023            ]))
2024            .unwrap();
2025
2026        table
2027            .add_row(DataRow::new(vec![
2028                DataValue::Integer(4),
2029                DataValue::String("In Progress".to_string()),
2030                DataValue::String("Medium".to_string()),
2031                DataValue::String("Jane".to_string()),
2032            ]))
2033            .unwrap();
2034
2035        let table = Arc::new(table);
2036        let engine = QueryEngine::new();
2037
2038        println!("\n=== Testing Complex Logical Expressions ===");
2039        println!("Table has {} rows", table.row_count());
2040        for i in 0..table.row_count() {
2041            let status = table.get_value(i, 1);
2042            let priority = table.get_value(i, 2);
2043            let assigned = table.get_value(i, 3);
2044            println!(
2045                "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
2046            );
2047        }
2048
2049        // Test complex AND/OR logic
2050        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
2051        let result = engine.execute(
2052            table.clone(),
2053            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
2054        );
2055        match result {
2056            Ok(view) => {
2057                println!(
2058                    "SUCCESS: Found {} rows with complex logic",
2059                    view.row_count()
2060                );
2061                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
2062            }
2063            Err(e) => {
2064                panic!("Complex logic query failed: {e}");
2065            }
2066        }
2067
2068        // Test NOT with complex expressions
2069        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
2070        let result = engine.execute(
2071            table.clone(),
2072            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
2073        );
2074        match result {
2075            Ok(view) => {
2076                println!(
2077                    "SUCCESS: Found {} rows with NOT complex logic",
2078                    view.row_count()
2079                );
2080                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
2081            }
2082            Err(e) => {
2083                panic!("NOT complex logic query failed: {e}");
2084            }
2085        }
2086
2087        println!("\n=== Complex logical expressions test complete! ===");
2088    }
2089
2090    #[test]
2091    fn test_mixed_data_types_and_edge_cases() {
2092        // Initialize tracing for debug output
2093        let _ = tracing_subscriber::fmt()
2094            .with_max_level(tracing::Level::DEBUG)
2095            .try_init();
2096
2097        let mut table = DataTable::new("test");
2098        table.add_column(DataColumn::new("id"));
2099        table.add_column(DataColumn::new("value"));
2100        table.add_column(DataColumn::new("nullable_field"));
2101
2102        // Add test data with mixed types and edge cases
2103        table
2104            .add_row(DataRow::new(vec![
2105                DataValue::Integer(1),
2106                DataValue::String("123.45".to_string()),
2107                DataValue::String("present".to_string()),
2108            ]))
2109            .unwrap();
2110
2111        table
2112            .add_row(DataRow::new(vec![
2113                DataValue::Integer(2),
2114                DataValue::Float(678.90),
2115                DataValue::Null,
2116            ]))
2117            .unwrap();
2118
2119        table
2120            .add_row(DataRow::new(vec![
2121                DataValue::Integer(3),
2122                DataValue::Boolean(true),
2123                DataValue::String("also present".to_string()),
2124            ]))
2125            .unwrap();
2126
2127        table
2128            .add_row(DataRow::new(vec![
2129                DataValue::Integer(4),
2130                DataValue::String("false".to_string()),
2131                DataValue::Null,
2132            ]))
2133            .unwrap();
2134
2135        let table = Arc::new(table);
2136        let engine = QueryEngine::new();
2137
2138        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
2139        println!("Table has {} rows", table.row_count());
2140        for i in 0..table.row_count() {
2141            let value = table.get_value(i, 1);
2142            let nullable = table.get_value(i, 2);
2143            println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
2144        }
2145
2146        // Test type coercion with boolean Contains
2147        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
2148        let result = engine.execute(
2149            table.clone(),
2150            "SELECT * FROM test WHERE value.Contains('true')",
2151        );
2152        match result {
2153            Ok(view) => {
2154                println!(
2155                    "SUCCESS: Found {} rows with boolean coercion",
2156                    view.row_count()
2157                );
2158                assert_eq!(view.row_count(), 1); // Should find the boolean true row
2159            }
2160            Err(e) => {
2161                panic!("Boolean coercion query failed: {e}");
2162            }
2163        }
2164
2165        // Test multiple IN values with mixed types
2166        println!("\n--- Test: id IN (1, 3) ---");
2167        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
2168        match result {
2169            Ok(view) => {
2170                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
2171                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
2172            }
2173            Err(e) => {
2174                panic!("Multiple IN values query failed: {e}");
2175            }
2176        }
2177
2178        println!("\n=== Mixed data types test complete! ===");
2179    }
2180
2181    /// Test that aggregate-only queries return exactly one row (regression test)
2182    #[test]
2183    fn test_aggregate_only_single_row() {
2184        let table = create_test_stock_data();
2185        let engine = QueryEngine::new();
2186
2187        // Test query with multiple aggregates - should return exactly 1 row
2188        let result = engine
2189            .execute(
2190                table.clone(),
2191                "SELECT COUNT(*), MIN(close), MAX(close), AVG(close) FROM stock",
2192            )
2193            .expect("Query should succeed");
2194
2195        assert_eq!(
2196            result.row_count(),
2197            1,
2198            "Aggregate-only query should return exactly 1 row"
2199        );
2200        assert_eq!(result.column_count(), 4, "Should have 4 aggregate columns");
2201
2202        // Verify the actual values are correct
2203        let source = result.source();
2204        let row = source.get_row(0).expect("Should have first row");
2205
2206        // COUNT(*) should be 5 (total rows)
2207        assert_eq!(row.values[0], DataValue::Integer(5));
2208
2209        // MIN should be 99.5
2210        assert_eq!(row.values[1], DataValue::Float(99.5));
2211
2212        // MAX should be 105.0
2213        assert_eq!(row.values[2], DataValue::Float(105.0));
2214
2215        // AVG should be approximately 102.4
2216        if let DataValue::Float(avg) = &row.values[3] {
2217            assert!(
2218                (avg - 102.4).abs() < 0.01,
2219                "Average should be approximately 102.4, got {}",
2220                avg
2221            );
2222        } else {
2223            panic!("AVG should return a Float value");
2224        }
2225    }
2226
2227    /// Test single aggregate function returns single row
2228    #[test]
2229    fn test_single_aggregate_single_row() {
2230        let table = create_test_stock_data();
2231        let engine = QueryEngine::new();
2232
2233        let result = engine
2234            .execute(table.clone(), "SELECT COUNT(*) FROM stock")
2235            .expect("Query should succeed");
2236
2237        assert_eq!(
2238            result.row_count(),
2239            1,
2240            "Single aggregate query should return exactly 1 row"
2241        );
2242        assert_eq!(result.column_count(), 1, "Should have 1 column");
2243
2244        let source = result.source();
2245        let row = source.get_row(0).expect("Should have first row");
2246        assert_eq!(row.values[0], DataValue::Integer(5));
2247    }
2248
2249    /// Test aggregate with WHERE clause filtering
2250    #[test]
2251    fn test_aggregate_with_where_single_row() {
2252        let table = create_test_stock_data();
2253        let engine = QueryEngine::new();
2254
2255        // Filter to only high-value stocks (>= 103.0) and aggregate
2256        let result = engine
2257            .execute(
2258                table.clone(),
2259                "SELECT COUNT(*), MIN(close), MAX(close) FROM stock WHERE close >= 103.0",
2260            )
2261            .expect("Query should succeed");
2262
2263        assert_eq!(
2264            result.row_count(),
2265            1,
2266            "Filtered aggregate query should return exactly 1 row"
2267        );
2268        assert_eq!(result.column_count(), 3, "Should have 3 aggregate columns");
2269
2270        let source = result.source();
2271        let row = source.get_row(0).expect("Should have first row");
2272
2273        // Should find 2 rows (103.5 and 105.0)
2274        assert_eq!(row.values[0], DataValue::Integer(2));
2275        assert_eq!(row.values[1], DataValue::Float(103.5)); // MIN
2276        assert_eq!(row.values[2], DataValue::Float(105.0)); // MAX
2277    }
2278
2279    #[test]
2280    fn test_not_in_parsing() {
2281        use crate::sql::recursive_parser::Parser;
2282
2283        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
2284        println!("\n=== Testing NOT IN parsing ===");
2285        println!("Parsing query: {query}");
2286
2287        let mut parser = Parser::new(query);
2288        match parser.parse() {
2289            Ok(statement) => {
2290                println!("Parsed statement: {statement:#?}");
2291                if let Some(where_clause) = statement.where_clause {
2292                    println!("WHERE conditions: {:#?}", where_clause.conditions);
2293                    if let Some(first_condition) = where_clause.conditions.first() {
2294                        println!("First condition expression: {:#?}", first_condition.expr);
2295                    }
2296                }
2297            }
2298            Err(e) => {
2299                panic!("Parse error: {e}");
2300            }
2301        }
2302    }
2303
2304    /// Create test stock data for aggregate testing
2305    fn create_test_stock_data() -> Arc<DataTable> {
2306        let mut table = DataTable::new("stock");
2307
2308        table.add_column(DataColumn::new("symbol"));
2309        table.add_column(DataColumn::new("close"));
2310        table.add_column(DataColumn::new("volume"));
2311
2312        // Add 5 rows of test data
2313        let test_data = vec![
2314            ("AAPL", 99.5, 1000),
2315            ("AAPL", 101.2, 1500),
2316            ("AAPL", 103.5, 2000),
2317            ("AAPL", 105.0, 1200),
2318            ("AAPL", 102.8, 1800),
2319        ];
2320
2321        for (symbol, close, volume) in test_data {
2322            table
2323                .add_row(DataRow::new(vec![
2324                    DataValue::String(symbol.to_string()),
2325                    DataValue::Float(close),
2326                    DataValue::Integer(volume),
2327                ]))
2328                .expect("Should add row successfully");
2329        }
2330
2331        Arc::new(table)
2332    }
2333}
2334
2335#[cfg(test)]
2336#[path = "query_engine_tests.rs"]
2337mod query_engine_tests;