sql_cli/data/
query_engine.rs

1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::config::global::get_date_notation;
9use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
10use crate::data::data_view::DataView;
11use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
12use crate::data::hash_join::HashJoinExecutor;
13use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
14use crate::data::virtual_table_generator::VirtualTableGenerator;
15use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
16use crate::sql::aggregates::contains_aggregate;
17use crate::sql::parser::ast::TableSource;
18use crate::sql::recursive_parser::{
19    OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression, TableFunction,
20};
21
22/// Query engine that executes SQL directly on `DataTable`
23pub struct QueryEngine {
24    case_insensitive: bool,
25    date_notation: String,
26    behavior_config: Option<BehaviorConfig>,
27}
28
29impl Default for QueryEngine {
30    fn default() -> Self {
31        Self::new()
32    }
33}
34
35impl QueryEngine {
36    #[must_use]
37    pub fn new() -> Self {
38        Self {
39            case_insensitive: false,
40            date_notation: get_date_notation(),
41            behavior_config: None,
42        }
43    }
44
45    #[must_use]
46    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
47        let case_insensitive = config.case_insensitive_default;
48        // Use get_date_notation() to respect environment variable override
49        let date_notation = get_date_notation();
50        Self {
51            case_insensitive,
52            date_notation,
53            behavior_config: Some(config),
54        }
55    }
56
57    #[must_use]
58    pub fn with_date_notation(_date_notation: String) -> Self {
59        Self {
60            case_insensitive: false,
61            date_notation: get_date_notation(), // Always use the global function
62            behavior_config: None,
63        }
64    }
65
66    #[must_use]
67    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
68        Self {
69            case_insensitive,
70            date_notation: get_date_notation(),
71            behavior_config: None,
72        }
73    }
74
75    #[must_use]
76    pub fn with_case_insensitive_and_date_notation(
77        case_insensitive: bool,
78        _date_notation: String, // Keep parameter for compatibility but use get_date_notation()
79    ) -> Self {
80        Self {
81            case_insensitive,
82            date_notation: get_date_notation(), // Always use the global function
83            behavior_config: None,
84        }
85    }
86
87    /// Find a column name similar to the given name using edit distance
88    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
89        let columns = table.column_names();
90        let mut best_match: Option<(String, usize)> = None;
91
92        for col in columns {
93            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
94            // Only suggest if distance is small (likely a typo)
95            // Allow up to 3 edits for longer names
96            let max_distance = if name.len() > 10 { 3 } else { 2 };
97            if distance <= max_distance {
98                match &best_match {
99                    None => best_match = Some((col, distance)),
100                    Some((_, best_dist)) if distance < *best_dist => {
101                        best_match = Some((col, distance));
102                    }
103                    _ => {}
104                }
105            }
106        }
107
108        best_match.map(|(name, _)| name)
109    }
110
111    /// Calculate Levenshtein edit distance between two strings
112    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
113        let len1 = s1.len();
114        let len2 = s2.len();
115        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
116
117        for i in 0..=len1 {
118            matrix[i][0] = i;
119        }
120        for j in 0..=len2 {
121            matrix[0][j] = j;
122        }
123
124        for (i, c1) in s1.chars().enumerate() {
125            for (j, c2) in s2.chars().enumerate() {
126                let cost = usize::from(c1 != c2);
127                matrix[i + 1][j + 1] = std::cmp::min(
128                    matrix[i][j + 1] + 1, // deletion
129                    std::cmp::min(
130                        matrix[i + 1][j] + 1, // insertion
131                        matrix[i][j] + cost,  // substitution
132                    ),
133                );
134            }
135        }
136
137        matrix[len1][len2]
138    }
139
140    /// Execute a SQL query on a `DataTable` and return a `DataView` (for backward compatibility)
141    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
142        let (view, _plan) = self.execute_with_plan(table, sql)?;
143        Ok(view)
144    }
145
146    /// Execute a query and return both the result and the execution plan
147    pub fn execute_with_plan(
148        &self,
149        table: Arc<DataTable>,
150        sql: &str,
151    ) -> Result<(DataView, ExecutionPlan)> {
152        let mut plan_builder = ExecutionPlanBuilder::new();
153        let start_time = Instant::now();
154
155        // Parse the SQL query
156        plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
157        plan_builder.add_detail(format!("Query: {}", sql));
158        let mut parser = Parser::new(sql);
159        let statement = parser
160            .parse()
161            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
162        plan_builder.add_detail(format!("Parsed successfully"));
163        if let Some(from) = &statement.from_table {
164            plan_builder.add_detail(format!("FROM: {}", from));
165        }
166        if statement.where_clause.is_some() {
167            plan_builder.add_detail("WHERE clause present".to_string());
168        }
169        plan_builder.end_step();
170
171        // Convert SelectStatement to DataView operations
172        // Create an empty context for CTEs
173        let mut cte_context = HashMap::new();
174        let result = self.build_view_with_context_and_plan(
175            table,
176            statement,
177            &mut cte_context,
178            &mut plan_builder,
179        )?;
180
181        let total_duration = start_time.elapsed();
182        info!(
183            "Query execution complete: total={:?}, rows={}",
184            total_duration,
185            result.row_count()
186        );
187
188        let plan = plan_builder.build();
189        Ok((result, plan))
190    }
191
192    /// Build a `DataView` from a parsed SQL statement
193    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
194        let mut cte_context = HashMap::new();
195        self.build_view_with_context(table, statement, &mut cte_context)
196    }
197
198    /// Build a DataView from a SelectStatement with CTE context
199    fn build_view_with_context(
200        &self,
201        table: Arc<DataTable>,
202        statement: SelectStatement,
203        cte_context: &mut HashMap<String, Arc<DataView>>,
204    ) -> Result<DataView> {
205        let mut dummy_plan = ExecutionPlanBuilder::new();
206        self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
207    }
208
209    /// Build a DataView from a SelectStatement with CTE context and execution plan tracking
210    fn build_view_with_context_and_plan(
211        &self,
212        table: Arc<DataTable>,
213        statement: SelectStatement,
214        cte_context: &mut HashMap<String, Arc<DataView>>,
215        plan: &mut ExecutionPlanBuilder,
216    ) -> Result<DataView> {
217        // First, process any CTEs
218        for cte in &statement.ctes {
219            debug!("QueryEngine: Processing CTE '{}'...", cte.name);
220            // Execute the CTE query (it might reference earlier CTEs)
221            let cte_result =
222                self.build_view_with_context(table.clone(), cte.query.clone(), cte_context)?;
223
224            // Store the result in the context for later use
225            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
226            debug!(
227                "QueryEngine: CTE '{}' processed, stored in context",
228                cte.name
229            );
230        }
231
232        // Determine the source table for the main query
233        let source_table = if let Some(ref table_func) = statement.from_function {
234            // Handle table functions like RANGE()
235            debug!("QueryEngine: Processing table function...");
236            match table_func {
237                TableFunction::Range { start, end, step } => {
238                    // Evaluate expressions to get numeric values
239                    let mut evaluator =
240                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
241
242                    // Create a dummy row for evaluating constant expressions
243                    let dummy_row = 0;
244
245                    let start_val = evaluator.evaluate(start, dummy_row)?;
246                    let end_val = evaluator.evaluate(end, dummy_row)?;
247                    let step_val = if let Some(step_expr) = step {
248                        Some(evaluator.evaluate(step_expr, dummy_row)?)
249                    } else {
250                        None
251                    };
252
253                    // Convert DataValues to integers
254                    let start_int = match start_val {
255                        DataValue::Integer(i) => i,
256                        DataValue::Float(f) => f as i64,
257                        _ => return Err(anyhow!("RANGE start must be numeric")),
258                    };
259
260                    let end_int = match end_val {
261                        DataValue::Integer(i) => i,
262                        DataValue::Float(f) => f as i64,
263                        _ => return Err(anyhow!("RANGE end must be numeric")),
264                    };
265
266                    let step_int = if let Some(step) = step_val {
267                        match step {
268                            DataValue::Integer(i) => Some(i),
269                            DataValue::Float(f) => Some(f as i64),
270                            _ => return Err(anyhow!("RANGE step must be numeric")),
271                        }
272                    } else {
273                        None
274                    };
275
276                    // Generate the virtual table
277                    VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
278                }
279            }
280        } else if let Some(ref subquery) = statement.from_subquery {
281            // Execute the subquery and use its result as the source
282            debug!("QueryEngine: Processing FROM subquery...");
283            let subquery_result =
284                self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
285
286            // Convert the DataView to a DataTable for use as source
287            // This materializes the subquery result
288            let materialized = self.materialize_view(subquery_result)?;
289            Arc::new(materialized)
290        } else if let Some(ref table_name) = statement.from_table {
291            // Check if this references a CTE
292            if let Some(cte_view) = cte_context.get(table_name) {
293                debug!("QueryEngine: Using CTE '{}' as source table", table_name);
294                // Materialize the CTE view as a table
295                let materialized = self.materialize_view((**cte_view).clone())?;
296                Arc::new(materialized)
297            } else {
298                // Regular table reference - use the provided table
299                table.clone()
300            }
301        } else {
302            // No FROM clause - use the provided table
303            table.clone()
304        };
305
306        // Process JOINs if present
307        let final_table = if !statement.joins.is_empty() {
308            plan.begin_step(StepType::Filter, "Process JOINs".to_string());
309            plan.add_detail(format!(
310                "{} JOIN clause(s) to process",
311                statement.joins.len()
312            ));
313
314            let join_executor = HashJoinExecutor::new(self.case_insensitive);
315            let mut current_table = source_table;
316
317            for join_clause in &statement.joins {
318                plan.add_detail(format!(
319                    "Executing {:?} JOIN on {}",
320                    join_clause.join_type, join_clause.condition.left_column
321                ));
322
323                // Resolve the right table for the join
324                let right_table = match &join_clause.table {
325                    TableSource::Table(name) => {
326                        // Check if it's a CTE reference
327                        if let Some(cte_view) = cte_context.get(name) {
328                            let materialized = self.materialize_view((**cte_view).clone())?;
329                            Arc::new(materialized)
330                        } else {
331                            // For now, we need the actual table data
332                            // In a real implementation, this would load from file
333                            return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
334                        }
335                    }
336                    TableSource::DerivedTable { query, alias: _ } => {
337                        // Execute the subquery
338                        let subquery_result = self.build_view_with_context(
339                            table.clone(),
340                            *query.clone(),
341                            cte_context,
342                        )?;
343                        let materialized = self.materialize_view(subquery_result)?;
344                        Arc::new(materialized)
345                    }
346                };
347
348                // Execute the join
349                let joined =
350                    join_executor.execute_join(current_table.clone(), join_clause, right_table)?;
351
352                plan.add_detail(format!("JOIN produced {} rows", joined.row_count()));
353                current_table = Arc::new(joined);
354            }
355
356            plan.end_step();
357            current_table
358        } else {
359            source_table
360        };
361
362        // Continue with the existing build_view logic but using final_table
363        self.build_view_internal(final_table, statement)
364    }
365
366    /// Materialize a DataView into a new DataTable
367    fn materialize_view(&self, view: DataView) -> Result<DataTable> {
368        let source = view.source();
369        let mut result_table = DataTable::new("derived");
370
371        // Get the visible columns from the view
372        let visible_cols = view.visible_column_indices().to_vec();
373
374        // Copy column definitions
375        for col_idx in &visible_cols {
376            let col = &source.columns[*col_idx];
377            let new_col = DataColumn {
378                name: col.name.clone(),
379                data_type: col.data_type.clone(),
380                nullable: col.nullable,
381                unique_values: col.unique_values,
382                null_count: col.null_count,
383                metadata: col.metadata.clone(),
384            };
385            result_table.add_column(new_col);
386        }
387
388        // Copy visible rows
389        for row_idx in view.visible_row_indices() {
390            let source_row = &source.rows[*row_idx];
391            let mut new_row = DataRow { values: Vec::new() };
392
393            for col_idx in &visible_cols {
394                new_row.values.push(source_row.values[*col_idx].clone());
395            }
396
397            result_table.add_row(new_row);
398        }
399
400        Ok(result_table)
401    }
402
403    fn build_view_internal(
404        &self,
405        table: Arc<DataTable>,
406        statement: SelectStatement,
407    ) -> Result<DataView> {
408        debug!(
409            "QueryEngine::build_view - select_items: {:?}",
410            statement.select_items
411        );
412        debug!(
413            "QueryEngine::build_view - where_clause: {:?}",
414            statement.where_clause
415        );
416
417        // Start with all rows visible
418        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
419
420        // Apply WHERE clause filtering using recursive evaluator
421        if let Some(where_clause) = &statement.where_clause {
422            let total_rows = table.row_count();
423            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
424            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
425
426            let filter_start = Instant::now();
427            // Filter visible rows based on WHERE clause
428            let mut filtered_rows = Vec::new();
429            for row_idx in visible_rows {
430                // Only log for first few rows to avoid performance impact
431                if row_idx < 3 {
432                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
433                }
434                let evaluator = RecursiveWhereEvaluator::with_config(
435                    &table,
436                    self.case_insensitive,
437                    self.date_notation.clone(),
438                );
439                match evaluator.evaluate(where_clause, row_idx) {
440                    Ok(result) => {
441                        if row_idx < 3 {
442                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
443                        }
444                        if result {
445                            filtered_rows.push(row_idx);
446                        }
447                    }
448                    Err(e) => {
449                        if row_idx < 3 {
450                            debug!(
451                                "QueryEngine: WHERE evaluation error for row {}: {}",
452                                row_idx, e
453                            );
454                        }
455                        // Propagate WHERE clause errors instead of silently ignoring them
456                        return Err(e);
457                    }
458                }
459            }
460            visible_rows = filtered_rows;
461            let filter_duration = filter_start.elapsed();
462            info!(
463                "WHERE clause filtering: {} rows -> {} rows in {:?}",
464                total_rows,
465                visible_rows.len(),
466                filter_duration
467            );
468
469            // Debug log moved to info level above with timing
470        }
471
472        // Create initial DataView with filtered rows
473        let mut view = DataView::new(table.clone());
474        view = view.with_rows(visible_rows);
475
476        // Handle GROUP BY if present
477        if let Some(group_by_columns) = &statement.group_by {
478            if !group_by_columns.is_empty() {
479                debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_columns);
480                view = self.apply_group_by(
481                    view,
482                    group_by_columns,
483                    &statement.select_items,
484                    statement.having.as_ref(),
485                )?;
486            }
487        } else {
488            // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
489            if !statement.select_items.is_empty() {
490                // Check if we have ANY non-star items (not just the first one)
491                let has_non_star_items = statement
492                    .select_items
493                    .iter()
494                    .any(|item| !matches!(item, SelectItem::Star));
495
496                // Apply select items if:
497                // 1. We have computed expressions or explicit columns
498                // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
499                if has_non_star_items || statement.select_items.len() > 1 {
500                    view = self.apply_select_items(view, &statement.select_items)?;
501                }
502                // If it's just a single star, no projection needed
503            } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
504                // Fallback to legacy column projection for backward compatibility
505                let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
506                view = view.with_columns(column_indices);
507            }
508        }
509
510        // Apply DISTINCT if specified
511        if statement.distinct {
512            view = self.apply_distinct(view)?;
513        }
514
515        // Apply ORDER BY sorting
516        if let Some(order_by_columns) = &statement.order_by {
517            if !order_by_columns.is_empty() {
518                view = self.apply_multi_order_by(view, order_by_columns)?;
519            }
520        }
521
522        // Apply LIMIT/OFFSET
523        if let Some(limit) = statement.limit {
524            let offset = statement.offset.unwrap_or(0);
525            view = view.with_limit(limit, offset);
526        }
527
528        Ok(view)
529    }
530
531    /// Resolve column names to indices
532    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
533        let mut indices = Vec::new();
534        let table_columns = table.column_names();
535
536        for col_name in columns {
537            let index = table_columns
538                .iter()
539                .position(|c| c.eq_ignore_ascii_case(col_name))
540                .ok_or_else(|| {
541                    let suggestion = self.find_similar_column(table, col_name);
542                    match suggestion {
543                        Some(similar) => anyhow::anyhow!(
544                            "Column '{}' not found. Did you mean '{}'?",
545                            col_name,
546                            similar
547                        ),
548                        None => anyhow::anyhow!("Column '{}' not found", col_name),
549                    }
550                })?;
551            indices.push(index);
552        }
553
554        Ok(indices)
555    }
556
557    /// Apply SELECT items (columns and computed expressions) to create new view
558    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
559        debug!(
560            "QueryEngine::apply_select_items - items: {:?}",
561            select_items
562        );
563        debug!(
564            "QueryEngine::apply_select_items - input view has {} rows",
565            view.row_count()
566        );
567
568        // Check if ALL select items are aggregate functions (no GROUP BY)
569        let all_aggregates = select_items.iter().all(|item| match item {
570            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
571            SelectItem::Column(_) => false,
572            SelectItem::Star => false,
573        });
574
575        if all_aggregates && view.row_count() > 0 {
576            // Special handling for aggregate-only queries (no GROUP BY)
577            // These should produce exactly one row
578            return self.apply_aggregate_select(view, select_items);
579        }
580
581        // Check if we need to create computed columns
582        let has_computed_expressions = select_items
583            .iter()
584            .any(|item| matches!(item, SelectItem::Expression { .. }));
585
586        debug!(
587            "QueryEngine::apply_select_items - has_computed_expressions: {}",
588            has_computed_expressions
589        );
590
591        if !has_computed_expressions {
592            // Simple case: only columns, use existing projection logic
593            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
594            return Ok(view.with_columns(column_indices));
595        }
596
597        // Complex case: we have computed expressions
598        // IMPORTANT: We create a PROJECTED view, not a new table
599        // This preserves the original DataTable reference
600
601        let source_table = view.source();
602        let visible_rows = view.visible_row_indices();
603
604        // Create a temporary table just for the computed result view
605        // But this table is only used for the current query result
606        let mut computed_table = DataTable::new("query_result");
607
608        // First, expand any Star selectors to actual columns
609        let mut expanded_items = Vec::new();
610        for item in select_items {
611            match item {
612                SelectItem::Star => {
613                    // Expand * to all columns from source table
614                    for col_name in source_table.column_names() {
615                        expanded_items.push(SelectItem::Column(col_name));
616                    }
617                }
618                _ => expanded_items.push(item.clone()),
619            }
620        }
621
622        // Add columns based on expanded SelectItems, handling duplicates
623        let mut column_name_counts: std::collections::HashMap<String, usize> =
624            std::collections::HashMap::new();
625
626        for item in &expanded_items {
627            let base_name = match item {
628                SelectItem::Column(name) => name.clone(),
629                SelectItem::Expression { alias, .. } => alias.clone(),
630                SelectItem::Star => unreachable!("Star should have been expanded"),
631            };
632
633            // Check if this column name has been used before
634            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
635            let column_name = if *count == 0 {
636                // First occurrence, use the name as-is
637                base_name.clone()
638            } else {
639                // Duplicate, append a suffix
640                format!("{base_name}_{count}")
641            };
642            *count += 1;
643
644            computed_table.add_column(DataColumn::new(&column_name));
645        }
646
647        // Calculate values for each row
648        let mut evaluator =
649            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
650
651        for &row_idx in visible_rows {
652            let mut row_values = Vec::new();
653
654            for item in &expanded_items {
655                let value = match item {
656                    SelectItem::Column(col_name) => {
657                        // Simple column reference
658                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
659                            let suggestion = self.find_similar_column(source_table, col_name);
660                            match suggestion {
661                                Some(similar) => anyhow::anyhow!(
662                                    "Column '{}' not found. Did you mean '{}'?",
663                                    col_name,
664                                    similar
665                                ),
666                                None => anyhow::anyhow!("Column '{}' not found", col_name),
667                            }
668                        })?;
669                        let row = source_table
670                            .get_row(row_idx)
671                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
672                        row.get(col_idx)
673                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
674                            .clone()
675                    }
676                    SelectItem::Expression { expr, .. } => {
677                        // Computed expression
678                        evaluator.evaluate(expr, row_idx)?
679                    }
680                    SelectItem::Star => unreachable!("Star should have been expanded"),
681                };
682                row_values.push(value);
683            }
684
685            computed_table
686                .add_row(DataRow::new(row_values))
687                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
688        }
689
690        // Return a view of the computed result
691        // This is a temporary view for this query only
692        Ok(DataView::new(Arc::new(computed_table)))
693    }
694
695    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
696    fn apply_aggregate_select(
697        &self,
698        view: DataView,
699        select_items: &[SelectItem],
700    ) -> Result<DataView> {
701        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
702
703        let source_table = view.source();
704        let mut result_table = DataTable::new("aggregate_result");
705
706        // Add columns for each select item
707        for item in select_items {
708            let column_name = match item {
709                SelectItem::Expression { alias, .. } => alias.clone(),
710                _ => unreachable!("Should only have expressions in aggregate-only query"),
711            };
712            result_table.add_column(DataColumn::new(&column_name));
713        }
714
715        // Create evaluator with visible rows from the view (for filtered aggregates)
716        let visible_rows = view.visible_row_indices().to_vec();
717        let mut evaluator =
718            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
719                .with_visible_rows(visible_rows);
720
721        // Evaluate each aggregate expression once (they handle all rows internally)
722        let mut row_values = Vec::new();
723        for item in select_items {
724            match item {
725                SelectItem::Expression { expr, .. } => {
726                    // The evaluator will handle aggregates over all rows
727                    // We pass row_index=0 but aggregates ignore it and process all rows
728                    let value = evaluator.evaluate(expr, 0)?;
729                    row_values.push(value);
730                }
731                _ => unreachable!("Should only have expressions in aggregate-only query"),
732            }
733        }
734
735        // Add the single result row
736        result_table
737            .add_row(DataRow::new(row_values))
738            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
739
740        Ok(DataView::new(Arc::new(result_table)))
741    }
742
743    /// Resolve `SelectItem` columns to indices (for simple column projections only)
744    fn resolve_select_columns(
745        &self,
746        table: &DataTable,
747        select_items: &[SelectItem],
748    ) -> Result<Vec<usize>> {
749        let mut indices = Vec::new();
750        let table_columns = table.column_names();
751
752        for item in select_items {
753            match item {
754                SelectItem::Column(col_name) => {
755                    let index = table_columns
756                        .iter()
757                        .position(|c| c.eq_ignore_ascii_case(col_name))
758                        .ok_or_else(|| {
759                            let suggestion = self.find_similar_column(table, col_name);
760                            match suggestion {
761                                Some(similar) => anyhow::anyhow!(
762                                    "Column '{}' not found. Did you mean '{}'?",
763                                    col_name,
764                                    similar
765                                ),
766                                None => anyhow::anyhow!("Column '{}' not found", col_name),
767                            }
768                        })?;
769                    indices.push(index);
770                }
771                SelectItem::Star => {
772                    // Expand * to all column indices
773                    for i in 0..table_columns.len() {
774                        indices.push(i);
775                    }
776                }
777                SelectItem::Expression { .. } => {
778                    return Err(anyhow::anyhow!(
779                        "Computed expressions require new table creation"
780                    ));
781                }
782            }
783        }
784
785        Ok(indices)
786    }
787
788    /// Apply DISTINCT to remove duplicate rows
789    fn apply_distinct(&self, view: DataView) -> Result<DataView> {
790        use std::collections::HashSet;
791
792        let source = view.source();
793        let visible_cols = view.visible_column_indices();
794        let visible_rows = view.visible_row_indices();
795
796        // Build a set to track unique rows
797        let mut seen_rows = HashSet::new();
798        let mut unique_row_indices = Vec::new();
799
800        for &row_idx in visible_rows {
801            // Build a key representing this row's visible column values
802            let mut row_key = Vec::new();
803            for &col_idx in visible_cols {
804                let value = source
805                    .get_value(row_idx, col_idx)
806                    .ok_or_else(|| anyhow!("Invalid cell reference"))?;
807                // Convert value to a hashable representation
808                row_key.push(format!("{:?}", value));
809            }
810
811            // Check if we've seen this row before
812            if seen_rows.insert(row_key) {
813                // First time seeing this row combination
814                unique_row_indices.push(row_idx);
815            }
816        }
817
818        // Create a new view with only unique rows
819        Ok(view.with_rows(unique_row_indices))
820    }
821
822    /// Apply multi-column ORDER BY sorting to the view
823    fn apply_multi_order_by(
824        &self,
825        mut view: DataView,
826        order_by_columns: &[OrderByColumn],
827    ) -> Result<DataView> {
828        // Build list of (source_column_index, ascending) tuples
829        let mut sort_columns = Vec::new();
830
831        for order_col in order_by_columns {
832            // Try to find the column index
833            // Check in the current view's source table (this handles both regular columns and computed columns)
834            // This is especially important after GROUP BY where we have a new result table with aggregate aliases
835            let col_index = view
836                .source()
837                .get_column_index(&order_col.column)
838                .ok_or_else(|| {
839                    // If not found, provide helpful error with suggestions
840                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
841                    match suggestion {
842                        Some(similar) => anyhow::anyhow!(
843                            "Column '{}' not found. Did you mean '{}'?",
844                            order_col.column,
845                            similar
846                        ),
847                        None => {
848                            // Also list available columns for debugging
849                            let available_cols = view.source().column_names().join(", ");
850                            anyhow::anyhow!(
851                                "Column '{}' not found. Available columns: {}",
852                                order_col.column,
853                                available_cols
854                            )
855                        }
856                    }
857                })?;
858
859            let ascending = matches!(order_col.direction, SortDirection::Asc);
860            sort_columns.push((col_index, ascending));
861        }
862
863        // Apply multi-column sorting
864        view.apply_multi_sort(&sort_columns)?;
865        Ok(view)
866    }
867
868    /// Apply GROUP BY to the view with optional HAVING clause
869    fn apply_group_by(
870        &self,
871        view: DataView,
872        group_by_columns: &[String],
873        select_items: &[SelectItem],
874        having: Option<&SqlExpression>,
875    ) -> Result<DataView> {
876        debug!(
877            "QueryEngine::apply_group_by - grouping by: {:?}",
878            group_by_columns
879        );
880
881        // Group the data using DataView's group_by method
882        let groups = view.group_by(group_by_columns)?;
883        debug!(
884            "QueryEngine::apply_group_by - created {} groups",
885            groups.len()
886        );
887
888        // Create a result table for the grouped data
889        let mut result_table = DataTable::new("grouped_result");
890
891        // Determine which columns to include in the result
892        // 1. GROUP BY columns always come first
893        // 2. Then aggregate functions
894
895        // Add GROUP BY columns to result
896        for col_name in group_by_columns {
897            result_table.add_column(DataColumn::new(col_name));
898        }
899
900        // Process SELECT items to find aggregates and their aliases
901        let mut aggregate_columns = Vec::new();
902        for item in select_items {
903            match item {
904                SelectItem::Expression { expr, alias } => {
905                    if contains_aggregate(expr) {
906                        // Expression always has an alias in the AST
907                        result_table.add_column(DataColumn::new(alias));
908                        aggregate_columns.push((expr.clone(), alias.clone()));
909                    }
910                }
911                SelectItem::Column(col_name) => {
912                    // Non-aggregate columns must be in GROUP BY
913                    if !group_by_columns.contains(col_name) {
914                        return Err(anyhow!(
915                            "Column '{}' must appear in GROUP BY clause or be used in an aggregate function",
916                            col_name
917                        ));
918                    }
919                }
920                SelectItem::Star => {
921                    // For GROUP BY queries, * is not really meaningful
922                    // We'll just include the GROUP BY columns which we already added
923                }
924            }
925        }
926
927        // Process each group
928        for (group_key, group_view) in groups {
929            let mut row_values = Vec::new();
930            let mut aggregate_values = std::collections::HashMap::new();
931
932            // Add GROUP BY column values from the key
933            for (i, value) in group_key.0.iter().enumerate() {
934                row_values.push(value.clone());
935                // Store GROUP BY columns for HAVING evaluation
936                if i < group_by_columns.len() {
937                    aggregate_values.insert(group_by_columns[i].clone(), value.clone());
938                }
939            }
940
941            // Calculate aggregate values for this group
942            for (expr, col_name) in &aggregate_columns {
943                // Set the visible rows for this group so aggregates work correctly
944                let group_rows = group_view.get_visible_rows();
945                let mut evaluator = ArithmeticEvaluator::new(group_view.source())
946                    .with_visible_rows(group_rows.clone());
947
948                // For aggregates, we need to evaluate across all rows in the group
949                let value = if group_view.row_count() > 0 && !group_rows.is_empty() {
950                    // Evaluate the aggregate expression
951                    // The arithmetic evaluator will handle the aggregate across visible rows
952                    evaluator
953                        .evaluate(expr, group_rows[0])
954                        .unwrap_or(DataValue::Null)
955                } else {
956                    DataValue::Null
957                };
958
959                // Store aggregate value for HAVING evaluation
960                aggregate_values.insert(col_name.clone(), value.clone());
961                row_values.push(value);
962            }
963
964            // Evaluate HAVING clause if present
965            if let Some(having_expr) = having {
966                // Create a temporary table with one row containing the aggregate values
967                let mut temp_table = DataTable::new("having_eval");
968                for col_name in aggregate_values.keys() {
969                    temp_table.add_column(DataColumn::new(col_name));
970                }
971
972                let temp_row_values: Vec<DataValue> = aggregate_values.values().cloned().collect();
973                temp_table
974                    .add_row(DataRow::new(temp_row_values))
975                    .map_err(|e| anyhow!("Failed to create temp table for HAVING: {}", e))?;
976
977                // Evaluate HAVING expression on the aggregate values
978                let mut evaluator = ArithmeticEvaluator::new(&temp_table);
979                let having_result = evaluator
980                    .evaluate(having_expr, 0)
981                    .unwrap_or(DataValue::Boolean(false));
982
983                // Skip this group if HAVING condition is not met
984                match having_result {
985                    DataValue::Boolean(false) => continue,
986                    DataValue::Null => continue,
987                    _ => {} // Include the row
988                }
989            }
990
991            // Add the row to the result table
992            result_table
993                .add_row(DataRow::new(row_values))
994                .map_err(|e| anyhow!("Failed to add row to result table: {}", e))?;
995        }
996
997        // Return a DataView of the grouped result
998        Ok(DataView::new(Arc::new(result_table)))
999    }
1000}
1001
1002#[cfg(test)]
1003mod tests {
1004    use super::*;
1005    use crate::data::datatable::{DataColumn, DataRow, DataValue};
1006
1007    fn create_test_table() -> Arc<DataTable> {
1008        let mut table = DataTable::new("test");
1009
1010        // Add columns
1011        table.add_column(DataColumn::new("id"));
1012        table.add_column(DataColumn::new("name"));
1013        table.add_column(DataColumn::new("age"));
1014
1015        // Add rows
1016        table
1017            .add_row(DataRow::new(vec![
1018                DataValue::Integer(1),
1019                DataValue::String("Alice".to_string()),
1020                DataValue::Integer(30),
1021            ]))
1022            .unwrap();
1023
1024        table
1025            .add_row(DataRow::new(vec![
1026                DataValue::Integer(2),
1027                DataValue::String("Bob".to_string()),
1028                DataValue::Integer(25),
1029            ]))
1030            .unwrap();
1031
1032        table
1033            .add_row(DataRow::new(vec![
1034                DataValue::Integer(3),
1035                DataValue::String("Charlie".to_string()),
1036                DataValue::Integer(35),
1037            ]))
1038            .unwrap();
1039
1040        Arc::new(table)
1041    }
1042
1043    #[test]
1044    fn test_select_all() {
1045        let table = create_test_table();
1046        let engine = QueryEngine::new();
1047
1048        let view = engine
1049            .execute(table.clone(), "SELECT * FROM users")
1050            .unwrap();
1051        assert_eq!(view.row_count(), 3);
1052        assert_eq!(view.column_count(), 3);
1053    }
1054
1055    #[test]
1056    fn test_select_columns() {
1057        let table = create_test_table();
1058        let engine = QueryEngine::new();
1059
1060        let view = engine
1061            .execute(table.clone(), "SELECT name, age FROM users")
1062            .unwrap();
1063        assert_eq!(view.row_count(), 3);
1064        assert_eq!(view.column_count(), 2);
1065    }
1066
1067    #[test]
1068    fn test_select_with_limit() {
1069        let table = create_test_table();
1070        let engine = QueryEngine::new();
1071
1072        let view = engine
1073            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1074            .unwrap();
1075        assert_eq!(view.row_count(), 2);
1076    }
1077
1078    #[test]
1079    fn test_type_coercion_contains() {
1080        // Initialize tracing for debug output
1081        let _ = tracing_subscriber::fmt()
1082            .with_max_level(tracing::Level::DEBUG)
1083            .try_init();
1084
1085        let mut table = DataTable::new("test");
1086        table.add_column(DataColumn::new("id"));
1087        table.add_column(DataColumn::new("status"));
1088        table.add_column(DataColumn::new("price"));
1089
1090        // Add test data with mixed types
1091        table
1092            .add_row(DataRow::new(vec![
1093                DataValue::Integer(1),
1094                DataValue::String("Pending".to_string()),
1095                DataValue::Float(99.99),
1096            ]))
1097            .unwrap();
1098
1099        table
1100            .add_row(DataRow::new(vec![
1101                DataValue::Integer(2),
1102                DataValue::String("Confirmed".to_string()),
1103                DataValue::Float(150.50),
1104            ]))
1105            .unwrap();
1106
1107        table
1108            .add_row(DataRow::new(vec![
1109                DataValue::Integer(3),
1110                DataValue::String("Pending".to_string()),
1111                DataValue::Float(75.00),
1112            ]))
1113            .unwrap();
1114
1115        let table = Arc::new(table);
1116        let engine = QueryEngine::new();
1117
1118        println!("\n=== Testing WHERE clause with Contains ===");
1119        println!("Table has {} rows", table.row_count());
1120        for i in 0..table.row_count() {
1121            let status = table.get_value(i, 1);
1122            println!("Row {i}: status = {status:?}");
1123        }
1124
1125        // Test 1: Basic string contains (should work)
1126        println!("\n--- Test 1: status.Contains('pend') ---");
1127        let result = engine.execute(
1128            table.clone(),
1129            "SELECT * FROM test WHERE status.Contains('pend')",
1130        );
1131        match result {
1132            Ok(view) => {
1133                println!("SUCCESS: Found {} matching rows", view.row_count());
1134                assert_eq!(view.row_count(), 2); // Should find both Pending rows
1135            }
1136            Err(e) => {
1137                panic!("Query failed: {e}");
1138            }
1139        }
1140
1141        // Test 2: Numeric contains (should work with type coercion)
1142        println!("\n--- Test 2: price.Contains('9') ---");
1143        let result = engine.execute(
1144            table.clone(),
1145            "SELECT * FROM test WHERE price.Contains('9')",
1146        );
1147        match result {
1148            Ok(view) => {
1149                println!(
1150                    "SUCCESS: Found {} matching rows with price containing '9'",
1151                    view.row_count()
1152                );
1153                // Should find 99.99 row
1154                assert!(view.row_count() >= 1);
1155            }
1156            Err(e) => {
1157                panic!("Numeric coercion query failed: {e}");
1158            }
1159        }
1160
1161        println!("\n=== All tests passed! ===");
1162    }
1163
1164    #[test]
1165    fn test_not_in_clause() {
1166        // Initialize tracing for debug output
1167        let _ = tracing_subscriber::fmt()
1168            .with_max_level(tracing::Level::DEBUG)
1169            .try_init();
1170
1171        let mut table = DataTable::new("test");
1172        table.add_column(DataColumn::new("id"));
1173        table.add_column(DataColumn::new("country"));
1174
1175        // Add test data
1176        table
1177            .add_row(DataRow::new(vec![
1178                DataValue::Integer(1),
1179                DataValue::String("CA".to_string()),
1180            ]))
1181            .unwrap();
1182
1183        table
1184            .add_row(DataRow::new(vec![
1185                DataValue::Integer(2),
1186                DataValue::String("US".to_string()),
1187            ]))
1188            .unwrap();
1189
1190        table
1191            .add_row(DataRow::new(vec![
1192                DataValue::Integer(3),
1193                DataValue::String("UK".to_string()),
1194            ]))
1195            .unwrap();
1196
1197        let table = Arc::new(table);
1198        let engine = QueryEngine::new();
1199
1200        println!("\n=== Testing NOT IN clause ===");
1201        println!("Table has {} rows", table.row_count());
1202        for i in 0..table.row_count() {
1203            let country = table.get_value(i, 1);
1204            println!("Row {i}: country = {country:?}");
1205        }
1206
1207        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
1208        println!("\n--- Test: country NOT IN ('CA') ---");
1209        let result = engine.execute(
1210            table.clone(),
1211            "SELECT * FROM test WHERE country NOT IN ('CA')",
1212        );
1213        match result {
1214            Ok(view) => {
1215                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1216                assert_eq!(view.row_count(), 2); // Should find US and UK
1217            }
1218            Err(e) => {
1219                panic!("NOT IN query failed: {e}");
1220            }
1221        }
1222
1223        println!("\n=== NOT IN test complete! ===");
1224    }
1225
1226    #[test]
1227    fn test_case_insensitive_in_and_not_in() {
1228        // Initialize tracing for debug output
1229        let _ = tracing_subscriber::fmt()
1230            .with_max_level(tracing::Level::DEBUG)
1231            .try_init();
1232
1233        let mut table = DataTable::new("test");
1234        table.add_column(DataColumn::new("id"));
1235        table.add_column(DataColumn::new("country"));
1236
1237        // Add test data with mixed case
1238        table
1239            .add_row(DataRow::new(vec![
1240                DataValue::Integer(1),
1241                DataValue::String("CA".to_string()), // uppercase
1242            ]))
1243            .unwrap();
1244
1245        table
1246            .add_row(DataRow::new(vec![
1247                DataValue::Integer(2),
1248                DataValue::String("us".to_string()), // lowercase
1249            ]))
1250            .unwrap();
1251
1252        table
1253            .add_row(DataRow::new(vec![
1254                DataValue::Integer(3),
1255                DataValue::String("UK".to_string()), // uppercase
1256            ]))
1257            .unwrap();
1258
1259        let table = Arc::new(table);
1260
1261        println!("\n=== Testing Case-Insensitive IN clause ===");
1262        println!("Table has {} rows", table.row_count());
1263        for i in 0..table.row_count() {
1264            let country = table.get_value(i, 1);
1265            println!("Row {i}: country = {country:?}");
1266        }
1267
1268        // Test case-insensitive IN - should match 'CA' with 'ca'
1269        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1270        let engine = QueryEngine::with_case_insensitive(true);
1271        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1272        match result {
1273            Ok(view) => {
1274                println!(
1275                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1276                    view.row_count()
1277                );
1278                assert_eq!(view.row_count(), 1); // Should find CA row
1279            }
1280            Err(e) => {
1281                panic!("Case-insensitive IN query failed: {e}");
1282            }
1283        }
1284
1285        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
1286        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1287        let result = engine.execute(
1288            table.clone(),
1289            "SELECT * FROM test WHERE country NOT IN ('ca')",
1290        );
1291        match result {
1292            Ok(view) => {
1293                println!(
1294                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1295                    view.row_count()
1296                );
1297                assert_eq!(view.row_count(), 2); // Should find us and UK rows
1298            }
1299            Err(e) => {
1300                panic!("Case-insensitive NOT IN query failed: {e}");
1301            }
1302        }
1303
1304        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
1305        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1306        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
1307        let result = engine_case_sensitive
1308            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1309        match result {
1310            Ok(view) => {
1311                println!(
1312                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1313                    view.row_count()
1314                );
1315                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
1316            }
1317            Err(e) => {
1318                panic!("Case-sensitive IN query failed: {e}");
1319            }
1320        }
1321
1322        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1323    }
1324
1325    #[test]
1326    #[ignore = "Parentheses in WHERE clause not yet implemented"]
1327    fn test_parentheses_in_where_clause() {
1328        // Initialize tracing for debug output
1329        let _ = tracing_subscriber::fmt()
1330            .with_max_level(tracing::Level::DEBUG)
1331            .try_init();
1332
1333        let mut table = DataTable::new("test");
1334        table.add_column(DataColumn::new("id"));
1335        table.add_column(DataColumn::new("status"));
1336        table.add_column(DataColumn::new("priority"));
1337
1338        // Add test data
1339        table
1340            .add_row(DataRow::new(vec![
1341                DataValue::Integer(1),
1342                DataValue::String("Pending".to_string()),
1343                DataValue::String("High".to_string()),
1344            ]))
1345            .unwrap();
1346
1347        table
1348            .add_row(DataRow::new(vec![
1349                DataValue::Integer(2),
1350                DataValue::String("Complete".to_string()),
1351                DataValue::String("High".to_string()),
1352            ]))
1353            .unwrap();
1354
1355        table
1356            .add_row(DataRow::new(vec![
1357                DataValue::Integer(3),
1358                DataValue::String("Pending".to_string()),
1359                DataValue::String("Low".to_string()),
1360            ]))
1361            .unwrap();
1362
1363        table
1364            .add_row(DataRow::new(vec![
1365                DataValue::Integer(4),
1366                DataValue::String("Complete".to_string()),
1367                DataValue::String("Low".to_string()),
1368            ]))
1369            .unwrap();
1370
1371        let table = Arc::new(table);
1372        let engine = QueryEngine::new();
1373
1374        println!("\n=== Testing Parentheses in WHERE clause ===");
1375        println!("Table has {} rows", table.row_count());
1376        for i in 0..table.row_count() {
1377            let status = table.get_value(i, 1);
1378            let priority = table.get_value(i, 2);
1379            println!("Row {i}: status = {status:?}, priority = {priority:?}");
1380        }
1381
1382        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
1383        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1384        let result = engine.execute(
1385            table.clone(),
1386            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1387        );
1388        match result {
1389            Ok(view) => {
1390                println!(
1391                    "SUCCESS: Found {} rows with parenthetical logic",
1392                    view.row_count()
1393                );
1394                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
1395            }
1396            Err(e) => {
1397                panic!("Parentheses query failed: {e}");
1398            }
1399        }
1400
1401        println!("\n=== Parentheses test complete! ===");
1402    }
1403
1404    #[test]
1405    #[ignore = "Numeric type coercion needs fixing"]
1406    fn test_numeric_type_coercion() {
1407        // Initialize tracing for debug output
1408        let _ = tracing_subscriber::fmt()
1409            .with_max_level(tracing::Level::DEBUG)
1410            .try_init();
1411
1412        let mut table = DataTable::new("test");
1413        table.add_column(DataColumn::new("id"));
1414        table.add_column(DataColumn::new("price"));
1415        table.add_column(DataColumn::new("quantity"));
1416
1417        // Add test data with different numeric types
1418        table
1419            .add_row(DataRow::new(vec![
1420                DataValue::Integer(1),
1421                DataValue::Float(99.50), // Contains '.'
1422                DataValue::Integer(100),
1423            ]))
1424            .unwrap();
1425
1426        table
1427            .add_row(DataRow::new(vec![
1428                DataValue::Integer(2),
1429                DataValue::Float(150.0), // Contains '.' and '0'
1430                DataValue::Integer(200),
1431            ]))
1432            .unwrap();
1433
1434        table
1435            .add_row(DataRow::new(vec![
1436                DataValue::Integer(3),
1437                DataValue::Integer(75), // No decimal point
1438                DataValue::Integer(50),
1439            ]))
1440            .unwrap();
1441
1442        let table = Arc::new(table);
1443        let engine = QueryEngine::new();
1444
1445        println!("\n=== Testing Numeric Type Coercion ===");
1446        println!("Table has {} rows", table.row_count());
1447        for i in 0..table.row_count() {
1448            let price = table.get_value(i, 1);
1449            let quantity = table.get_value(i, 2);
1450            println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1451        }
1452
1453        // Test Contains on float values - should find rows with decimal points
1454        println!("\n--- Test: price.Contains('.') ---");
1455        let result = engine.execute(
1456            table.clone(),
1457            "SELECT * FROM test WHERE price.Contains('.')",
1458        );
1459        match result {
1460            Ok(view) => {
1461                println!(
1462                    "SUCCESS: Found {} rows with decimal points in price",
1463                    view.row_count()
1464                );
1465                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1466            }
1467            Err(e) => {
1468                panic!("Numeric Contains query failed: {e}");
1469            }
1470        }
1471
1472        // Test Contains on integer values converted to string
1473        println!("\n--- Test: quantity.Contains('0') ---");
1474        let result = engine.execute(
1475            table.clone(),
1476            "SELECT * FROM test WHERE quantity.Contains('0')",
1477        );
1478        match result {
1479            Ok(view) => {
1480                println!(
1481                    "SUCCESS: Found {} rows with '0' in quantity",
1482                    view.row_count()
1483                );
1484                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1485            }
1486            Err(e) => {
1487                panic!("Integer Contains query failed: {e}");
1488            }
1489        }
1490
1491        println!("\n=== Numeric type coercion test complete! ===");
1492    }
1493
1494    #[test]
1495    fn test_datetime_comparisons() {
1496        // Initialize tracing for debug output
1497        let _ = tracing_subscriber::fmt()
1498            .with_max_level(tracing::Level::DEBUG)
1499            .try_init();
1500
1501        let mut table = DataTable::new("test");
1502        table.add_column(DataColumn::new("id"));
1503        table.add_column(DataColumn::new("created_date"));
1504
1505        // Add test data with date strings (as they would come from CSV)
1506        table
1507            .add_row(DataRow::new(vec![
1508                DataValue::Integer(1),
1509                DataValue::String("2024-12-15".to_string()),
1510            ]))
1511            .unwrap();
1512
1513        table
1514            .add_row(DataRow::new(vec![
1515                DataValue::Integer(2),
1516                DataValue::String("2025-01-15".to_string()),
1517            ]))
1518            .unwrap();
1519
1520        table
1521            .add_row(DataRow::new(vec![
1522                DataValue::Integer(3),
1523                DataValue::String("2025-02-15".to_string()),
1524            ]))
1525            .unwrap();
1526
1527        let table = Arc::new(table);
1528        let engine = QueryEngine::new();
1529
1530        println!("\n=== Testing DateTime Comparisons ===");
1531        println!("Table has {} rows", table.row_count());
1532        for i in 0..table.row_count() {
1533            let date = table.get_value(i, 1);
1534            println!("Row {i}: created_date = {date:?}");
1535        }
1536
1537        // Test DateTime constructor comparison - should find dates after 2025-01-01
1538        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1539        let result = engine.execute(
1540            table.clone(),
1541            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1542        );
1543        match result {
1544            Ok(view) => {
1545                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1546                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1547            }
1548            Err(e) => {
1549                panic!("DateTime comparison query failed: {e}");
1550            }
1551        }
1552
1553        println!("\n=== DateTime comparison test complete! ===");
1554    }
1555
1556    #[test]
1557    fn test_not_with_method_calls() {
1558        // Initialize tracing for debug output
1559        let _ = tracing_subscriber::fmt()
1560            .with_max_level(tracing::Level::DEBUG)
1561            .try_init();
1562
1563        let mut table = DataTable::new("test");
1564        table.add_column(DataColumn::new("id"));
1565        table.add_column(DataColumn::new("status"));
1566
1567        // Add test data
1568        table
1569            .add_row(DataRow::new(vec![
1570                DataValue::Integer(1),
1571                DataValue::String("Pending Review".to_string()),
1572            ]))
1573            .unwrap();
1574
1575        table
1576            .add_row(DataRow::new(vec![
1577                DataValue::Integer(2),
1578                DataValue::String("Complete".to_string()),
1579            ]))
1580            .unwrap();
1581
1582        table
1583            .add_row(DataRow::new(vec![
1584                DataValue::Integer(3),
1585                DataValue::String("Pending Approval".to_string()),
1586            ]))
1587            .unwrap();
1588
1589        let table = Arc::new(table);
1590        let engine = QueryEngine::with_case_insensitive(true);
1591
1592        println!("\n=== Testing NOT with Method Calls ===");
1593        println!("Table has {} rows", table.row_count());
1594        for i in 0..table.row_count() {
1595            let status = table.get_value(i, 1);
1596            println!("Row {i}: status = {status:?}");
1597        }
1598
1599        // Test NOT with Contains - should exclude rows containing "pend"
1600        println!("\n--- Test: NOT status.Contains('pend') ---");
1601        let result = engine.execute(
1602            table.clone(),
1603            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1604        );
1605        match result {
1606            Ok(view) => {
1607                println!(
1608                    "SUCCESS: Found {} rows NOT containing 'pend'",
1609                    view.row_count()
1610                );
1611                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1612            }
1613            Err(e) => {
1614                panic!("NOT Contains query failed: {e}");
1615            }
1616        }
1617
1618        // Test NOT with StartsWith
1619        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1620        let result = engine.execute(
1621            table.clone(),
1622            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1623        );
1624        match result {
1625            Ok(view) => {
1626                println!(
1627                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1628                    view.row_count()
1629                );
1630                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1631            }
1632            Err(e) => {
1633                panic!("NOT StartsWith query failed: {e}");
1634            }
1635        }
1636
1637        println!("\n=== NOT with method calls test complete! ===");
1638    }
1639
1640    #[test]
1641    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1642    fn test_complex_logical_expressions() {
1643        // Initialize tracing for debug output
1644        let _ = tracing_subscriber::fmt()
1645            .with_max_level(tracing::Level::DEBUG)
1646            .try_init();
1647
1648        let mut table = DataTable::new("test");
1649        table.add_column(DataColumn::new("id"));
1650        table.add_column(DataColumn::new("status"));
1651        table.add_column(DataColumn::new("priority"));
1652        table.add_column(DataColumn::new("assigned"));
1653
1654        // Add comprehensive test data
1655        table
1656            .add_row(DataRow::new(vec![
1657                DataValue::Integer(1),
1658                DataValue::String("Pending".to_string()),
1659                DataValue::String("High".to_string()),
1660                DataValue::String("John".to_string()),
1661            ]))
1662            .unwrap();
1663
1664        table
1665            .add_row(DataRow::new(vec![
1666                DataValue::Integer(2),
1667                DataValue::String("Complete".to_string()),
1668                DataValue::String("High".to_string()),
1669                DataValue::String("Jane".to_string()),
1670            ]))
1671            .unwrap();
1672
1673        table
1674            .add_row(DataRow::new(vec![
1675                DataValue::Integer(3),
1676                DataValue::String("Pending".to_string()),
1677                DataValue::String("Low".to_string()),
1678                DataValue::String("John".to_string()),
1679            ]))
1680            .unwrap();
1681
1682        table
1683            .add_row(DataRow::new(vec![
1684                DataValue::Integer(4),
1685                DataValue::String("In Progress".to_string()),
1686                DataValue::String("Medium".to_string()),
1687                DataValue::String("Jane".to_string()),
1688            ]))
1689            .unwrap();
1690
1691        let table = Arc::new(table);
1692        let engine = QueryEngine::new();
1693
1694        println!("\n=== Testing Complex Logical Expressions ===");
1695        println!("Table has {} rows", table.row_count());
1696        for i in 0..table.row_count() {
1697            let status = table.get_value(i, 1);
1698            let priority = table.get_value(i, 2);
1699            let assigned = table.get_value(i, 3);
1700            println!(
1701                "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1702            );
1703        }
1704
1705        // Test complex AND/OR logic
1706        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1707        let result = engine.execute(
1708            table.clone(),
1709            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1710        );
1711        match result {
1712            Ok(view) => {
1713                println!(
1714                    "SUCCESS: Found {} rows with complex logic",
1715                    view.row_count()
1716                );
1717                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
1718            }
1719            Err(e) => {
1720                panic!("Complex logic query failed: {e}");
1721            }
1722        }
1723
1724        // Test NOT with complex expressions
1725        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1726        let result = engine.execute(
1727            table.clone(),
1728            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1729        );
1730        match result {
1731            Ok(view) => {
1732                println!(
1733                    "SUCCESS: Found {} rows with NOT complex logic",
1734                    view.row_count()
1735                );
1736                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
1737            }
1738            Err(e) => {
1739                panic!("NOT complex logic query failed: {e}");
1740            }
1741        }
1742
1743        println!("\n=== Complex logical expressions test complete! ===");
1744    }
1745
1746    #[test]
1747    fn test_mixed_data_types_and_edge_cases() {
1748        // Initialize tracing for debug output
1749        let _ = tracing_subscriber::fmt()
1750            .with_max_level(tracing::Level::DEBUG)
1751            .try_init();
1752
1753        let mut table = DataTable::new("test");
1754        table.add_column(DataColumn::new("id"));
1755        table.add_column(DataColumn::new("value"));
1756        table.add_column(DataColumn::new("nullable_field"));
1757
1758        // Add test data with mixed types and edge cases
1759        table
1760            .add_row(DataRow::new(vec![
1761                DataValue::Integer(1),
1762                DataValue::String("123.45".to_string()),
1763                DataValue::String("present".to_string()),
1764            ]))
1765            .unwrap();
1766
1767        table
1768            .add_row(DataRow::new(vec![
1769                DataValue::Integer(2),
1770                DataValue::Float(678.90),
1771                DataValue::Null,
1772            ]))
1773            .unwrap();
1774
1775        table
1776            .add_row(DataRow::new(vec![
1777                DataValue::Integer(3),
1778                DataValue::Boolean(true),
1779                DataValue::String("also present".to_string()),
1780            ]))
1781            .unwrap();
1782
1783        table
1784            .add_row(DataRow::new(vec![
1785                DataValue::Integer(4),
1786                DataValue::String("false".to_string()),
1787                DataValue::Null,
1788            ]))
1789            .unwrap();
1790
1791        let table = Arc::new(table);
1792        let engine = QueryEngine::new();
1793
1794        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1795        println!("Table has {} rows", table.row_count());
1796        for i in 0..table.row_count() {
1797            let value = table.get_value(i, 1);
1798            let nullable = table.get_value(i, 2);
1799            println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
1800        }
1801
1802        // Test type coercion with boolean Contains
1803        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1804        let result = engine.execute(
1805            table.clone(),
1806            "SELECT * FROM test WHERE value.Contains('true')",
1807        );
1808        match result {
1809            Ok(view) => {
1810                println!(
1811                    "SUCCESS: Found {} rows with boolean coercion",
1812                    view.row_count()
1813                );
1814                assert_eq!(view.row_count(), 1); // Should find the boolean true row
1815            }
1816            Err(e) => {
1817                panic!("Boolean coercion query failed: {e}");
1818            }
1819        }
1820
1821        // Test multiple IN values with mixed types
1822        println!("\n--- Test: id IN (1, 3) ---");
1823        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
1824        match result {
1825            Ok(view) => {
1826                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
1827                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
1828            }
1829            Err(e) => {
1830                panic!("Multiple IN values query failed: {e}");
1831            }
1832        }
1833
1834        println!("\n=== Mixed data types test complete! ===");
1835    }
1836
1837    #[test]
1838    fn test_not_in_parsing() {
1839        use crate::sql::recursive_parser::Parser;
1840
1841        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
1842        println!("\n=== Testing NOT IN parsing ===");
1843        println!("Parsing query: {query}");
1844
1845        let mut parser = Parser::new(query);
1846        match parser.parse() {
1847            Ok(statement) => {
1848                println!("Parsed statement: {statement:#?}");
1849                if let Some(where_clause) = statement.where_clause {
1850                    println!("WHERE conditions: {:#?}", where_clause.conditions);
1851                    if let Some(first_condition) = where_clause.conditions.first() {
1852                        println!("First condition expression: {:#?}", first_condition.expr);
1853                    }
1854                }
1855            }
1856            Err(e) => {
1857                panic!("Parse error: {e}");
1858            }
1859        }
1860    }
1861}
1862
1863#[cfg(test)]
1864#[path = "query_engine_tests.rs"]
1865mod query_engine_tests;