sql_cli/data/
query_engine.rs

1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
9use crate::data::data_view::DataView;
10use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
11use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
12use crate::sql::aggregates::contains_aggregate;
13use crate::sql::recursive_parser::{
14    OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression, CTE,
15};
16
17/// Query engine that executes SQL directly on `DataTable`
18pub struct QueryEngine {
19    case_insensitive: bool,
20    date_notation: String,
21    behavior_config: Option<BehaviorConfig>,
22}
23
24impl Default for QueryEngine {
25    fn default() -> Self {
26        Self::new()
27    }
28}
29
30impl QueryEngine {
31    #[must_use]
32    pub fn new() -> Self {
33        Self {
34            case_insensitive: false,
35            date_notation: "us".to_string(),
36            behavior_config: None,
37        }
38    }
39
40    #[must_use]
41    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
42        let case_insensitive = config.case_insensitive_default;
43        let date_notation = config.default_date_notation.clone();
44        Self {
45            case_insensitive,
46            date_notation,
47            behavior_config: Some(config),
48        }
49    }
50
51    #[must_use]
52    pub fn with_date_notation(date_notation: String) -> Self {
53        Self {
54            case_insensitive: false,
55            date_notation,
56            behavior_config: None,
57        }
58    }
59
60    #[must_use]
61    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
62        Self {
63            case_insensitive,
64            date_notation: "us".to_string(),
65            behavior_config: None,
66        }
67    }
68
69    #[must_use]
70    pub fn with_case_insensitive_and_date_notation(
71        case_insensitive: bool,
72        date_notation: String,
73    ) -> Self {
74        Self {
75            case_insensitive,
76            date_notation,
77            behavior_config: None,
78        }
79    }
80
81    /// Find a column name similar to the given name using edit distance
82    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
83        let columns = table.column_names();
84        let mut best_match: Option<(String, usize)> = None;
85
86        for col in columns {
87            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
88            // Only suggest if distance is small (likely a typo)
89            // Allow up to 3 edits for longer names
90            let max_distance = if name.len() > 10 { 3 } else { 2 };
91            if distance <= max_distance {
92                match &best_match {
93                    None => best_match = Some((col, distance)),
94                    Some((_, best_dist)) if distance < *best_dist => {
95                        best_match = Some((col, distance));
96                    }
97                    _ => {}
98                }
99            }
100        }
101
102        best_match.map(|(name, _)| name)
103    }
104
105    /// Calculate Levenshtein edit distance between two strings
106    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
107        let len1 = s1.len();
108        let len2 = s2.len();
109        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
110
111        for i in 0..=len1 {
112            matrix[i][0] = i;
113        }
114        for j in 0..=len2 {
115            matrix[0][j] = j;
116        }
117
118        for (i, c1) in s1.chars().enumerate() {
119            for (j, c2) in s2.chars().enumerate() {
120                let cost = usize::from(c1 != c2);
121                matrix[i + 1][j + 1] = std::cmp::min(
122                    matrix[i][j + 1] + 1, // deletion
123                    std::cmp::min(
124                        matrix[i + 1][j] + 1, // insertion
125                        matrix[i][j] + cost,  // substitution
126                    ),
127                );
128            }
129        }
130
131        matrix[len1][len2]
132    }
133
134    /// Execute a SQL query on a `DataTable` and return a `DataView` (for backward compatibility)
135    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
136        let start_time = Instant::now();
137
138        // Parse the SQL query
139        let parse_start = Instant::now();
140        let mut parser = Parser::new(sql);
141        let statement = parser
142            .parse()
143            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
144        let parse_duration = parse_start.elapsed();
145
146        // Convert SelectStatement to DataView operations
147        let build_start = Instant::now();
148        // Create an empty context for CTEs
149        let mut cte_context = HashMap::new();
150        let result = self.build_view_with_context(table, statement, &mut cte_context)?;
151        let build_duration = build_start.elapsed();
152
153        let total_duration = start_time.elapsed();
154        info!(
155            "Query execution complete: parse={:?}, build={:?}, total={:?}, rows={}",
156            parse_duration,
157            build_duration,
158            total_duration,
159            result.row_count()
160        );
161
162        Ok(result)
163    }
164
165    /// Build a `DataView` from a parsed SQL statement
166    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
167        let mut cte_context = HashMap::new();
168        self.build_view_with_context(table, statement, &mut cte_context)
169    }
170
171    /// Build a DataView from a SelectStatement with CTE context
172    fn build_view_with_context(
173        &self,
174        table: Arc<DataTable>,
175        statement: SelectStatement,
176        cte_context: &mut HashMap<String, Arc<DataView>>,
177    ) -> Result<DataView> {
178        // First, process any CTEs
179        for cte in &statement.ctes {
180            debug!("QueryEngine: Processing CTE '{}'...", cte.name);
181            // Execute the CTE query (it might reference earlier CTEs)
182            let cte_result =
183                self.build_view_with_context(table.clone(), cte.query.clone(), cte_context)?;
184
185            // Store the result in the context for later use
186            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
187            debug!(
188                "QueryEngine: CTE '{}' processed, stored in context",
189                cte.name
190            );
191        }
192
193        // Determine the source table for the main query
194        let source_table = if let Some(ref subquery) = statement.from_subquery {
195            // Execute the subquery and use its result as the source
196            debug!("QueryEngine: Processing FROM subquery...");
197            let subquery_result =
198                self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
199
200            // Convert the DataView to a DataTable for use as source
201            // This materializes the subquery result
202            let materialized = self.materialize_view(subquery_result)?;
203            Arc::new(materialized)
204        } else if let Some(ref table_name) = statement.from_table {
205            // Check if this references a CTE
206            if let Some(cte_view) = cte_context.get(table_name) {
207                debug!("QueryEngine: Using CTE '{}' as source table", table_name);
208                // Materialize the CTE view as a table
209                let materialized = self.materialize_view((**cte_view).clone())?;
210                Arc::new(materialized)
211            } else {
212                // Regular table reference - use the provided table
213                table.clone()
214            }
215        } else {
216            // No FROM clause - use the provided table
217            table.clone()
218        };
219
220        // Continue with the existing build_view logic but using source_table
221        self.build_view_internal(source_table, statement)
222    }
223
224    /// Materialize a DataView into a new DataTable
225    fn materialize_view(&self, view: DataView) -> Result<DataTable> {
226        let source = view.source();
227        let mut result_table = DataTable::new("derived");
228
229        // Get the visible columns from the view
230        let visible_cols = view.visible_column_indices().to_vec();
231
232        // Copy column definitions
233        for col_idx in &visible_cols {
234            let col = &source.columns[*col_idx];
235            let new_col = DataColumn {
236                name: col.name.clone(),
237                data_type: col.data_type.clone(),
238                nullable: col.nullable,
239                unique_values: col.unique_values,
240                null_count: col.null_count,
241                metadata: col.metadata.clone(),
242            };
243            result_table.add_column(new_col);
244        }
245
246        // Copy visible rows
247        for row_idx in view.visible_row_indices() {
248            let source_row = &source.rows[*row_idx];
249            let mut new_row = DataRow { values: Vec::new() };
250
251            for col_idx in &visible_cols {
252                new_row.values.push(source_row.values[*col_idx].clone());
253            }
254
255            result_table.add_row(new_row);
256        }
257
258        Ok(result_table)
259    }
260
261    fn build_view_internal(
262        &self,
263        table: Arc<DataTable>,
264        statement: SelectStatement,
265    ) -> Result<DataView> {
266        debug!(
267            "QueryEngine::build_view - select_items: {:?}",
268            statement.select_items
269        );
270        debug!(
271            "QueryEngine::build_view - where_clause: {:?}",
272            statement.where_clause
273        );
274
275        // Start with all rows visible
276        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
277
278        // Apply WHERE clause filtering using recursive evaluator
279        if let Some(where_clause) = &statement.where_clause {
280            let total_rows = table.row_count();
281            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
282            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
283
284            let filter_start = Instant::now();
285            // Filter visible rows based on WHERE clause
286            let mut filtered_rows = Vec::new();
287            for row_idx in visible_rows {
288                // Only log for first few rows to avoid performance impact
289                if row_idx < 3 {
290                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
291                }
292                let evaluator = RecursiveWhereEvaluator::with_config(
293                    &table,
294                    self.case_insensitive,
295                    self.date_notation.clone(),
296                );
297                match evaluator.evaluate(where_clause, row_idx) {
298                    Ok(result) => {
299                        if row_idx < 3 {
300                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
301                        }
302                        if result {
303                            filtered_rows.push(row_idx);
304                        }
305                    }
306                    Err(e) => {
307                        if row_idx < 3 {
308                            debug!(
309                                "QueryEngine: WHERE evaluation error for row {}: {}",
310                                row_idx, e
311                            );
312                        }
313                        // Propagate WHERE clause errors instead of silently ignoring them
314                        return Err(e);
315                    }
316                }
317            }
318            visible_rows = filtered_rows;
319            let filter_duration = filter_start.elapsed();
320            info!(
321                "WHERE clause filtering: {} rows -> {} rows in {:?}",
322                total_rows,
323                visible_rows.len(),
324                filter_duration
325            );
326
327            // Debug log moved to info level above with timing
328        }
329
330        // Create initial DataView with filtered rows
331        let mut view = DataView::new(table.clone());
332        view = view.with_rows(visible_rows);
333
334        // Handle GROUP BY if present
335        if let Some(group_by_columns) = &statement.group_by {
336            if !group_by_columns.is_empty() {
337                debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_columns);
338                view = self.apply_group_by(
339                    view,
340                    group_by_columns,
341                    &statement.select_items,
342                    statement.having.as_ref(),
343                )?;
344            }
345        } else {
346            // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
347            if !statement.select_items.is_empty() {
348                // Check if we have ANY non-star items (not just the first one)
349                let has_non_star_items = statement
350                    .select_items
351                    .iter()
352                    .any(|item| !matches!(item, SelectItem::Star));
353
354                // Apply select items if:
355                // 1. We have computed expressions or explicit columns
356                // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
357                if has_non_star_items || statement.select_items.len() > 1 {
358                    view = self.apply_select_items(view, &statement.select_items)?;
359                }
360                // If it's just a single star, no projection needed
361            } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
362                // Fallback to legacy column projection for backward compatibility
363                let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
364                view = view.with_columns(column_indices);
365            }
366        }
367
368        // Apply ORDER BY sorting
369        if let Some(order_by_columns) = &statement.order_by {
370            if !order_by_columns.is_empty() {
371                view = self.apply_multi_order_by(view, order_by_columns)?;
372            }
373        }
374
375        // Apply LIMIT/OFFSET
376        if let Some(limit) = statement.limit {
377            let offset = statement.offset.unwrap_or(0);
378            view = view.with_limit(limit, offset);
379        }
380
381        Ok(view)
382    }
383
384    /// Resolve column names to indices
385    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
386        let mut indices = Vec::new();
387        let table_columns = table.column_names();
388
389        for col_name in columns {
390            let index = table_columns
391                .iter()
392                .position(|c| c.eq_ignore_ascii_case(col_name))
393                .ok_or_else(|| {
394                    let suggestion = self.find_similar_column(table, col_name);
395                    match suggestion {
396                        Some(similar) => anyhow::anyhow!(
397                            "Column '{}' not found. Did you mean '{}'?",
398                            col_name,
399                            similar
400                        ),
401                        None => anyhow::anyhow!("Column '{}' not found", col_name),
402                    }
403                })?;
404            indices.push(index);
405        }
406
407        Ok(indices)
408    }
409
410    /// Apply SELECT items (columns and computed expressions) to create new view
411    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
412        debug!(
413            "QueryEngine::apply_select_items - items: {:?}",
414            select_items
415        );
416        debug!(
417            "QueryEngine::apply_select_items - input view has {} rows",
418            view.row_count()
419        );
420
421        // Check if ALL select items are aggregate functions (no GROUP BY)
422        let all_aggregates = select_items.iter().all(|item| match item {
423            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
424            SelectItem::Column(_) => false,
425            SelectItem::Star => false,
426        });
427
428        if all_aggregates && view.row_count() > 0 {
429            // Special handling for aggregate-only queries (no GROUP BY)
430            // These should produce exactly one row
431            return self.apply_aggregate_select(view, select_items);
432        }
433
434        // Check if we need to create computed columns
435        let has_computed_expressions = select_items
436            .iter()
437            .any(|item| matches!(item, SelectItem::Expression { .. }));
438
439        debug!(
440            "QueryEngine::apply_select_items - has_computed_expressions: {}",
441            has_computed_expressions
442        );
443
444        if !has_computed_expressions {
445            // Simple case: only columns, use existing projection logic
446            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
447            return Ok(view.with_columns(column_indices));
448        }
449
450        // Complex case: we have computed expressions
451        // IMPORTANT: We create a PROJECTED view, not a new table
452        // This preserves the original DataTable reference
453
454        let source_table = view.source();
455        let visible_rows = view.visible_row_indices();
456
457        // Create a temporary table just for the computed result view
458        // But this table is only used for the current query result
459        let mut computed_table = DataTable::new("query_result");
460
461        // First, expand any Star selectors to actual columns
462        let mut expanded_items = Vec::new();
463        for item in select_items {
464            match item {
465                SelectItem::Star => {
466                    // Expand * to all columns from source table
467                    for col_name in source_table.column_names() {
468                        expanded_items.push(SelectItem::Column(col_name));
469                    }
470                }
471                _ => expanded_items.push(item.clone()),
472            }
473        }
474
475        // Add columns based on expanded SelectItems, handling duplicates
476        let mut column_name_counts: std::collections::HashMap<String, usize> =
477            std::collections::HashMap::new();
478
479        for item in &expanded_items {
480            let base_name = match item {
481                SelectItem::Column(name) => name.clone(),
482                SelectItem::Expression { alias, .. } => alias.clone(),
483                SelectItem::Star => unreachable!("Star should have been expanded"),
484            };
485
486            // Check if this column name has been used before
487            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
488            let column_name = if *count == 0 {
489                // First occurrence, use the name as-is
490                base_name.clone()
491            } else {
492                // Duplicate, append a suffix
493                format!("{base_name}_{count}")
494            };
495            *count += 1;
496
497            computed_table.add_column(DataColumn::new(&column_name));
498        }
499
500        // Calculate values for each row
501        let mut evaluator =
502            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
503
504        for &row_idx in visible_rows {
505            let mut row_values = Vec::new();
506
507            for item in &expanded_items {
508                let value = match item {
509                    SelectItem::Column(col_name) => {
510                        // Simple column reference
511                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
512                            let suggestion = self.find_similar_column(source_table, col_name);
513                            match suggestion {
514                                Some(similar) => anyhow::anyhow!(
515                                    "Column '{}' not found. Did you mean '{}'?",
516                                    col_name,
517                                    similar
518                                ),
519                                None => anyhow::anyhow!("Column '{}' not found", col_name),
520                            }
521                        })?;
522                        let row = source_table
523                            .get_row(row_idx)
524                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
525                        row.get(col_idx)
526                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
527                            .clone()
528                    }
529                    SelectItem::Expression { expr, .. } => {
530                        // Computed expression
531                        evaluator.evaluate(expr, row_idx)?
532                    }
533                    SelectItem::Star => unreachable!("Star should have been expanded"),
534                };
535                row_values.push(value);
536            }
537
538            computed_table
539                .add_row(DataRow::new(row_values))
540                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
541        }
542
543        // Return a view of the computed result
544        // This is a temporary view for this query only
545        Ok(DataView::new(Arc::new(computed_table)))
546    }
547
548    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
549    fn apply_aggregate_select(
550        &self,
551        view: DataView,
552        select_items: &[SelectItem],
553    ) -> Result<DataView> {
554        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
555
556        let source_table = view.source();
557        let mut result_table = DataTable::new("aggregate_result");
558
559        // Add columns for each select item
560        for item in select_items {
561            let column_name = match item {
562                SelectItem::Expression { alias, .. } => alias.clone(),
563                _ => unreachable!("Should only have expressions in aggregate-only query"),
564            };
565            result_table.add_column(DataColumn::new(&column_name));
566        }
567
568        // Create evaluator with visible rows from the view (for filtered aggregates)
569        let visible_rows = view.visible_row_indices().to_vec();
570        let mut evaluator =
571            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
572                .with_visible_rows(visible_rows);
573
574        // Evaluate each aggregate expression once (they handle all rows internally)
575        let mut row_values = Vec::new();
576        for item in select_items {
577            match item {
578                SelectItem::Expression { expr, .. } => {
579                    // The evaluator will handle aggregates over all rows
580                    // We pass row_index=0 but aggregates ignore it and process all rows
581                    let value = evaluator.evaluate(expr, 0)?;
582                    row_values.push(value);
583                }
584                _ => unreachable!("Should only have expressions in aggregate-only query"),
585            }
586        }
587
588        // Add the single result row
589        result_table
590            .add_row(DataRow::new(row_values))
591            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
592
593        Ok(DataView::new(Arc::new(result_table)))
594    }
595
596    /// Resolve `SelectItem` columns to indices (for simple column projections only)
597    fn resolve_select_columns(
598        &self,
599        table: &DataTable,
600        select_items: &[SelectItem],
601    ) -> Result<Vec<usize>> {
602        let mut indices = Vec::new();
603        let table_columns = table.column_names();
604
605        for item in select_items {
606            match item {
607                SelectItem::Column(col_name) => {
608                    let index = table_columns
609                        .iter()
610                        .position(|c| c.eq_ignore_ascii_case(col_name))
611                        .ok_or_else(|| {
612                            let suggestion = self.find_similar_column(table, col_name);
613                            match suggestion {
614                                Some(similar) => anyhow::anyhow!(
615                                    "Column '{}' not found. Did you mean '{}'?",
616                                    col_name,
617                                    similar
618                                ),
619                                None => anyhow::anyhow!("Column '{}' not found", col_name),
620                            }
621                        })?;
622                    indices.push(index);
623                }
624                SelectItem::Star => {
625                    // Expand * to all column indices
626                    for i in 0..table_columns.len() {
627                        indices.push(i);
628                    }
629                }
630                SelectItem::Expression { .. } => {
631                    return Err(anyhow::anyhow!(
632                        "Computed expressions require new table creation"
633                    ));
634                }
635            }
636        }
637
638        Ok(indices)
639    }
640
641    /// Apply multi-column ORDER BY sorting to the view
642    fn apply_multi_order_by(
643        &self,
644        mut view: DataView,
645        order_by_columns: &[OrderByColumn],
646    ) -> Result<DataView> {
647        // Build list of (source_column_index, ascending) tuples
648        let mut sort_columns = Vec::new();
649
650        for order_col in order_by_columns {
651            // Try to find the column index
652            // First check in the current view's source (this handles both regular columns and computed columns)
653            let col_index = view
654                .source()
655                .get_column_index(&order_col.column)
656                .ok_or_else(|| {
657                    // If not found, provide helpful error with suggestions
658                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
659                    match suggestion {
660                        Some(similar) => anyhow::anyhow!(
661                            "Column '{}' not found. Did you mean '{}'?",
662                            order_col.column,
663                            similar
664                        ),
665                        None => {
666                            // Also list available columns for debugging
667                            let available_cols = view.source().column_names().join(", ");
668                            anyhow::anyhow!(
669                                "Column '{}' not found. Available columns: {}",
670                                order_col.column,
671                                available_cols
672                            )
673                        }
674                    }
675                })?;
676
677            let ascending = matches!(order_col.direction, SortDirection::Asc);
678            sort_columns.push((col_index, ascending));
679        }
680
681        // Apply multi-column sorting
682        view.apply_multi_sort(&sort_columns)?;
683        Ok(view)
684    }
685
686    /// Apply GROUP BY to the view with optional HAVING clause
687    fn apply_group_by(
688        &self,
689        view: DataView,
690        group_by_columns: &[String],
691        select_items: &[SelectItem],
692        having: Option<&SqlExpression>,
693    ) -> Result<DataView> {
694        debug!(
695            "QueryEngine::apply_group_by - grouping by: {:?}",
696            group_by_columns
697        );
698
699        // Group the data using DataView's group_by method
700        let groups = view.group_by(group_by_columns)?;
701        debug!(
702            "QueryEngine::apply_group_by - created {} groups",
703            groups.len()
704        );
705
706        // Create a result table for the grouped data
707        let mut result_table = DataTable::new("grouped_result");
708
709        // Determine which columns to include in the result
710        // 1. GROUP BY columns always come first
711        // 2. Then aggregate functions
712
713        // Add GROUP BY columns to result
714        for col_name in group_by_columns {
715            result_table.add_column(DataColumn::new(col_name));
716        }
717
718        // Process SELECT items to find aggregates and their aliases
719        let mut aggregate_columns = Vec::new();
720        for item in select_items {
721            match item {
722                SelectItem::Expression { expr, alias } => {
723                    if contains_aggregate(expr) {
724                        // Expression always has an alias in the AST
725                        result_table.add_column(DataColumn::new(alias));
726                        aggregate_columns.push((expr.clone(), alias.clone()));
727                    }
728                }
729                SelectItem::Column(col_name) => {
730                    // Non-aggregate columns must be in GROUP BY
731                    if !group_by_columns.contains(col_name) {
732                        return Err(anyhow!(
733                            "Column '{}' must appear in GROUP BY clause or be used in an aggregate function",
734                            col_name
735                        ));
736                    }
737                }
738                SelectItem::Star => {
739                    // For GROUP BY queries, * is not really meaningful
740                    // We'll just include the GROUP BY columns which we already added
741                }
742            }
743        }
744
745        // Process each group
746        for (group_key, group_view) in groups {
747            let mut row_values = Vec::new();
748            let mut aggregate_values = std::collections::HashMap::new();
749
750            // Add GROUP BY column values from the key
751            for (i, value) in group_key.0.iter().enumerate() {
752                row_values.push(value.clone());
753                // Store GROUP BY columns for HAVING evaluation
754                if i < group_by_columns.len() {
755                    aggregate_values.insert(group_by_columns[i].clone(), value.clone());
756                }
757            }
758
759            // Calculate aggregate values for this group
760            for (expr, col_name) in &aggregate_columns {
761                // Set the visible rows for this group so aggregates work correctly
762                let group_rows = group_view.get_visible_rows();
763                let mut evaluator = ArithmeticEvaluator::new(group_view.source())
764                    .with_visible_rows(group_rows.clone());
765
766                // For aggregates, we need to evaluate across all rows in the group
767                let value = if group_view.row_count() > 0 && !group_rows.is_empty() {
768                    // Evaluate the aggregate expression
769                    // The arithmetic evaluator will handle the aggregate across visible rows
770                    evaluator
771                        .evaluate(expr, group_rows[0])
772                        .unwrap_or(DataValue::Null)
773                } else {
774                    DataValue::Null
775                };
776
777                // Store aggregate value for HAVING evaluation
778                aggregate_values.insert(col_name.clone(), value.clone());
779                row_values.push(value);
780            }
781
782            // Evaluate HAVING clause if present
783            if let Some(having_expr) = having {
784                // Create a temporary table with one row containing the aggregate values
785                let mut temp_table = DataTable::new("having_eval");
786                for col_name in aggregate_values.keys() {
787                    temp_table.add_column(DataColumn::new(col_name));
788                }
789
790                let temp_row_values: Vec<DataValue> = aggregate_values.values().cloned().collect();
791                temp_table
792                    .add_row(DataRow::new(temp_row_values))
793                    .map_err(|e| anyhow!("Failed to create temp table for HAVING: {}", e))?;
794
795                // Evaluate HAVING expression on the aggregate values
796                let mut evaluator = ArithmeticEvaluator::new(&temp_table);
797                let having_result = evaluator
798                    .evaluate(having_expr, 0)
799                    .unwrap_or(DataValue::Boolean(false));
800
801                // Skip this group if HAVING condition is not met
802                match having_result {
803                    DataValue::Boolean(false) => continue,
804                    DataValue::Null => continue,
805                    _ => {} // Include the row
806                }
807            }
808
809            // Add the row to the result table
810            result_table
811                .add_row(DataRow::new(row_values))
812                .map_err(|e| anyhow!("Failed to add row to result table: {}", e))?;
813        }
814
815        // Return a DataView of the grouped result
816        Ok(DataView::new(Arc::new(result_table)))
817    }
818}
819
820#[cfg(test)]
821mod tests {
822    use super::*;
823    use crate::data::datatable::{DataColumn, DataRow, DataValue};
824
825    fn create_test_table() -> Arc<DataTable> {
826        let mut table = DataTable::new("test");
827
828        // Add columns
829        table.add_column(DataColumn::new("id"));
830        table.add_column(DataColumn::new("name"));
831        table.add_column(DataColumn::new("age"));
832
833        // Add rows
834        table
835            .add_row(DataRow::new(vec![
836                DataValue::Integer(1),
837                DataValue::String("Alice".to_string()),
838                DataValue::Integer(30),
839            ]))
840            .unwrap();
841
842        table
843            .add_row(DataRow::new(vec![
844                DataValue::Integer(2),
845                DataValue::String("Bob".to_string()),
846                DataValue::Integer(25),
847            ]))
848            .unwrap();
849
850        table
851            .add_row(DataRow::new(vec![
852                DataValue::Integer(3),
853                DataValue::String("Charlie".to_string()),
854                DataValue::Integer(35),
855            ]))
856            .unwrap();
857
858        Arc::new(table)
859    }
860
861    #[test]
862    fn test_select_all() {
863        let table = create_test_table();
864        let engine = QueryEngine::new();
865
866        let view = engine
867            .execute(table.clone(), "SELECT * FROM users")
868            .unwrap();
869        assert_eq!(view.row_count(), 3);
870        assert_eq!(view.column_count(), 3);
871    }
872
873    #[test]
874    fn test_select_columns() {
875        let table = create_test_table();
876        let engine = QueryEngine::new();
877
878        let view = engine
879            .execute(table.clone(), "SELECT name, age FROM users")
880            .unwrap();
881        assert_eq!(view.row_count(), 3);
882        assert_eq!(view.column_count(), 2);
883    }
884
885    #[test]
886    fn test_select_with_limit() {
887        let table = create_test_table();
888        let engine = QueryEngine::new();
889
890        let view = engine
891            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
892            .unwrap();
893        assert_eq!(view.row_count(), 2);
894    }
895
896    #[test]
897    fn test_type_coercion_contains() {
898        // Initialize tracing for debug output
899        let _ = tracing_subscriber::fmt()
900            .with_max_level(tracing::Level::DEBUG)
901            .try_init();
902
903        let mut table = DataTable::new("test");
904        table.add_column(DataColumn::new("id"));
905        table.add_column(DataColumn::new("status"));
906        table.add_column(DataColumn::new("price"));
907
908        // Add test data with mixed types
909        table
910            .add_row(DataRow::new(vec![
911                DataValue::Integer(1),
912                DataValue::String("Pending".to_string()),
913                DataValue::Float(99.99),
914            ]))
915            .unwrap();
916
917        table
918            .add_row(DataRow::new(vec![
919                DataValue::Integer(2),
920                DataValue::String("Confirmed".to_string()),
921                DataValue::Float(150.50),
922            ]))
923            .unwrap();
924
925        table
926            .add_row(DataRow::new(vec![
927                DataValue::Integer(3),
928                DataValue::String("Pending".to_string()),
929                DataValue::Float(75.00),
930            ]))
931            .unwrap();
932
933        let table = Arc::new(table);
934        let engine = QueryEngine::new();
935
936        println!("\n=== Testing WHERE clause with Contains ===");
937        println!("Table has {} rows", table.row_count());
938        for i in 0..table.row_count() {
939            let status = table.get_value(i, 1);
940            println!("Row {i}: status = {status:?}");
941        }
942
943        // Test 1: Basic string contains (should work)
944        println!("\n--- Test 1: status.Contains('pend') ---");
945        let result = engine.execute(
946            table.clone(),
947            "SELECT * FROM test WHERE status.Contains('pend')",
948        );
949        match result {
950            Ok(view) => {
951                println!("SUCCESS: Found {} matching rows", view.row_count());
952                assert_eq!(view.row_count(), 2); // Should find both Pending rows
953            }
954            Err(e) => {
955                panic!("Query failed: {e}");
956            }
957        }
958
959        // Test 2: Numeric contains (should work with type coercion)
960        println!("\n--- Test 2: price.Contains('9') ---");
961        let result = engine.execute(
962            table.clone(),
963            "SELECT * FROM test WHERE price.Contains('9')",
964        );
965        match result {
966            Ok(view) => {
967                println!(
968                    "SUCCESS: Found {} matching rows with price containing '9'",
969                    view.row_count()
970                );
971                // Should find 99.99 row
972                assert!(view.row_count() >= 1);
973            }
974            Err(e) => {
975                panic!("Numeric coercion query failed: {e}");
976            }
977        }
978
979        println!("\n=== All tests passed! ===");
980    }
981
982    #[test]
983    fn test_not_in_clause() {
984        // Initialize tracing for debug output
985        let _ = tracing_subscriber::fmt()
986            .with_max_level(tracing::Level::DEBUG)
987            .try_init();
988
989        let mut table = DataTable::new("test");
990        table.add_column(DataColumn::new("id"));
991        table.add_column(DataColumn::new("country"));
992
993        // Add test data
994        table
995            .add_row(DataRow::new(vec![
996                DataValue::Integer(1),
997                DataValue::String("CA".to_string()),
998            ]))
999            .unwrap();
1000
1001        table
1002            .add_row(DataRow::new(vec![
1003                DataValue::Integer(2),
1004                DataValue::String("US".to_string()),
1005            ]))
1006            .unwrap();
1007
1008        table
1009            .add_row(DataRow::new(vec![
1010                DataValue::Integer(3),
1011                DataValue::String("UK".to_string()),
1012            ]))
1013            .unwrap();
1014
1015        let table = Arc::new(table);
1016        let engine = QueryEngine::new();
1017
1018        println!("\n=== Testing NOT IN clause ===");
1019        println!("Table has {} rows", table.row_count());
1020        for i in 0..table.row_count() {
1021            let country = table.get_value(i, 1);
1022            println!("Row {i}: country = {country:?}");
1023        }
1024
1025        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
1026        println!("\n--- Test: country NOT IN ('CA') ---");
1027        let result = engine.execute(
1028            table.clone(),
1029            "SELECT * FROM test WHERE country NOT IN ('CA')",
1030        );
1031        match result {
1032            Ok(view) => {
1033                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1034                assert_eq!(view.row_count(), 2); // Should find US and UK
1035            }
1036            Err(e) => {
1037                panic!("NOT IN query failed: {e}");
1038            }
1039        }
1040
1041        println!("\n=== NOT IN test complete! ===");
1042    }
1043
1044    #[test]
1045    fn test_case_insensitive_in_and_not_in() {
1046        // Initialize tracing for debug output
1047        let _ = tracing_subscriber::fmt()
1048            .with_max_level(tracing::Level::DEBUG)
1049            .try_init();
1050
1051        let mut table = DataTable::new("test");
1052        table.add_column(DataColumn::new("id"));
1053        table.add_column(DataColumn::new("country"));
1054
1055        // Add test data with mixed case
1056        table
1057            .add_row(DataRow::new(vec![
1058                DataValue::Integer(1),
1059                DataValue::String("CA".to_string()), // uppercase
1060            ]))
1061            .unwrap();
1062
1063        table
1064            .add_row(DataRow::new(vec![
1065                DataValue::Integer(2),
1066                DataValue::String("us".to_string()), // lowercase
1067            ]))
1068            .unwrap();
1069
1070        table
1071            .add_row(DataRow::new(vec![
1072                DataValue::Integer(3),
1073                DataValue::String("UK".to_string()), // uppercase
1074            ]))
1075            .unwrap();
1076
1077        let table = Arc::new(table);
1078
1079        println!("\n=== Testing Case-Insensitive IN clause ===");
1080        println!("Table has {} rows", table.row_count());
1081        for i in 0..table.row_count() {
1082            let country = table.get_value(i, 1);
1083            println!("Row {i}: country = {country:?}");
1084        }
1085
1086        // Test case-insensitive IN - should match 'CA' with 'ca'
1087        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1088        let engine = QueryEngine::with_case_insensitive(true);
1089        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1090        match result {
1091            Ok(view) => {
1092                println!(
1093                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1094                    view.row_count()
1095                );
1096                assert_eq!(view.row_count(), 1); // Should find CA row
1097            }
1098            Err(e) => {
1099                panic!("Case-insensitive IN query failed: {e}");
1100            }
1101        }
1102
1103        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
1104        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1105        let result = engine.execute(
1106            table.clone(),
1107            "SELECT * FROM test WHERE country NOT IN ('ca')",
1108        );
1109        match result {
1110            Ok(view) => {
1111                println!(
1112                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1113                    view.row_count()
1114                );
1115                assert_eq!(view.row_count(), 2); // Should find us and UK rows
1116            }
1117            Err(e) => {
1118                panic!("Case-insensitive NOT IN query failed: {e}");
1119            }
1120        }
1121
1122        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
1123        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1124        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
1125        let result = engine_case_sensitive
1126            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1127        match result {
1128            Ok(view) => {
1129                println!(
1130                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1131                    view.row_count()
1132                );
1133                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
1134            }
1135            Err(e) => {
1136                panic!("Case-sensitive IN query failed: {e}");
1137            }
1138        }
1139
1140        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1141    }
1142
1143    #[test]
1144    #[ignore = "Parentheses in WHERE clause not yet implemented"]
1145    fn test_parentheses_in_where_clause() {
1146        // Initialize tracing for debug output
1147        let _ = tracing_subscriber::fmt()
1148            .with_max_level(tracing::Level::DEBUG)
1149            .try_init();
1150
1151        let mut table = DataTable::new("test");
1152        table.add_column(DataColumn::new("id"));
1153        table.add_column(DataColumn::new("status"));
1154        table.add_column(DataColumn::new("priority"));
1155
1156        // Add test data
1157        table
1158            .add_row(DataRow::new(vec![
1159                DataValue::Integer(1),
1160                DataValue::String("Pending".to_string()),
1161                DataValue::String("High".to_string()),
1162            ]))
1163            .unwrap();
1164
1165        table
1166            .add_row(DataRow::new(vec![
1167                DataValue::Integer(2),
1168                DataValue::String("Complete".to_string()),
1169                DataValue::String("High".to_string()),
1170            ]))
1171            .unwrap();
1172
1173        table
1174            .add_row(DataRow::new(vec![
1175                DataValue::Integer(3),
1176                DataValue::String("Pending".to_string()),
1177                DataValue::String("Low".to_string()),
1178            ]))
1179            .unwrap();
1180
1181        table
1182            .add_row(DataRow::new(vec![
1183                DataValue::Integer(4),
1184                DataValue::String("Complete".to_string()),
1185                DataValue::String("Low".to_string()),
1186            ]))
1187            .unwrap();
1188
1189        let table = Arc::new(table);
1190        let engine = QueryEngine::new();
1191
1192        println!("\n=== Testing Parentheses in WHERE clause ===");
1193        println!("Table has {} rows", table.row_count());
1194        for i in 0..table.row_count() {
1195            let status = table.get_value(i, 1);
1196            let priority = table.get_value(i, 2);
1197            println!("Row {i}: status = {status:?}, priority = {priority:?}");
1198        }
1199
1200        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
1201        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1202        let result = engine.execute(
1203            table.clone(),
1204            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1205        );
1206        match result {
1207            Ok(view) => {
1208                println!(
1209                    "SUCCESS: Found {} rows with parenthetical logic",
1210                    view.row_count()
1211                );
1212                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
1213            }
1214            Err(e) => {
1215                panic!("Parentheses query failed: {e}");
1216            }
1217        }
1218
1219        println!("\n=== Parentheses test complete! ===");
1220    }
1221
1222    #[test]
1223    #[ignore = "Numeric type coercion needs fixing"]
1224    fn test_numeric_type_coercion() {
1225        // Initialize tracing for debug output
1226        let _ = tracing_subscriber::fmt()
1227            .with_max_level(tracing::Level::DEBUG)
1228            .try_init();
1229
1230        let mut table = DataTable::new("test");
1231        table.add_column(DataColumn::new("id"));
1232        table.add_column(DataColumn::new("price"));
1233        table.add_column(DataColumn::new("quantity"));
1234
1235        // Add test data with different numeric types
1236        table
1237            .add_row(DataRow::new(vec![
1238                DataValue::Integer(1),
1239                DataValue::Float(99.50), // Contains '.'
1240                DataValue::Integer(100),
1241            ]))
1242            .unwrap();
1243
1244        table
1245            .add_row(DataRow::new(vec![
1246                DataValue::Integer(2),
1247                DataValue::Float(150.0), // Contains '.' and '0'
1248                DataValue::Integer(200),
1249            ]))
1250            .unwrap();
1251
1252        table
1253            .add_row(DataRow::new(vec![
1254                DataValue::Integer(3),
1255                DataValue::Integer(75), // No decimal point
1256                DataValue::Integer(50),
1257            ]))
1258            .unwrap();
1259
1260        let table = Arc::new(table);
1261        let engine = QueryEngine::new();
1262
1263        println!("\n=== Testing Numeric Type Coercion ===");
1264        println!("Table has {} rows", table.row_count());
1265        for i in 0..table.row_count() {
1266            let price = table.get_value(i, 1);
1267            let quantity = table.get_value(i, 2);
1268            println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1269        }
1270
1271        // Test Contains on float values - should find rows with decimal points
1272        println!("\n--- Test: price.Contains('.') ---");
1273        let result = engine.execute(
1274            table.clone(),
1275            "SELECT * FROM test WHERE price.Contains('.')",
1276        );
1277        match result {
1278            Ok(view) => {
1279                println!(
1280                    "SUCCESS: Found {} rows with decimal points in price",
1281                    view.row_count()
1282                );
1283                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1284            }
1285            Err(e) => {
1286                panic!("Numeric Contains query failed: {e}");
1287            }
1288        }
1289
1290        // Test Contains on integer values converted to string
1291        println!("\n--- Test: quantity.Contains('0') ---");
1292        let result = engine.execute(
1293            table.clone(),
1294            "SELECT * FROM test WHERE quantity.Contains('0')",
1295        );
1296        match result {
1297            Ok(view) => {
1298                println!(
1299                    "SUCCESS: Found {} rows with '0' in quantity",
1300                    view.row_count()
1301                );
1302                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1303            }
1304            Err(e) => {
1305                panic!("Integer Contains query failed: {e}");
1306            }
1307        }
1308
1309        println!("\n=== Numeric type coercion test complete! ===");
1310    }
1311
1312    #[test]
1313    fn test_datetime_comparisons() {
1314        // Initialize tracing for debug output
1315        let _ = tracing_subscriber::fmt()
1316            .with_max_level(tracing::Level::DEBUG)
1317            .try_init();
1318
1319        let mut table = DataTable::new("test");
1320        table.add_column(DataColumn::new("id"));
1321        table.add_column(DataColumn::new("created_date"));
1322
1323        // Add test data with date strings (as they would come from CSV)
1324        table
1325            .add_row(DataRow::new(vec![
1326                DataValue::Integer(1),
1327                DataValue::String("2024-12-15".to_string()),
1328            ]))
1329            .unwrap();
1330
1331        table
1332            .add_row(DataRow::new(vec![
1333                DataValue::Integer(2),
1334                DataValue::String("2025-01-15".to_string()),
1335            ]))
1336            .unwrap();
1337
1338        table
1339            .add_row(DataRow::new(vec![
1340                DataValue::Integer(3),
1341                DataValue::String("2025-02-15".to_string()),
1342            ]))
1343            .unwrap();
1344
1345        let table = Arc::new(table);
1346        let engine = QueryEngine::new();
1347
1348        println!("\n=== Testing DateTime Comparisons ===");
1349        println!("Table has {} rows", table.row_count());
1350        for i in 0..table.row_count() {
1351            let date = table.get_value(i, 1);
1352            println!("Row {i}: created_date = {date:?}");
1353        }
1354
1355        // Test DateTime constructor comparison - should find dates after 2025-01-01
1356        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1357        let result = engine.execute(
1358            table.clone(),
1359            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1360        );
1361        match result {
1362            Ok(view) => {
1363                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1364                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1365            }
1366            Err(e) => {
1367                panic!("DateTime comparison query failed: {e}");
1368            }
1369        }
1370
1371        println!("\n=== DateTime comparison test complete! ===");
1372    }
1373
1374    #[test]
1375    fn test_not_with_method_calls() {
1376        // Initialize tracing for debug output
1377        let _ = tracing_subscriber::fmt()
1378            .with_max_level(tracing::Level::DEBUG)
1379            .try_init();
1380
1381        let mut table = DataTable::new("test");
1382        table.add_column(DataColumn::new("id"));
1383        table.add_column(DataColumn::new("status"));
1384
1385        // Add test data
1386        table
1387            .add_row(DataRow::new(vec![
1388                DataValue::Integer(1),
1389                DataValue::String("Pending Review".to_string()),
1390            ]))
1391            .unwrap();
1392
1393        table
1394            .add_row(DataRow::new(vec![
1395                DataValue::Integer(2),
1396                DataValue::String("Complete".to_string()),
1397            ]))
1398            .unwrap();
1399
1400        table
1401            .add_row(DataRow::new(vec![
1402                DataValue::Integer(3),
1403                DataValue::String("Pending Approval".to_string()),
1404            ]))
1405            .unwrap();
1406
1407        let table = Arc::new(table);
1408        let engine = QueryEngine::with_case_insensitive(true);
1409
1410        println!("\n=== Testing NOT with Method Calls ===");
1411        println!("Table has {} rows", table.row_count());
1412        for i in 0..table.row_count() {
1413            let status = table.get_value(i, 1);
1414            println!("Row {i}: status = {status:?}");
1415        }
1416
1417        // Test NOT with Contains - should exclude rows containing "pend"
1418        println!("\n--- Test: NOT status.Contains('pend') ---");
1419        let result = engine.execute(
1420            table.clone(),
1421            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1422        );
1423        match result {
1424            Ok(view) => {
1425                println!(
1426                    "SUCCESS: Found {} rows NOT containing 'pend'",
1427                    view.row_count()
1428                );
1429                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1430            }
1431            Err(e) => {
1432                panic!("NOT Contains query failed: {e}");
1433            }
1434        }
1435
1436        // Test NOT with StartsWith
1437        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1438        let result = engine.execute(
1439            table.clone(),
1440            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1441        );
1442        match result {
1443            Ok(view) => {
1444                println!(
1445                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1446                    view.row_count()
1447                );
1448                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1449            }
1450            Err(e) => {
1451                panic!("NOT StartsWith query failed: {e}");
1452            }
1453        }
1454
1455        println!("\n=== NOT with method calls test complete! ===");
1456    }
1457
1458    #[test]
1459    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1460    fn test_complex_logical_expressions() {
1461        // Initialize tracing for debug output
1462        let _ = tracing_subscriber::fmt()
1463            .with_max_level(tracing::Level::DEBUG)
1464            .try_init();
1465
1466        let mut table = DataTable::new("test");
1467        table.add_column(DataColumn::new("id"));
1468        table.add_column(DataColumn::new("status"));
1469        table.add_column(DataColumn::new("priority"));
1470        table.add_column(DataColumn::new("assigned"));
1471
1472        // Add comprehensive test data
1473        table
1474            .add_row(DataRow::new(vec![
1475                DataValue::Integer(1),
1476                DataValue::String("Pending".to_string()),
1477                DataValue::String("High".to_string()),
1478                DataValue::String("John".to_string()),
1479            ]))
1480            .unwrap();
1481
1482        table
1483            .add_row(DataRow::new(vec![
1484                DataValue::Integer(2),
1485                DataValue::String("Complete".to_string()),
1486                DataValue::String("High".to_string()),
1487                DataValue::String("Jane".to_string()),
1488            ]))
1489            .unwrap();
1490
1491        table
1492            .add_row(DataRow::new(vec![
1493                DataValue::Integer(3),
1494                DataValue::String("Pending".to_string()),
1495                DataValue::String("Low".to_string()),
1496                DataValue::String("John".to_string()),
1497            ]))
1498            .unwrap();
1499
1500        table
1501            .add_row(DataRow::new(vec![
1502                DataValue::Integer(4),
1503                DataValue::String("In Progress".to_string()),
1504                DataValue::String("Medium".to_string()),
1505                DataValue::String("Jane".to_string()),
1506            ]))
1507            .unwrap();
1508
1509        let table = Arc::new(table);
1510        let engine = QueryEngine::new();
1511
1512        println!("\n=== Testing Complex Logical Expressions ===");
1513        println!("Table has {} rows", table.row_count());
1514        for i in 0..table.row_count() {
1515            let status = table.get_value(i, 1);
1516            let priority = table.get_value(i, 2);
1517            let assigned = table.get_value(i, 3);
1518            println!(
1519                "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1520            );
1521        }
1522
1523        // Test complex AND/OR logic
1524        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1525        let result = engine.execute(
1526            table.clone(),
1527            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1528        );
1529        match result {
1530            Ok(view) => {
1531                println!(
1532                    "SUCCESS: Found {} rows with complex logic",
1533                    view.row_count()
1534                );
1535                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
1536            }
1537            Err(e) => {
1538                panic!("Complex logic query failed: {e}");
1539            }
1540        }
1541
1542        // Test NOT with complex expressions
1543        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1544        let result = engine.execute(
1545            table.clone(),
1546            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1547        );
1548        match result {
1549            Ok(view) => {
1550                println!(
1551                    "SUCCESS: Found {} rows with NOT complex logic",
1552                    view.row_count()
1553                );
1554                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
1555            }
1556            Err(e) => {
1557                panic!("NOT complex logic query failed: {e}");
1558            }
1559        }
1560
1561        println!("\n=== Complex logical expressions test complete! ===");
1562    }
1563
1564    #[test]
1565    fn test_mixed_data_types_and_edge_cases() {
1566        // Initialize tracing for debug output
1567        let _ = tracing_subscriber::fmt()
1568            .with_max_level(tracing::Level::DEBUG)
1569            .try_init();
1570
1571        let mut table = DataTable::new("test");
1572        table.add_column(DataColumn::new("id"));
1573        table.add_column(DataColumn::new("value"));
1574        table.add_column(DataColumn::new("nullable_field"));
1575
1576        // Add test data with mixed types and edge cases
1577        table
1578            .add_row(DataRow::new(vec![
1579                DataValue::Integer(1),
1580                DataValue::String("123.45".to_string()),
1581                DataValue::String("present".to_string()),
1582            ]))
1583            .unwrap();
1584
1585        table
1586            .add_row(DataRow::new(vec![
1587                DataValue::Integer(2),
1588                DataValue::Float(678.90),
1589                DataValue::Null,
1590            ]))
1591            .unwrap();
1592
1593        table
1594            .add_row(DataRow::new(vec![
1595                DataValue::Integer(3),
1596                DataValue::Boolean(true),
1597                DataValue::String("also present".to_string()),
1598            ]))
1599            .unwrap();
1600
1601        table
1602            .add_row(DataRow::new(vec![
1603                DataValue::Integer(4),
1604                DataValue::String("false".to_string()),
1605                DataValue::Null,
1606            ]))
1607            .unwrap();
1608
1609        let table = Arc::new(table);
1610        let engine = QueryEngine::new();
1611
1612        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1613        println!("Table has {} rows", table.row_count());
1614        for i in 0..table.row_count() {
1615            let value = table.get_value(i, 1);
1616            let nullable = table.get_value(i, 2);
1617            println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
1618        }
1619
1620        // Test type coercion with boolean Contains
1621        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1622        let result = engine.execute(
1623            table.clone(),
1624            "SELECT * FROM test WHERE value.Contains('true')",
1625        );
1626        match result {
1627            Ok(view) => {
1628                println!(
1629                    "SUCCESS: Found {} rows with boolean coercion",
1630                    view.row_count()
1631                );
1632                assert_eq!(view.row_count(), 1); // Should find the boolean true row
1633            }
1634            Err(e) => {
1635                panic!("Boolean coercion query failed: {e}");
1636            }
1637        }
1638
1639        // Test multiple IN values with mixed types
1640        println!("\n--- Test: id IN (1, 3) ---");
1641        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
1642        match result {
1643            Ok(view) => {
1644                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
1645                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
1646            }
1647            Err(e) => {
1648                panic!("Multiple IN values query failed: {e}");
1649            }
1650        }
1651
1652        println!("\n=== Mixed data types test complete! ===");
1653    }
1654
1655    #[test]
1656    fn test_not_in_parsing() {
1657        use crate::sql::recursive_parser::Parser;
1658
1659        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
1660        println!("\n=== Testing NOT IN parsing ===");
1661        println!("Parsing query: {query}");
1662
1663        let mut parser = Parser::new(query);
1664        match parser.parse() {
1665            Ok(statement) => {
1666                println!("Parsed statement: {statement:#?}");
1667                if let Some(where_clause) = statement.where_clause {
1668                    println!("WHERE conditions: {:#?}", where_clause.conditions);
1669                    if let Some(first_condition) = where_clause.conditions.first() {
1670                        println!("First condition expression: {:#?}", first_condition.expr);
1671                    }
1672                }
1673            }
1674            Err(e) => {
1675                panic!("Parse error: {e}");
1676            }
1677        }
1678    }
1679}
1680
1681#[cfg(test)]
1682#[path = "query_engine_tests.rs"]
1683mod query_engine_tests;