sql_cli/data/
query_engine.rs

1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
9use crate::data::data_view::DataView;
10use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
11use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
12use crate::data::virtual_table_generator::VirtualTableGenerator;
13use crate::sql::aggregates::contains_aggregate;
14use crate::sql::recursive_parser::{
15    OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression, TableFunction,
16};
17
18/// Query engine that executes SQL directly on `DataTable`
19pub struct QueryEngine {
20    case_insensitive: bool,
21    date_notation: String,
22    behavior_config: Option<BehaviorConfig>,
23}
24
25impl Default for QueryEngine {
26    fn default() -> Self {
27        Self::new()
28    }
29}
30
31impl QueryEngine {
32    #[must_use]
33    pub fn new() -> Self {
34        Self {
35            case_insensitive: false,
36            date_notation: "us".to_string(),
37            behavior_config: None,
38        }
39    }
40
41    #[must_use]
42    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
43        let case_insensitive = config.case_insensitive_default;
44        let date_notation = config.default_date_notation.clone();
45        Self {
46            case_insensitive,
47            date_notation,
48            behavior_config: Some(config),
49        }
50    }
51
52    #[must_use]
53    pub fn with_date_notation(date_notation: String) -> Self {
54        Self {
55            case_insensitive: false,
56            date_notation,
57            behavior_config: None,
58        }
59    }
60
61    #[must_use]
62    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
63        Self {
64            case_insensitive,
65            date_notation: "us".to_string(),
66            behavior_config: None,
67        }
68    }
69
70    #[must_use]
71    pub fn with_case_insensitive_and_date_notation(
72        case_insensitive: bool,
73        date_notation: String,
74    ) -> Self {
75        Self {
76            case_insensitive,
77            date_notation,
78            behavior_config: None,
79        }
80    }
81
82    /// Find a column name similar to the given name using edit distance
83    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
84        let columns = table.column_names();
85        let mut best_match: Option<(String, usize)> = None;
86
87        for col in columns {
88            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
89            // Only suggest if distance is small (likely a typo)
90            // Allow up to 3 edits for longer names
91            let max_distance = if name.len() > 10 { 3 } else { 2 };
92            if distance <= max_distance {
93                match &best_match {
94                    None => best_match = Some((col, distance)),
95                    Some((_, best_dist)) if distance < *best_dist => {
96                        best_match = Some((col, distance));
97                    }
98                    _ => {}
99                }
100            }
101        }
102
103        best_match.map(|(name, _)| name)
104    }
105
106    /// Calculate Levenshtein edit distance between two strings
107    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
108        let len1 = s1.len();
109        let len2 = s2.len();
110        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
111
112        for i in 0..=len1 {
113            matrix[i][0] = i;
114        }
115        for j in 0..=len2 {
116            matrix[0][j] = j;
117        }
118
119        for (i, c1) in s1.chars().enumerate() {
120            for (j, c2) in s2.chars().enumerate() {
121                let cost = usize::from(c1 != c2);
122                matrix[i + 1][j + 1] = std::cmp::min(
123                    matrix[i][j + 1] + 1, // deletion
124                    std::cmp::min(
125                        matrix[i + 1][j] + 1, // insertion
126                        matrix[i][j] + cost,  // substitution
127                    ),
128                );
129            }
130        }
131
132        matrix[len1][len2]
133    }
134
135    /// Execute a SQL query on a `DataTable` and return a `DataView` (for backward compatibility)
136    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
137        let start_time = Instant::now();
138
139        // Parse the SQL query
140        let parse_start = Instant::now();
141        let mut parser = Parser::new(sql);
142        let statement = parser
143            .parse()
144            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
145        let parse_duration = parse_start.elapsed();
146
147        // Convert SelectStatement to DataView operations
148        let build_start = Instant::now();
149        // Create an empty context for CTEs
150        let mut cte_context = HashMap::new();
151        let result = self.build_view_with_context(table, statement, &mut cte_context)?;
152        let build_duration = build_start.elapsed();
153
154        let total_duration = start_time.elapsed();
155        info!(
156            "Query execution complete: parse={:?}, build={:?}, total={:?}, rows={}",
157            parse_duration,
158            build_duration,
159            total_duration,
160            result.row_count()
161        );
162
163        Ok(result)
164    }
165
166    /// Build a `DataView` from a parsed SQL statement
167    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
168        let mut cte_context = HashMap::new();
169        self.build_view_with_context(table, statement, &mut cte_context)
170    }
171
172    /// Build a DataView from a SelectStatement with CTE context
173    fn build_view_with_context(
174        &self,
175        table: Arc<DataTable>,
176        statement: SelectStatement,
177        cte_context: &mut HashMap<String, Arc<DataView>>,
178    ) -> Result<DataView> {
179        // First, process any CTEs
180        for cte in &statement.ctes {
181            debug!("QueryEngine: Processing CTE '{}'...", cte.name);
182            // Execute the CTE query (it might reference earlier CTEs)
183            let cte_result =
184                self.build_view_with_context(table.clone(), cte.query.clone(), cte_context)?;
185
186            // Store the result in the context for later use
187            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
188            debug!(
189                "QueryEngine: CTE '{}' processed, stored in context",
190                cte.name
191            );
192        }
193
194        // Determine the source table for the main query
195        let source_table = if let Some(ref table_func) = statement.from_function {
196            // Handle table functions like RANGE()
197            debug!("QueryEngine: Processing table function...");
198            match table_func {
199                TableFunction::Range { start, end, step } => {
200                    // Evaluate expressions to get numeric values
201                    let mut evaluator =
202                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
203
204                    // Create a dummy row for evaluating constant expressions
205                    let dummy_row = 0;
206
207                    let start_val = evaluator.evaluate(start, dummy_row)?;
208                    let end_val = evaluator.evaluate(end, dummy_row)?;
209                    let step_val = if let Some(step_expr) = step {
210                        Some(evaluator.evaluate(step_expr, dummy_row)?)
211                    } else {
212                        None
213                    };
214
215                    // Convert DataValues to integers
216                    let start_int = match start_val {
217                        DataValue::Integer(i) => i,
218                        DataValue::Float(f) => f as i64,
219                        _ => return Err(anyhow!("RANGE start must be numeric")),
220                    };
221
222                    let end_int = match end_val {
223                        DataValue::Integer(i) => i,
224                        DataValue::Float(f) => f as i64,
225                        _ => return Err(anyhow!("RANGE end must be numeric")),
226                    };
227
228                    let step_int = if let Some(step) = step_val {
229                        match step {
230                            DataValue::Integer(i) => Some(i),
231                            DataValue::Float(f) => Some(f as i64),
232                            _ => return Err(anyhow!("RANGE step must be numeric")),
233                        }
234                    } else {
235                        None
236                    };
237
238                    // Generate the virtual table
239                    VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
240                }
241            }
242        } else if let Some(ref subquery) = statement.from_subquery {
243            // Execute the subquery and use its result as the source
244            debug!("QueryEngine: Processing FROM subquery...");
245            let subquery_result =
246                self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
247
248            // Convert the DataView to a DataTable for use as source
249            // This materializes the subquery result
250            let materialized = self.materialize_view(subquery_result)?;
251            Arc::new(materialized)
252        } else if let Some(ref table_name) = statement.from_table {
253            // Check if this references a CTE
254            if let Some(cte_view) = cte_context.get(table_name) {
255                debug!("QueryEngine: Using CTE '{}' as source table", table_name);
256                // Materialize the CTE view as a table
257                let materialized = self.materialize_view((**cte_view).clone())?;
258                Arc::new(materialized)
259            } else {
260                // Regular table reference - use the provided table
261                table.clone()
262            }
263        } else {
264            // No FROM clause - use the provided table
265            table.clone()
266        };
267
268        // Continue with the existing build_view logic but using source_table
269        self.build_view_internal(source_table, statement)
270    }
271
272    /// Materialize a DataView into a new DataTable
273    fn materialize_view(&self, view: DataView) -> Result<DataTable> {
274        let source = view.source();
275        let mut result_table = DataTable::new("derived");
276
277        // Get the visible columns from the view
278        let visible_cols = view.visible_column_indices().to_vec();
279
280        // Copy column definitions
281        for col_idx in &visible_cols {
282            let col = &source.columns[*col_idx];
283            let new_col = DataColumn {
284                name: col.name.clone(),
285                data_type: col.data_type.clone(),
286                nullable: col.nullable,
287                unique_values: col.unique_values,
288                null_count: col.null_count,
289                metadata: col.metadata.clone(),
290            };
291            result_table.add_column(new_col);
292        }
293
294        // Copy visible rows
295        for row_idx in view.visible_row_indices() {
296            let source_row = &source.rows[*row_idx];
297            let mut new_row = DataRow { values: Vec::new() };
298
299            for col_idx in &visible_cols {
300                new_row.values.push(source_row.values[*col_idx].clone());
301            }
302
303            result_table.add_row(new_row);
304        }
305
306        Ok(result_table)
307    }
308
309    fn build_view_internal(
310        &self,
311        table: Arc<DataTable>,
312        statement: SelectStatement,
313    ) -> Result<DataView> {
314        debug!(
315            "QueryEngine::build_view - select_items: {:?}",
316            statement.select_items
317        );
318        debug!(
319            "QueryEngine::build_view - where_clause: {:?}",
320            statement.where_clause
321        );
322
323        // Start with all rows visible
324        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
325
326        // Apply WHERE clause filtering using recursive evaluator
327        if let Some(where_clause) = &statement.where_clause {
328            let total_rows = table.row_count();
329            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
330            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
331
332            let filter_start = Instant::now();
333            // Filter visible rows based on WHERE clause
334            let mut filtered_rows = Vec::new();
335            for row_idx in visible_rows {
336                // Only log for first few rows to avoid performance impact
337                if row_idx < 3 {
338                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
339                }
340                let evaluator = RecursiveWhereEvaluator::with_config(
341                    &table,
342                    self.case_insensitive,
343                    self.date_notation.clone(),
344                );
345                match evaluator.evaluate(where_clause, row_idx) {
346                    Ok(result) => {
347                        if row_idx < 3 {
348                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
349                        }
350                        if result {
351                            filtered_rows.push(row_idx);
352                        }
353                    }
354                    Err(e) => {
355                        if row_idx < 3 {
356                            debug!(
357                                "QueryEngine: WHERE evaluation error for row {}: {}",
358                                row_idx, e
359                            );
360                        }
361                        // Propagate WHERE clause errors instead of silently ignoring them
362                        return Err(e);
363                    }
364                }
365            }
366            visible_rows = filtered_rows;
367            let filter_duration = filter_start.elapsed();
368            info!(
369                "WHERE clause filtering: {} rows -> {} rows in {:?}",
370                total_rows,
371                visible_rows.len(),
372                filter_duration
373            );
374
375            // Debug log moved to info level above with timing
376        }
377
378        // Create initial DataView with filtered rows
379        let mut view = DataView::new(table.clone());
380        view = view.with_rows(visible_rows);
381
382        // Handle GROUP BY if present
383        if let Some(group_by_columns) = &statement.group_by {
384            if !group_by_columns.is_empty() {
385                debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_columns);
386                view = self.apply_group_by(
387                    view,
388                    group_by_columns,
389                    &statement.select_items,
390                    statement.having.as_ref(),
391                )?;
392            }
393        } else {
394            // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
395            if !statement.select_items.is_empty() {
396                // Check if we have ANY non-star items (not just the first one)
397                let has_non_star_items = statement
398                    .select_items
399                    .iter()
400                    .any(|item| !matches!(item, SelectItem::Star));
401
402                // Apply select items if:
403                // 1. We have computed expressions or explicit columns
404                // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
405                if has_non_star_items || statement.select_items.len() > 1 {
406                    view = self.apply_select_items(view, &statement.select_items)?;
407                }
408                // If it's just a single star, no projection needed
409            } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
410                // Fallback to legacy column projection for backward compatibility
411                let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
412                view = view.with_columns(column_indices);
413            }
414        }
415
416        // Apply DISTINCT if specified
417        if statement.distinct {
418            view = self.apply_distinct(view)?;
419        }
420
421        // Apply ORDER BY sorting
422        if let Some(order_by_columns) = &statement.order_by {
423            if !order_by_columns.is_empty() {
424                view = self.apply_multi_order_by(view, order_by_columns)?;
425            }
426        }
427
428        // Apply LIMIT/OFFSET
429        if let Some(limit) = statement.limit {
430            let offset = statement.offset.unwrap_or(0);
431            view = view.with_limit(limit, offset);
432        }
433
434        Ok(view)
435    }
436
437    /// Resolve column names to indices
438    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
439        let mut indices = Vec::new();
440        let table_columns = table.column_names();
441
442        for col_name in columns {
443            let index = table_columns
444                .iter()
445                .position(|c| c.eq_ignore_ascii_case(col_name))
446                .ok_or_else(|| {
447                    let suggestion = self.find_similar_column(table, col_name);
448                    match suggestion {
449                        Some(similar) => anyhow::anyhow!(
450                            "Column '{}' not found. Did you mean '{}'?",
451                            col_name,
452                            similar
453                        ),
454                        None => anyhow::anyhow!("Column '{}' not found", col_name),
455                    }
456                })?;
457            indices.push(index);
458        }
459
460        Ok(indices)
461    }
462
463    /// Apply SELECT items (columns and computed expressions) to create new view
464    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
465        debug!(
466            "QueryEngine::apply_select_items - items: {:?}",
467            select_items
468        );
469        debug!(
470            "QueryEngine::apply_select_items - input view has {} rows",
471            view.row_count()
472        );
473
474        // Check if ALL select items are aggregate functions (no GROUP BY)
475        let all_aggregates = select_items.iter().all(|item| match item {
476            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
477            SelectItem::Column(_) => false,
478            SelectItem::Star => false,
479        });
480
481        if all_aggregates && view.row_count() > 0 {
482            // Special handling for aggregate-only queries (no GROUP BY)
483            // These should produce exactly one row
484            return self.apply_aggregate_select(view, select_items);
485        }
486
487        // Check if we need to create computed columns
488        let has_computed_expressions = select_items
489            .iter()
490            .any(|item| matches!(item, SelectItem::Expression { .. }));
491
492        debug!(
493            "QueryEngine::apply_select_items - has_computed_expressions: {}",
494            has_computed_expressions
495        );
496
497        if !has_computed_expressions {
498            // Simple case: only columns, use existing projection logic
499            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
500            return Ok(view.with_columns(column_indices));
501        }
502
503        // Complex case: we have computed expressions
504        // IMPORTANT: We create a PROJECTED view, not a new table
505        // This preserves the original DataTable reference
506
507        let source_table = view.source();
508        let visible_rows = view.visible_row_indices();
509
510        // Create a temporary table just for the computed result view
511        // But this table is only used for the current query result
512        let mut computed_table = DataTable::new("query_result");
513
514        // First, expand any Star selectors to actual columns
515        let mut expanded_items = Vec::new();
516        for item in select_items {
517            match item {
518                SelectItem::Star => {
519                    // Expand * to all columns from source table
520                    for col_name in source_table.column_names() {
521                        expanded_items.push(SelectItem::Column(col_name));
522                    }
523                }
524                _ => expanded_items.push(item.clone()),
525            }
526        }
527
528        // Add columns based on expanded SelectItems, handling duplicates
529        let mut column_name_counts: std::collections::HashMap<String, usize> =
530            std::collections::HashMap::new();
531
532        for item in &expanded_items {
533            let base_name = match item {
534                SelectItem::Column(name) => name.clone(),
535                SelectItem::Expression { alias, .. } => alias.clone(),
536                SelectItem::Star => unreachable!("Star should have been expanded"),
537            };
538
539            // Check if this column name has been used before
540            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
541            let column_name = if *count == 0 {
542                // First occurrence, use the name as-is
543                base_name.clone()
544            } else {
545                // Duplicate, append a suffix
546                format!("{base_name}_{count}")
547            };
548            *count += 1;
549
550            computed_table.add_column(DataColumn::new(&column_name));
551        }
552
553        // Calculate values for each row
554        let mut evaluator =
555            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
556
557        for &row_idx in visible_rows {
558            let mut row_values = Vec::new();
559
560            for item in &expanded_items {
561                let value = match item {
562                    SelectItem::Column(col_name) => {
563                        // Simple column reference
564                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
565                            let suggestion = self.find_similar_column(source_table, col_name);
566                            match suggestion {
567                                Some(similar) => anyhow::anyhow!(
568                                    "Column '{}' not found. Did you mean '{}'?",
569                                    col_name,
570                                    similar
571                                ),
572                                None => anyhow::anyhow!("Column '{}' not found", col_name),
573                            }
574                        })?;
575                        let row = source_table
576                            .get_row(row_idx)
577                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
578                        row.get(col_idx)
579                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
580                            .clone()
581                    }
582                    SelectItem::Expression { expr, .. } => {
583                        // Computed expression
584                        evaluator.evaluate(expr, row_idx)?
585                    }
586                    SelectItem::Star => unreachable!("Star should have been expanded"),
587                };
588                row_values.push(value);
589            }
590
591            computed_table
592                .add_row(DataRow::new(row_values))
593                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
594        }
595
596        // Return a view of the computed result
597        // This is a temporary view for this query only
598        Ok(DataView::new(Arc::new(computed_table)))
599    }
600
601    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
602    fn apply_aggregate_select(
603        &self,
604        view: DataView,
605        select_items: &[SelectItem],
606    ) -> Result<DataView> {
607        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
608
609        let source_table = view.source();
610        let mut result_table = DataTable::new("aggregate_result");
611
612        // Add columns for each select item
613        for item in select_items {
614            let column_name = match item {
615                SelectItem::Expression { alias, .. } => alias.clone(),
616                _ => unreachable!("Should only have expressions in aggregate-only query"),
617            };
618            result_table.add_column(DataColumn::new(&column_name));
619        }
620
621        // Create evaluator with visible rows from the view (for filtered aggregates)
622        let visible_rows = view.visible_row_indices().to_vec();
623        let mut evaluator =
624            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
625                .with_visible_rows(visible_rows);
626
627        // Evaluate each aggregate expression once (they handle all rows internally)
628        let mut row_values = Vec::new();
629        for item in select_items {
630            match item {
631                SelectItem::Expression { expr, .. } => {
632                    // The evaluator will handle aggregates over all rows
633                    // We pass row_index=0 but aggregates ignore it and process all rows
634                    let value = evaluator.evaluate(expr, 0)?;
635                    row_values.push(value);
636                }
637                _ => unreachable!("Should only have expressions in aggregate-only query"),
638            }
639        }
640
641        // Add the single result row
642        result_table
643            .add_row(DataRow::new(row_values))
644            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
645
646        Ok(DataView::new(Arc::new(result_table)))
647    }
648
649    /// Resolve `SelectItem` columns to indices (for simple column projections only)
650    fn resolve_select_columns(
651        &self,
652        table: &DataTable,
653        select_items: &[SelectItem],
654    ) -> Result<Vec<usize>> {
655        let mut indices = Vec::new();
656        let table_columns = table.column_names();
657
658        for item in select_items {
659            match item {
660                SelectItem::Column(col_name) => {
661                    let index = table_columns
662                        .iter()
663                        .position(|c| c.eq_ignore_ascii_case(col_name))
664                        .ok_or_else(|| {
665                            let suggestion = self.find_similar_column(table, col_name);
666                            match suggestion {
667                                Some(similar) => anyhow::anyhow!(
668                                    "Column '{}' not found. Did you mean '{}'?",
669                                    col_name,
670                                    similar
671                                ),
672                                None => anyhow::anyhow!("Column '{}' not found", col_name),
673                            }
674                        })?;
675                    indices.push(index);
676                }
677                SelectItem::Star => {
678                    // Expand * to all column indices
679                    for i in 0..table_columns.len() {
680                        indices.push(i);
681                    }
682                }
683                SelectItem::Expression { .. } => {
684                    return Err(anyhow::anyhow!(
685                        "Computed expressions require new table creation"
686                    ));
687                }
688            }
689        }
690
691        Ok(indices)
692    }
693
694    /// Apply DISTINCT to remove duplicate rows
695    fn apply_distinct(&self, view: DataView) -> Result<DataView> {
696        use std::collections::HashSet;
697
698        let source = view.source();
699        let visible_cols = view.visible_column_indices();
700        let visible_rows = view.visible_row_indices();
701
702        // Build a set to track unique rows
703        let mut seen_rows = HashSet::new();
704        let mut unique_row_indices = Vec::new();
705
706        for &row_idx in visible_rows {
707            // Build a key representing this row's visible column values
708            let mut row_key = Vec::new();
709            for &col_idx in visible_cols {
710                let value = source
711                    .get_value(row_idx, col_idx)
712                    .ok_or_else(|| anyhow!("Invalid cell reference"))?;
713                // Convert value to a hashable representation
714                row_key.push(format!("{:?}", value));
715            }
716
717            // Check if we've seen this row before
718            if seen_rows.insert(row_key) {
719                // First time seeing this row combination
720                unique_row_indices.push(row_idx);
721            }
722        }
723
724        // Create a new view with only unique rows
725        Ok(view.with_rows(unique_row_indices))
726    }
727
728    /// Apply multi-column ORDER BY sorting to the view
729    fn apply_multi_order_by(
730        &self,
731        mut view: DataView,
732        order_by_columns: &[OrderByColumn],
733    ) -> Result<DataView> {
734        // Build list of (source_column_index, ascending) tuples
735        let mut sort_columns = Vec::new();
736
737        for order_col in order_by_columns {
738            // Try to find the column index
739            // First check in the current view's source (this handles both regular columns and computed columns)
740            let col_index = view
741                .source()
742                .get_column_index(&order_col.column)
743                .ok_or_else(|| {
744                    // If not found, provide helpful error with suggestions
745                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
746                    match suggestion {
747                        Some(similar) => anyhow::anyhow!(
748                            "Column '{}' not found. Did you mean '{}'?",
749                            order_col.column,
750                            similar
751                        ),
752                        None => {
753                            // Also list available columns for debugging
754                            let available_cols = view.source().column_names().join(", ");
755                            anyhow::anyhow!(
756                                "Column '{}' not found. Available columns: {}",
757                                order_col.column,
758                                available_cols
759                            )
760                        }
761                    }
762                })?;
763
764            let ascending = matches!(order_col.direction, SortDirection::Asc);
765            sort_columns.push((col_index, ascending));
766        }
767
768        // Apply multi-column sorting
769        view.apply_multi_sort(&sort_columns)?;
770        Ok(view)
771    }
772
773    /// Apply GROUP BY to the view with optional HAVING clause
774    fn apply_group_by(
775        &self,
776        view: DataView,
777        group_by_columns: &[String],
778        select_items: &[SelectItem],
779        having: Option<&SqlExpression>,
780    ) -> Result<DataView> {
781        debug!(
782            "QueryEngine::apply_group_by - grouping by: {:?}",
783            group_by_columns
784        );
785
786        // Group the data using DataView's group_by method
787        let groups = view.group_by(group_by_columns)?;
788        debug!(
789            "QueryEngine::apply_group_by - created {} groups",
790            groups.len()
791        );
792
793        // Create a result table for the grouped data
794        let mut result_table = DataTable::new("grouped_result");
795
796        // Determine which columns to include in the result
797        // 1. GROUP BY columns always come first
798        // 2. Then aggregate functions
799
800        // Add GROUP BY columns to result
801        for col_name in group_by_columns {
802            result_table.add_column(DataColumn::new(col_name));
803        }
804
805        // Process SELECT items to find aggregates and their aliases
806        let mut aggregate_columns = Vec::new();
807        for item in select_items {
808            match item {
809                SelectItem::Expression { expr, alias } => {
810                    if contains_aggregate(expr) {
811                        // Expression always has an alias in the AST
812                        result_table.add_column(DataColumn::new(alias));
813                        aggregate_columns.push((expr.clone(), alias.clone()));
814                    }
815                }
816                SelectItem::Column(col_name) => {
817                    // Non-aggregate columns must be in GROUP BY
818                    if !group_by_columns.contains(col_name) {
819                        return Err(anyhow!(
820                            "Column '{}' must appear in GROUP BY clause or be used in an aggregate function",
821                            col_name
822                        ));
823                    }
824                }
825                SelectItem::Star => {
826                    // For GROUP BY queries, * is not really meaningful
827                    // We'll just include the GROUP BY columns which we already added
828                }
829            }
830        }
831
832        // Process each group
833        for (group_key, group_view) in groups {
834            let mut row_values = Vec::new();
835            let mut aggregate_values = std::collections::HashMap::new();
836
837            // Add GROUP BY column values from the key
838            for (i, value) in group_key.0.iter().enumerate() {
839                row_values.push(value.clone());
840                // Store GROUP BY columns for HAVING evaluation
841                if i < group_by_columns.len() {
842                    aggregate_values.insert(group_by_columns[i].clone(), value.clone());
843                }
844            }
845
846            // Calculate aggregate values for this group
847            for (expr, col_name) in &aggregate_columns {
848                // Set the visible rows for this group so aggregates work correctly
849                let group_rows = group_view.get_visible_rows();
850                let mut evaluator = ArithmeticEvaluator::new(group_view.source())
851                    .with_visible_rows(group_rows.clone());
852
853                // For aggregates, we need to evaluate across all rows in the group
854                let value = if group_view.row_count() > 0 && !group_rows.is_empty() {
855                    // Evaluate the aggregate expression
856                    // The arithmetic evaluator will handle the aggregate across visible rows
857                    evaluator
858                        .evaluate(expr, group_rows[0])
859                        .unwrap_or(DataValue::Null)
860                } else {
861                    DataValue::Null
862                };
863
864                // Store aggregate value for HAVING evaluation
865                aggregate_values.insert(col_name.clone(), value.clone());
866                row_values.push(value);
867            }
868
869            // Evaluate HAVING clause if present
870            if let Some(having_expr) = having {
871                // Create a temporary table with one row containing the aggregate values
872                let mut temp_table = DataTable::new("having_eval");
873                for col_name in aggregate_values.keys() {
874                    temp_table.add_column(DataColumn::new(col_name));
875                }
876
877                let temp_row_values: Vec<DataValue> = aggregate_values.values().cloned().collect();
878                temp_table
879                    .add_row(DataRow::new(temp_row_values))
880                    .map_err(|e| anyhow!("Failed to create temp table for HAVING: {}", e))?;
881
882                // Evaluate HAVING expression on the aggregate values
883                let mut evaluator = ArithmeticEvaluator::new(&temp_table);
884                let having_result = evaluator
885                    .evaluate(having_expr, 0)
886                    .unwrap_or(DataValue::Boolean(false));
887
888                // Skip this group if HAVING condition is not met
889                match having_result {
890                    DataValue::Boolean(false) => continue,
891                    DataValue::Null => continue,
892                    _ => {} // Include the row
893                }
894            }
895
896            // Add the row to the result table
897            result_table
898                .add_row(DataRow::new(row_values))
899                .map_err(|e| anyhow!("Failed to add row to result table: {}", e))?;
900        }
901
902        // Return a DataView of the grouped result
903        Ok(DataView::new(Arc::new(result_table)))
904    }
905}
906
907#[cfg(test)]
908mod tests {
909    use super::*;
910    use crate::data::datatable::{DataColumn, DataRow, DataValue};
911
912    fn create_test_table() -> Arc<DataTable> {
913        let mut table = DataTable::new("test");
914
915        // Add columns
916        table.add_column(DataColumn::new("id"));
917        table.add_column(DataColumn::new("name"));
918        table.add_column(DataColumn::new("age"));
919
920        // Add rows
921        table
922            .add_row(DataRow::new(vec![
923                DataValue::Integer(1),
924                DataValue::String("Alice".to_string()),
925                DataValue::Integer(30),
926            ]))
927            .unwrap();
928
929        table
930            .add_row(DataRow::new(vec![
931                DataValue::Integer(2),
932                DataValue::String("Bob".to_string()),
933                DataValue::Integer(25),
934            ]))
935            .unwrap();
936
937        table
938            .add_row(DataRow::new(vec![
939                DataValue::Integer(3),
940                DataValue::String("Charlie".to_string()),
941                DataValue::Integer(35),
942            ]))
943            .unwrap();
944
945        Arc::new(table)
946    }
947
948    #[test]
949    fn test_select_all() {
950        let table = create_test_table();
951        let engine = QueryEngine::new();
952
953        let view = engine
954            .execute(table.clone(), "SELECT * FROM users")
955            .unwrap();
956        assert_eq!(view.row_count(), 3);
957        assert_eq!(view.column_count(), 3);
958    }
959
960    #[test]
961    fn test_select_columns() {
962        let table = create_test_table();
963        let engine = QueryEngine::new();
964
965        let view = engine
966            .execute(table.clone(), "SELECT name, age FROM users")
967            .unwrap();
968        assert_eq!(view.row_count(), 3);
969        assert_eq!(view.column_count(), 2);
970    }
971
972    #[test]
973    fn test_select_with_limit() {
974        let table = create_test_table();
975        let engine = QueryEngine::new();
976
977        let view = engine
978            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
979            .unwrap();
980        assert_eq!(view.row_count(), 2);
981    }
982
983    #[test]
984    fn test_type_coercion_contains() {
985        // Initialize tracing for debug output
986        let _ = tracing_subscriber::fmt()
987            .with_max_level(tracing::Level::DEBUG)
988            .try_init();
989
990        let mut table = DataTable::new("test");
991        table.add_column(DataColumn::new("id"));
992        table.add_column(DataColumn::new("status"));
993        table.add_column(DataColumn::new("price"));
994
995        // Add test data with mixed types
996        table
997            .add_row(DataRow::new(vec![
998                DataValue::Integer(1),
999                DataValue::String("Pending".to_string()),
1000                DataValue::Float(99.99),
1001            ]))
1002            .unwrap();
1003
1004        table
1005            .add_row(DataRow::new(vec![
1006                DataValue::Integer(2),
1007                DataValue::String("Confirmed".to_string()),
1008                DataValue::Float(150.50),
1009            ]))
1010            .unwrap();
1011
1012        table
1013            .add_row(DataRow::new(vec![
1014                DataValue::Integer(3),
1015                DataValue::String("Pending".to_string()),
1016                DataValue::Float(75.00),
1017            ]))
1018            .unwrap();
1019
1020        let table = Arc::new(table);
1021        let engine = QueryEngine::new();
1022
1023        println!("\n=== Testing WHERE clause with Contains ===");
1024        println!("Table has {} rows", table.row_count());
1025        for i in 0..table.row_count() {
1026            let status = table.get_value(i, 1);
1027            println!("Row {i}: status = {status:?}");
1028        }
1029
1030        // Test 1: Basic string contains (should work)
1031        println!("\n--- Test 1: status.Contains('pend') ---");
1032        let result = engine.execute(
1033            table.clone(),
1034            "SELECT * FROM test WHERE status.Contains('pend')",
1035        );
1036        match result {
1037            Ok(view) => {
1038                println!("SUCCESS: Found {} matching rows", view.row_count());
1039                assert_eq!(view.row_count(), 2); // Should find both Pending rows
1040            }
1041            Err(e) => {
1042                panic!("Query failed: {e}");
1043            }
1044        }
1045
1046        // Test 2: Numeric contains (should work with type coercion)
1047        println!("\n--- Test 2: price.Contains('9') ---");
1048        let result = engine.execute(
1049            table.clone(),
1050            "SELECT * FROM test WHERE price.Contains('9')",
1051        );
1052        match result {
1053            Ok(view) => {
1054                println!(
1055                    "SUCCESS: Found {} matching rows with price containing '9'",
1056                    view.row_count()
1057                );
1058                // Should find 99.99 row
1059                assert!(view.row_count() >= 1);
1060            }
1061            Err(e) => {
1062                panic!("Numeric coercion query failed: {e}");
1063            }
1064        }
1065
1066        println!("\n=== All tests passed! ===");
1067    }
1068
1069    #[test]
1070    fn test_not_in_clause() {
1071        // Initialize tracing for debug output
1072        let _ = tracing_subscriber::fmt()
1073            .with_max_level(tracing::Level::DEBUG)
1074            .try_init();
1075
1076        let mut table = DataTable::new("test");
1077        table.add_column(DataColumn::new("id"));
1078        table.add_column(DataColumn::new("country"));
1079
1080        // Add test data
1081        table
1082            .add_row(DataRow::new(vec![
1083                DataValue::Integer(1),
1084                DataValue::String("CA".to_string()),
1085            ]))
1086            .unwrap();
1087
1088        table
1089            .add_row(DataRow::new(vec![
1090                DataValue::Integer(2),
1091                DataValue::String("US".to_string()),
1092            ]))
1093            .unwrap();
1094
1095        table
1096            .add_row(DataRow::new(vec![
1097                DataValue::Integer(3),
1098                DataValue::String("UK".to_string()),
1099            ]))
1100            .unwrap();
1101
1102        let table = Arc::new(table);
1103        let engine = QueryEngine::new();
1104
1105        println!("\n=== Testing NOT IN clause ===");
1106        println!("Table has {} rows", table.row_count());
1107        for i in 0..table.row_count() {
1108            let country = table.get_value(i, 1);
1109            println!("Row {i}: country = {country:?}");
1110        }
1111
1112        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
1113        println!("\n--- Test: country NOT IN ('CA') ---");
1114        let result = engine.execute(
1115            table.clone(),
1116            "SELECT * FROM test WHERE country NOT IN ('CA')",
1117        );
1118        match result {
1119            Ok(view) => {
1120                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1121                assert_eq!(view.row_count(), 2); // Should find US and UK
1122            }
1123            Err(e) => {
1124                panic!("NOT IN query failed: {e}");
1125            }
1126        }
1127
1128        println!("\n=== NOT IN test complete! ===");
1129    }
1130
1131    #[test]
1132    fn test_case_insensitive_in_and_not_in() {
1133        // Initialize tracing for debug output
1134        let _ = tracing_subscriber::fmt()
1135            .with_max_level(tracing::Level::DEBUG)
1136            .try_init();
1137
1138        let mut table = DataTable::new("test");
1139        table.add_column(DataColumn::new("id"));
1140        table.add_column(DataColumn::new("country"));
1141
1142        // Add test data with mixed case
1143        table
1144            .add_row(DataRow::new(vec![
1145                DataValue::Integer(1),
1146                DataValue::String("CA".to_string()), // uppercase
1147            ]))
1148            .unwrap();
1149
1150        table
1151            .add_row(DataRow::new(vec![
1152                DataValue::Integer(2),
1153                DataValue::String("us".to_string()), // lowercase
1154            ]))
1155            .unwrap();
1156
1157        table
1158            .add_row(DataRow::new(vec![
1159                DataValue::Integer(3),
1160                DataValue::String("UK".to_string()), // uppercase
1161            ]))
1162            .unwrap();
1163
1164        let table = Arc::new(table);
1165
1166        println!("\n=== Testing Case-Insensitive IN clause ===");
1167        println!("Table has {} rows", table.row_count());
1168        for i in 0..table.row_count() {
1169            let country = table.get_value(i, 1);
1170            println!("Row {i}: country = {country:?}");
1171        }
1172
1173        // Test case-insensitive IN - should match 'CA' with 'ca'
1174        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1175        let engine = QueryEngine::with_case_insensitive(true);
1176        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1177        match result {
1178            Ok(view) => {
1179                println!(
1180                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1181                    view.row_count()
1182                );
1183                assert_eq!(view.row_count(), 1); // Should find CA row
1184            }
1185            Err(e) => {
1186                panic!("Case-insensitive IN query failed: {e}");
1187            }
1188        }
1189
1190        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
1191        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1192        let result = engine.execute(
1193            table.clone(),
1194            "SELECT * FROM test WHERE country NOT IN ('ca')",
1195        );
1196        match result {
1197            Ok(view) => {
1198                println!(
1199                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1200                    view.row_count()
1201                );
1202                assert_eq!(view.row_count(), 2); // Should find us and UK rows
1203            }
1204            Err(e) => {
1205                panic!("Case-insensitive NOT IN query failed: {e}");
1206            }
1207        }
1208
1209        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
1210        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1211        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
1212        let result = engine_case_sensitive
1213            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1214        match result {
1215            Ok(view) => {
1216                println!(
1217                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1218                    view.row_count()
1219                );
1220                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
1221            }
1222            Err(e) => {
1223                panic!("Case-sensitive IN query failed: {e}");
1224            }
1225        }
1226
1227        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1228    }
1229
1230    #[test]
1231    #[ignore = "Parentheses in WHERE clause not yet implemented"]
1232    fn test_parentheses_in_where_clause() {
1233        // Initialize tracing for debug output
1234        let _ = tracing_subscriber::fmt()
1235            .with_max_level(tracing::Level::DEBUG)
1236            .try_init();
1237
1238        let mut table = DataTable::new("test");
1239        table.add_column(DataColumn::new("id"));
1240        table.add_column(DataColumn::new("status"));
1241        table.add_column(DataColumn::new("priority"));
1242
1243        // Add test data
1244        table
1245            .add_row(DataRow::new(vec![
1246                DataValue::Integer(1),
1247                DataValue::String("Pending".to_string()),
1248                DataValue::String("High".to_string()),
1249            ]))
1250            .unwrap();
1251
1252        table
1253            .add_row(DataRow::new(vec![
1254                DataValue::Integer(2),
1255                DataValue::String("Complete".to_string()),
1256                DataValue::String("High".to_string()),
1257            ]))
1258            .unwrap();
1259
1260        table
1261            .add_row(DataRow::new(vec![
1262                DataValue::Integer(3),
1263                DataValue::String("Pending".to_string()),
1264                DataValue::String("Low".to_string()),
1265            ]))
1266            .unwrap();
1267
1268        table
1269            .add_row(DataRow::new(vec![
1270                DataValue::Integer(4),
1271                DataValue::String("Complete".to_string()),
1272                DataValue::String("Low".to_string()),
1273            ]))
1274            .unwrap();
1275
1276        let table = Arc::new(table);
1277        let engine = QueryEngine::new();
1278
1279        println!("\n=== Testing Parentheses in WHERE clause ===");
1280        println!("Table has {} rows", table.row_count());
1281        for i in 0..table.row_count() {
1282            let status = table.get_value(i, 1);
1283            let priority = table.get_value(i, 2);
1284            println!("Row {i}: status = {status:?}, priority = {priority:?}");
1285        }
1286
1287        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
1288        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1289        let result = engine.execute(
1290            table.clone(),
1291            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1292        );
1293        match result {
1294            Ok(view) => {
1295                println!(
1296                    "SUCCESS: Found {} rows with parenthetical logic",
1297                    view.row_count()
1298                );
1299                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
1300            }
1301            Err(e) => {
1302                panic!("Parentheses query failed: {e}");
1303            }
1304        }
1305
1306        println!("\n=== Parentheses test complete! ===");
1307    }
1308
1309    #[test]
1310    #[ignore = "Numeric type coercion needs fixing"]
1311    fn test_numeric_type_coercion() {
1312        // Initialize tracing for debug output
1313        let _ = tracing_subscriber::fmt()
1314            .with_max_level(tracing::Level::DEBUG)
1315            .try_init();
1316
1317        let mut table = DataTable::new("test");
1318        table.add_column(DataColumn::new("id"));
1319        table.add_column(DataColumn::new("price"));
1320        table.add_column(DataColumn::new("quantity"));
1321
1322        // Add test data with different numeric types
1323        table
1324            .add_row(DataRow::new(vec![
1325                DataValue::Integer(1),
1326                DataValue::Float(99.50), // Contains '.'
1327                DataValue::Integer(100),
1328            ]))
1329            .unwrap();
1330
1331        table
1332            .add_row(DataRow::new(vec![
1333                DataValue::Integer(2),
1334                DataValue::Float(150.0), // Contains '.' and '0'
1335                DataValue::Integer(200),
1336            ]))
1337            .unwrap();
1338
1339        table
1340            .add_row(DataRow::new(vec![
1341                DataValue::Integer(3),
1342                DataValue::Integer(75), // No decimal point
1343                DataValue::Integer(50),
1344            ]))
1345            .unwrap();
1346
1347        let table = Arc::new(table);
1348        let engine = QueryEngine::new();
1349
1350        println!("\n=== Testing Numeric Type Coercion ===");
1351        println!("Table has {} rows", table.row_count());
1352        for i in 0..table.row_count() {
1353            let price = table.get_value(i, 1);
1354            let quantity = table.get_value(i, 2);
1355            println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1356        }
1357
1358        // Test Contains on float values - should find rows with decimal points
1359        println!("\n--- Test: price.Contains('.') ---");
1360        let result = engine.execute(
1361            table.clone(),
1362            "SELECT * FROM test WHERE price.Contains('.')",
1363        );
1364        match result {
1365            Ok(view) => {
1366                println!(
1367                    "SUCCESS: Found {} rows with decimal points in price",
1368                    view.row_count()
1369                );
1370                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1371            }
1372            Err(e) => {
1373                panic!("Numeric Contains query failed: {e}");
1374            }
1375        }
1376
1377        // Test Contains on integer values converted to string
1378        println!("\n--- Test: quantity.Contains('0') ---");
1379        let result = engine.execute(
1380            table.clone(),
1381            "SELECT * FROM test WHERE quantity.Contains('0')",
1382        );
1383        match result {
1384            Ok(view) => {
1385                println!(
1386                    "SUCCESS: Found {} rows with '0' in quantity",
1387                    view.row_count()
1388                );
1389                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1390            }
1391            Err(e) => {
1392                panic!("Integer Contains query failed: {e}");
1393            }
1394        }
1395
1396        println!("\n=== Numeric type coercion test complete! ===");
1397    }
1398
1399    #[test]
1400    fn test_datetime_comparisons() {
1401        // Initialize tracing for debug output
1402        let _ = tracing_subscriber::fmt()
1403            .with_max_level(tracing::Level::DEBUG)
1404            .try_init();
1405
1406        let mut table = DataTable::new("test");
1407        table.add_column(DataColumn::new("id"));
1408        table.add_column(DataColumn::new("created_date"));
1409
1410        // Add test data with date strings (as they would come from CSV)
1411        table
1412            .add_row(DataRow::new(vec![
1413                DataValue::Integer(1),
1414                DataValue::String("2024-12-15".to_string()),
1415            ]))
1416            .unwrap();
1417
1418        table
1419            .add_row(DataRow::new(vec![
1420                DataValue::Integer(2),
1421                DataValue::String("2025-01-15".to_string()),
1422            ]))
1423            .unwrap();
1424
1425        table
1426            .add_row(DataRow::new(vec![
1427                DataValue::Integer(3),
1428                DataValue::String("2025-02-15".to_string()),
1429            ]))
1430            .unwrap();
1431
1432        let table = Arc::new(table);
1433        let engine = QueryEngine::new();
1434
1435        println!("\n=== Testing DateTime Comparisons ===");
1436        println!("Table has {} rows", table.row_count());
1437        for i in 0..table.row_count() {
1438            let date = table.get_value(i, 1);
1439            println!("Row {i}: created_date = {date:?}");
1440        }
1441
1442        // Test DateTime constructor comparison - should find dates after 2025-01-01
1443        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1444        let result = engine.execute(
1445            table.clone(),
1446            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1447        );
1448        match result {
1449            Ok(view) => {
1450                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1451                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1452            }
1453            Err(e) => {
1454                panic!("DateTime comparison query failed: {e}");
1455            }
1456        }
1457
1458        println!("\n=== DateTime comparison test complete! ===");
1459    }
1460
1461    #[test]
1462    fn test_not_with_method_calls() {
1463        // Initialize tracing for debug output
1464        let _ = tracing_subscriber::fmt()
1465            .with_max_level(tracing::Level::DEBUG)
1466            .try_init();
1467
1468        let mut table = DataTable::new("test");
1469        table.add_column(DataColumn::new("id"));
1470        table.add_column(DataColumn::new("status"));
1471
1472        // Add test data
1473        table
1474            .add_row(DataRow::new(vec![
1475                DataValue::Integer(1),
1476                DataValue::String("Pending Review".to_string()),
1477            ]))
1478            .unwrap();
1479
1480        table
1481            .add_row(DataRow::new(vec![
1482                DataValue::Integer(2),
1483                DataValue::String("Complete".to_string()),
1484            ]))
1485            .unwrap();
1486
1487        table
1488            .add_row(DataRow::new(vec![
1489                DataValue::Integer(3),
1490                DataValue::String("Pending Approval".to_string()),
1491            ]))
1492            .unwrap();
1493
1494        let table = Arc::new(table);
1495        let engine = QueryEngine::with_case_insensitive(true);
1496
1497        println!("\n=== Testing NOT with Method Calls ===");
1498        println!("Table has {} rows", table.row_count());
1499        for i in 0..table.row_count() {
1500            let status = table.get_value(i, 1);
1501            println!("Row {i}: status = {status:?}");
1502        }
1503
1504        // Test NOT with Contains - should exclude rows containing "pend"
1505        println!("\n--- Test: NOT status.Contains('pend') ---");
1506        let result = engine.execute(
1507            table.clone(),
1508            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1509        );
1510        match result {
1511            Ok(view) => {
1512                println!(
1513                    "SUCCESS: Found {} rows NOT containing 'pend'",
1514                    view.row_count()
1515                );
1516                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1517            }
1518            Err(e) => {
1519                panic!("NOT Contains query failed: {e}");
1520            }
1521        }
1522
1523        // Test NOT with StartsWith
1524        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1525        let result = engine.execute(
1526            table.clone(),
1527            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1528        );
1529        match result {
1530            Ok(view) => {
1531                println!(
1532                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1533                    view.row_count()
1534                );
1535                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1536            }
1537            Err(e) => {
1538                panic!("NOT StartsWith query failed: {e}");
1539            }
1540        }
1541
1542        println!("\n=== NOT with method calls test complete! ===");
1543    }
1544
1545    #[test]
1546    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1547    fn test_complex_logical_expressions() {
1548        // Initialize tracing for debug output
1549        let _ = tracing_subscriber::fmt()
1550            .with_max_level(tracing::Level::DEBUG)
1551            .try_init();
1552
1553        let mut table = DataTable::new("test");
1554        table.add_column(DataColumn::new("id"));
1555        table.add_column(DataColumn::new("status"));
1556        table.add_column(DataColumn::new("priority"));
1557        table.add_column(DataColumn::new("assigned"));
1558
1559        // Add comprehensive test data
1560        table
1561            .add_row(DataRow::new(vec![
1562                DataValue::Integer(1),
1563                DataValue::String("Pending".to_string()),
1564                DataValue::String("High".to_string()),
1565                DataValue::String("John".to_string()),
1566            ]))
1567            .unwrap();
1568
1569        table
1570            .add_row(DataRow::new(vec![
1571                DataValue::Integer(2),
1572                DataValue::String("Complete".to_string()),
1573                DataValue::String("High".to_string()),
1574                DataValue::String("Jane".to_string()),
1575            ]))
1576            .unwrap();
1577
1578        table
1579            .add_row(DataRow::new(vec![
1580                DataValue::Integer(3),
1581                DataValue::String("Pending".to_string()),
1582                DataValue::String("Low".to_string()),
1583                DataValue::String("John".to_string()),
1584            ]))
1585            .unwrap();
1586
1587        table
1588            .add_row(DataRow::new(vec![
1589                DataValue::Integer(4),
1590                DataValue::String("In Progress".to_string()),
1591                DataValue::String("Medium".to_string()),
1592                DataValue::String("Jane".to_string()),
1593            ]))
1594            .unwrap();
1595
1596        let table = Arc::new(table);
1597        let engine = QueryEngine::new();
1598
1599        println!("\n=== Testing Complex Logical Expressions ===");
1600        println!("Table has {} rows", table.row_count());
1601        for i in 0..table.row_count() {
1602            let status = table.get_value(i, 1);
1603            let priority = table.get_value(i, 2);
1604            let assigned = table.get_value(i, 3);
1605            println!(
1606                "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1607            );
1608        }
1609
1610        // Test complex AND/OR logic
1611        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1612        let result = engine.execute(
1613            table.clone(),
1614            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1615        );
1616        match result {
1617            Ok(view) => {
1618                println!(
1619                    "SUCCESS: Found {} rows with complex logic",
1620                    view.row_count()
1621                );
1622                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
1623            }
1624            Err(e) => {
1625                panic!("Complex logic query failed: {e}");
1626            }
1627        }
1628
1629        // Test NOT with complex expressions
1630        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1631        let result = engine.execute(
1632            table.clone(),
1633            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1634        );
1635        match result {
1636            Ok(view) => {
1637                println!(
1638                    "SUCCESS: Found {} rows with NOT complex logic",
1639                    view.row_count()
1640                );
1641                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
1642            }
1643            Err(e) => {
1644                panic!("NOT complex logic query failed: {e}");
1645            }
1646        }
1647
1648        println!("\n=== Complex logical expressions test complete! ===");
1649    }
1650
1651    #[test]
1652    fn test_mixed_data_types_and_edge_cases() {
1653        // Initialize tracing for debug output
1654        let _ = tracing_subscriber::fmt()
1655            .with_max_level(tracing::Level::DEBUG)
1656            .try_init();
1657
1658        let mut table = DataTable::new("test");
1659        table.add_column(DataColumn::new("id"));
1660        table.add_column(DataColumn::new("value"));
1661        table.add_column(DataColumn::new("nullable_field"));
1662
1663        // Add test data with mixed types and edge cases
1664        table
1665            .add_row(DataRow::new(vec![
1666                DataValue::Integer(1),
1667                DataValue::String("123.45".to_string()),
1668                DataValue::String("present".to_string()),
1669            ]))
1670            .unwrap();
1671
1672        table
1673            .add_row(DataRow::new(vec![
1674                DataValue::Integer(2),
1675                DataValue::Float(678.90),
1676                DataValue::Null,
1677            ]))
1678            .unwrap();
1679
1680        table
1681            .add_row(DataRow::new(vec![
1682                DataValue::Integer(3),
1683                DataValue::Boolean(true),
1684                DataValue::String("also present".to_string()),
1685            ]))
1686            .unwrap();
1687
1688        table
1689            .add_row(DataRow::new(vec![
1690                DataValue::Integer(4),
1691                DataValue::String("false".to_string()),
1692                DataValue::Null,
1693            ]))
1694            .unwrap();
1695
1696        let table = Arc::new(table);
1697        let engine = QueryEngine::new();
1698
1699        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1700        println!("Table has {} rows", table.row_count());
1701        for i in 0..table.row_count() {
1702            let value = table.get_value(i, 1);
1703            let nullable = table.get_value(i, 2);
1704            println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
1705        }
1706
1707        // Test type coercion with boolean Contains
1708        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1709        let result = engine.execute(
1710            table.clone(),
1711            "SELECT * FROM test WHERE value.Contains('true')",
1712        );
1713        match result {
1714            Ok(view) => {
1715                println!(
1716                    "SUCCESS: Found {} rows with boolean coercion",
1717                    view.row_count()
1718                );
1719                assert_eq!(view.row_count(), 1); // Should find the boolean true row
1720            }
1721            Err(e) => {
1722                panic!("Boolean coercion query failed: {e}");
1723            }
1724        }
1725
1726        // Test multiple IN values with mixed types
1727        println!("\n--- Test: id IN (1, 3) ---");
1728        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
1729        match result {
1730            Ok(view) => {
1731                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
1732                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
1733            }
1734            Err(e) => {
1735                panic!("Multiple IN values query failed: {e}");
1736            }
1737        }
1738
1739        println!("\n=== Mixed data types test complete! ===");
1740    }
1741
1742    #[test]
1743    fn test_not_in_parsing() {
1744        use crate::sql::recursive_parser::Parser;
1745
1746        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
1747        println!("\n=== Testing NOT IN parsing ===");
1748        println!("Parsing query: {query}");
1749
1750        let mut parser = Parser::new(query);
1751        match parser.parse() {
1752            Ok(statement) => {
1753                println!("Parsed statement: {statement:#?}");
1754                if let Some(where_clause) = statement.where_clause {
1755                    println!("WHERE conditions: {:#?}", where_clause.conditions);
1756                    if let Some(first_condition) = where_clause.conditions.first() {
1757                        println!("First condition expression: {:#?}", first_condition.expr);
1758                    }
1759                }
1760            }
1761            Err(e) => {
1762                panic!("Parse error: {e}");
1763            }
1764        }
1765    }
1766}
1767
1768#[cfg(test)]
1769#[path = "query_engine_tests.rs"]
1770mod query_engine_tests;