sql_cli/data/
query_engine.rs

1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
9use crate::data::data_view::DataView;
10use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
11use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
12use crate::data::virtual_table_generator::VirtualTableGenerator;
13use crate::sql::aggregates::contains_aggregate;
14use crate::sql::recursive_parser::{
15    OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression, TableFunction,
16};
17
18/// Query engine that executes SQL directly on `DataTable`
19pub struct QueryEngine {
20    case_insensitive: bool,
21    date_notation: String,
22    behavior_config: Option<BehaviorConfig>,
23}
24
25impl Default for QueryEngine {
26    fn default() -> Self {
27        Self::new()
28    }
29}
30
31impl QueryEngine {
32    #[must_use]
33    pub fn new() -> Self {
34        Self {
35            case_insensitive: false,
36            date_notation: "us".to_string(),
37            behavior_config: None,
38        }
39    }
40
41    #[must_use]
42    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
43        let case_insensitive = config.case_insensitive_default;
44        let date_notation = config.default_date_notation.clone();
45        Self {
46            case_insensitive,
47            date_notation,
48            behavior_config: Some(config),
49        }
50    }
51
52    #[must_use]
53    pub fn with_date_notation(date_notation: String) -> Self {
54        Self {
55            case_insensitive: false,
56            date_notation,
57            behavior_config: None,
58        }
59    }
60
61    #[must_use]
62    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
63        Self {
64            case_insensitive,
65            date_notation: "us".to_string(),
66            behavior_config: None,
67        }
68    }
69
70    #[must_use]
71    pub fn with_case_insensitive_and_date_notation(
72        case_insensitive: bool,
73        date_notation: String,
74    ) -> Self {
75        Self {
76            case_insensitive,
77            date_notation,
78            behavior_config: None,
79        }
80    }
81
82    /// Find a column name similar to the given name using edit distance
83    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
84        let columns = table.column_names();
85        let mut best_match: Option<(String, usize)> = None;
86
87        for col in columns {
88            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
89            // Only suggest if distance is small (likely a typo)
90            // Allow up to 3 edits for longer names
91            let max_distance = if name.len() > 10 { 3 } else { 2 };
92            if distance <= max_distance {
93                match &best_match {
94                    None => best_match = Some((col, distance)),
95                    Some((_, best_dist)) if distance < *best_dist => {
96                        best_match = Some((col, distance));
97                    }
98                    _ => {}
99                }
100            }
101        }
102
103        best_match.map(|(name, _)| name)
104    }
105
106    /// Calculate Levenshtein edit distance between two strings
107    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
108        let len1 = s1.len();
109        let len2 = s2.len();
110        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
111
112        for i in 0..=len1 {
113            matrix[i][0] = i;
114        }
115        for j in 0..=len2 {
116            matrix[0][j] = j;
117        }
118
119        for (i, c1) in s1.chars().enumerate() {
120            for (j, c2) in s2.chars().enumerate() {
121                let cost = usize::from(c1 != c2);
122                matrix[i + 1][j + 1] = std::cmp::min(
123                    matrix[i][j + 1] + 1, // deletion
124                    std::cmp::min(
125                        matrix[i + 1][j] + 1, // insertion
126                        matrix[i][j] + cost,  // substitution
127                    ),
128                );
129            }
130        }
131
132        matrix[len1][len2]
133    }
134
135    /// Execute a SQL query on a `DataTable` and return a `DataView` (for backward compatibility)
136    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
137        let start_time = Instant::now();
138
139        // Parse the SQL query
140        let parse_start = Instant::now();
141        let mut parser = Parser::new(sql);
142        let statement = parser
143            .parse()
144            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
145        let parse_duration = parse_start.elapsed();
146
147        // Convert SelectStatement to DataView operations
148        let build_start = Instant::now();
149        // Create an empty context for CTEs
150        let mut cte_context = HashMap::new();
151        let result = self.build_view_with_context(table, statement, &mut cte_context)?;
152        let build_duration = build_start.elapsed();
153
154        let total_duration = start_time.elapsed();
155        info!(
156            "Query execution complete: parse={:?}, build={:?}, total={:?}, rows={}",
157            parse_duration,
158            build_duration,
159            total_duration,
160            result.row_count()
161        );
162
163        Ok(result)
164    }
165
166    /// Build a `DataView` from a parsed SQL statement
167    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
168        let mut cte_context = HashMap::new();
169        self.build_view_with_context(table, statement, &mut cte_context)
170    }
171
172    /// Build a DataView from a SelectStatement with CTE context
173    fn build_view_with_context(
174        &self,
175        table: Arc<DataTable>,
176        statement: SelectStatement,
177        cte_context: &mut HashMap<String, Arc<DataView>>,
178    ) -> Result<DataView> {
179        // First, process any CTEs
180        for cte in &statement.ctes {
181            debug!("QueryEngine: Processing CTE '{}'...", cte.name);
182            // Execute the CTE query (it might reference earlier CTEs)
183            let cte_result =
184                self.build_view_with_context(table.clone(), cte.query.clone(), cte_context)?;
185
186            // Store the result in the context for later use
187            cte_context.insert(cte.name.clone(), Arc::new(cte_result));
188            debug!(
189                "QueryEngine: CTE '{}' processed, stored in context",
190                cte.name
191            );
192        }
193
194        // Determine the source table for the main query
195        let source_table = if let Some(ref table_func) = statement.from_function {
196            // Handle table functions like RANGE()
197            debug!("QueryEngine: Processing table function...");
198            match table_func {
199                TableFunction::Range { start, end, step } => {
200                    // Evaluate expressions to get numeric values
201                    let mut evaluator =
202                        ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
203
204                    // Create a dummy row for evaluating constant expressions
205                    let dummy_row = 0;
206
207                    let start_val = evaluator.evaluate(start, dummy_row)?;
208                    let end_val = evaluator.evaluate(end, dummy_row)?;
209                    let step_val = if let Some(step_expr) = step {
210                        Some(evaluator.evaluate(step_expr, dummy_row)?)
211                    } else {
212                        None
213                    };
214
215                    // Convert DataValues to integers
216                    let start_int = match start_val {
217                        DataValue::Integer(i) => i,
218                        DataValue::Float(f) => f as i64,
219                        _ => return Err(anyhow!("RANGE start must be numeric")),
220                    };
221
222                    let end_int = match end_val {
223                        DataValue::Integer(i) => i,
224                        DataValue::Float(f) => f as i64,
225                        _ => return Err(anyhow!("RANGE end must be numeric")),
226                    };
227
228                    let step_int = if let Some(step) = step_val {
229                        match step {
230                            DataValue::Integer(i) => Some(i),
231                            DataValue::Float(f) => Some(f as i64),
232                            _ => return Err(anyhow!("RANGE step must be numeric")),
233                        }
234                    } else {
235                        None
236                    };
237
238                    // Generate the virtual table
239                    VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
240                }
241            }
242        } else if let Some(ref subquery) = statement.from_subquery {
243            // Execute the subquery and use its result as the source
244            debug!("QueryEngine: Processing FROM subquery...");
245            let subquery_result =
246                self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
247
248            // Convert the DataView to a DataTable for use as source
249            // This materializes the subquery result
250            let materialized = self.materialize_view(subquery_result)?;
251            Arc::new(materialized)
252        } else if let Some(ref table_name) = statement.from_table {
253            // Check if this references a CTE
254            if let Some(cte_view) = cte_context.get(table_name) {
255                debug!("QueryEngine: Using CTE '{}' as source table", table_name);
256                // Materialize the CTE view as a table
257                let materialized = self.materialize_view((**cte_view).clone())?;
258                Arc::new(materialized)
259            } else {
260                // Regular table reference - use the provided table
261                table.clone()
262            }
263        } else {
264            // No FROM clause - use the provided table
265            table.clone()
266        };
267
268        // Continue with the existing build_view logic but using source_table
269        self.build_view_internal(source_table, statement)
270    }
271
272    /// Materialize a DataView into a new DataTable
273    fn materialize_view(&self, view: DataView) -> Result<DataTable> {
274        let source = view.source();
275        let mut result_table = DataTable::new("derived");
276
277        // Get the visible columns from the view
278        let visible_cols = view.visible_column_indices().to_vec();
279
280        // Copy column definitions
281        for col_idx in &visible_cols {
282            let col = &source.columns[*col_idx];
283            let new_col = DataColumn {
284                name: col.name.clone(),
285                data_type: col.data_type.clone(),
286                nullable: col.nullable,
287                unique_values: col.unique_values,
288                null_count: col.null_count,
289                metadata: col.metadata.clone(),
290            };
291            result_table.add_column(new_col);
292        }
293
294        // Copy visible rows
295        for row_idx in view.visible_row_indices() {
296            let source_row = &source.rows[*row_idx];
297            let mut new_row = DataRow { values: Vec::new() };
298
299            for col_idx in &visible_cols {
300                new_row.values.push(source_row.values[*col_idx].clone());
301            }
302
303            result_table.add_row(new_row);
304        }
305
306        Ok(result_table)
307    }
308
309    fn build_view_internal(
310        &self,
311        table: Arc<DataTable>,
312        statement: SelectStatement,
313    ) -> Result<DataView> {
314        debug!(
315            "QueryEngine::build_view - select_items: {:?}",
316            statement.select_items
317        );
318        debug!(
319            "QueryEngine::build_view - where_clause: {:?}",
320            statement.where_clause
321        );
322
323        // Start with all rows visible
324        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
325
326        // Apply WHERE clause filtering using recursive evaluator
327        if let Some(where_clause) = &statement.where_clause {
328            let total_rows = table.row_count();
329            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
330            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
331
332            let filter_start = Instant::now();
333            // Filter visible rows based on WHERE clause
334            let mut filtered_rows = Vec::new();
335            for row_idx in visible_rows {
336                // Only log for first few rows to avoid performance impact
337                if row_idx < 3 {
338                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
339                }
340                let evaluator = RecursiveWhereEvaluator::with_config(
341                    &table,
342                    self.case_insensitive,
343                    self.date_notation.clone(),
344                );
345                match evaluator.evaluate(where_clause, row_idx) {
346                    Ok(result) => {
347                        if row_idx < 3 {
348                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
349                        }
350                        if result {
351                            filtered_rows.push(row_idx);
352                        }
353                    }
354                    Err(e) => {
355                        if row_idx < 3 {
356                            debug!(
357                                "QueryEngine: WHERE evaluation error for row {}: {}",
358                                row_idx, e
359                            );
360                        }
361                        // Propagate WHERE clause errors instead of silently ignoring them
362                        return Err(e);
363                    }
364                }
365            }
366            visible_rows = filtered_rows;
367            let filter_duration = filter_start.elapsed();
368            info!(
369                "WHERE clause filtering: {} rows -> {} rows in {:?}",
370                total_rows,
371                visible_rows.len(),
372                filter_duration
373            );
374
375            // Debug log moved to info level above with timing
376        }
377
378        // Create initial DataView with filtered rows
379        let mut view = DataView::new(table.clone());
380        view = view.with_rows(visible_rows);
381
382        // Handle GROUP BY if present
383        if let Some(group_by_columns) = &statement.group_by {
384            if !group_by_columns.is_empty() {
385                debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_columns);
386                view = self.apply_group_by(
387                    view,
388                    group_by_columns,
389                    &statement.select_items,
390                    statement.having.as_ref(),
391                )?;
392            }
393        } else {
394            // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
395            if !statement.select_items.is_empty() {
396                // Check if we have ANY non-star items (not just the first one)
397                let has_non_star_items = statement
398                    .select_items
399                    .iter()
400                    .any(|item| !matches!(item, SelectItem::Star));
401
402                // Apply select items if:
403                // 1. We have computed expressions or explicit columns
404                // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
405                if has_non_star_items || statement.select_items.len() > 1 {
406                    view = self.apply_select_items(view, &statement.select_items)?;
407                }
408                // If it's just a single star, no projection needed
409            } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
410                // Fallback to legacy column projection for backward compatibility
411                let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
412                view = view.with_columns(column_indices);
413            }
414        }
415
416        // Apply DISTINCT if specified
417        if statement.distinct {
418            view = self.apply_distinct(view)?;
419        }
420
421        // Apply ORDER BY sorting
422        if let Some(order_by_columns) = &statement.order_by {
423            if !order_by_columns.is_empty() {
424                view = self.apply_multi_order_by(view, order_by_columns)?;
425            }
426        }
427
428        // Apply LIMIT/OFFSET
429        if let Some(limit) = statement.limit {
430            let offset = statement.offset.unwrap_or(0);
431            view = view.with_limit(limit, offset);
432        }
433
434        Ok(view)
435    }
436
437    /// Resolve column names to indices
438    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
439        let mut indices = Vec::new();
440        let table_columns = table.column_names();
441
442        for col_name in columns {
443            let index = table_columns
444                .iter()
445                .position(|c| c.eq_ignore_ascii_case(col_name))
446                .ok_or_else(|| {
447                    let suggestion = self.find_similar_column(table, col_name);
448                    match suggestion {
449                        Some(similar) => anyhow::anyhow!(
450                            "Column '{}' not found. Did you mean '{}'?",
451                            col_name,
452                            similar
453                        ),
454                        None => anyhow::anyhow!("Column '{}' not found", col_name),
455                    }
456                })?;
457            indices.push(index);
458        }
459
460        Ok(indices)
461    }
462
463    /// Apply SELECT items (columns and computed expressions) to create new view
464    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
465        debug!(
466            "QueryEngine::apply_select_items - items: {:?}",
467            select_items
468        );
469        debug!(
470            "QueryEngine::apply_select_items - input view has {} rows",
471            view.row_count()
472        );
473
474        // Check if ALL select items are aggregate functions (no GROUP BY)
475        let all_aggregates = select_items.iter().all(|item| match item {
476            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
477            SelectItem::Column(_) => false,
478            SelectItem::Star => false,
479        });
480
481        if all_aggregates && view.row_count() > 0 {
482            // Special handling for aggregate-only queries (no GROUP BY)
483            // These should produce exactly one row
484            return self.apply_aggregate_select(view, select_items);
485        }
486
487        // Check if we need to create computed columns
488        let has_computed_expressions = select_items
489            .iter()
490            .any(|item| matches!(item, SelectItem::Expression { .. }));
491
492        debug!(
493            "QueryEngine::apply_select_items - has_computed_expressions: {}",
494            has_computed_expressions
495        );
496
497        if !has_computed_expressions {
498            // Simple case: only columns, use existing projection logic
499            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
500            return Ok(view.with_columns(column_indices));
501        }
502
503        // Complex case: we have computed expressions
504        // IMPORTANT: We create a PROJECTED view, not a new table
505        // This preserves the original DataTable reference
506
507        let source_table = view.source();
508        let visible_rows = view.visible_row_indices();
509
510        // Create a temporary table just for the computed result view
511        // But this table is only used for the current query result
512        let mut computed_table = DataTable::new("query_result");
513
514        // First, expand any Star selectors to actual columns
515        let mut expanded_items = Vec::new();
516        for item in select_items {
517            match item {
518                SelectItem::Star => {
519                    // Expand * to all columns from source table
520                    for col_name in source_table.column_names() {
521                        expanded_items.push(SelectItem::Column(col_name));
522                    }
523                }
524                _ => expanded_items.push(item.clone()),
525            }
526        }
527
528        // Add columns based on expanded SelectItems, handling duplicates
529        let mut column_name_counts: std::collections::HashMap<String, usize> =
530            std::collections::HashMap::new();
531
532        for item in &expanded_items {
533            let base_name = match item {
534                SelectItem::Column(name) => name.clone(),
535                SelectItem::Expression { alias, .. } => alias.clone(),
536                SelectItem::Star => unreachable!("Star should have been expanded"),
537            };
538
539            // Check if this column name has been used before
540            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
541            let column_name = if *count == 0 {
542                // First occurrence, use the name as-is
543                base_name.clone()
544            } else {
545                // Duplicate, append a suffix
546                format!("{base_name}_{count}")
547            };
548            *count += 1;
549
550            computed_table.add_column(DataColumn::new(&column_name));
551        }
552
553        // Calculate values for each row
554        let mut evaluator =
555            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
556
557        for &row_idx in visible_rows {
558            let mut row_values = Vec::new();
559
560            for item in &expanded_items {
561                let value = match item {
562                    SelectItem::Column(col_name) => {
563                        // Simple column reference
564                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
565                            let suggestion = self.find_similar_column(source_table, col_name);
566                            match suggestion {
567                                Some(similar) => anyhow::anyhow!(
568                                    "Column '{}' not found. Did you mean '{}'?",
569                                    col_name,
570                                    similar
571                                ),
572                                None => anyhow::anyhow!("Column '{}' not found", col_name),
573                            }
574                        })?;
575                        let row = source_table
576                            .get_row(row_idx)
577                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
578                        row.get(col_idx)
579                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
580                            .clone()
581                    }
582                    SelectItem::Expression { expr, .. } => {
583                        // Computed expression
584                        evaluator.evaluate(expr, row_idx)?
585                    }
586                    SelectItem::Star => unreachable!("Star should have been expanded"),
587                };
588                row_values.push(value);
589            }
590
591            computed_table
592                .add_row(DataRow::new(row_values))
593                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
594        }
595
596        // Return a view of the computed result
597        // This is a temporary view for this query only
598        Ok(DataView::new(Arc::new(computed_table)))
599    }
600
601    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
602    fn apply_aggregate_select(
603        &self,
604        view: DataView,
605        select_items: &[SelectItem],
606    ) -> Result<DataView> {
607        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
608
609        let source_table = view.source();
610        let mut result_table = DataTable::new("aggregate_result");
611
612        // Add columns for each select item
613        for item in select_items {
614            let column_name = match item {
615                SelectItem::Expression { alias, .. } => alias.clone(),
616                _ => unreachable!("Should only have expressions in aggregate-only query"),
617            };
618            result_table.add_column(DataColumn::new(&column_name));
619        }
620
621        // Create evaluator with visible rows from the view (for filtered aggregates)
622        let visible_rows = view.visible_row_indices().to_vec();
623        let mut evaluator =
624            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
625                .with_visible_rows(visible_rows);
626
627        // Evaluate each aggregate expression once (they handle all rows internally)
628        let mut row_values = Vec::new();
629        for item in select_items {
630            match item {
631                SelectItem::Expression { expr, .. } => {
632                    // The evaluator will handle aggregates over all rows
633                    // We pass row_index=0 but aggregates ignore it and process all rows
634                    let value = evaluator.evaluate(expr, 0)?;
635                    row_values.push(value);
636                }
637                _ => unreachable!("Should only have expressions in aggregate-only query"),
638            }
639        }
640
641        // Add the single result row
642        result_table
643            .add_row(DataRow::new(row_values))
644            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
645
646        Ok(DataView::new(Arc::new(result_table)))
647    }
648
649    /// Resolve `SelectItem` columns to indices (for simple column projections only)
650    fn resolve_select_columns(
651        &self,
652        table: &DataTable,
653        select_items: &[SelectItem],
654    ) -> Result<Vec<usize>> {
655        let mut indices = Vec::new();
656        let table_columns = table.column_names();
657
658        for item in select_items {
659            match item {
660                SelectItem::Column(col_name) => {
661                    let index = table_columns
662                        .iter()
663                        .position(|c| c.eq_ignore_ascii_case(col_name))
664                        .ok_or_else(|| {
665                            let suggestion = self.find_similar_column(table, col_name);
666                            match suggestion {
667                                Some(similar) => anyhow::anyhow!(
668                                    "Column '{}' not found. Did you mean '{}'?",
669                                    col_name,
670                                    similar
671                                ),
672                                None => anyhow::anyhow!("Column '{}' not found", col_name),
673                            }
674                        })?;
675                    indices.push(index);
676                }
677                SelectItem::Star => {
678                    // Expand * to all column indices
679                    for i in 0..table_columns.len() {
680                        indices.push(i);
681                    }
682                }
683                SelectItem::Expression { .. } => {
684                    return Err(anyhow::anyhow!(
685                        "Computed expressions require new table creation"
686                    ));
687                }
688            }
689        }
690
691        Ok(indices)
692    }
693
694    /// Apply DISTINCT to remove duplicate rows
695    fn apply_distinct(&self, view: DataView) -> Result<DataView> {
696        use std::collections::HashSet;
697
698        let source = view.source();
699        let visible_cols = view.visible_column_indices();
700        let visible_rows = view.visible_row_indices();
701
702        // Build a set to track unique rows
703        let mut seen_rows = HashSet::new();
704        let mut unique_row_indices = Vec::new();
705
706        for &row_idx in visible_rows {
707            // Build a key representing this row's visible column values
708            let mut row_key = Vec::new();
709            for &col_idx in visible_cols {
710                let value = source
711                    .get_value(row_idx, col_idx)
712                    .ok_or_else(|| anyhow!("Invalid cell reference"))?;
713                // Convert value to a hashable representation
714                row_key.push(format!("{:?}", value));
715            }
716
717            // Check if we've seen this row before
718            if seen_rows.insert(row_key) {
719                // First time seeing this row combination
720                unique_row_indices.push(row_idx);
721            }
722        }
723
724        // Create a new view with only unique rows
725        Ok(view.with_rows(unique_row_indices))
726    }
727
728    /// Apply multi-column ORDER BY sorting to the view
729    fn apply_multi_order_by(
730        &self,
731        mut view: DataView,
732        order_by_columns: &[OrderByColumn],
733    ) -> Result<DataView> {
734        // Build list of (source_column_index, ascending) tuples
735        let mut sort_columns = Vec::new();
736
737        for order_col in order_by_columns {
738            // Try to find the column index
739            // Check in the current view's source table (this handles both regular columns and computed columns)
740            // This is especially important after GROUP BY where we have a new result table with aggregate aliases
741            let col_index = view
742                .source()
743                .get_column_index(&order_col.column)
744                .ok_or_else(|| {
745                    // If not found, provide helpful error with suggestions
746                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
747                    match suggestion {
748                        Some(similar) => anyhow::anyhow!(
749                            "Column '{}' not found. Did you mean '{}'?",
750                            order_col.column,
751                            similar
752                        ),
753                        None => {
754                            // Also list available columns for debugging
755                            let available_cols = view.source().column_names().join(", ");
756                            anyhow::anyhow!(
757                                "Column '{}' not found. Available columns: {}",
758                                order_col.column,
759                                available_cols
760                            )
761                        }
762                    }
763                })?;
764
765            let ascending = matches!(order_col.direction, SortDirection::Asc);
766            sort_columns.push((col_index, ascending));
767        }
768
769        // Apply multi-column sorting
770        view.apply_multi_sort(&sort_columns)?;
771        Ok(view)
772    }
773
774    /// Apply GROUP BY to the view with optional HAVING clause
775    fn apply_group_by(
776        &self,
777        view: DataView,
778        group_by_columns: &[String],
779        select_items: &[SelectItem],
780        having: Option<&SqlExpression>,
781    ) -> Result<DataView> {
782        debug!(
783            "QueryEngine::apply_group_by - grouping by: {:?}",
784            group_by_columns
785        );
786
787        // Group the data using DataView's group_by method
788        let groups = view.group_by(group_by_columns)?;
789        debug!(
790            "QueryEngine::apply_group_by - created {} groups",
791            groups.len()
792        );
793
794        // Create a result table for the grouped data
795        let mut result_table = DataTable::new("grouped_result");
796
797        // Determine which columns to include in the result
798        // 1. GROUP BY columns always come first
799        // 2. Then aggregate functions
800
801        // Add GROUP BY columns to result
802        for col_name in group_by_columns {
803            result_table.add_column(DataColumn::new(col_name));
804        }
805
806        // Process SELECT items to find aggregates and their aliases
807        let mut aggregate_columns = Vec::new();
808        for item in select_items {
809            match item {
810                SelectItem::Expression { expr, alias } => {
811                    if contains_aggregate(expr) {
812                        // Expression always has an alias in the AST
813                        result_table.add_column(DataColumn::new(alias));
814                        aggregate_columns.push((expr.clone(), alias.clone()));
815                    }
816                }
817                SelectItem::Column(col_name) => {
818                    // Non-aggregate columns must be in GROUP BY
819                    if !group_by_columns.contains(col_name) {
820                        return Err(anyhow!(
821                            "Column '{}' must appear in GROUP BY clause or be used in an aggregate function",
822                            col_name
823                        ));
824                    }
825                }
826                SelectItem::Star => {
827                    // For GROUP BY queries, * is not really meaningful
828                    // We'll just include the GROUP BY columns which we already added
829                }
830            }
831        }
832
833        // Process each group
834        for (group_key, group_view) in groups {
835            let mut row_values = Vec::new();
836            let mut aggregate_values = std::collections::HashMap::new();
837
838            // Add GROUP BY column values from the key
839            for (i, value) in group_key.0.iter().enumerate() {
840                row_values.push(value.clone());
841                // Store GROUP BY columns for HAVING evaluation
842                if i < group_by_columns.len() {
843                    aggregate_values.insert(group_by_columns[i].clone(), value.clone());
844                }
845            }
846
847            // Calculate aggregate values for this group
848            for (expr, col_name) in &aggregate_columns {
849                // Set the visible rows for this group so aggregates work correctly
850                let group_rows = group_view.get_visible_rows();
851                let mut evaluator = ArithmeticEvaluator::new(group_view.source())
852                    .with_visible_rows(group_rows.clone());
853
854                // For aggregates, we need to evaluate across all rows in the group
855                let value = if group_view.row_count() > 0 && !group_rows.is_empty() {
856                    // Evaluate the aggregate expression
857                    // The arithmetic evaluator will handle the aggregate across visible rows
858                    evaluator
859                        .evaluate(expr, group_rows[0])
860                        .unwrap_or(DataValue::Null)
861                } else {
862                    DataValue::Null
863                };
864
865                // Store aggregate value for HAVING evaluation
866                aggregate_values.insert(col_name.clone(), value.clone());
867                row_values.push(value);
868            }
869
870            // Evaluate HAVING clause if present
871            if let Some(having_expr) = having {
872                // Create a temporary table with one row containing the aggregate values
873                let mut temp_table = DataTable::new("having_eval");
874                for col_name in aggregate_values.keys() {
875                    temp_table.add_column(DataColumn::new(col_name));
876                }
877
878                let temp_row_values: Vec<DataValue> = aggregate_values.values().cloned().collect();
879                temp_table
880                    .add_row(DataRow::new(temp_row_values))
881                    .map_err(|e| anyhow!("Failed to create temp table for HAVING: {}", e))?;
882
883                // Evaluate HAVING expression on the aggregate values
884                let mut evaluator = ArithmeticEvaluator::new(&temp_table);
885                let having_result = evaluator
886                    .evaluate(having_expr, 0)
887                    .unwrap_or(DataValue::Boolean(false));
888
889                // Skip this group if HAVING condition is not met
890                match having_result {
891                    DataValue::Boolean(false) => continue,
892                    DataValue::Null => continue,
893                    _ => {} // Include the row
894                }
895            }
896
897            // Add the row to the result table
898            result_table
899                .add_row(DataRow::new(row_values))
900                .map_err(|e| anyhow!("Failed to add row to result table: {}", e))?;
901        }
902
903        // Return a DataView of the grouped result
904        Ok(DataView::new(Arc::new(result_table)))
905    }
906}
907
908#[cfg(test)]
909mod tests {
910    use super::*;
911    use crate::data::datatable::{DataColumn, DataRow, DataValue};
912
913    fn create_test_table() -> Arc<DataTable> {
914        let mut table = DataTable::new("test");
915
916        // Add columns
917        table.add_column(DataColumn::new("id"));
918        table.add_column(DataColumn::new("name"));
919        table.add_column(DataColumn::new("age"));
920
921        // Add rows
922        table
923            .add_row(DataRow::new(vec![
924                DataValue::Integer(1),
925                DataValue::String("Alice".to_string()),
926                DataValue::Integer(30),
927            ]))
928            .unwrap();
929
930        table
931            .add_row(DataRow::new(vec![
932                DataValue::Integer(2),
933                DataValue::String("Bob".to_string()),
934                DataValue::Integer(25),
935            ]))
936            .unwrap();
937
938        table
939            .add_row(DataRow::new(vec![
940                DataValue::Integer(3),
941                DataValue::String("Charlie".to_string()),
942                DataValue::Integer(35),
943            ]))
944            .unwrap();
945
946        Arc::new(table)
947    }
948
949    #[test]
950    fn test_select_all() {
951        let table = create_test_table();
952        let engine = QueryEngine::new();
953
954        let view = engine
955            .execute(table.clone(), "SELECT * FROM users")
956            .unwrap();
957        assert_eq!(view.row_count(), 3);
958        assert_eq!(view.column_count(), 3);
959    }
960
961    #[test]
962    fn test_select_columns() {
963        let table = create_test_table();
964        let engine = QueryEngine::new();
965
966        let view = engine
967            .execute(table.clone(), "SELECT name, age FROM users")
968            .unwrap();
969        assert_eq!(view.row_count(), 3);
970        assert_eq!(view.column_count(), 2);
971    }
972
973    #[test]
974    fn test_select_with_limit() {
975        let table = create_test_table();
976        let engine = QueryEngine::new();
977
978        let view = engine
979            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
980            .unwrap();
981        assert_eq!(view.row_count(), 2);
982    }
983
984    #[test]
985    fn test_type_coercion_contains() {
986        // Initialize tracing for debug output
987        let _ = tracing_subscriber::fmt()
988            .with_max_level(tracing::Level::DEBUG)
989            .try_init();
990
991        let mut table = DataTable::new("test");
992        table.add_column(DataColumn::new("id"));
993        table.add_column(DataColumn::new("status"));
994        table.add_column(DataColumn::new("price"));
995
996        // Add test data with mixed types
997        table
998            .add_row(DataRow::new(vec![
999                DataValue::Integer(1),
1000                DataValue::String("Pending".to_string()),
1001                DataValue::Float(99.99),
1002            ]))
1003            .unwrap();
1004
1005        table
1006            .add_row(DataRow::new(vec![
1007                DataValue::Integer(2),
1008                DataValue::String("Confirmed".to_string()),
1009                DataValue::Float(150.50),
1010            ]))
1011            .unwrap();
1012
1013        table
1014            .add_row(DataRow::new(vec![
1015                DataValue::Integer(3),
1016                DataValue::String("Pending".to_string()),
1017                DataValue::Float(75.00),
1018            ]))
1019            .unwrap();
1020
1021        let table = Arc::new(table);
1022        let engine = QueryEngine::new();
1023
1024        println!("\n=== Testing WHERE clause with Contains ===");
1025        println!("Table has {} rows", table.row_count());
1026        for i in 0..table.row_count() {
1027            let status = table.get_value(i, 1);
1028            println!("Row {i}: status = {status:?}");
1029        }
1030
1031        // Test 1: Basic string contains (should work)
1032        println!("\n--- Test 1: status.Contains('pend') ---");
1033        let result = engine.execute(
1034            table.clone(),
1035            "SELECT * FROM test WHERE status.Contains('pend')",
1036        );
1037        match result {
1038            Ok(view) => {
1039                println!("SUCCESS: Found {} matching rows", view.row_count());
1040                assert_eq!(view.row_count(), 2); // Should find both Pending rows
1041            }
1042            Err(e) => {
1043                panic!("Query failed: {e}");
1044            }
1045        }
1046
1047        // Test 2: Numeric contains (should work with type coercion)
1048        println!("\n--- Test 2: price.Contains('9') ---");
1049        let result = engine.execute(
1050            table.clone(),
1051            "SELECT * FROM test WHERE price.Contains('9')",
1052        );
1053        match result {
1054            Ok(view) => {
1055                println!(
1056                    "SUCCESS: Found {} matching rows with price containing '9'",
1057                    view.row_count()
1058                );
1059                // Should find 99.99 row
1060                assert!(view.row_count() >= 1);
1061            }
1062            Err(e) => {
1063                panic!("Numeric coercion query failed: {e}");
1064            }
1065        }
1066
1067        println!("\n=== All tests passed! ===");
1068    }
1069
1070    #[test]
1071    fn test_not_in_clause() {
1072        // Initialize tracing for debug output
1073        let _ = tracing_subscriber::fmt()
1074            .with_max_level(tracing::Level::DEBUG)
1075            .try_init();
1076
1077        let mut table = DataTable::new("test");
1078        table.add_column(DataColumn::new("id"));
1079        table.add_column(DataColumn::new("country"));
1080
1081        // Add test data
1082        table
1083            .add_row(DataRow::new(vec![
1084                DataValue::Integer(1),
1085                DataValue::String("CA".to_string()),
1086            ]))
1087            .unwrap();
1088
1089        table
1090            .add_row(DataRow::new(vec![
1091                DataValue::Integer(2),
1092                DataValue::String("US".to_string()),
1093            ]))
1094            .unwrap();
1095
1096        table
1097            .add_row(DataRow::new(vec![
1098                DataValue::Integer(3),
1099                DataValue::String("UK".to_string()),
1100            ]))
1101            .unwrap();
1102
1103        let table = Arc::new(table);
1104        let engine = QueryEngine::new();
1105
1106        println!("\n=== Testing NOT IN clause ===");
1107        println!("Table has {} rows", table.row_count());
1108        for i in 0..table.row_count() {
1109            let country = table.get_value(i, 1);
1110            println!("Row {i}: country = {country:?}");
1111        }
1112
1113        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
1114        println!("\n--- Test: country NOT IN ('CA') ---");
1115        let result = engine.execute(
1116            table.clone(),
1117            "SELECT * FROM test WHERE country NOT IN ('CA')",
1118        );
1119        match result {
1120            Ok(view) => {
1121                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1122                assert_eq!(view.row_count(), 2); // Should find US and UK
1123            }
1124            Err(e) => {
1125                panic!("NOT IN query failed: {e}");
1126            }
1127        }
1128
1129        println!("\n=== NOT IN test complete! ===");
1130    }
1131
1132    #[test]
1133    fn test_case_insensitive_in_and_not_in() {
1134        // Initialize tracing for debug output
1135        let _ = tracing_subscriber::fmt()
1136            .with_max_level(tracing::Level::DEBUG)
1137            .try_init();
1138
1139        let mut table = DataTable::new("test");
1140        table.add_column(DataColumn::new("id"));
1141        table.add_column(DataColumn::new("country"));
1142
1143        // Add test data with mixed case
1144        table
1145            .add_row(DataRow::new(vec![
1146                DataValue::Integer(1),
1147                DataValue::String("CA".to_string()), // uppercase
1148            ]))
1149            .unwrap();
1150
1151        table
1152            .add_row(DataRow::new(vec![
1153                DataValue::Integer(2),
1154                DataValue::String("us".to_string()), // lowercase
1155            ]))
1156            .unwrap();
1157
1158        table
1159            .add_row(DataRow::new(vec![
1160                DataValue::Integer(3),
1161                DataValue::String("UK".to_string()), // uppercase
1162            ]))
1163            .unwrap();
1164
1165        let table = Arc::new(table);
1166
1167        println!("\n=== Testing Case-Insensitive IN clause ===");
1168        println!("Table has {} rows", table.row_count());
1169        for i in 0..table.row_count() {
1170            let country = table.get_value(i, 1);
1171            println!("Row {i}: country = {country:?}");
1172        }
1173
1174        // Test case-insensitive IN - should match 'CA' with 'ca'
1175        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1176        let engine = QueryEngine::with_case_insensitive(true);
1177        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1178        match result {
1179            Ok(view) => {
1180                println!(
1181                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1182                    view.row_count()
1183                );
1184                assert_eq!(view.row_count(), 1); // Should find CA row
1185            }
1186            Err(e) => {
1187                panic!("Case-insensitive IN query failed: {e}");
1188            }
1189        }
1190
1191        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
1192        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1193        let result = engine.execute(
1194            table.clone(),
1195            "SELECT * FROM test WHERE country NOT IN ('ca')",
1196        );
1197        match result {
1198            Ok(view) => {
1199                println!(
1200                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1201                    view.row_count()
1202                );
1203                assert_eq!(view.row_count(), 2); // Should find us and UK rows
1204            }
1205            Err(e) => {
1206                panic!("Case-insensitive NOT IN query failed: {e}");
1207            }
1208        }
1209
1210        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
1211        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1212        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
1213        let result = engine_case_sensitive
1214            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1215        match result {
1216            Ok(view) => {
1217                println!(
1218                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1219                    view.row_count()
1220                );
1221                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
1222            }
1223            Err(e) => {
1224                panic!("Case-sensitive IN query failed: {e}");
1225            }
1226        }
1227
1228        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1229    }
1230
1231    #[test]
1232    #[ignore = "Parentheses in WHERE clause not yet implemented"]
1233    fn test_parentheses_in_where_clause() {
1234        // Initialize tracing for debug output
1235        let _ = tracing_subscriber::fmt()
1236            .with_max_level(tracing::Level::DEBUG)
1237            .try_init();
1238
1239        let mut table = DataTable::new("test");
1240        table.add_column(DataColumn::new("id"));
1241        table.add_column(DataColumn::new("status"));
1242        table.add_column(DataColumn::new("priority"));
1243
1244        // Add test data
1245        table
1246            .add_row(DataRow::new(vec![
1247                DataValue::Integer(1),
1248                DataValue::String("Pending".to_string()),
1249                DataValue::String("High".to_string()),
1250            ]))
1251            .unwrap();
1252
1253        table
1254            .add_row(DataRow::new(vec![
1255                DataValue::Integer(2),
1256                DataValue::String("Complete".to_string()),
1257                DataValue::String("High".to_string()),
1258            ]))
1259            .unwrap();
1260
1261        table
1262            .add_row(DataRow::new(vec![
1263                DataValue::Integer(3),
1264                DataValue::String("Pending".to_string()),
1265                DataValue::String("Low".to_string()),
1266            ]))
1267            .unwrap();
1268
1269        table
1270            .add_row(DataRow::new(vec![
1271                DataValue::Integer(4),
1272                DataValue::String("Complete".to_string()),
1273                DataValue::String("Low".to_string()),
1274            ]))
1275            .unwrap();
1276
1277        let table = Arc::new(table);
1278        let engine = QueryEngine::new();
1279
1280        println!("\n=== Testing Parentheses in WHERE clause ===");
1281        println!("Table has {} rows", table.row_count());
1282        for i in 0..table.row_count() {
1283            let status = table.get_value(i, 1);
1284            let priority = table.get_value(i, 2);
1285            println!("Row {i}: status = {status:?}, priority = {priority:?}");
1286        }
1287
1288        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
1289        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1290        let result = engine.execute(
1291            table.clone(),
1292            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1293        );
1294        match result {
1295            Ok(view) => {
1296                println!(
1297                    "SUCCESS: Found {} rows with parenthetical logic",
1298                    view.row_count()
1299                );
1300                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
1301            }
1302            Err(e) => {
1303                panic!("Parentheses query failed: {e}");
1304            }
1305        }
1306
1307        println!("\n=== Parentheses test complete! ===");
1308    }
1309
1310    #[test]
1311    #[ignore = "Numeric type coercion needs fixing"]
1312    fn test_numeric_type_coercion() {
1313        // Initialize tracing for debug output
1314        let _ = tracing_subscriber::fmt()
1315            .with_max_level(tracing::Level::DEBUG)
1316            .try_init();
1317
1318        let mut table = DataTable::new("test");
1319        table.add_column(DataColumn::new("id"));
1320        table.add_column(DataColumn::new("price"));
1321        table.add_column(DataColumn::new("quantity"));
1322
1323        // Add test data with different numeric types
1324        table
1325            .add_row(DataRow::new(vec![
1326                DataValue::Integer(1),
1327                DataValue::Float(99.50), // Contains '.'
1328                DataValue::Integer(100),
1329            ]))
1330            .unwrap();
1331
1332        table
1333            .add_row(DataRow::new(vec![
1334                DataValue::Integer(2),
1335                DataValue::Float(150.0), // Contains '.' and '0'
1336                DataValue::Integer(200),
1337            ]))
1338            .unwrap();
1339
1340        table
1341            .add_row(DataRow::new(vec![
1342                DataValue::Integer(3),
1343                DataValue::Integer(75), // No decimal point
1344                DataValue::Integer(50),
1345            ]))
1346            .unwrap();
1347
1348        let table = Arc::new(table);
1349        let engine = QueryEngine::new();
1350
1351        println!("\n=== Testing Numeric Type Coercion ===");
1352        println!("Table has {} rows", table.row_count());
1353        for i in 0..table.row_count() {
1354            let price = table.get_value(i, 1);
1355            let quantity = table.get_value(i, 2);
1356            println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1357        }
1358
1359        // Test Contains on float values - should find rows with decimal points
1360        println!("\n--- Test: price.Contains('.') ---");
1361        let result = engine.execute(
1362            table.clone(),
1363            "SELECT * FROM test WHERE price.Contains('.')",
1364        );
1365        match result {
1366            Ok(view) => {
1367                println!(
1368                    "SUCCESS: Found {} rows with decimal points in price",
1369                    view.row_count()
1370                );
1371                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1372            }
1373            Err(e) => {
1374                panic!("Numeric Contains query failed: {e}");
1375            }
1376        }
1377
1378        // Test Contains on integer values converted to string
1379        println!("\n--- Test: quantity.Contains('0') ---");
1380        let result = engine.execute(
1381            table.clone(),
1382            "SELECT * FROM test WHERE quantity.Contains('0')",
1383        );
1384        match result {
1385            Ok(view) => {
1386                println!(
1387                    "SUCCESS: Found {} rows with '0' in quantity",
1388                    view.row_count()
1389                );
1390                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1391            }
1392            Err(e) => {
1393                panic!("Integer Contains query failed: {e}");
1394            }
1395        }
1396
1397        println!("\n=== Numeric type coercion test complete! ===");
1398    }
1399
1400    #[test]
1401    fn test_datetime_comparisons() {
1402        // Initialize tracing for debug output
1403        let _ = tracing_subscriber::fmt()
1404            .with_max_level(tracing::Level::DEBUG)
1405            .try_init();
1406
1407        let mut table = DataTable::new("test");
1408        table.add_column(DataColumn::new("id"));
1409        table.add_column(DataColumn::new("created_date"));
1410
1411        // Add test data with date strings (as they would come from CSV)
1412        table
1413            .add_row(DataRow::new(vec![
1414                DataValue::Integer(1),
1415                DataValue::String("2024-12-15".to_string()),
1416            ]))
1417            .unwrap();
1418
1419        table
1420            .add_row(DataRow::new(vec![
1421                DataValue::Integer(2),
1422                DataValue::String("2025-01-15".to_string()),
1423            ]))
1424            .unwrap();
1425
1426        table
1427            .add_row(DataRow::new(vec![
1428                DataValue::Integer(3),
1429                DataValue::String("2025-02-15".to_string()),
1430            ]))
1431            .unwrap();
1432
1433        let table = Arc::new(table);
1434        let engine = QueryEngine::new();
1435
1436        println!("\n=== Testing DateTime Comparisons ===");
1437        println!("Table has {} rows", table.row_count());
1438        for i in 0..table.row_count() {
1439            let date = table.get_value(i, 1);
1440            println!("Row {i}: created_date = {date:?}");
1441        }
1442
1443        // Test DateTime constructor comparison - should find dates after 2025-01-01
1444        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1445        let result = engine.execute(
1446            table.clone(),
1447            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1448        );
1449        match result {
1450            Ok(view) => {
1451                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1452                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1453            }
1454            Err(e) => {
1455                panic!("DateTime comparison query failed: {e}");
1456            }
1457        }
1458
1459        println!("\n=== DateTime comparison test complete! ===");
1460    }
1461
1462    #[test]
1463    fn test_not_with_method_calls() {
1464        // Initialize tracing for debug output
1465        let _ = tracing_subscriber::fmt()
1466            .with_max_level(tracing::Level::DEBUG)
1467            .try_init();
1468
1469        let mut table = DataTable::new("test");
1470        table.add_column(DataColumn::new("id"));
1471        table.add_column(DataColumn::new("status"));
1472
1473        // Add test data
1474        table
1475            .add_row(DataRow::new(vec![
1476                DataValue::Integer(1),
1477                DataValue::String("Pending Review".to_string()),
1478            ]))
1479            .unwrap();
1480
1481        table
1482            .add_row(DataRow::new(vec![
1483                DataValue::Integer(2),
1484                DataValue::String("Complete".to_string()),
1485            ]))
1486            .unwrap();
1487
1488        table
1489            .add_row(DataRow::new(vec![
1490                DataValue::Integer(3),
1491                DataValue::String("Pending Approval".to_string()),
1492            ]))
1493            .unwrap();
1494
1495        let table = Arc::new(table);
1496        let engine = QueryEngine::with_case_insensitive(true);
1497
1498        println!("\n=== Testing NOT with Method Calls ===");
1499        println!("Table has {} rows", table.row_count());
1500        for i in 0..table.row_count() {
1501            let status = table.get_value(i, 1);
1502            println!("Row {i}: status = {status:?}");
1503        }
1504
1505        // Test NOT with Contains - should exclude rows containing "pend"
1506        println!("\n--- Test: NOT status.Contains('pend') ---");
1507        let result = engine.execute(
1508            table.clone(),
1509            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1510        );
1511        match result {
1512            Ok(view) => {
1513                println!(
1514                    "SUCCESS: Found {} rows NOT containing 'pend'",
1515                    view.row_count()
1516                );
1517                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1518            }
1519            Err(e) => {
1520                panic!("NOT Contains query failed: {e}");
1521            }
1522        }
1523
1524        // Test NOT with StartsWith
1525        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1526        let result = engine.execute(
1527            table.clone(),
1528            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1529        );
1530        match result {
1531            Ok(view) => {
1532                println!(
1533                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1534                    view.row_count()
1535                );
1536                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1537            }
1538            Err(e) => {
1539                panic!("NOT StartsWith query failed: {e}");
1540            }
1541        }
1542
1543        println!("\n=== NOT with method calls test complete! ===");
1544    }
1545
1546    #[test]
1547    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1548    fn test_complex_logical_expressions() {
1549        // Initialize tracing for debug output
1550        let _ = tracing_subscriber::fmt()
1551            .with_max_level(tracing::Level::DEBUG)
1552            .try_init();
1553
1554        let mut table = DataTable::new("test");
1555        table.add_column(DataColumn::new("id"));
1556        table.add_column(DataColumn::new("status"));
1557        table.add_column(DataColumn::new("priority"));
1558        table.add_column(DataColumn::new("assigned"));
1559
1560        // Add comprehensive test data
1561        table
1562            .add_row(DataRow::new(vec![
1563                DataValue::Integer(1),
1564                DataValue::String("Pending".to_string()),
1565                DataValue::String("High".to_string()),
1566                DataValue::String("John".to_string()),
1567            ]))
1568            .unwrap();
1569
1570        table
1571            .add_row(DataRow::new(vec![
1572                DataValue::Integer(2),
1573                DataValue::String("Complete".to_string()),
1574                DataValue::String("High".to_string()),
1575                DataValue::String("Jane".to_string()),
1576            ]))
1577            .unwrap();
1578
1579        table
1580            .add_row(DataRow::new(vec![
1581                DataValue::Integer(3),
1582                DataValue::String("Pending".to_string()),
1583                DataValue::String("Low".to_string()),
1584                DataValue::String("John".to_string()),
1585            ]))
1586            .unwrap();
1587
1588        table
1589            .add_row(DataRow::new(vec![
1590                DataValue::Integer(4),
1591                DataValue::String("In Progress".to_string()),
1592                DataValue::String("Medium".to_string()),
1593                DataValue::String("Jane".to_string()),
1594            ]))
1595            .unwrap();
1596
1597        let table = Arc::new(table);
1598        let engine = QueryEngine::new();
1599
1600        println!("\n=== Testing Complex Logical Expressions ===");
1601        println!("Table has {} rows", table.row_count());
1602        for i in 0..table.row_count() {
1603            let status = table.get_value(i, 1);
1604            let priority = table.get_value(i, 2);
1605            let assigned = table.get_value(i, 3);
1606            println!(
1607                "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1608            );
1609        }
1610
1611        // Test complex AND/OR logic
1612        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1613        let result = engine.execute(
1614            table.clone(),
1615            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1616        );
1617        match result {
1618            Ok(view) => {
1619                println!(
1620                    "SUCCESS: Found {} rows with complex logic",
1621                    view.row_count()
1622                );
1623                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
1624            }
1625            Err(e) => {
1626                panic!("Complex logic query failed: {e}");
1627            }
1628        }
1629
1630        // Test NOT with complex expressions
1631        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1632        let result = engine.execute(
1633            table.clone(),
1634            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1635        );
1636        match result {
1637            Ok(view) => {
1638                println!(
1639                    "SUCCESS: Found {} rows with NOT complex logic",
1640                    view.row_count()
1641                );
1642                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
1643            }
1644            Err(e) => {
1645                panic!("NOT complex logic query failed: {e}");
1646            }
1647        }
1648
1649        println!("\n=== Complex logical expressions test complete! ===");
1650    }
1651
1652    #[test]
1653    fn test_mixed_data_types_and_edge_cases() {
1654        // Initialize tracing for debug output
1655        let _ = tracing_subscriber::fmt()
1656            .with_max_level(tracing::Level::DEBUG)
1657            .try_init();
1658
1659        let mut table = DataTable::new("test");
1660        table.add_column(DataColumn::new("id"));
1661        table.add_column(DataColumn::new("value"));
1662        table.add_column(DataColumn::new("nullable_field"));
1663
1664        // Add test data with mixed types and edge cases
1665        table
1666            .add_row(DataRow::new(vec![
1667                DataValue::Integer(1),
1668                DataValue::String("123.45".to_string()),
1669                DataValue::String("present".to_string()),
1670            ]))
1671            .unwrap();
1672
1673        table
1674            .add_row(DataRow::new(vec![
1675                DataValue::Integer(2),
1676                DataValue::Float(678.90),
1677                DataValue::Null,
1678            ]))
1679            .unwrap();
1680
1681        table
1682            .add_row(DataRow::new(vec![
1683                DataValue::Integer(3),
1684                DataValue::Boolean(true),
1685                DataValue::String("also present".to_string()),
1686            ]))
1687            .unwrap();
1688
1689        table
1690            .add_row(DataRow::new(vec![
1691                DataValue::Integer(4),
1692                DataValue::String("false".to_string()),
1693                DataValue::Null,
1694            ]))
1695            .unwrap();
1696
1697        let table = Arc::new(table);
1698        let engine = QueryEngine::new();
1699
1700        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1701        println!("Table has {} rows", table.row_count());
1702        for i in 0..table.row_count() {
1703            let value = table.get_value(i, 1);
1704            let nullable = table.get_value(i, 2);
1705            println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
1706        }
1707
1708        // Test type coercion with boolean Contains
1709        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1710        let result = engine.execute(
1711            table.clone(),
1712            "SELECT * FROM test WHERE value.Contains('true')",
1713        );
1714        match result {
1715            Ok(view) => {
1716                println!(
1717                    "SUCCESS: Found {} rows with boolean coercion",
1718                    view.row_count()
1719                );
1720                assert_eq!(view.row_count(), 1); // Should find the boolean true row
1721            }
1722            Err(e) => {
1723                panic!("Boolean coercion query failed: {e}");
1724            }
1725        }
1726
1727        // Test multiple IN values with mixed types
1728        println!("\n--- Test: id IN (1, 3) ---");
1729        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
1730        match result {
1731            Ok(view) => {
1732                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
1733                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
1734            }
1735            Err(e) => {
1736                panic!("Multiple IN values query failed: {e}");
1737            }
1738        }
1739
1740        println!("\n=== Mixed data types test complete! ===");
1741    }
1742
1743    #[test]
1744    fn test_not_in_parsing() {
1745        use crate::sql::recursive_parser::Parser;
1746
1747        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
1748        println!("\n=== Testing NOT IN parsing ===");
1749        println!("Parsing query: {query}");
1750
1751        let mut parser = Parser::new(query);
1752        match parser.parse() {
1753            Ok(statement) => {
1754                println!("Parsed statement: {statement:#?}");
1755                if let Some(where_clause) = statement.where_clause {
1756                    println!("WHERE conditions: {:#?}", where_clause.conditions);
1757                    if let Some(first_condition) = where_clause.conditions.first() {
1758                        println!("First condition expression: {:#?}", first_condition.expr);
1759                    }
1760                }
1761            }
1762            Err(e) => {
1763                panic!("Parse error: {e}");
1764            }
1765        }
1766    }
1767}
1768
1769#[cfg(test)]
1770#[path = "query_engine_tests.rs"]
1771mod query_engine_tests;