sql_cli/data/
query_engine.rs

1use anyhow::Result;
2use std::sync::Arc;
3use std::time::Instant;
4use tracing::{debug, info};
5
6use crate::config::config::BehaviorConfig;
7use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
8use crate::data::data_view::DataView;
9use crate::data::datatable::{DataColumn, DataRow, DataTable};
10use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
11use crate::sql::aggregates::contains_aggregate;
12use crate::sql::recursive_parser::{
13    OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection,
14};
15
16/// Query engine that executes SQL directly on DataTable
17pub struct QueryEngine {
18    case_insensitive: bool,
19    date_notation: String,
20    behavior_config: Option<BehaviorConfig>,
21}
22
23impl QueryEngine {
24    pub fn new() -> Self {
25        Self {
26            case_insensitive: false,
27            date_notation: "us".to_string(),
28            behavior_config: None,
29        }
30    }
31
32    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
33        let case_insensitive = config.case_insensitive_default;
34        let date_notation = config.default_date_notation.clone();
35        Self {
36            case_insensitive,
37            date_notation,
38            behavior_config: Some(config),
39        }
40    }
41
42    pub fn with_date_notation(date_notation: String) -> Self {
43        Self {
44            case_insensitive: false,
45            date_notation,
46            behavior_config: None,
47        }
48    }
49
50    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
51        Self {
52            case_insensitive,
53            date_notation: "us".to_string(),
54            behavior_config: None,
55        }
56    }
57
58    pub fn with_case_insensitive_and_date_notation(
59        case_insensitive: bool,
60        date_notation: String,
61    ) -> Self {
62        Self {
63            case_insensitive,
64            date_notation,
65            behavior_config: None,
66        }
67    }
68
69    /// Find a column name similar to the given name using edit distance
70    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
71        let columns = table.column_names();
72        let mut best_match: Option<(String, usize)> = None;
73
74        for col in columns {
75            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
76            // Only suggest if distance is small (likely a typo)
77            // Allow up to 3 edits for longer names
78            let max_distance = if name.len() > 10 { 3 } else { 2 };
79            if distance <= max_distance {
80                match &best_match {
81                    None => best_match = Some((col, distance)),
82                    Some((_, best_dist)) if distance < *best_dist => {
83                        best_match = Some((col, distance));
84                    }
85                    _ => {}
86                }
87            }
88        }
89
90        best_match.map(|(name, _)| name)
91    }
92
93    /// Calculate Levenshtein edit distance between two strings
94    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
95        let len1 = s1.len();
96        let len2 = s2.len();
97        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
98
99        for i in 0..=len1 {
100            matrix[i][0] = i;
101        }
102        for j in 0..=len2 {
103            matrix[0][j] = j;
104        }
105
106        for (i, c1) in s1.chars().enumerate() {
107            for (j, c2) in s2.chars().enumerate() {
108                let cost = if c1 == c2 { 0 } else { 1 };
109                matrix[i + 1][j + 1] = std::cmp::min(
110                    matrix[i][j + 1] + 1, // deletion
111                    std::cmp::min(
112                        matrix[i + 1][j] + 1, // insertion
113                        matrix[i][j] + cost,  // substitution
114                    ),
115                );
116            }
117        }
118
119        matrix[len1][len2]
120    }
121
122    /// Execute a SQL query on a DataTable and return a DataView (for backward compatibility)
123    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
124        let start_time = Instant::now();
125
126        // Parse the SQL query
127        let parse_start = Instant::now();
128        let mut parser = Parser::new(sql);
129        let statement = parser
130            .parse()
131            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
132        let parse_duration = parse_start.elapsed();
133
134        // Convert SelectStatement to DataView operations
135        let build_start = Instant::now();
136        let result = self.build_view(table, statement)?;
137        let build_duration = build_start.elapsed();
138
139        let total_duration = start_time.elapsed();
140        info!(
141            "Query execution complete: parse={:?}, build={:?}, total={:?}, rows={}",
142            parse_duration,
143            build_duration,
144            total_duration,
145            result.row_count()
146        );
147
148        Ok(result)
149    }
150
151    /// Build a DataView from a parsed SQL statement
152    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
153        debug!(
154            "QueryEngine::build_view - select_items: {:?}",
155            statement.select_items
156        );
157        debug!(
158            "QueryEngine::build_view - where_clause: {:?}",
159            statement.where_clause
160        );
161
162        // Start with all rows visible
163        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
164
165        // Apply WHERE clause filtering using recursive evaluator
166        if let Some(where_clause) = &statement.where_clause {
167            let total_rows = table.row_count();
168            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
169            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
170
171            let filter_start = Instant::now();
172            // Filter visible rows based on WHERE clause
173            let mut filtered_rows = Vec::new();
174            for row_idx in visible_rows {
175                // Only log for first few rows to avoid performance impact
176                if row_idx < 3 {
177                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
178                }
179                let evaluator = RecursiveWhereEvaluator::with_config(
180                    &*table,
181                    self.case_insensitive,
182                    self.date_notation.clone(),
183                );
184                match evaluator.evaluate(where_clause, row_idx) {
185                    Ok(result) => {
186                        if row_idx < 3 {
187                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
188                        }
189                        if result {
190                            filtered_rows.push(row_idx);
191                        }
192                    }
193                    Err(e) => {
194                        if row_idx < 3 {
195                            debug!(
196                                "QueryEngine: WHERE evaluation error for row {}: {}",
197                                row_idx, e
198                            );
199                        }
200                        // Propagate WHERE clause errors instead of silently ignoring them
201                        return Err(e);
202                    }
203                }
204            }
205            visible_rows = filtered_rows;
206            let filter_duration = filter_start.elapsed();
207            info!(
208                "WHERE clause filtering: {} rows -> {} rows in {:?}",
209                total_rows,
210                visible_rows.len(),
211                filter_duration
212            );
213
214            // Debug log moved to info level above with timing
215        }
216
217        // Create initial DataView with filtered rows
218        let mut view = DataView::new(table.clone());
219        view = view.with_rows(visible_rows);
220
221        // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
222        if !statement.select_items.is_empty() {
223            // Check if we have ANY non-star items (not just the first one)
224            let has_non_star_items = statement
225                .select_items
226                .iter()
227                .any(|item| !matches!(item, SelectItem::Star));
228
229            // Apply select items if:
230            // 1. We have computed expressions or explicit columns
231            // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
232            if has_non_star_items || statement.select_items.len() > 1 {
233                view = self.apply_select_items(view, &statement.select_items)?;
234            }
235            // If it's just a single star, no projection needed
236        } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
237            // Fallback to legacy column projection for backward compatibility
238            let column_indices = self.resolve_column_indices(&*table, &statement.columns)?;
239            view = view.with_columns(column_indices);
240        }
241
242        // Apply ORDER BY sorting
243        if let Some(order_by_columns) = &statement.order_by {
244            if !order_by_columns.is_empty() {
245                view = self.apply_multi_order_by(view, order_by_columns)?;
246            }
247        }
248
249        // Apply LIMIT/OFFSET
250        if let Some(limit) = statement.limit {
251            let offset = statement.offset.unwrap_or(0);
252            view = view.with_limit(limit, offset);
253        }
254
255        Ok(view)
256    }
257
258    /// Resolve column names to indices
259    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
260        let mut indices = Vec::new();
261        let table_columns = table.column_names();
262
263        for col_name in columns {
264            let index = table_columns
265                .iter()
266                .position(|c| c.eq_ignore_ascii_case(col_name))
267                .ok_or_else(|| {
268                    let suggestion = self.find_similar_column(table, col_name);
269                    match suggestion {
270                        Some(similar) => anyhow::anyhow!(
271                            "Column '{}' not found. Did you mean '{}'?",
272                            col_name,
273                            similar
274                        ),
275                        None => anyhow::anyhow!("Column '{}' not found", col_name),
276                    }
277                })?;
278            indices.push(index);
279        }
280
281        Ok(indices)
282    }
283
284    /// Apply SELECT items (columns and computed expressions) to create new view
285    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
286        debug!(
287            "QueryEngine::apply_select_items - items: {:?}",
288            select_items
289        );
290        debug!(
291            "QueryEngine::apply_select_items - input view has {} rows",
292            view.row_count()
293        );
294
295        // Check if ALL select items are aggregate functions (no GROUP BY)
296        let all_aggregates = select_items.iter().all(|item| match item {
297            SelectItem::Expression { expr, .. } => contains_aggregate(expr),
298            SelectItem::Column(_) => false,
299            SelectItem::Star => false,
300        });
301
302        if all_aggregates && view.row_count() > 0 {
303            // Special handling for aggregate-only queries (no GROUP BY)
304            // These should produce exactly one row
305            return self.apply_aggregate_select(view, select_items);
306        }
307
308        // Check if we need to create computed columns
309        let has_computed_expressions = select_items
310            .iter()
311            .any(|item| matches!(item, SelectItem::Expression { .. }));
312
313        debug!(
314            "QueryEngine::apply_select_items - has_computed_expressions: {}",
315            has_computed_expressions
316        );
317
318        if !has_computed_expressions {
319            // Simple case: only columns, use existing projection logic
320            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
321            return Ok(view.with_columns(column_indices));
322        }
323
324        // Complex case: we have computed expressions
325        // IMPORTANT: We create a PROJECTED view, not a new table
326        // This preserves the original DataTable reference
327
328        let source_table = view.source();
329        let visible_rows = view.visible_row_indices();
330
331        // Create a temporary table just for the computed result view
332        // But this table is only used for the current query result
333        let mut computed_table = DataTable::new("query_result");
334
335        // First, expand any Star selectors to actual columns
336        let mut expanded_items = Vec::new();
337        for item in select_items {
338            match item {
339                SelectItem::Star => {
340                    // Expand * to all columns from source table
341                    for col_name in source_table.column_names() {
342                        expanded_items.push(SelectItem::Column(col_name));
343                    }
344                }
345                _ => expanded_items.push(item.clone()),
346            }
347        }
348
349        // Add columns based on expanded SelectItems, handling duplicates
350        let mut column_name_counts: std::collections::HashMap<String, usize> =
351            std::collections::HashMap::new();
352
353        for item in &expanded_items {
354            let base_name = match item {
355                SelectItem::Column(name) => name.clone(),
356                SelectItem::Expression { alias, .. } => alias.clone(),
357                SelectItem::Star => unreachable!("Star should have been expanded"),
358            };
359
360            // Check if this column name has been used before
361            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
362            let column_name = if *count == 0 {
363                // First occurrence, use the name as-is
364                base_name.clone()
365            } else {
366                // Duplicate, append a suffix
367                format!("{}_{}", base_name, count)
368            };
369            *count += 1;
370
371            computed_table.add_column(DataColumn::new(&column_name));
372        }
373
374        // Calculate values for each row
375        let evaluator =
376            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
377
378        for &row_idx in visible_rows {
379            let mut row_values = Vec::new();
380
381            for item in &expanded_items {
382                let value = match item {
383                    SelectItem::Column(col_name) => {
384                        // Simple column reference
385                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
386                            let suggestion = self.find_similar_column(source_table, col_name);
387                            match suggestion {
388                                Some(similar) => anyhow::anyhow!(
389                                    "Column '{}' not found. Did you mean '{}'?",
390                                    col_name,
391                                    similar
392                                ),
393                                None => anyhow::anyhow!("Column '{}' not found", col_name),
394                            }
395                        })?;
396                        let row = source_table
397                            .get_row(row_idx)
398                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
399                        row.get(col_idx)
400                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
401                            .clone()
402                    }
403                    SelectItem::Expression { expr, .. } => {
404                        // Computed expression
405                        evaluator.evaluate(expr, row_idx)?
406                    }
407                    SelectItem::Star => unreachable!("Star should have been expanded"),
408                };
409                row_values.push(value);
410            }
411
412            computed_table
413                .add_row(DataRow::new(row_values))
414                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
415        }
416
417        // Return a view of the computed result
418        // This is a temporary view for this query only
419        Ok(DataView::new(Arc::new(computed_table)))
420    }
421
422    /// Apply aggregate-only SELECT (no GROUP BY - produces single row)
423    fn apply_aggregate_select(
424        &self,
425        view: DataView,
426        select_items: &[SelectItem],
427    ) -> Result<DataView> {
428        debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
429
430        let source_table = view.source();
431        let mut result_table = DataTable::new("aggregate_result");
432
433        // Add columns for each select item
434        for item in select_items {
435            let column_name = match item {
436                SelectItem::Expression { alias, .. } => alias.clone(),
437                _ => unreachable!("Should only have expressions in aggregate-only query"),
438            };
439            result_table.add_column(DataColumn::new(&column_name));
440        }
441
442        // Create evaluator with visible rows from the view (for filtered aggregates)
443        let visible_rows = view.visible_row_indices().to_vec();
444        let evaluator =
445            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
446                .with_visible_rows(visible_rows);
447
448        // Evaluate each aggregate expression once (they handle all rows internally)
449        let mut row_values = Vec::new();
450        for item in select_items {
451            match item {
452                SelectItem::Expression { expr, .. } => {
453                    // The evaluator will handle aggregates over all rows
454                    // We pass row_index=0 but aggregates ignore it and process all rows
455                    let value = evaluator.evaluate(expr, 0)?;
456                    row_values.push(value);
457                }
458                _ => unreachable!("Should only have expressions in aggregate-only query"),
459            }
460        }
461
462        // Add the single result row
463        result_table
464            .add_row(DataRow::new(row_values))
465            .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
466
467        Ok(DataView::new(Arc::new(result_table)))
468    }
469
470    /// Resolve SelectItem columns to indices (for simple column projections only)
471    fn resolve_select_columns(
472        &self,
473        table: &DataTable,
474        select_items: &[SelectItem],
475    ) -> Result<Vec<usize>> {
476        let mut indices = Vec::new();
477        let table_columns = table.column_names();
478
479        for item in select_items {
480            match item {
481                SelectItem::Column(col_name) => {
482                    let index = table_columns
483                        .iter()
484                        .position(|c| c.eq_ignore_ascii_case(col_name))
485                        .ok_or_else(|| {
486                            let suggestion = self.find_similar_column(table, col_name);
487                            match suggestion {
488                                Some(similar) => anyhow::anyhow!(
489                                    "Column '{}' not found. Did you mean '{}'?",
490                                    col_name,
491                                    similar
492                                ),
493                                None => anyhow::anyhow!("Column '{}' not found", col_name),
494                            }
495                        })?;
496                    indices.push(index);
497                }
498                SelectItem::Star => {
499                    // Expand * to all column indices
500                    for i in 0..table_columns.len() {
501                        indices.push(i);
502                    }
503                }
504                SelectItem::Expression { .. } => {
505                    return Err(anyhow::anyhow!(
506                        "Computed expressions require new table creation"
507                    ));
508                }
509            }
510        }
511
512        Ok(indices)
513    }
514
515    /// Apply multi-column ORDER BY sorting to the view
516    fn apply_multi_order_by(
517        &self,
518        mut view: DataView,
519        order_by_columns: &[OrderByColumn],
520    ) -> Result<DataView> {
521        // Build list of (source_column_index, ascending) tuples
522        let mut sort_columns = Vec::new();
523
524        for order_col in order_by_columns {
525            // Get column index
526            let col_index = view
527                .source()
528                .get_column_index(&order_col.column)
529                .ok_or_else(|| {
530                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
531                    match suggestion {
532                        Some(similar) => anyhow::anyhow!(
533                            "Column '{}' not found. Did you mean '{}'?",
534                            order_col.column,
535                            similar
536                        ),
537                        None => anyhow::anyhow!("Column '{}' not found", order_col.column),
538                    }
539                })?;
540
541            let ascending = matches!(order_col.direction, SortDirection::Asc);
542            sort_columns.push((col_index, ascending));
543        }
544
545        // Apply multi-column sorting
546        view.apply_multi_sort(&sort_columns)?;
547        Ok(view)
548    }
549}
550
551#[cfg(test)]
552mod tests {
553    use super::*;
554    use crate::data::datatable::{DataColumn, DataRow, DataValue};
555
556    fn create_test_table() -> Arc<DataTable> {
557        let mut table = DataTable::new("test");
558
559        // Add columns
560        table.add_column(DataColumn::new("id"));
561        table.add_column(DataColumn::new("name"));
562        table.add_column(DataColumn::new("age"));
563
564        // Add rows
565        table
566            .add_row(DataRow::new(vec![
567                DataValue::Integer(1),
568                DataValue::String("Alice".to_string()),
569                DataValue::Integer(30),
570            ]))
571            .unwrap();
572
573        table
574            .add_row(DataRow::new(vec![
575                DataValue::Integer(2),
576                DataValue::String("Bob".to_string()),
577                DataValue::Integer(25),
578            ]))
579            .unwrap();
580
581        table
582            .add_row(DataRow::new(vec![
583                DataValue::Integer(3),
584                DataValue::String("Charlie".to_string()),
585                DataValue::Integer(35),
586            ]))
587            .unwrap();
588
589        Arc::new(table)
590    }
591
592    #[test]
593    fn test_select_all() {
594        let table = create_test_table();
595        let engine = QueryEngine::new();
596
597        let view = engine
598            .execute(table.clone(), "SELECT * FROM users")
599            .unwrap();
600        assert_eq!(view.row_count(), 3);
601        assert_eq!(view.column_count(), 3);
602    }
603
604    #[test]
605    fn test_select_columns() {
606        let table = create_test_table();
607        let engine = QueryEngine::new();
608
609        let view = engine
610            .execute(table.clone(), "SELECT name, age FROM users")
611            .unwrap();
612        assert_eq!(view.row_count(), 3);
613        assert_eq!(view.column_count(), 2);
614    }
615
616    #[test]
617    fn test_select_with_limit() {
618        let table = create_test_table();
619        let engine = QueryEngine::new();
620
621        let view = engine
622            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
623            .unwrap();
624        assert_eq!(view.row_count(), 2);
625    }
626
627    #[test]
628    fn test_type_coercion_contains() {
629        // Initialize tracing for debug output
630        let _ = tracing_subscriber::fmt()
631            .with_max_level(tracing::Level::DEBUG)
632            .try_init();
633
634        let mut table = DataTable::new("test");
635        table.add_column(DataColumn::new("id"));
636        table.add_column(DataColumn::new("status"));
637        table.add_column(DataColumn::new("price"));
638
639        // Add test data with mixed types
640        table
641            .add_row(DataRow::new(vec![
642                DataValue::Integer(1),
643                DataValue::String("Pending".to_string()),
644                DataValue::Float(99.99),
645            ]))
646            .unwrap();
647
648        table
649            .add_row(DataRow::new(vec![
650                DataValue::Integer(2),
651                DataValue::String("Confirmed".to_string()),
652                DataValue::Float(150.50),
653            ]))
654            .unwrap();
655
656        table
657            .add_row(DataRow::new(vec![
658                DataValue::Integer(3),
659                DataValue::String("Pending".to_string()),
660                DataValue::Float(75.00),
661            ]))
662            .unwrap();
663
664        let table = Arc::new(table);
665        let engine = QueryEngine::new();
666
667        println!("\n=== Testing WHERE clause with Contains ===");
668        println!("Table has {} rows", table.row_count());
669        for i in 0..table.row_count() {
670            let status = table.get_value(i, 1);
671            println!("Row {}: status = {:?}", i, status);
672        }
673
674        // Test 1: Basic string contains (should work)
675        println!("\n--- Test 1: status.Contains('pend') ---");
676        let result = engine.execute(
677            table.clone(),
678            "SELECT * FROM test WHERE status.Contains('pend')",
679        );
680        match result {
681            Ok(view) => {
682                println!("SUCCESS: Found {} matching rows", view.row_count());
683                assert_eq!(view.row_count(), 2); // Should find both Pending rows
684            }
685            Err(e) => {
686                panic!("Query failed: {}", e);
687            }
688        }
689
690        // Test 2: Numeric contains (should work with type coercion)
691        println!("\n--- Test 2: price.Contains('9') ---");
692        let result = engine.execute(
693            table.clone(),
694            "SELECT * FROM test WHERE price.Contains('9')",
695        );
696        match result {
697            Ok(view) => {
698                println!(
699                    "SUCCESS: Found {} matching rows with price containing '9'",
700                    view.row_count()
701                );
702                // Should find 99.99 row
703                assert!(view.row_count() >= 1);
704            }
705            Err(e) => {
706                panic!("Numeric coercion query failed: {}", e);
707            }
708        }
709
710        println!("\n=== All tests passed! ===");
711    }
712
713    #[test]
714    fn test_not_in_clause() {
715        // Initialize tracing for debug output
716        let _ = tracing_subscriber::fmt()
717            .with_max_level(tracing::Level::DEBUG)
718            .try_init();
719
720        let mut table = DataTable::new("test");
721        table.add_column(DataColumn::new("id"));
722        table.add_column(DataColumn::new("country"));
723
724        // Add test data
725        table
726            .add_row(DataRow::new(vec![
727                DataValue::Integer(1),
728                DataValue::String("CA".to_string()),
729            ]))
730            .unwrap();
731
732        table
733            .add_row(DataRow::new(vec![
734                DataValue::Integer(2),
735                DataValue::String("US".to_string()),
736            ]))
737            .unwrap();
738
739        table
740            .add_row(DataRow::new(vec![
741                DataValue::Integer(3),
742                DataValue::String("UK".to_string()),
743            ]))
744            .unwrap();
745
746        let table = Arc::new(table);
747        let engine = QueryEngine::new();
748
749        println!("\n=== Testing NOT IN clause ===");
750        println!("Table has {} rows", table.row_count());
751        for i in 0..table.row_count() {
752            let country = table.get_value(i, 1);
753            println!("Row {}: country = {:?}", i, country);
754        }
755
756        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
757        println!("\n--- Test: country NOT IN ('CA') ---");
758        let result = engine.execute(
759            table.clone(),
760            "SELECT * FROM test WHERE country NOT IN ('CA')",
761        );
762        match result {
763            Ok(view) => {
764                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
765                assert_eq!(view.row_count(), 2); // Should find US and UK
766            }
767            Err(e) => {
768                panic!("NOT IN query failed: {}", e);
769            }
770        }
771
772        println!("\n=== NOT IN test complete! ===");
773    }
774
775    #[test]
776    fn test_case_insensitive_in_and_not_in() {
777        // Initialize tracing for debug output
778        let _ = tracing_subscriber::fmt()
779            .with_max_level(tracing::Level::DEBUG)
780            .try_init();
781
782        let mut table = DataTable::new("test");
783        table.add_column(DataColumn::new("id"));
784        table.add_column(DataColumn::new("country"));
785
786        // Add test data with mixed case
787        table
788            .add_row(DataRow::new(vec![
789                DataValue::Integer(1),
790                DataValue::String("CA".to_string()), // uppercase
791            ]))
792            .unwrap();
793
794        table
795            .add_row(DataRow::new(vec![
796                DataValue::Integer(2),
797                DataValue::String("us".to_string()), // lowercase
798            ]))
799            .unwrap();
800
801        table
802            .add_row(DataRow::new(vec![
803                DataValue::Integer(3),
804                DataValue::String("UK".to_string()), // uppercase
805            ]))
806            .unwrap();
807
808        let table = Arc::new(table);
809
810        println!("\n=== Testing Case-Insensitive IN clause ===");
811        println!("Table has {} rows", table.row_count());
812        for i in 0..table.row_count() {
813            let country = table.get_value(i, 1);
814            println!("Row {}: country = {:?}", i, country);
815        }
816
817        // Test case-insensitive IN - should match 'CA' with 'ca'
818        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
819        let engine = QueryEngine::with_case_insensitive(true);
820        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
821        match result {
822            Ok(view) => {
823                println!(
824                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
825                    view.row_count()
826                );
827                assert_eq!(view.row_count(), 1); // Should find CA row
828            }
829            Err(e) => {
830                panic!("Case-insensitive IN query failed: {}", e);
831            }
832        }
833
834        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
835        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
836        let result = engine.execute(
837            table.clone(),
838            "SELECT * FROM test WHERE country NOT IN ('ca')",
839        );
840        match result {
841            Ok(view) => {
842                println!(
843                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
844                    view.row_count()
845                );
846                assert_eq!(view.row_count(), 2); // Should find us and UK rows
847            }
848            Err(e) => {
849                panic!("Case-insensitive NOT IN query failed: {}", e);
850            }
851        }
852
853        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
854        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
855        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
856        let result = engine_case_sensitive
857            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
858        match result {
859            Ok(view) => {
860                println!(
861                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
862                    view.row_count()
863                );
864                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
865            }
866            Err(e) => {
867                panic!("Case-sensitive IN query failed: {}", e);
868            }
869        }
870
871        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
872    }
873
874    #[test]
875    #[ignore = "Parentheses in WHERE clause not yet implemented"]
876    fn test_parentheses_in_where_clause() {
877        // Initialize tracing for debug output
878        let _ = tracing_subscriber::fmt()
879            .with_max_level(tracing::Level::DEBUG)
880            .try_init();
881
882        let mut table = DataTable::new("test");
883        table.add_column(DataColumn::new("id"));
884        table.add_column(DataColumn::new("status"));
885        table.add_column(DataColumn::new("priority"));
886
887        // Add test data
888        table
889            .add_row(DataRow::new(vec![
890                DataValue::Integer(1),
891                DataValue::String("Pending".to_string()),
892                DataValue::String("High".to_string()),
893            ]))
894            .unwrap();
895
896        table
897            .add_row(DataRow::new(vec![
898                DataValue::Integer(2),
899                DataValue::String("Complete".to_string()),
900                DataValue::String("High".to_string()),
901            ]))
902            .unwrap();
903
904        table
905            .add_row(DataRow::new(vec![
906                DataValue::Integer(3),
907                DataValue::String("Pending".to_string()),
908                DataValue::String("Low".to_string()),
909            ]))
910            .unwrap();
911
912        table
913            .add_row(DataRow::new(vec![
914                DataValue::Integer(4),
915                DataValue::String("Complete".to_string()),
916                DataValue::String("Low".to_string()),
917            ]))
918            .unwrap();
919
920        let table = Arc::new(table);
921        let engine = QueryEngine::new();
922
923        println!("\n=== Testing Parentheses in WHERE clause ===");
924        println!("Table has {} rows", table.row_count());
925        for i in 0..table.row_count() {
926            let status = table.get_value(i, 1);
927            let priority = table.get_value(i, 2);
928            println!(
929                "Row {}: status = {:?}, priority = {:?}",
930                i, status, priority
931            );
932        }
933
934        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
935        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
936        let result = engine.execute(
937            table.clone(),
938            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
939        );
940        match result {
941            Ok(view) => {
942                println!(
943                    "SUCCESS: Found {} rows with parenthetical logic",
944                    view.row_count()
945                );
946                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
947            }
948            Err(e) => {
949                panic!("Parentheses query failed: {}", e);
950            }
951        }
952
953        println!("\n=== Parentheses test complete! ===");
954    }
955
956    #[test]
957    #[ignore = "Numeric type coercion needs fixing"]
958    fn test_numeric_type_coercion() {
959        // Initialize tracing for debug output
960        let _ = tracing_subscriber::fmt()
961            .with_max_level(tracing::Level::DEBUG)
962            .try_init();
963
964        let mut table = DataTable::new("test");
965        table.add_column(DataColumn::new("id"));
966        table.add_column(DataColumn::new("price"));
967        table.add_column(DataColumn::new("quantity"));
968
969        // Add test data with different numeric types
970        table
971            .add_row(DataRow::new(vec![
972                DataValue::Integer(1),
973                DataValue::Float(99.50), // Contains '.'
974                DataValue::Integer(100),
975            ]))
976            .unwrap();
977
978        table
979            .add_row(DataRow::new(vec![
980                DataValue::Integer(2),
981                DataValue::Float(150.0), // Contains '.' and '0'
982                DataValue::Integer(200),
983            ]))
984            .unwrap();
985
986        table
987            .add_row(DataRow::new(vec![
988                DataValue::Integer(3),
989                DataValue::Integer(75), // No decimal point
990                DataValue::Integer(50),
991            ]))
992            .unwrap();
993
994        let table = Arc::new(table);
995        let engine = QueryEngine::new();
996
997        println!("\n=== Testing Numeric Type Coercion ===");
998        println!("Table has {} rows", table.row_count());
999        for i in 0..table.row_count() {
1000            let price = table.get_value(i, 1);
1001            let quantity = table.get_value(i, 2);
1002            println!("Row {}: price = {:?}, quantity = {:?}", i, price, quantity);
1003        }
1004
1005        // Test Contains on float values - should find rows with decimal points
1006        println!("\n--- Test: price.Contains('.') ---");
1007        let result = engine.execute(
1008            table.clone(),
1009            "SELECT * FROM test WHERE price.Contains('.')",
1010        );
1011        match result {
1012            Ok(view) => {
1013                println!(
1014                    "SUCCESS: Found {} rows with decimal points in price",
1015                    view.row_count()
1016                );
1017                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
1018            }
1019            Err(e) => {
1020                panic!("Numeric Contains query failed: {}", e);
1021            }
1022        }
1023
1024        // Test Contains on integer values converted to string
1025        println!("\n--- Test: quantity.Contains('0') ---");
1026        let result = engine.execute(
1027            table.clone(),
1028            "SELECT * FROM test WHERE quantity.Contains('0')",
1029        );
1030        match result {
1031            Ok(view) => {
1032                println!(
1033                    "SUCCESS: Found {} rows with '0' in quantity",
1034                    view.row_count()
1035                );
1036                assert_eq!(view.row_count(), 2); // Should find 100 and 200
1037            }
1038            Err(e) => {
1039                panic!("Integer Contains query failed: {}", e);
1040            }
1041        }
1042
1043        println!("\n=== Numeric type coercion test complete! ===");
1044    }
1045
1046    #[test]
1047    fn test_datetime_comparisons() {
1048        // Initialize tracing for debug output
1049        let _ = tracing_subscriber::fmt()
1050            .with_max_level(tracing::Level::DEBUG)
1051            .try_init();
1052
1053        let mut table = DataTable::new("test");
1054        table.add_column(DataColumn::new("id"));
1055        table.add_column(DataColumn::new("created_date"));
1056
1057        // Add test data with date strings (as they would come from CSV)
1058        table
1059            .add_row(DataRow::new(vec![
1060                DataValue::Integer(1),
1061                DataValue::String("2024-12-15".to_string()),
1062            ]))
1063            .unwrap();
1064
1065        table
1066            .add_row(DataRow::new(vec![
1067                DataValue::Integer(2),
1068                DataValue::String("2025-01-15".to_string()),
1069            ]))
1070            .unwrap();
1071
1072        table
1073            .add_row(DataRow::new(vec![
1074                DataValue::Integer(3),
1075                DataValue::String("2025-02-15".to_string()),
1076            ]))
1077            .unwrap();
1078
1079        let table = Arc::new(table);
1080        let engine = QueryEngine::new();
1081
1082        println!("\n=== Testing DateTime Comparisons ===");
1083        println!("Table has {} rows", table.row_count());
1084        for i in 0..table.row_count() {
1085            let date = table.get_value(i, 1);
1086            println!("Row {}: created_date = {:?}", i, date);
1087        }
1088
1089        // Test DateTime constructor comparison - should find dates after 2025-01-01
1090        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1091        let result = engine.execute(
1092            table.clone(),
1093            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1094        );
1095        match result {
1096            Ok(view) => {
1097                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1098                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1099            }
1100            Err(e) => {
1101                panic!("DateTime comparison query failed: {}", e);
1102            }
1103        }
1104
1105        println!("\n=== DateTime comparison test complete! ===");
1106    }
1107
1108    #[test]
1109    fn test_not_with_method_calls() {
1110        // Initialize tracing for debug output
1111        let _ = tracing_subscriber::fmt()
1112            .with_max_level(tracing::Level::DEBUG)
1113            .try_init();
1114
1115        let mut table = DataTable::new("test");
1116        table.add_column(DataColumn::new("id"));
1117        table.add_column(DataColumn::new("status"));
1118
1119        // Add test data
1120        table
1121            .add_row(DataRow::new(vec![
1122                DataValue::Integer(1),
1123                DataValue::String("Pending Review".to_string()),
1124            ]))
1125            .unwrap();
1126
1127        table
1128            .add_row(DataRow::new(vec![
1129                DataValue::Integer(2),
1130                DataValue::String("Complete".to_string()),
1131            ]))
1132            .unwrap();
1133
1134        table
1135            .add_row(DataRow::new(vec![
1136                DataValue::Integer(3),
1137                DataValue::String("Pending Approval".to_string()),
1138            ]))
1139            .unwrap();
1140
1141        let table = Arc::new(table);
1142        let engine = QueryEngine::with_case_insensitive(true);
1143
1144        println!("\n=== Testing NOT with Method Calls ===");
1145        println!("Table has {} rows", table.row_count());
1146        for i in 0..table.row_count() {
1147            let status = table.get_value(i, 1);
1148            println!("Row {}: status = {:?}", i, status);
1149        }
1150
1151        // Test NOT with Contains - should exclude rows containing "pend"
1152        println!("\n--- Test: NOT status.Contains('pend') ---");
1153        let result = engine.execute(
1154            table.clone(),
1155            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1156        );
1157        match result {
1158            Ok(view) => {
1159                println!(
1160                    "SUCCESS: Found {} rows NOT containing 'pend'",
1161                    view.row_count()
1162                );
1163                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1164            }
1165            Err(e) => {
1166                panic!("NOT Contains query failed: {}", e);
1167            }
1168        }
1169
1170        // Test NOT with StartsWith
1171        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1172        let result = engine.execute(
1173            table.clone(),
1174            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1175        );
1176        match result {
1177            Ok(view) => {
1178                println!(
1179                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1180                    view.row_count()
1181                );
1182                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1183            }
1184            Err(e) => {
1185                panic!("NOT StartsWith query failed: {}", e);
1186            }
1187        }
1188
1189        println!("\n=== NOT with method calls test complete! ===");
1190    }
1191
1192    #[test]
1193    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1194    fn test_complex_logical_expressions() {
1195        // Initialize tracing for debug output
1196        let _ = tracing_subscriber::fmt()
1197            .with_max_level(tracing::Level::DEBUG)
1198            .try_init();
1199
1200        let mut table = DataTable::new("test");
1201        table.add_column(DataColumn::new("id"));
1202        table.add_column(DataColumn::new("status"));
1203        table.add_column(DataColumn::new("priority"));
1204        table.add_column(DataColumn::new("assigned"));
1205
1206        // Add comprehensive test data
1207        table
1208            .add_row(DataRow::new(vec![
1209                DataValue::Integer(1),
1210                DataValue::String("Pending".to_string()),
1211                DataValue::String("High".to_string()),
1212                DataValue::String("John".to_string()),
1213            ]))
1214            .unwrap();
1215
1216        table
1217            .add_row(DataRow::new(vec![
1218                DataValue::Integer(2),
1219                DataValue::String("Complete".to_string()),
1220                DataValue::String("High".to_string()),
1221                DataValue::String("Jane".to_string()),
1222            ]))
1223            .unwrap();
1224
1225        table
1226            .add_row(DataRow::new(vec![
1227                DataValue::Integer(3),
1228                DataValue::String("Pending".to_string()),
1229                DataValue::String("Low".to_string()),
1230                DataValue::String("John".to_string()),
1231            ]))
1232            .unwrap();
1233
1234        table
1235            .add_row(DataRow::new(vec![
1236                DataValue::Integer(4),
1237                DataValue::String("In Progress".to_string()),
1238                DataValue::String("Medium".to_string()),
1239                DataValue::String("Jane".to_string()),
1240            ]))
1241            .unwrap();
1242
1243        let table = Arc::new(table);
1244        let engine = QueryEngine::new();
1245
1246        println!("\n=== Testing Complex Logical Expressions ===");
1247        println!("Table has {} rows", table.row_count());
1248        for i in 0..table.row_count() {
1249            let status = table.get_value(i, 1);
1250            let priority = table.get_value(i, 2);
1251            let assigned = table.get_value(i, 3);
1252            println!(
1253                "Row {}: status = {:?}, priority = {:?}, assigned = {:?}",
1254                i, status, priority, assigned
1255            );
1256        }
1257
1258        // Test complex AND/OR logic
1259        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1260        let result = engine.execute(
1261            table.clone(),
1262            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1263        );
1264        match result {
1265            Ok(view) => {
1266                println!(
1267                    "SUCCESS: Found {} rows with complex logic",
1268                    view.row_count()
1269                );
1270                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
1271            }
1272            Err(e) => {
1273                panic!("Complex logic query failed: {}", e);
1274            }
1275        }
1276
1277        // Test NOT with complex expressions
1278        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1279        let result = engine.execute(
1280            table.clone(),
1281            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1282        );
1283        match result {
1284            Ok(view) => {
1285                println!(
1286                    "SUCCESS: Found {} rows with NOT complex logic",
1287                    view.row_count()
1288                );
1289                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
1290            }
1291            Err(e) => {
1292                panic!("NOT complex logic query failed: {}", e);
1293            }
1294        }
1295
1296        println!("\n=== Complex logical expressions test complete! ===");
1297    }
1298
1299    #[test]
1300    fn test_mixed_data_types_and_edge_cases() {
1301        // Initialize tracing for debug output
1302        let _ = tracing_subscriber::fmt()
1303            .with_max_level(tracing::Level::DEBUG)
1304            .try_init();
1305
1306        let mut table = DataTable::new("test");
1307        table.add_column(DataColumn::new("id"));
1308        table.add_column(DataColumn::new("value"));
1309        table.add_column(DataColumn::new("nullable_field"));
1310
1311        // Add test data with mixed types and edge cases
1312        table
1313            .add_row(DataRow::new(vec![
1314                DataValue::Integer(1),
1315                DataValue::String("123.45".to_string()),
1316                DataValue::String("present".to_string()),
1317            ]))
1318            .unwrap();
1319
1320        table
1321            .add_row(DataRow::new(vec![
1322                DataValue::Integer(2),
1323                DataValue::Float(678.90),
1324                DataValue::Null,
1325            ]))
1326            .unwrap();
1327
1328        table
1329            .add_row(DataRow::new(vec![
1330                DataValue::Integer(3),
1331                DataValue::Boolean(true),
1332                DataValue::String("also present".to_string()),
1333            ]))
1334            .unwrap();
1335
1336        table
1337            .add_row(DataRow::new(vec![
1338                DataValue::Integer(4),
1339                DataValue::String("false".to_string()),
1340                DataValue::Null,
1341            ]))
1342            .unwrap();
1343
1344        let table = Arc::new(table);
1345        let engine = QueryEngine::new();
1346
1347        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1348        println!("Table has {} rows", table.row_count());
1349        for i in 0..table.row_count() {
1350            let value = table.get_value(i, 1);
1351            let nullable = table.get_value(i, 2);
1352            println!(
1353                "Row {}: value = {:?}, nullable_field = {:?}",
1354                i, value, nullable
1355            );
1356        }
1357
1358        // Test type coercion with boolean Contains
1359        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1360        let result = engine.execute(
1361            table.clone(),
1362            "SELECT * FROM test WHERE value.Contains('true')",
1363        );
1364        match result {
1365            Ok(view) => {
1366                println!(
1367                    "SUCCESS: Found {} rows with boolean coercion",
1368                    view.row_count()
1369                );
1370                assert_eq!(view.row_count(), 1); // Should find the boolean true row
1371            }
1372            Err(e) => {
1373                panic!("Boolean coercion query failed: {}", e);
1374            }
1375        }
1376
1377        // Test multiple IN values with mixed types
1378        println!("\n--- Test: id IN (1, 3) ---");
1379        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
1380        match result {
1381            Ok(view) => {
1382                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
1383                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
1384            }
1385            Err(e) => {
1386                panic!("Multiple IN values query failed: {}", e);
1387            }
1388        }
1389
1390        println!("\n=== Mixed data types test complete! ===");
1391    }
1392
1393    #[test]
1394    fn test_not_in_parsing() {
1395        use crate::sql::recursive_parser::Parser;
1396
1397        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
1398        println!("\n=== Testing NOT IN parsing ===");
1399        println!("Parsing query: {}", query);
1400
1401        let mut parser = Parser::new(query);
1402        match parser.parse() {
1403            Ok(statement) => {
1404                println!("Parsed statement: {:#?}", statement);
1405                if let Some(where_clause) = statement.where_clause {
1406                    println!("WHERE conditions: {:#?}", where_clause.conditions);
1407                    if let Some(first_condition) = where_clause.conditions.first() {
1408                        println!("First condition expression: {:#?}", first_condition.expr);
1409                    }
1410                }
1411            }
1412            Err(e) => {
1413                panic!("Parse error: {}", e);
1414            }
1415        }
1416    }
1417}
1418
1419#[cfg(test)]
1420#[path = "query_engine_tests.rs"]
1421mod query_engine_tests;