sql_cli/data/
query_engine.rs

1use anyhow::Result;
2use std::sync::Arc;
3use std::time::Instant;
4use tracing::{debug, info};
5
6use crate::config::config::BehaviorConfig;
7use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
8use crate::data::data_view::DataView;
9use crate::data::datatable::{DataColumn, DataRow, DataTable};
10use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
11use crate::sql::recursive_parser::{
12    OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection,
13};
14
15/// Query engine that executes SQL directly on DataTable
16pub struct QueryEngine {
17    case_insensitive: bool,
18    date_notation: String,
19    behavior_config: Option<BehaviorConfig>,
20}
21
22impl QueryEngine {
23    pub fn new() -> Self {
24        Self {
25            case_insensitive: false,
26            date_notation: "us".to_string(),
27            behavior_config: None,
28        }
29    }
30
31    pub fn with_behavior_config(config: BehaviorConfig) -> Self {
32        let case_insensitive = config.case_insensitive_default;
33        let date_notation = config.default_date_notation.clone();
34        Self {
35            case_insensitive,
36            date_notation,
37            behavior_config: Some(config),
38        }
39    }
40
41    pub fn with_date_notation(date_notation: String) -> Self {
42        Self {
43            case_insensitive: false,
44            date_notation,
45            behavior_config: None,
46        }
47    }
48
49    pub fn with_case_insensitive(case_insensitive: bool) -> Self {
50        Self {
51            case_insensitive,
52            date_notation: "us".to_string(),
53            behavior_config: None,
54        }
55    }
56
57    pub fn with_case_insensitive_and_date_notation(
58        case_insensitive: bool,
59        date_notation: String,
60    ) -> Self {
61        Self {
62            case_insensitive,
63            date_notation,
64            behavior_config: None,
65        }
66    }
67
68    /// Find a column name similar to the given name using edit distance
69    fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
70        let columns = table.column_names();
71        let mut best_match: Option<(String, usize)> = None;
72
73        for col in columns {
74            let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
75            // Only suggest if distance is small (likely a typo)
76            // Allow up to 3 edits for longer names
77            let max_distance = if name.len() > 10 { 3 } else { 2 };
78            if distance <= max_distance {
79                match &best_match {
80                    None => best_match = Some((col, distance)),
81                    Some((_, best_dist)) if distance < *best_dist => {
82                        best_match = Some((col, distance));
83                    }
84                    _ => {}
85                }
86            }
87        }
88
89        best_match.map(|(name, _)| name)
90    }
91
92    /// Calculate Levenshtein edit distance between two strings
93    fn edit_distance(&self, s1: &str, s2: &str) -> usize {
94        let len1 = s1.len();
95        let len2 = s2.len();
96        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
97
98        for i in 0..=len1 {
99            matrix[i][0] = i;
100        }
101        for j in 0..=len2 {
102            matrix[0][j] = j;
103        }
104
105        for (i, c1) in s1.chars().enumerate() {
106            for (j, c2) in s2.chars().enumerate() {
107                let cost = if c1 == c2 { 0 } else { 1 };
108                matrix[i + 1][j + 1] = std::cmp::min(
109                    matrix[i][j + 1] + 1, // deletion
110                    std::cmp::min(
111                        matrix[i + 1][j] + 1, // insertion
112                        matrix[i][j] + cost,  // substitution
113                    ),
114                );
115            }
116        }
117
118        matrix[len1][len2]
119    }
120
121    /// Execute a SQL query on a DataTable and return a DataView (for backward compatibility)
122    pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
123        let start_time = Instant::now();
124
125        // Parse the SQL query
126        let parse_start = Instant::now();
127        let mut parser = Parser::new(sql);
128        let statement = parser
129            .parse()
130            .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
131        let parse_duration = parse_start.elapsed();
132
133        // Convert SelectStatement to DataView operations
134        let build_start = Instant::now();
135        let result = self.build_view(table, statement)?;
136        let build_duration = build_start.elapsed();
137
138        let total_duration = start_time.elapsed();
139        info!(
140            "Query execution complete: parse={:?}, build={:?}, total={:?}, rows={}",
141            parse_duration,
142            build_duration,
143            total_duration,
144            result.row_count()
145        );
146
147        Ok(result)
148    }
149
150    /// Build a DataView from a parsed SQL statement
151    fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
152        debug!(
153            "QueryEngine::build_view - select_items: {:?}",
154            statement.select_items
155        );
156        debug!(
157            "QueryEngine::build_view - where_clause: {:?}",
158            statement.where_clause
159        );
160
161        // Start with all rows visible
162        let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
163
164        // Apply WHERE clause filtering using recursive evaluator
165        if let Some(where_clause) = &statement.where_clause {
166            let total_rows = table.row_count();
167            debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
168            debug!("QueryEngine: WHERE clause = {:?}", where_clause);
169
170            let filter_start = Instant::now();
171            // Filter visible rows based on WHERE clause
172            let mut filtered_rows = Vec::new();
173            for row_idx in visible_rows {
174                // Only log for first few rows to avoid performance impact
175                if row_idx < 3 {
176                    debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
177                }
178                let evaluator = RecursiveWhereEvaluator::with_config(
179                    &*table,
180                    self.case_insensitive,
181                    self.date_notation.clone(),
182                );
183                match evaluator.evaluate(where_clause, row_idx) {
184                    Ok(result) => {
185                        if row_idx < 3 {
186                            debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
187                        }
188                        if result {
189                            filtered_rows.push(row_idx);
190                        }
191                    }
192                    Err(e) => {
193                        if row_idx < 3 {
194                            debug!(
195                                "QueryEngine: WHERE evaluation error for row {}: {}",
196                                row_idx, e
197                            );
198                        }
199                        // Propagate WHERE clause errors instead of silently ignoring them
200                        return Err(e);
201                    }
202                }
203            }
204            visible_rows = filtered_rows;
205            let filter_duration = filter_start.elapsed();
206            info!(
207                "WHERE clause filtering: {} rows -> {} rows in {:?}",
208                total_rows,
209                visible_rows.len(),
210                filter_duration
211            );
212
213            // Debug log moved to info level above with timing
214        }
215
216        // Create initial DataView with filtered rows
217        let mut view = DataView::new(table.clone());
218        view = view.with_rows(visible_rows);
219
220        // Apply column projection or computed expressions (SELECT clause) - do this AFTER filtering
221        if !statement.select_items.is_empty() {
222            // Check if we have ANY non-star items (not just the first one)
223            let has_non_star_items = statement
224                .select_items
225                .iter()
226                .any(|item| !matches!(item, SelectItem::Star));
227
228            // Apply select items if:
229            // 1. We have computed expressions or explicit columns
230            // 2. OR we have a mix of star and other items (e.g., SELECT *, computed_col)
231            if has_non_star_items || statement.select_items.len() > 1 {
232                view = self.apply_select_items(view, &statement.select_items)?;
233            }
234            // If it's just a single star, no projection needed
235        } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
236            // Fallback to legacy column projection for backward compatibility
237            let column_indices = self.resolve_column_indices(&*table, &statement.columns)?;
238            view = view.with_columns(column_indices);
239        }
240
241        // Apply ORDER BY sorting
242        if let Some(order_by_columns) = &statement.order_by {
243            if !order_by_columns.is_empty() {
244                view = self.apply_multi_order_by(view, order_by_columns)?;
245            }
246        }
247
248        // Apply LIMIT/OFFSET
249        if let Some(limit) = statement.limit {
250            let offset = statement.offset.unwrap_or(0);
251            view = view.with_limit(limit, offset);
252        }
253
254        Ok(view)
255    }
256
257    /// Resolve column names to indices
258    fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
259        let mut indices = Vec::new();
260        let table_columns = table.column_names();
261
262        for col_name in columns {
263            let index = table_columns
264                .iter()
265                .position(|c| c.eq_ignore_ascii_case(col_name))
266                .ok_or_else(|| {
267                    let suggestion = self.find_similar_column(table, col_name);
268                    match suggestion {
269                        Some(similar) => anyhow::anyhow!(
270                            "Column '{}' not found. Did you mean '{}'?",
271                            col_name,
272                            similar
273                        ),
274                        None => anyhow::anyhow!("Column '{}' not found", col_name),
275                    }
276                })?;
277            indices.push(index);
278        }
279
280        Ok(indices)
281    }
282
283    /// Apply SELECT items (columns and computed expressions) to create new view
284    fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
285        debug!(
286            "QueryEngine::apply_select_items - items: {:?}",
287            select_items
288        );
289        debug!(
290            "QueryEngine::apply_select_items - input view has {} rows",
291            view.row_count()
292        );
293
294        // Check if we need to create computed columns
295        let has_computed_expressions = select_items
296            .iter()
297            .any(|item| matches!(item, SelectItem::Expression { .. }));
298
299        debug!(
300            "QueryEngine::apply_select_items - has_computed_expressions: {}",
301            has_computed_expressions
302        );
303
304        if !has_computed_expressions {
305            // Simple case: only columns, use existing projection logic
306            let column_indices = self.resolve_select_columns(view.source(), select_items)?;
307            return Ok(view.with_columns(column_indices));
308        }
309
310        // Complex case: we have computed expressions
311        // IMPORTANT: We create a PROJECTED view, not a new table
312        // This preserves the original DataTable reference
313
314        let source_table = view.source();
315        let visible_rows = view.visible_row_indices();
316
317        // Create a temporary table just for the computed result view
318        // But this table is only used for the current query result
319        let mut computed_table = DataTable::new("query_result");
320
321        // First, expand any Star selectors to actual columns
322        let mut expanded_items = Vec::new();
323        for item in select_items {
324            match item {
325                SelectItem::Star => {
326                    // Expand * to all columns from source table
327                    for col_name in source_table.column_names() {
328                        expanded_items.push(SelectItem::Column(col_name));
329                    }
330                }
331                _ => expanded_items.push(item.clone()),
332            }
333        }
334
335        // Add columns based on expanded SelectItems, handling duplicates
336        let mut column_name_counts: std::collections::HashMap<String, usize> =
337            std::collections::HashMap::new();
338
339        for item in &expanded_items {
340            let base_name = match item {
341                SelectItem::Column(name) => name.clone(),
342                SelectItem::Expression { alias, .. } => alias.clone(),
343                SelectItem::Star => unreachable!("Star should have been expanded"),
344            };
345
346            // Check if this column name has been used before
347            let count = column_name_counts.entry(base_name.clone()).or_insert(0);
348            let column_name = if *count == 0 {
349                // First occurrence, use the name as-is
350                base_name.clone()
351            } else {
352                // Duplicate, append a suffix
353                format!("{}_{}", base_name, count)
354            };
355            *count += 1;
356
357            computed_table.add_column(DataColumn::new(&column_name));
358        }
359
360        // Calculate values for each row
361        let evaluator =
362            ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
363
364        for &row_idx in visible_rows {
365            let mut row_values = Vec::new();
366
367            for item in &expanded_items {
368                let value = match item {
369                    SelectItem::Column(col_name) => {
370                        // Simple column reference
371                        let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
372                            let suggestion = self.find_similar_column(source_table, col_name);
373                            match suggestion {
374                                Some(similar) => anyhow::anyhow!(
375                                    "Column '{}' not found. Did you mean '{}'?",
376                                    col_name,
377                                    similar
378                                ),
379                                None => anyhow::anyhow!("Column '{}' not found", col_name),
380                            }
381                        })?;
382                        let row = source_table
383                            .get_row(row_idx)
384                            .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
385                        row.get(col_idx)
386                            .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
387                            .clone()
388                    }
389                    SelectItem::Expression { expr, .. } => {
390                        // Computed expression
391                        evaluator.evaluate(expr, row_idx)?
392                    }
393                    SelectItem::Star => unreachable!("Star should have been expanded"),
394                };
395                row_values.push(value);
396            }
397
398            computed_table
399                .add_row(DataRow::new(row_values))
400                .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
401        }
402
403        // Return a view of the computed result
404        // This is a temporary view for this query only
405        Ok(DataView::new(Arc::new(computed_table)))
406    }
407
408    /// Resolve SelectItem columns to indices (for simple column projections only)
409    fn resolve_select_columns(
410        &self,
411        table: &DataTable,
412        select_items: &[SelectItem],
413    ) -> Result<Vec<usize>> {
414        let mut indices = Vec::new();
415        let table_columns = table.column_names();
416
417        for item in select_items {
418            match item {
419                SelectItem::Column(col_name) => {
420                    let index = table_columns
421                        .iter()
422                        .position(|c| c.eq_ignore_ascii_case(col_name))
423                        .ok_or_else(|| {
424                            let suggestion = self.find_similar_column(table, col_name);
425                            match suggestion {
426                                Some(similar) => anyhow::anyhow!(
427                                    "Column '{}' not found. Did you mean '{}'?",
428                                    col_name,
429                                    similar
430                                ),
431                                None => anyhow::anyhow!("Column '{}' not found", col_name),
432                            }
433                        })?;
434                    indices.push(index);
435                }
436                SelectItem::Star => {
437                    // Expand * to all column indices
438                    for i in 0..table_columns.len() {
439                        indices.push(i);
440                    }
441                }
442                SelectItem::Expression { .. } => {
443                    return Err(anyhow::anyhow!(
444                        "Computed expressions require new table creation"
445                    ));
446                }
447            }
448        }
449
450        Ok(indices)
451    }
452
453    /// Apply multi-column ORDER BY sorting to the view
454    fn apply_multi_order_by(
455        &self,
456        mut view: DataView,
457        order_by_columns: &[OrderByColumn],
458    ) -> Result<DataView> {
459        // Build list of (source_column_index, ascending) tuples
460        let mut sort_columns = Vec::new();
461
462        for order_col in order_by_columns {
463            // Get column index
464            let col_index = view
465                .source()
466                .get_column_index(&order_col.column)
467                .ok_or_else(|| {
468                    let suggestion = self.find_similar_column(view.source(), &order_col.column);
469                    match suggestion {
470                        Some(similar) => anyhow::anyhow!(
471                            "Column '{}' not found. Did you mean '{}'?",
472                            order_col.column,
473                            similar
474                        ),
475                        None => anyhow::anyhow!("Column '{}' not found", order_col.column),
476                    }
477                })?;
478
479            let ascending = matches!(order_col.direction, SortDirection::Asc);
480            sort_columns.push((col_index, ascending));
481        }
482
483        // Apply multi-column sorting
484        view.apply_multi_sort(&sort_columns)?;
485        Ok(view)
486    }
487}
488
489#[cfg(test)]
490mod tests {
491    use super::*;
492    use crate::data::datatable::{DataColumn, DataRow, DataValue};
493
494    fn create_test_table() -> Arc<DataTable> {
495        let mut table = DataTable::new("test");
496
497        // Add columns
498        table.add_column(DataColumn::new("id"));
499        table.add_column(DataColumn::new("name"));
500        table.add_column(DataColumn::new("age"));
501
502        // Add rows
503        table
504            .add_row(DataRow::new(vec![
505                DataValue::Integer(1),
506                DataValue::String("Alice".to_string()),
507                DataValue::Integer(30),
508            ]))
509            .unwrap();
510
511        table
512            .add_row(DataRow::new(vec![
513                DataValue::Integer(2),
514                DataValue::String("Bob".to_string()),
515                DataValue::Integer(25),
516            ]))
517            .unwrap();
518
519        table
520            .add_row(DataRow::new(vec![
521                DataValue::Integer(3),
522                DataValue::String("Charlie".to_string()),
523                DataValue::Integer(35),
524            ]))
525            .unwrap();
526
527        Arc::new(table)
528    }
529
530    #[test]
531    fn test_select_all() {
532        let table = create_test_table();
533        let engine = QueryEngine::new();
534
535        let view = engine
536            .execute(table.clone(), "SELECT * FROM users")
537            .unwrap();
538        assert_eq!(view.row_count(), 3);
539        assert_eq!(view.column_count(), 3);
540    }
541
542    #[test]
543    fn test_select_columns() {
544        let table = create_test_table();
545        let engine = QueryEngine::new();
546
547        let view = engine
548            .execute(table.clone(), "SELECT name, age FROM users")
549            .unwrap();
550        assert_eq!(view.row_count(), 3);
551        assert_eq!(view.column_count(), 2);
552    }
553
554    #[test]
555    fn test_select_with_limit() {
556        let table = create_test_table();
557        let engine = QueryEngine::new();
558
559        let view = engine
560            .execute(table.clone(), "SELECT * FROM users LIMIT 2")
561            .unwrap();
562        assert_eq!(view.row_count(), 2);
563    }
564
565    #[test]
566    fn test_type_coercion_contains() {
567        // Initialize tracing for debug output
568        let _ = tracing_subscriber::fmt()
569            .with_max_level(tracing::Level::DEBUG)
570            .try_init();
571
572        let mut table = DataTable::new("test");
573        table.add_column(DataColumn::new("id"));
574        table.add_column(DataColumn::new("status"));
575        table.add_column(DataColumn::new("price"));
576
577        // Add test data with mixed types
578        table
579            .add_row(DataRow::new(vec![
580                DataValue::Integer(1),
581                DataValue::String("Pending".to_string()),
582                DataValue::Float(99.99),
583            ]))
584            .unwrap();
585
586        table
587            .add_row(DataRow::new(vec![
588                DataValue::Integer(2),
589                DataValue::String("Confirmed".to_string()),
590                DataValue::Float(150.50),
591            ]))
592            .unwrap();
593
594        table
595            .add_row(DataRow::new(vec![
596                DataValue::Integer(3),
597                DataValue::String("Pending".to_string()),
598                DataValue::Float(75.00),
599            ]))
600            .unwrap();
601
602        let table = Arc::new(table);
603        let engine = QueryEngine::new();
604
605        println!("\n=== Testing WHERE clause with Contains ===");
606        println!("Table has {} rows", table.row_count());
607        for i in 0..table.row_count() {
608            let status = table.get_value(i, 1);
609            println!("Row {}: status = {:?}", i, status);
610        }
611
612        // Test 1: Basic string contains (should work)
613        println!("\n--- Test 1: status.Contains('pend') ---");
614        let result = engine.execute(
615            table.clone(),
616            "SELECT * FROM test WHERE status.Contains('pend')",
617        );
618        match result {
619            Ok(view) => {
620                println!("SUCCESS: Found {} matching rows", view.row_count());
621                assert_eq!(view.row_count(), 2); // Should find both Pending rows
622            }
623            Err(e) => {
624                panic!("Query failed: {}", e);
625            }
626        }
627
628        // Test 2: Numeric contains (should work with type coercion)
629        println!("\n--- Test 2: price.Contains('9') ---");
630        let result = engine.execute(
631            table.clone(),
632            "SELECT * FROM test WHERE price.Contains('9')",
633        );
634        match result {
635            Ok(view) => {
636                println!(
637                    "SUCCESS: Found {} matching rows with price containing '9'",
638                    view.row_count()
639                );
640                // Should find 99.99 row
641                assert!(view.row_count() >= 1);
642            }
643            Err(e) => {
644                panic!("Numeric coercion query failed: {}", e);
645            }
646        }
647
648        println!("\n=== All tests passed! ===");
649    }
650
651    #[test]
652    fn test_not_in_clause() {
653        // Initialize tracing for debug output
654        let _ = tracing_subscriber::fmt()
655            .with_max_level(tracing::Level::DEBUG)
656            .try_init();
657
658        let mut table = DataTable::new("test");
659        table.add_column(DataColumn::new("id"));
660        table.add_column(DataColumn::new("country"));
661
662        // Add test data
663        table
664            .add_row(DataRow::new(vec![
665                DataValue::Integer(1),
666                DataValue::String("CA".to_string()),
667            ]))
668            .unwrap();
669
670        table
671            .add_row(DataRow::new(vec![
672                DataValue::Integer(2),
673                DataValue::String("US".to_string()),
674            ]))
675            .unwrap();
676
677        table
678            .add_row(DataRow::new(vec![
679                DataValue::Integer(3),
680                DataValue::String("UK".to_string()),
681            ]))
682            .unwrap();
683
684        let table = Arc::new(table);
685        let engine = QueryEngine::new();
686
687        println!("\n=== Testing NOT IN clause ===");
688        println!("Table has {} rows", table.row_count());
689        for i in 0..table.row_count() {
690            let country = table.get_value(i, 1);
691            println!("Row {}: country = {:?}", i, country);
692        }
693
694        // Test NOT IN clause - should exclude CA, return US and UK (2 rows)
695        println!("\n--- Test: country NOT IN ('CA') ---");
696        let result = engine.execute(
697            table.clone(),
698            "SELECT * FROM test WHERE country NOT IN ('CA')",
699        );
700        match result {
701            Ok(view) => {
702                println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
703                assert_eq!(view.row_count(), 2); // Should find US and UK
704            }
705            Err(e) => {
706                panic!("NOT IN query failed: {}", e);
707            }
708        }
709
710        println!("\n=== NOT IN test complete! ===");
711    }
712
713    #[test]
714    fn test_case_insensitive_in_and_not_in() {
715        // Initialize tracing for debug output
716        let _ = tracing_subscriber::fmt()
717            .with_max_level(tracing::Level::DEBUG)
718            .try_init();
719
720        let mut table = DataTable::new("test");
721        table.add_column(DataColumn::new("id"));
722        table.add_column(DataColumn::new("country"));
723
724        // Add test data with mixed case
725        table
726            .add_row(DataRow::new(vec![
727                DataValue::Integer(1),
728                DataValue::String("CA".to_string()), // uppercase
729            ]))
730            .unwrap();
731
732        table
733            .add_row(DataRow::new(vec![
734                DataValue::Integer(2),
735                DataValue::String("us".to_string()), // lowercase
736            ]))
737            .unwrap();
738
739        table
740            .add_row(DataRow::new(vec![
741                DataValue::Integer(3),
742                DataValue::String("UK".to_string()), // uppercase
743            ]))
744            .unwrap();
745
746        let table = Arc::new(table);
747
748        println!("\n=== Testing Case-Insensitive IN clause ===");
749        println!("Table has {} rows", table.row_count());
750        for i in 0..table.row_count() {
751            let country = table.get_value(i, 1);
752            println!("Row {}: country = {:?}", i, country);
753        }
754
755        // Test case-insensitive IN - should match 'CA' with 'ca'
756        println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
757        let engine = QueryEngine::with_case_insensitive(true);
758        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
759        match result {
760            Ok(view) => {
761                println!(
762                    "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
763                    view.row_count()
764                );
765                assert_eq!(view.row_count(), 1); // Should find CA row
766            }
767            Err(e) => {
768                panic!("Case-insensitive IN query failed: {}", e);
769            }
770        }
771
772        // Test case-insensitive NOT IN - should exclude 'CA' when searching for 'ca'
773        println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
774        let result = engine.execute(
775            table.clone(),
776            "SELECT * FROM test WHERE country NOT IN ('ca')",
777        );
778        match result {
779            Ok(view) => {
780                println!(
781                    "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
782                    view.row_count()
783                );
784                assert_eq!(view.row_count(), 2); // Should find us and UK rows
785            }
786            Err(e) => {
787                panic!("Case-insensitive NOT IN query failed: {}", e);
788            }
789        }
790
791        // Test case-sensitive (default) - should NOT match 'CA' with 'ca'
792        println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
793        let engine_case_sensitive = QueryEngine::new(); // defaults to case_insensitive=false
794        let result = engine_case_sensitive
795            .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
796        match result {
797            Ok(view) => {
798                println!(
799                    "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
800                    view.row_count()
801                );
802                assert_eq!(view.row_count(), 0); // Should find no rows (CA != ca)
803            }
804            Err(e) => {
805                panic!("Case-sensitive IN query failed: {}", e);
806            }
807        }
808
809        println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
810    }
811
812    #[test]
813    #[ignore = "Parentheses in WHERE clause not yet implemented"]
814    fn test_parentheses_in_where_clause() {
815        // Initialize tracing for debug output
816        let _ = tracing_subscriber::fmt()
817            .with_max_level(tracing::Level::DEBUG)
818            .try_init();
819
820        let mut table = DataTable::new("test");
821        table.add_column(DataColumn::new("id"));
822        table.add_column(DataColumn::new("status"));
823        table.add_column(DataColumn::new("priority"));
824
825        // Add test data
826        table
827            .add_row(DataRow::new(vec![
828                DataValue::Integer(1),
829                DataValue::String("Pending".to_string()),
830                DataValue::String("High".to_string()),
831            ]))
832            .unwrap();
833
834        table
835            .add_row(DataRow::new(vec![
836                DataValue::Integer(2),
837                DataValue::String("Complete".to_string()),
838                DataValue::String("High".to_string()),
839            ]))
840            .unwrap();
841
842        table
843            .add_row(DataRow::new(vec![
844                DataValue::Integer(3),
845                DataValue::String("Pending".to_string()),
846                DataValue::String("Low".to_string()),
847            ]))
848            .unwrap();
849
850        table
851            .add_row(DataRow::new(vec![
852                DataValue::Integer(4),
853                DataValue::String("Complete".to_string()),
854                DataValue::String("Low".to_string()),
855            ]))
856            .unwrap();
857
858        let table = Arc::new(table);
859        let engine = QueryEngine::new();
860
861        println!("\n=== Testing Parentheses in WHERE clause ===");
862        println!("Table has {} rows", table.row_count());
863        for i in 0..table.row_count() {
864            let status = table.get_value(i, 1);
865            let priority = table.get_value(i, 2);
866            println!(
867                "Row {}: status = {:?}, priority = {:?}",
868                i, status, priority
869            );
870        }
871
872        // Test OR with parentheses - should get (Pending AND High) OR (Complete AND Low)
873        println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
874        let result = engine.execute(
875            table.clone(),
876            "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
877        );
878        match result {
879            Ok(view) => {
880                println!(
881                    "SUCCESS: Found {} rows with parenthetical logic",
882                    view.row_count()
883                );
884                assert_eq!(view.row_count(), 2); // Should find rows 1 and 4
885            }
886            Err(e) => {
887                panic!("Parentheses query failed: {}", e);
888            }
889        }
890
891        println!("\n=== Parentheses test complete! ===");
892    }
893
894    #[test]
895    #[ignore = "Numeric type coercion needs fixing"]
896    fn test_numeric_type_coercion() {
897        // Initialize tracing for debug output
898        let _ = tracing_subscriber::fmt()
899            .with_max_level(tracing::Level::DEBUG)
900            .try_init();
901
902        let mut table = DataTable::new("test");
903        table.add_column(DataColumn::new("id"));
904        table.add_column(DataColumn::new("price"));
905        table.add_column(DataColumn::new("quantity"));
906
907        // Add test data with different numeric types
908        table
909            .add_row(DataRow::new(vec![
910                DataValue::Integer(1),
911                DataValue::Float(99.50), // Contains '.'
912                DataValue::Integer(100),
913            ]))
914            .unwrap();
915
916        table
917            .add_row(DataRow::new(vec![
918                DataValue::Integer(2),
919                DataValue::Float(150.0), // Contains '.' and '0'
920                DataValue::Integer(200),
921            ]))
922            .unwrap();
923
924        table
925            .add_row(DataRow::new(vec![
926                DataValue::Integer(3),
927                DataValue::Integer(75), // No decimal point
928                DataValue::Integer(50),
929            ]))
930            .unwrap();
931
932        let table = Arc::new(table);
933        let engine = QueryEngine::new();
934
935        println!("\n=== Testing Numeric Type Coercion ===");
936        println!("Table has {} rows", table.row_count());
937        for i in 0..table.row_count() {
938            let price = table.get_value(i, 1);
939            let quantity = table.get_value(i, 2);
940            println!("Row {}: price = {:?}, quantity = {:?}", i, price, quantity);
941        }
942
943        // Test Contains on float values - should find rows with decimal points
944        println!("\n--- Test: price.Contains('.') ---");
945        let result = engine.execute(
946            table.clone(),
947            "SELECT * FROM test WHERE price.Contains('.')",
948        );
949        match result {
950            Ok(view) => {
951                println!(
952                    "SUCCESS: Found {} rows with decimal points in price",
953                    view.row_count()
954                );
955                assert_eq!(view.row_count(), 2); // Should find 99.50 and 150.0
956            }
957            Err(e) => {
958                panic!("Numeric Contains query failed: {}", e);
959            }
960        }
961
962        // Test Contains on integer values converted to string
963        println!("\n--- Test: quantity.Contains('0') ---");
964        let result = engine.execute(
965            table.clone(),
966            "SELECT * FROM test WHERE quantity.Contains('0')",
967        );
968        match result {
969            Ok(view) => {
970                println!(
971                    "SUCCESS: Found {} rows with '0' in quantity",
972                    view.row_count()
973                );
974                assert_eq!(view.row_count(), 2); // Should find 100 and 200
975            }
976            Err(e) => {
977                panic!("Integer Contains query failed: {}", e);
978            }
979        }
980
981        println!("\n=== Numeric type coercion test complete! ===");
982    }
983
984    #[test]
985    fn test_datetime_comparisons() {
986        // Initialize tracing for debug output
987        let _ = tracing_subscriber::fmt()
988            .with_max_level(tracing::Level::DEBUG)
989            .try_init();
990
991        let mut table = DataTable::new("test");
992        table.add_column(DataColumn::new("id"));
993        table.add_column(DataColumn::new("created_date"));
994
995        // Add test data with date strings (as they would come from CSV)
996        table
997            .add_row(DataRow::new(vec![
998                DataValue::Integer(1),
999                DataValue::String("2024-12-15".to_string()),
1000            ]))
1001            .unwrap();
1002
1003        table
1004            .add_row(DataRow::new(vec![
1005                DataValue::Integer(2),
1006                DataValue::String("2025-01-15".to_string()),
1007            ]))
1008            .unwrap();
1009
1010        table
1011            .add_row(DataRow::new(vec![
1012                DataValue::Integer(3),
1013                DataValue::String("2025-02-15".to_string()),
1014            ]))
1015            .unwrap();
1016
1017        let table = Arc::new(table);
1018        let engine = QueryEngine::new();
1019
1020        println!("\n=== Testing DateTime Comparisons ===");
1021        println!("Table has {} rows", table.row_count());
1022        for i in 0..table.row_count() {
1023            let date = table.get_value(i, 1);
1024            println!("Row {}: created_date = {:?}", i, date);
1025        }
1026
1027        // Test DateTime constructor comparison - should find dates after 2025-01-01
1028        println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1029        let result = engine.execute(
1030            table.clone(),
1031            "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1032        );
1033        match result {
1034            Ok(view) => {
1035                println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1036                assert_eq!(view.row_count(), 2); // Should find 2025-01-15 and 2025-02-15
1037            }
1038            Err(e) => {
1039                panic!("DateTime comparison query failed: {}", e);
1040            }
1041        }
1042
1043        println!("\n=== DateTime comparison test complete! ===");
1044    }
1045
1046    #[test]
1047    fn test_not_with_method_calls() {
1048        // Initialize tracing for debug output
1049        let _ = tracing_subscriber::fmt()
1050            .with_max_level(tracing::Level::DEBUG)
1051            .try_init();
1052
1053        let mut table = DataTable::new("test");
1054        table.add_column(DataColumn::new("id"));
1055        table.add_column(DataColumn::new("status"));
1056
1057        // Add test data
1058        table
1059            .add_row(DataRow::new(vec![
1060                DataValue::Integer(1),
1061                DataValue::String("Pending Review".to_string()),
1062            ]))
1063            .unwrap();
1064
1065        table
1066            .add_row(DataRow::new(vec![
1067                DataValue::Integer(2),
1068                DataValue::String("Complete".to_string()),
1069            ]))
1070            .unwrap();
1071
1072        table
1073            .add_row(DataRow::new(vec![
1074                DataValue::Integer(3),
1075                DataValue::String("Pending Approval".to_string()),
1076            ]))
1077            .unwrap();
1078
1079        let table = Arc::new(table);
1080        let engine = QueryEngine::with_case_insensitive(true);
1081
1082        println!("\n=== Testing NOT with Method Calls ===");
1083        println!("Table has {} rows", table.row_count());
1084        for i in 0..table.row_count() {
1085            let status = table.get_value(i, 1);
1086            println!("Row {}: status = {:?}", i, status);
1087        }
1088
1089        // Test NOT with Contains - should exclude rows containing "pend"
1090        println!("\n--- Test: NOT status.Contains('pend') ---");
1091        let result = engine.execute(
1092            table.clone(),
1093            "SELECT * FROM test WHERE NOT status.Contains('pend')",
1094        );
1095        match result {
1096            Ok(view) => {
1097                println!(
1098                    "SUCCESS: Found {} rows NOT containing 'pend'",
1099                    view.row_count()
1100                );
1101                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1102            }
1103            Err(e) => {
1104                panic!("NOT Contains query failed: {}", e);
1105            }
1106        }
1107
1108        // Test NOT with StartsWith
1109        println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1110        let result = engine.execute(
1111            table.clone(),
1112            "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1113        );
1114        match result {
1115            Ok(view) => {
1116                println!(
1117                    "SUCCESS: Found {} rows NOT starting with 'Pending'",
1118                    view.row_count()
1119                );
1120                assert_eq!(view.row_count(), 1); // Should find only "Complete"
1121            }
1122            Err(e) => {
1123                panic!("NOT StartsWith query failed: {}", e);
1124            }
1125        }
1126
1127        println!("\n=== NOT with method calls test complete! ===");
1128    }
1129
1130    #[test]
1131    #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1132    fn test_complex_logical_expressions() {
1133        // Initialize tracing for debug output
1134        let _ = tracing_subscriber::fmt()
1135            .with_max_level(tracing::Level::DEBUG)
1136            .try_init();
1137
1138        let mut table = DataTable::new("test");
1139        table.add_column(DataColumn::new("id"));
1140        table.add_column(DataColumn::new("status"));
1141        table.add_column(DataColumn::new("priority"));
1142        table.add_column(DataColumn::new("assigned"));
1143
1144        // Add comprehensive test data
1145        table
1146            .add_row(DataRow::new(vec![
1147                DataValue::Integer(1),
1148                DataValue::String("Pending".to_string()),
1149                DataValue::String("High".to_string()),
1150                DataValue::String("John".to_string()),
1151            ]))
1152            .unwrap();
1153
1154        table
1155            .add_row(DataRow::new(vec![
1156                DataValue::Integer(2),
1157                DataValue::String("Complete".to_string()),
1158                DataValue::String("High".to_string()),
1159                DataValue::String("Jane".to_string()),
1160            ]))
1161            .unwrap();
1162
1163        table
1164            .add_row(DataRow::new(vec![
1165                DataValue::Integer(3),
1166                DataValue::String("Pending".to_string()),
1167                DataValue::String("Low".to_string()),
1168                DataValue::String("John".to_string()),
1169            ]))
1170            .unwrap();
1171
1172        table
1173            .add_row(DataRow::new(vec![
1174                DataValue::Integer(4),
1175                DataValue::String("In Progress".to_string()),
1176                DataValue::String("Medium".to_string()),
1177                DataValue::String("Jane".to_string()),
1178            ]))
1179            .unwrap();
1180
1181        let table = Arc::new(table);
1182        let engine = QueryEngine::new();
1183
1184        println!("\n=== Testing Complex Logical Expressions ===");
1185        println!("Table has {} rows", table.row_count());
1186        for i in 0..table.row_count() {
1187            let status = table.get_value(i, 1);
1188            let priority = table.get_value(i, 2);
1189            let assigned = table.get_value(i, 3);
1190            println!(
1191                "Row {}: status = {:?}, priority = {:?}, assigned = {:?}",
1192                i, status, priority, assigned
1193            );
1194        }
1195
1196        // Test complex AND/OR logic
1197        println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1198        let result = engine.execute(
1199            table.clone(),
1200            "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1201        );
1202        match result {
1203            Ok(view) => {
1204                println!(
1205                    "SUCCESS: Found {} rows with complex logic",
1206                    view.row_count()
1207                );
1208                assert_eq!(view.row_count(), 2); // Should find rows 1 and 3 (both Pending, one High priority, both assigned to John)
1209            }
1210            Err(e) => {
1211                panic!("Complex logic query failed: {}", e);
1212            }
1213        }
1214
1215        // Test NOT with complex expressions
1216        println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1217        let result = engine.execute(
1218            table.clone(),
1219            "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1220        );
1221        match result {
1222            Ok(view) => {
1223                println!(
1224                    "SUCCESS: Found {} rows with NOT complex logic",
1225                    view.row_count()
1226                );
1227                assert_eq!(view.row_count(), 2); // Should find rows 1 (Pending+High) and 4 (In Progress+Medium)
1228            }
1229            Err(e) => {
1230                panic!("NOT complex logic query failed: {}", e);
1231            }
1232        }
1233
1234        println!("\n=== Complex logical expressions test complete! ===");
1235    }
1236
1237    #[test]
1238    fn test_mixed_data_types_and_edge_cases() {
1239        // Initialize tracing for debug output
1240        let _ = tracing_subscriber::fmt()
1241            .with_max_level(tracing::Level::DEBUG)
1242            .try_init();
1243
1244        let mut table = DataTable::new("test");
1245        table.add_column(DataColumn::new("id"));
1246        table.add_column(DataColumn::new("value"));
1247        table.add_column(DataColumn::new("nullable_field"));
1248
1249        // Add test data with mixed types and edge cases
1250        table
1251            .add_row(DataRow::new(vec![
1252                DataValue::Integer(1),
1253                DataValue::String("123.45".to_string()),
1254                DataValue::String("present".to_string()),
1255            ]))
1256            .unwrap();
1257
1258        table
1259            .add_row(DataRow::new(vec![
1260                DataValue::Integer(2),
1261                DataValue::Float(678.90),
1262                DataValue::Null,
1263            ]))
1264            .unwrap();
1265
1266        table
1267            .add_row(DataRow::new(vec![
1268                DataValue::Integer(3),
1269                DataValue::Boolean(true),
1270                DataValue::String("also present".to_string()),
1271            ]))
1272            .unwrap();
1273
1274        table
1275            .add_row(DataRow::new(vec![
1276                DataValue::Integer(4),
1277                DataValue::String("false".to_string()),
1278                DataValue::Null,
1279            ]))
1280            .unwrap();
1281
1282        let table = Arc::new(table);
1283        let engine = QueryEngine::new();
1284
1285        println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1286        println!("Table has {} rows", table.row_count());
1287        for i in 0..table.row_count() {
1288            let value = table.get_value(i, 1);
1289            let nullable = table.get_value(i, 2);
1290            println!(
1291                "Row {}: value = {:?}, nullable_field = {:?}",
1292                i, value, nullable
1293            );
1294        }
1295
1296        // Test type coercion with boolean Contains
1297        println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1298        let result = engine.execute(
1299            table.clone(),
1300            "SELECT * FROM test WHERE value.Contains('true')",
1301        );
1302        match result {
1303            Ok(view) => {
1304                println!(
1305                    "SUCCESS: Found {} rows with boolean coercion",
1306                    view.row_count()
1307                );
1308                assert_eq!(view.row_count(), 1); // Should find the boolean true row
1309            }
1310            Err(e) => {
1311                panic!("Boolean coercion query failed: {}", e);
1312            }
1313        }
1314
1315        // Test multiple IN values with mixed types
1316        println!("\n--- Test: id IN (1, 3) ---");
1317        let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
1318        match result {
1319            Ok(view) => {
1320                println!("SUCCESS: Found {} rows with IN clause", view.row_count());
1321                assert_eq!(view.row_count(), 2); // Should find rows with id 1 and 3
1322            }
1323            Err(e) => {
1324                panic!("Multiple IN values query failed: {}", e);
1325            }
1326        }
1327
1328        println!("\n=== Mixed data types test complete! ===");
1329    }
1330
1331    #[test]
1332    fn test_not_in_parsing() {
1333        use crate::sql::recursive_parser::Parser;
1334
1335        let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
1336        println!("\n=== Testing NOT IN parsing ===");
1337        println!("Parsing query: {}", query);
1338
1339        let mut parser = Parser::new(query);
1340        match parser.parse() {
1341            Ok(statement) => {
1342                println!("Parsed statement: {:#?}", statement);
1343                if let Some(where_clause) = statement.where_clause {
1344                    println!("WHERE conditions: {:#?}", where_clause.conditions);
1345                    if let Some(first_condition) = where_clause.conditions.first() {
1346                        println!("First condition expression: {:#?}", first_condition.expr);
1347                    }
1348                }
1349            }
1350            Err(e) => {
1351                panic!("Parse error: {}", e);
1352            }
1353        }
1354    }
1355}
1356
1357#[cfg(test)]
1358#[path = "query_engine_tests.rs"]
1359mod query_engine_tests;