1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
9use crate::data::data_view::DataView;
10use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
11use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
12use crate::sql::aggregates::contains_aggregate;
13use crate::sql::recursive_parser::{
14 OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression, CTE,
15};
16
17pub struct QueryEngine {
19 case_insensitive: bool,
20 date_notation: String,
21 behavior_config: Option<BehaviorConfig>,
22}
23
24impl Default for QueryEngine {
25 fn default() -> Self {
26 Self::new()
27 }
28}
29
30impl QueryEngine {
31 #[must_use]
32 pub fn new() -> Self {
33 Self {
34 case_insensitive: false,
35 date_notation: "us".to_string(),
36 behavior_config: None,
37 }
38 }
39
40 #[must_use]
41 pub fn with_behavior_config(config: BehaviorConfig) -> Self {
42 let case_insensitive = config.case_insensitive_default;
43 let date_notation = config.default_date_notation.clone();
44 Self {
45 case_insensitive,
46 date_notation,
47 behavior_config: Some(config),
48 }
49 }
50
51 #[must_use]
52 pub fn with_date_notation(date_notation: String) -> Self {
53 Self {
54 case_insensitive: false,
55 date_notation,
56 behavior_config: None,
57 }
58 }
59
60 #[must_use]
61 pub fn with_case_insensitive(case_insensitive: bool) -> Self {
62 Self {
63 case_insensitive,
64 date_notation: "us".to_string(),
65 behavior_config: None,
66 }
67 }
68
69 #[must_use]
70 pub fn with_case_insensitive_and_date_notation(
71 case_insensitive: bool,
72 date_notation: String,
73 ) -> Self {
74 Self {
75 case_insensitive,
76 date_notation,
77 behavior_config: None,
78 }
79 }
80
81 fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
83 let columns = table.column_names();
84 let mut best_match: Option<(String, usize)> = None;
85
86 for col in columns {
87 let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
88 let max_distance = if name.len() > 10 { 3 } else { 2 };
91 if distance <= max_distance {
92 match &best_match {
93 None => best_match = Some((col, distance)),
94 Some((_, best_dist)) if distance < *best_dist => {
95 best_match = Some((col, distance));
96 }
97 _ => {}
98 }
99 }
100 }
101
102 best_match.map(|(name, _)| name)
103 }
104
105 fn edit_distance(&self, s1: &str, s2: &str) -> usize {
107 let len1 = s1.len();
108 let len2 = s2.len();
109 let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
110
111 for i in 0..=len1 {
112 matrix[i][0] = i;
113 }
114 for j in 0..=len2 {
115 matrix[0][j] = j;
116 }
117
118 for (i, c1) in s1.chars().enumerate() {
119 for (j, c2) in s2.chars().enumerate() {
120 let cost = usize::from(c1 != c2);
121 matrix[i + 1][j + 1] = std::cmp::min(
122 matrix[i][j + 1] + 1, std::cmp::min(
124 matrix[i + 1][j] + 1, matrix[i][j] + cost, ),
127 );
128 }
129 }
130
131 matrix[len1][len2]
132 }
133
134 pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
136 let start_time = Instant::now();
137
138 let parse_start = Instant::now();
140 let mut parser = Parser::new(sql);
141 let statement = parser
142 .parse()
143 .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
144 let parse_duration = parse_start.elapsed();
145
146 let build_start = Instant::now();
148 let mut cte_context = HashMap::new();
150 let result = self.build_view_with_context(table, statement, &mut cte_context)?;
151 let build_duration = build_start.elapsed();
152
153 let total_duration = start_time.elapsed();
154 info!(
155 "Query execution complete: parse={:?}, build={:?}, total={:?}, rows={}",
156 parse_duration,
157 build_duration,
158 total_duration,
159 result.row_count()
160 );
161
162 Ok(result)
163 }
164
165 fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
167 let mut cte_context = HashMap::new();
168 self.build_view_with_context(table, statement, &mut cte_context)
169 }
170
171 fn build_view_with_context(
173 &self,
174 table: Arc<DataTable>,
175 statement: SelectStatement,
176 cte_context: &mut HashMap<String, Arc<DataView>>,
177 ) -> Result<DataView> {
178 for cte in &statement.ctes {
180 debug!("QueryEngine: Processing CTE '{}'...", cte.name);
181 let cte_result =
183 self.build_view_with_context(table.clone(), cte.query.clone(), cte_context)?;
184
185 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
187 debug!(
188 "QueryEngine: CTE '{}' processed, stored in context",
189 cte.name
190 );
191 }
192
193 let source_table = if let Some(ref subquery) = statement.from_subquery {
195 debug!("QueryEngine: Processing FROM subquery...");
197 let subquery_result =
198 self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
199
200 let materialized = self.materialize_view(subquery_result)?;
203 Arc::new(materialized)
204 } else if let Some(ref table_name) = statement.from_table {
205 if let Some(cte_view) = cte_context.get(table_name) {
207 debug!("QueryEngine: Using CTE '{}' as source table", table_name);
208 let materialized = self.materialize_view((**cte_view).clone())?;
210 Arc::new(materialized)
211 } else {
212 table.clone()
214 }
215 } else {
216 table.clone()
218 };
219
220 self.build_view_internal(source_table, statement)
222 }
223
224 fn materialize_view(&self, view: DataView) -> Result<DataTable> {
226 let source = view.source();
227 let mut result_table = DataTable::new("derived");
228
229 let visible_cols = view.visible_column_indices().to_vec();
231
232 for col_idx in &visible_cols {
234 let col = &source.columns[*col_idx];
235 let new_col = DataColumn {
236 name: col.name.clone(),
237 data_type: col.data_type.clone(),
238 nullable: col.nullable,
239 unique_values: col.unique_values,
240 null_count: col.null_count,
241 metadata: col.metadata.clone(),
242 };
243 result_table.add_column(new_col);
244 }
245
246 for row_idx in view.visible_row_indices() {
248 let source_row = &source.rows[*row_idx];
249 let mut new_row = DataRow { values: Vec::new() };
250
251 for col_idx in &visible_cols {
252 new_row.values.push(source_row.values[*col_idx].clone());
253 }
254
255 result_table.add_row(new_row);
256 }
257
258 Ok(result_table)
259 }
260
261 fn build_view_internal(
262 &self,
263 table: Arc<DataTable>,
264 statement: SelectStatement,
265 ) -> Result<DataView> {
266 debug!(
267 "QueryEngine::build_view - select_items: {:?}",
268 statement.select_items
269 );
270 debug!(
271 "QueryEngine::build_view - where_clause: {:?}",
272 statement.where_clause
273 );
274
275 let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
277
278 if let Some(where_clause) = &statement.where_clause {
280 let total_rows = table.row_count();
281 debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
282 debug!("QueryEngine: WHERE clause = {:?}", where_clause);
283
284 let filter_start = Instant::now();
285 let mut filtered_rows = Vec::new();
287 for row_idx in visible_rows {
288 if row_idx < 3 {
290 debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
291 }
292 let evaluator = RecursiveWhereEvaluator::with_config(
293 &table,
294 self.case_insensitive,
295 self.date_notation.clone(),
296 );
297 match evaluator.evaluate(where_clause, row_idx) {
298 Ok(result) => {
299 if row_idx < 3 {
300 debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
301 }
302 if result {
303 filtered_rows.push(row_idx);
304 }
305 }
306 Err(e) => {
307 if row_idx < 3 {
308 debug!(
309 "QueryEngine: WHERE evaluation error for row {}: {}",
310 row_idx, e
311 );
312 }
313 return Err(e);
315 }
316 }
317 }
318 visible_rows = filtered_rows;
319 let filter_duration = filter_start.elapsed();
320 info!(
321 "WHERE clause filtering: {} rows -> {} rows in {:?}",
322 total_rows,
323 visible_rows.len(),
324 filter_duration
325 );
326
327 }
329
330 let mut view = DataView::new(table.clone());
332 view = view.with_rows(visible_rows);
333
334 if let Some(group_by_columns) = &statement.group_by {
336 if !group_by_columns.is_empty() {
337 debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_columns);
338 view = self.apply_group_by(
339 view,
340 group_by_columns,
341 &statement.select_items,
342 statement.having.as_ref(),
343 )?;
344 }
345 } else {
346 if !statement.select_items.is_empty() {
348 let has_non_star_items = statement
350 .select_items
351 .iter()
352 .any(|item| !matches!(item, SelectItem::Star));
353
354 if has_non_star_items || statement.select_items.len() > 1 {
358 view = self.apply_select_items(view, &statement.select_items)?;
359 }
360 } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
362 let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
364 view = view.with_columns(column_indices);
365 }
366 }
367
368 if let Some(order_by_columns) = &statement.order_by {
370 if !order_by_columns.is_empty() {
371 view = self.apply_multi_order_by(view, order_by_columns)?;
372 }
373 }
374
375 if let Some(limit) = statement.limit {
377 let offset = statement.offset.unwrap_or(0);
378 view = view.with_limit(limit, offset);
379 }
380
381 Ok(view)
382 }
383
384 fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
386 let mut indices = Vec::new();
387 let table_columns = table.column_names();
388
389 for col_name in columns {
390 let index = table_columns
391 .iter()
392 .position(|c| c.eq_ignore_ascii_case(col_name))
393 .ok_or_else(|| {
394 let suggestion = self.find_similar_column(table, col_name);
395 match suggestion {
396 Some(similar) => anyhow::anyhow!(
397 "Column '{}' not found. Did you mean '{}'?",
398 col_name,
399 similar
400 ),
401 None => anyhow::anyhow!("Column '{}' not found", col_name),
402 }
403 })?;
404 indices.push(index);
405 }
406
407 Ok(indices)
408 }
409
410 fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
412 debug!(
413 "QueryEngine::apply_select_items - items: {:?}",
414 select_items
415 );
416 debug!(
417 "QueryEngine::apply_select_items - input view has {} rows",
418 view.row_count()
419 );
420
421 let all_aggregates = select_items.iter().all(|item| match item {
423 SelectItem::Expression { expr, .. } => contains_aggregate(expr),
424 SelectItem::Column(_) => false,
425 SelectItem::Star => false,
426 });
427
428 if all_aggregates && view.row_count() > 0 {
429 return self.apply_aggregate_select(view, select_items);
432 }
433
434 let has_computed_expressions = select_items
436 .iter()
437 .any(|item| matches!(item, SelectItem::Expression { .. }));
438
439 debug!(
440 "QueryEngine::apply_select_items - has_computed_expressions: {}",
441 has_computed_expressions
442 );
443
444 if !has_computed_expressions {
445 let column_indices = self.resolve_select_columns(view.source(), select_items)?;
447 return Ok(view.with_columns(column_indices));
448 }
449
450 let source_table = view.source();
455 let visible_rows = view.visible_row_indices();
456
457 let mut computed_table = DataTable::new("query_result");
460
461 let mut expanded_items = Vec::new();
463 for item in select_items {
464 match item {
465 SelectItem::Star => {
466 for col_name in source_table.column_names() {
468 expanded_items.push(SelectItem::Column(col_name));
469 }
470 }
471 _ => expanded_items.push(item.clone()),
472 }
473 }
474
475 let mut column_name_counts: std::collections::HashMap<String, usize> =
477 std::collections::HashMap::new();
478
479 for item in &expanded_items {
480 let base_name = match item {
481 SelectItem::Column(name) => name.clone(),
482 SelectItem::Expression { alias, .. } => alias.clone(),
483 SelectItem::Star => unreachable!("Star should have been expanded"),
484 };
485
486 let count = column_name_counts.entry(base_name.clone()).or_insert(0);
488 let column_name = if *count == 0 {
489 base_name.clone()
491 } else {
492 format!("{base_name}_{count}")
494 };
495 *count += 1;
496
497 computed_table.add_column(DataColumn::new(&column_name));
498 }
499
500 let mut evaluator =
502 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
503
504 for &row_idx in visible_rows {
505 let mut row_values = Vec::new();
506
507 for item in &expanded_items {
508 let value = match item {
509 SelectItem::Column(col_name) => {
510 let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
512 let suggestion = self.find_similar_column(source_table, col_name);
513 match suggestion {
514 Some(similar) => anyhow::anyhow!(
515 "Column '{}' not found. Did you mean '{}'?",
516 col_name,
517 similar
518 ),
519 None => anyhow::anyhow!("Column '{}' not found", col_name),
520 }
521 })?;
522 let row = source_table
523 .get_row(row_idx)
524 .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
525 row.get(col_idx)
526 .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
527 .clone()
528 }
529 SelectItem::Expression { expr, .. } => {
530 evaluator.evaluate(expr, row_idx)?
532 }
533 SelectItem::Star => unreachable!("Star should have been expanded"),
534 };
535 row_values.push(value);
536 }
537
538 computed_table
539 .add_row(DataRow::new(row_values))
540 .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
541 }
542
543 Ok(DataView::new(Arc::new(computed_table)))
546 }
547
548 fn apply_aggregate_select(
550 &self,
551 view: DataView,
552 select_items: &[SelectItem],
553 ) -> Result<DataView> {
554 debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
555
556 let source_table = view.source();
557 let mut result_table = DataTable::new("aggregate_result");
558
559 for item in select_items {
561 let column_name = match item {
562 SelectItem::Expression { alias, .. } => alias.clone(),
563 _ => unreachable!("Should only have expressions in aggregate-only query"),
564 };
565 result_table.add_column(DataColumn::new(&column_name));
566 }
567
568 let visible_rows = view.visible_row_indices().to_vec();
570 let mut evaluator =
571 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
572 .with_visible_rows(visible_rows);
573
574 let mut row_values = Vec::new();
576 for item in select_items {
577 match item {
578 SelectItem::Expression { expr, .. } => {
579 let value = evaluator.evaluate(expr, 0)?;
582 row_values.push(value);
583 }
584 _ => unreachable!("Should only have expressions in aggregate-only query"),
585 }
586 }
587
588 result_table
590 .add_row(DataRow::new(row_values))
591 .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
592
593 Ok(DataView::new(Arc::new(result_table)))
594 }
595
596 fn resolve_select_columns(
598 &self,
599 table: &DataTable,
600 select_items: &[SelectItem],
601 ) -> Result<Vec<usize>> {
602 let mut indices = Vec::new();
603 let table_columns = table.column_names();
604
605 for item in select_items {
606 match item {
607 SelectItem::Column(col_name) => {
608 let index = table_columns
609 .iter()
610 .position(|c| c.eq_ignore_ascii_case(col_name))
611 .ok_or_else(|| {
612 let suggestion = self.find_similar_column(table, col_name);
613 match suggestion {
614 Some(similar) => anyhow::anyhow!(
615 "Column '{}' not found. Did you mean '{}'?",
616 col_name,
617 similar
618 ),
619 None => anyhow::anyhow!("Column '{}' not found", col_name),
620 }
621 })?;
622 indices.push(index);
623 }
624 SelectItem::Star => {
625 for i in 0..table_columns.len() {
627 indices.push(i);
628 }
629 }
630 SelectItem::Expression { .. } => {
631 return Err(anyhow::anyhow!(
632 "Computed expressions require new table creation"
633 ));
634 }
635 }
636 }
637
638 Ok(indices)
639 }
640
641 fn apply_multi_order_by(
643 &self,
644 mut view: DataView,
645 order_by_columns: &[OrderByColumn],
646 ) -> Result<DataView> {
647 let mut sort_columns = Vec::new();
649
650 for order_col in order_by_columns {
651 let col_index = view
654 .source()
655 .get_column_index(&order_col.column)
656 .ok_or_else(|| {
657 let suggestion = self.find_similar_column(view.source(), &order_col.column);
659 match suggestion {
660 Some(similar) => anyhow::anyhow!(
661 "Column '{}' not found. Did you mean '{}'?",
662 order_col.column,
663 similar
664 ),
665 None => {
666 let available_cols = view.source().column_names().join(", ");
668 anyhow::anyhow!(
669 "Column '{}' not found. Available columns: {}",
670 order_col.column,
671 available_cols
672 )
673 }
674 }
675 })?;
676
677 let ascending = matches!(order_col.direction, SortDirection::Asc);
678 sort_columns.push((col_index, ascending));
679 }
680
681 view.apply_multi_sort(&sort_columns)?;
683 Ok(view)
684 }
685
686 fn apply_group_by(
688 &self,
689 view: DataView,
690 group_by_columns: &[String],
691 select_items: &[SelectItem],
692 having: Option<&SqlExpression>,
693 ) -> Result<DataView> {
694 debug!(
695 "QueryEngine::apply_group_by - grouping by: {:?}",
696 group_by_columns
697 );
698
699 let groups = view.group_by(group_by_columns)?;
701 debug!(
702 "QueryEngine::apply_group_by - created {} groups",
703 groups.len()
704 );
705
706 let mut result_table = DataTable::new("grouped_result");
708
709 for col_name in group_by_columns {
715 result_table.add_column(DataColumn::new(col_name));
716 }
717
718 let mut aggregate_columns = Vec::new();
720 for item in select_items {
721 match item {
722 SelectItem::Expression { expr, alias } => {
723 if contains_aggregate(expr) {
724 result_table.add_column(DataColumn::new(alias));
726 aggregate_columns.push((expr.clone(), alias.clone()));
727 }
728 }
729 SelectItem::Column(col_name) => {
730 if !group_by_columns.contains(col_name) {
732 return Err(anyhow!(
733 "Column '{}' must appear in GROUP BY clause or be used in an aggregate function",
734 col_name
735 ));
736 }
737 }
738 SelectItem::Star => {
739 }
742 }
743 }
744
745 for (group_key, group_view) in groups {
747 let mut row_values = Vec::new();
748 let mut aggregate_values = std::collections::HashMap::new();
749
750 for (i, value) in group_key.0.iter().enumerate() {
752 row_values.push(value.clone());
753 if i < group_by_columns.len() {
755 aggregate_values.insert(group_by_columns[i].clone(), value.clone());
756 }
757 }
758
759 for (expr, col_name) in &aggregate_columns {
761 let group_rows = group_view.get_visible_rows();
763 let mut evaluator = ArithmeticEvaluator::new(group_view.source())
764 .with_visible_rows(group_rows.clone());
765
766 let value = if group_view.row_count() > 0 && !group_rows.is_empty() {
768 evaluator
771 .evaluate(expr, group_rows[0])
772 .unwrap_or(DataValue::Null)
773 } else {
774 DataValue::Null
775 };
776
777 aggregate_values.insert(col_name.clone(), value.clone());
779 row_values.push(value);
780 }
781
782 if let Some(having_expr) = having {
784 let mut temp_table = DataTable::new("having_eval");
786 for col_name in aggregate_values.keys() {
787 temp_table.add_column(DataColumn::new(col_name));
788 }
789
790 let temp_row_values: Vec<DataValue> = aggregate_values.values().cloned().collect();
791 temp_table
792 .add_row(DataRow::new(temp_row_values))
793 .map_err(|e| anyhow!("Failed to create temp table for HAVING: {}", e))?;
794
795 let mut evaluator = ArithmeticEvaluator::new(&temp_table);
797 let having_result = evaluator
798 .evaluate(having_expr, 0)
799 .unwrap_or(DataValue::Boolean(false));
800
801 match having_result {
803 DataValue::Boolean(false) => continue,
804 DataValue::Null => continue,
805 _ => {} }
807 }
808
809 result_table
811 .add_row(DataRow::new(row_values))
812 .map_err(|e| anyhow!("Failed to add row to result table: {}", e))?;
813 }
814
815 Ok(DataView::new(Arc::new(result_table)))
817 }
818}
819
820#[cfg(test)]
821mod tests {
822 use super::*;
823 use crate::data::datatable::{DataColumn, DataRow, DataValue};
824
825 fn create_test_table() -> Arc<DataTable> {
826 let mut table = DataTable::new("test");
827
828 table.add_column(DataColumn::new("id"));
830 table.add_column(DataColumn::new("name"));
831 table.add_column(DataColumn::new("age"));
832
833 table
835 .add_row(DataRow::new(vec![
836 DataValue::Integer(1),
837 DataValue::String("Alice".to_string()),
838 DataValue::Integer(30),
839 ]))
840 .unwrap();
841
842 table
843 .add_row(DataRow::new(vec![
844 DataValue::Integer(2),
845 DataValue::String("Bob".to_string()),
846 DataValue::Integer(25),
847 ]))
848 .unwrap();
849
850 table
851 .add_row(DataRow::new(vec![
852 DataValue::Integer(3),
853 DataValue::String("Charlie".to_string()),
854 DataValue::Integer(35),
855 ]))
856 .unwrap();
857
858 Arc::new(table)
859 }
860
861 #[test]
862 fn test_select_all() {
863 let table = create_test_table();
864 let engine = QueryEngine::new();
865
866 let view = engine
867 .execute(table.clone(), "SELECT * FROM users")
868 .unwrap();
869 assert_eq!(view.row_count(), 3);
870 assert_eq!(view.column_count(), 3);
871 }
872
873 #[test]
874 fn test_select_columns() {
875 let table = create_test_table();
876 let engine = QueryEngine::new();
877
878 let view = engine
879 .execute(table.clone(), "SELECT name, age FROM users")
880 .unwrap();
881 assert_eq!(view.row_count(), 3);
882 assert_eq!(view.column_count(), 2);
883 }
884
885 #[test]
886 fn test_select_with_limit() {
887 let table = create_test_table();
888 let engine = QueryEngine::new();
889
890 let view = engine
891 .execute(table.clone(), "SELECT * FROM users LIMIT 2")
892 .unwrap();
893 assert_eq!(view.row_count(), 2);
894 }
895
896 #[test]
897 fn test_type_coercion_contains() {
898 let _ = tracing_subscriber::fmt()
900 .with_max_level(tracing::Level::DEBUG)
901 .try_init();
902
903 let mut table = DataTable::new("test");
904 table.add_column(DataColumn::new("id"));
905 table.add_column(DataColumn::new("status"));
906 table.add_column(DataColumn::new("price"));
907
908 table
910 .add_row(DataRow::new(vec![
911 DataValue::Integer(1),
912 DataValue::String("Pending".to_string()),
913 DataValue::Float(99.99),
914 ]))
915 .unwrap();
916
917 table
918 .add_row(DataRow::new(vec![
919 DataValue::Integer(2),
920 DataValue::String("Confirmed".to_string()),
921 DataValue::Float(150.50),
922 ]))
923 .unwrap();
924
925 table
926 .add_row(DataRow::new(vec![
927 DataValue::Integer(3),
928 DataValue::String("Pending".to_string()),
929 DataValue::Float(75.00),
930 ]))
931 .unwrap();
932
933 let table = Arc::new(table);
934 let engine = QueryEngine::new();
935
936 println!("\n=== Testing WHERE clause with Contains ===");
937 println!("Table has {} rows", table.row_count());
938 for i in 0..table.row_count() {
939 let status = table.get_value(i, 1);
940 println!("Row {i}: status = {status:?}");
941 }
942
943 println!("\n--- Test 1: status.Contains('pend') ---");
945 let result = engine.execute(
946 table.clone(),
947 "SELECT * FROM test WHERE status.Contains('pend')",
948 );
949 match result {
950 Ok(view) => {
951 println!("SUCCESS: Found {} matching rows", view.row_count());
952 assert_eq!(view.row_count(), 2); }
954 Err(e) => {
955 panic!("Query failed: {e}");
956 }
957 }
958
959 println!("\n--- Test 2: price.Contains('9') ---");
961 let result = engine.execute(
962 table.clone(),
963 "SELECT * FROM test WHERE price.Contains('9')",
964 );
965 match result {
966 Ok(view) => {
967 println!(
968 "SUCCESS: Found {} matching rows with price containing '9'",
969 view.row_count()
970 );
971 assert!(view.row_count() >= 1);
973 }
974 Err(e) => {
975 panic!("Numeric coercion query failed: {e}");
976 }
977 }
978
979 println!("\n=== All tests passed! ===");
980 }
981
982 #[test]
983 fn test_not_in_clause() {
984 let _ = tracing_subscriber::fmt()
986 .with_max_level(tracing::Level::DEBUG)
987 .try_init();
988
989 let mut table = DataTable::new("test");
990 table.add_column(DataColumn::new("id"));
991 table.add_column(DataColumn::new("country"));
992
993 table
995 .add_row(DataRow::new(vec![
996 DataValue::Integer(1),
997 DataValue::String("CA".to_string()),
998 ]))
999 .unwrap();
1000
1001 table
1002 .add_row(DataRow::new(vec![
1003 DataValue::Integer(2),
1004 DataValue::String("US".to_string()),
1005 ]))
1006 .unwrap();
1007
1008 table
1009 .add_row(DataRow::new(vec![
1010 DataValue::Integer(3),
1011 DataValue::String("UK".to_string()),
1012 ]))
1013 .unwrap();
1014
1015 let table = Arc::new(table);
1016 let engine = QueryEngine::new();
1017
1018 println!("\n=== Testing NOT IN clause ===");
1019 println!("Table has {} rows", table.row_count());
1020 for i in 0..table.row_count() {
1021 let country = table.get_value(i, 1);
1022 println!("Row {i}: country = {country:?}");
1023 }
1024
1025 println!("\n--- Test: country NOT IN ('CA') ---");
1027 let result = engine.execute(
1028 table.clone(),
1029 "SELECT * FROM test WHERE country NOT IN ('CA')",
1030 );
1031 match result {
1032 Ok(view) => {
1033 println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1034 assert_eq!(view.row_count(), 2); }
1036 Err(e) => {
1037 panic!("NOT IN query failed: {e}");
1038 }
1039 }
1040
1041 println!("\n=== NOT IN test complete! ===");
1042 }
1043
1044 #[test]
1045 fn test_case_insensitive_in_and_not_in() {
1046 let _ = tracing_subscriber::fmt()
1048 .with_max_level(tracing::Level::DEBUG)
1049 .try_init();
1050
1051 let mut table = DataTable::new("test");
1052 table.add_column(DataColumn::new("id"));
1053 table.add_column(DataColumn::new("country"));
1054
1055 table
1057 .add_row(DataRow::new(vec![
1058 DataValue::Integer(1),
1059 DataValue::String("CA".to_string()), ]))
1061 .unwrap();
1062
1063 table
1064 .add_row(DataRow::new(vec![
1065 DataValue::Integer(2),
1066 DataValue::String("us".to_string()), ]))
1068 .unwrap();
1069
1070 table
1071 .add_row(DataRow::new(vec![
1072 DataValue::Integer(3),
1073 DataValue::String("UK".to_string()), ]))
1075 .unwrap();
1076
1077 let table = Arc::new(table);
1078
1079 println!("\n=== Testing Case-Insensitive IN clause ===");
1080 println!("Table has {} rows", table.row_count());
1081 for i in 0..table.row_count() {
1082 let country = table.get_value(i, 1);
1083 println!("Row {i}: country = {country:?}");
1084 }
1085
1086 println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1088 let engine = QueryEngine::with_case_insensitive(true);
1089 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1090 match result {
1091 Ok(view) => {
1092 println!(
1093 "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1094 view.row_count()
1095 );
1096 assert_eq!(view.row_count(), 1); }
1098 Err(e) => {
1099 panic!("Case-insensitive IN query failed: {e}");
1100 }
1101 }
1102
1103 println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1105 let result = engine.execute(
1106 table.clone(),
1107 "SELECT * FROM test WHERE country NOT IN ('ca')",
1108 );
1109 match result {
1110 Ok(view) => {
1111 println!(
1112 "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1113 view.row_count()
1114 );
1115 assert_eq!(view.row_count(), 2); }
1117 Err(e) => {
1118 panic!("Case-insensitive NOT IN query failed: {e}");
1119 }
1120 }
1121
1122 println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1124 let engine_case_sensitive = QueryEngine::new(); let result = engine_case_sensitive
1126 .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1127 match result {
1128 Ok(view) => {
1129 println!(
1130 "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1131 view.row_count()
1132 );
1133 assert_eq!(view.row_count(), 0); }
1135 Err(e) => {
1136 panic!("Case-sensitive IN query failed: {e}");
1137 }
1138 }
1139
1140 println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1141 }
1142
1143 #[test]
1144 #[ignore = "Parentheses in WHERE clause not yet implemented"]
1145 fn test_parentheses_in_where_clause() {
1146 let _ = tracing_subscriber::fmt()
1148 .with_max_level(tracing::Level::DEBUG)
1149 .try_init();
1150
1151 let mut table = DataTable::new("test");
1152 table.add_column(DataColumn::new("id"));
1153 table.add_column(DataColumn::new("status"));
1154 table.add_column(DataColumn::new("priority"));
1155
1156 table
1158 .add_row(DataRow::new(vec![
1159 DataValue::Integer(1),
1160 DataValue::String("Pending".to_string()),
1161 DataValue::String("High".to_string()),
1162 ]))
1163 .unwrap();
1164
1165 table
1166 .add_row(DataRow::new(vec![
1167 DataValue::Integer(2),
1168 DataValue::String("Complete".to_string()),
1169 DataValue::String("High".to_string()),
1170 ]))
1171 .unwrap();
1172
1173 table
1174 .add_row(DataRow::new(vec![
1175 DataValue::Integer(3),
1176 DataValue::String("Pending".to_string()),
1177 DataValue::String("Low".to_string()),
1178 ]))
1179 .unwrap();
1180
1181 table
1182 .add_row(DataRow::new(vec![
1183 DataValue::Integer(4),
1184 DataValue::String("Complete".to_string()),
1185 DataValue::String("Low".to_string()),
1186 ]))
1187 .unwrap();
1188
1189 let table = Arc::new(table);
1190 let engine = QueryEngine::new();
1191
1192 println!("\n=== Testing Parentheses in WHERE clause ===");
1193 println!("Table has {} rows", table.row_count());
1194 for i in 0..table.row_count() {
1195 let status = table.get_value(i, 1);
1196 let priority = table.get_value(i, 2);
1197 println!("Row {i}: status = {status:?}, priority = {priority:?}");
1198 }
1199
1200 println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1202 let result = engine.execute(
1203 table.clone(),
1204 "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1205 );
1206 match result {
1207 Ok(view) => {
1208 println!(
1209 "SUCCESS: Found {} rows with parenthetical logic",
1210 view.row_count()
1211 );
1212 assert_eq!(view.row_count(), 2); }
1214 Err(e) => {
1215 panic!("Parentheses query failed: {e}");
1216 }
1217 }
1218
1219 println!("\n=== Parentheses test complete! ===");
1220 }
1221
1222 #[test]
1223 #[ignore = "Numeric type coercion needs fixing"]
1224 fn test_numeric_type_coercion() {
1225 let _ = tracing_subscriber::fmt()
1227 .with_max_level(tracing::Level::DEBUG)
1228 .try_init();
1229
1230 let mut table = DataTable::new("test");
1231 table.add_column(DataColumn::new("id"));
1232 table.add_column(DataColumn::new("price"));
1233 table.add_column(DataColumn::new("quantity"));
1234
1235 table
1237 .add_row(DataRow::new(vec![
1238 DataValue::Integer(1),
1239 DataValue::Float(99.50), DataValue::Integer(100),
1241 ]))
1242 .unwrap();
1243
1244 table
1245 .add_row(DataRow::new(vec![
1246 DataValue::Integer(2),
1247 DataValue::Float(150.0), DataValue::Integer(200),
1249 ]))
1250 .unwrap();
1251
1252 table
1253 .add_row(DataRow::new(vec![
1254 DataValue::Integer(3),
1255 DataValue::Integer(75), DataValue::Integer(50),
1257 ]))
1258 .unwrap();
1259
1260 let table = Arc::new(table);
1261 let engine = QueryEngine::new();
1262
1263 println!("\n=== Testing Numeric Type Coercion ===");
1264 println!("Table has {} rows", table.row_count());
1265 for i in 0..table.row_count() {
1266 let price = table.get_value(i, 1);
1267 let quantity = table.get_value(i, 2);
1268 println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1269 }
1270
1271 println!("\n--- Test: price.Contains('.') ---");
1273 let result = engine.execute(
1274 table.clone(),
1275 "SELECT * FROM test WHERE price.Contains('.')",
1276 );
1277 match result {
1278 Ok(view) => {
1279 println!(
1280 "SUCCESS: Found {} rows with decimal points in price",
1281 view.row_count()
1282 );
1283 assert_eq!(view.row_count(), 2); }
1285 Err(e) => {
1286 panic!("Numeric Contains query failed: {e}");
1287 }
1288 }
1289
1290 println!("\n--- Test: quantity.Contains('0') ---");
1292 let result = engine.execute(
1293 table.clone(),
1294 "SELECT * FROM test WHERE quantity.Contains('0')",
1295 );
1296 match result {
1297 Ok(view) => {
1298 println!(
1299 "SUCCESS: Found {} rows with '0' in quantity",
1300 view.row_count()
1301 );
1302 assert_eq!(view.row_count(), 2); }
1304 Err(e) => {
1305 panic!("Integer Contains query failed: {e}");
1306 }
1307 }
1308
1309 println!("\n=== Numeric type coercion test complete! ===");
1310 }
1311
1312 #[test]
1313 fn test_datetime_comparisons() {
1314 let _ = tracing_subscriber::fmt()
1316 .with_max_level(tracing::Level::DEBUG)
1317 .try_init();
1318
1319 let mut table = DataTable::new("test");
1320 table.add_column(DataColumn::new("id"));
1321 table.add_column(DataColumn::new("created_date"));
1322
1323 table
1325 .add_row(DataRow::new(vec![
1326 DataValue::Integer(1),
1327 DataValue::String("2024-12-15".to_string()),
1328 ]))
1329 .unwrap();
1330
1331 table
1332 .add_row(DataRow::new(vec![
1333 DataValue::Integer(2),
1334 DataValue::String("2025-01-15".to_string()),
1335 ]))
1336 .unwrap();
1337
1338 table
1339 .add_row(DataRow::new(vec![
1340 DataValue::Integer(3),
1341 DataValue::String("2025-02-15".to_string()),
1342 ]))
1343 .unwrap();
1344
1345 let table = Arc::new(table);
1346 let engine = QueryEngine::new();
1347
1348 println!("\n=== Testing DateTime Comparisons ===");
1349 println!("Table has {} rows", table.row_count());
1350 for i in 0..table.row_count() {
1351 let date = table.get_value(i, 1);
1352 println!("Row {i}: created_date = {date:?}");
1353 }
1354
1355 println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1357 let result = engine.execute(
1358 table.clone(),
1359 "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1360 );
1361 match result {
1362 Ok(view) => {
1363 println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1364 assert_eq!(view.row_count(), 2); }
1366 Err(e) => {
1367 panic!("DateTime comparison query failed: {e}");
1368 }
1369 }
1370
1371 println!("\n=== DateTime comparison test complete! ===");
1372 }
1373
1374 #[test]
1375 fn test_not_with_method_calls() {
1376 let _ = tracing_subscriber::fmt()
1378 .with_max_level(tracing::Level::DEBUG)
1379 .try_init();
1380
1381 let mut table = DataTable::new("test");
1382 table.add_column(DataColumn::new("id"));
1383 table.add_column(DataColumn::new("status"));
1384
1385 table
1387 .add_row(DataRow::new(vec![
1388 DataValue::Integer(1),
1389 DataValue::String("Pending Review".to_string()),
1390 ]))
1391 .unwrap();
1392
1393 table
1394 .add_row(DataRow::new(vec![
1395 DataValue::Integer(2),
1396 DataValue::String("Complete".to_string()),
1397 ]))
1398 .unwrap();
1399
1400 table
1401 .add_row(DataRow::new(vec![
1402 DataValue::Integer(3),
1403 DataValue::String("Pending Approval".to_string()),
1404 ]))
1405 .unwrap();
1406
1407 let table = Arc::new(table);
1408 let engine = QueryEngine::with_case_insensitive(true);
1409
1410 println!("\n=== Testing NOT with Method Calls ===");
1411 println!("Table has {} rows", table.row_count());
1412 for i in 0..table.row_count() {
1413 let status = table.get_value(i, 1);
1414 println!("Row {i}: status = {status:?}");
1415 }
1416
1417 println!("\n--- Test: NOT status.Contains('pend') ---");
1419 let result = engine.execute(
1420 table.clone(),
1421 "SELECT * FROM test WHERE NOT status.Contains('pend')",
1422 );
1423 match result {
1424 Ok(view) => {
1425 println!(
1426 "SUCCESS: Found {} rows NOT containing 'pend'",
1427 view.row_count()
1428 );
1429 assert_eq!(view.row_count(), 1); }
1431 Err(e) => {
1432 panic!("NOT Contains query failed: {e}");
1433 }
1434 }
1435
1436 println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1438 let result = engine.execute(
1439 table.clone(),
1440 "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1441 );
1442 match result {
1443 Ok(view) => {
1444 println!(
1445 "SUCCESS: Found {} rows NOT starting with 'Pending'",
1446 view.row_count()
1447 );
1448 assert_eq!(view.row_count(), 1); }
1450 Err(e) => {
1451 panic!("NOT StartsWith query failed: {e}");
1452 }
1453 }
1454
1455 println!("\n=== NOT with method calls test complete! ===");
1456 }
1457
1458 #[test]
1459 #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1460 fn test_complex_logical_expressions() {
1461 let _ = tracing_subscriber::fmt()
1463 .with_max_level(tracing::Level::DEBUG)
1464 .try_init();
1465
1466 let mut table = DataTable::new("test");
1467 table.add_column(DataColumn::new("id"));
1468 table.add_column(DataColumn::new("status"));
1469 table.add_column(DataColumn::new("priority"));
1470 table.add_column(DataColumn::new("assigned"));
1471
1472 table
1474 .add_row(DataRow::new(vec![
1475 DataValue::Integer(1),
1476 DataValue::String("Pending".to_string()),
1477 DataValue::String("High".to_string()),
1478 DataValue::String("John".to_string()),
1479 ]))
1480 .unwrap();
1481
1482 table
1483 .add_row(DataRow::new(vec![
1484 DataValue::Integer(2),
1485 DataValue::String("Complete".to_string()),
1486 DataValue::String("High".to_string()),
1487 DataValue::String("Jane".to_string()),
1488 ]))
1489 .unwrap();
1490
1491 table
1492 .add_row(DataRow::new(vec![
1493 DataValue::Integer(3),
1494 DataValue::String("Pending".to_string()),
1495 DataValue::String("Low".to_string()),
1496 DataValue::String("John".to_string()),
1497 ]))
1498 .unwrap();
1499
1500 table
1501 .add_row(DataRow::new(vec![
1502 DataValue::Integer(4),
1503 DataValue::String("In Progress".to_string()),
1504 DataValue::String("Medium".to_string()),
1505 DataValue::String("Jane".to_string()),
1506 ]))
1507 .unwrap();
1508
1509 let table = Arc::new(table);
1510 let engine = QueryEngine::new();
1511
1512 println!("\n=== Testing Complex Logical Expressions ===");
1513 println!("Table has {} rows", table.row_count());
1514 for i in 0..table.row_count() {
1515 let status = table.get_value(i, 1);
1516 let priority = table.get_value(i, 2);
1517 let assigned = table.get_value(i, 3);
1518 println!(
1519 "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1520 );
1521 }
1522
1523 println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1525 let result = engine.execute(
1526 table.clone(),
1527 "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1528 );
1529 match result {
1530 Ok(view) => {
1531 println!(
1532 "SUCCESS: Found {} rows with complex logic",
1533 view.row_count()
1534 );
1535 assert_eq!(view.row_count(), 2); }
1537 Err(e) => {
1538 panic!("Complex logic query failed: {e}");
1539 }
1540 }
1541
1542 println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1544 let result = engine.execute(
1545 table.clone(),
1546 "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1547 );
1548 match result {
1549 Ok(view) => {
1550 println!(
1551 "SUCCESS: Found {} rows with NOT complex logic",
1552 view.row_count()
1553 );
1554 assert_eq!(view.row_count(), 2); }
1556 Err(e) => {
1557 panic!("NOT complex logic query failed: {e}");
1558 }
1559 }
1560
1561 println!("\n=== Complex logical expressions test complete! ===");
1562 }
1563
1564 #[test]
1565 fn test_mixed_data_types_and_edge_cases() {
1566 let _ = tracing_subscriber::fmt()
1568 .with_max_level(tracing::Level::DEBUG)
1569 .try_init();
1570
1571 let mut table = DataTable::new("test");
1572 table.add_column(DataColumn::new("id"));
1573 table.add_column(DataColumn::new("value"));
1574 table.add_column(DataColumn::new("nullable_field"));
1575
1576 table
1578 .add_row(DataRow::new(vec![
1579 DataValue::Integer(1),
1580 DataValue::String("123.45".to_string()),
1581 DataValue::String("present".to_string()),
1582 ]))
1583 .unwrap();
1584
1585 table
1586 .add_row(DataRow::new(vec![
1587 DataValue::Integer(2),
1588 DataValue::Float(678.90),
1589 DataValue::Null,
1590 ]))
1591 .unwrap();
1592
1593 table
1594 .add_row(DataRow::new(vec![
1595 DataValue::Integer(3),
1596 DataValue::Boolean(true),
1597 DataValue::String("also present".to_string()),
1598 ]))
1599 .unwrap();
1600
1601 table
1602 .add_row(DataRow::new(vec![
1603 DataValue::Integer(4),
1604 DataValue::String("false".to_string()),
1605 DataValue::Null,
1606 ]))
1607 .unwrap();
1608
1609 let table = Arc::new(table);
1610 let engine = QueryEngine::new();
1611
1612 println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1613 println!("Table has {} rows", table.row_count());
1614 for i in 0..table.row_count() {
1615 let value = table.get_value(i, 1);
1616 let nullable = table.get_value(i, 2);
1617 println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
1618 }
1619
1620 println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1622 let result = engine.execute(
1623 table.clone(),
1624 "SELECT * FROM test WHERE value.Contains('true')",
1625 );
1626 match result {
1627 Ok(view) => {
1628 println!(
1629 "SUCCESS: Found {} rows with boolean coercion",
1630 view.row_count()
1631 );
1632 assert_eq!(view.row_count(), 1); }
1634 Err(e) => {
1635 panic!("Boolean coercion query failed: {e}");
1636 }
1637 }
1638
1639 println!("\n--- Test: id IN (1, 3) ---");
1641 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
1642 match result {
1643 Ok(view) => {
1644 println!("SUCCESS: Found {} rows with IN clause", view.row_count());
1645 assert_eq!(view.row_count(), 2); }
1647 Err(e) => {
1648 panic!("Multiple IN values query failed: {e}");
1649 }
1650 }
1651
1652 println!("\n=== Mixed data types test complete! ===");
1653 }
1654
1655 #[test]
1656 fn test_not_in_parsing() {
1657 use crate::sql::recursive_parser::Parser;
1658
1659 let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
1660 println!("\n=== Testing NOT IN parsing ===");
1661 println!("Parsing query: {query}");
1662
1663 let mut parser = Parser::new(query);
1664 match parser.parse() {
1665 Ok(statement) => {
1666 println!("Parsed statement: {statement:#?}");
1667 if let Some(where_clause) = statement.where_clause {
1668 println!("WHERE conditions: {:#?}", where_clause.conditions);
1669 if let Some(first_condition) = where_clause.conditions.first() {
1670 println!("First condition expression: {:#?}", first_condition.expr);
1671 }
1672 }
1673 }
1674 Err(e) => {
1675 panic!("Parse error: {e}");
1676 }
1677 }
1678 }
1679}
1680
1681#[cfg(test)]
1682#[path = "query_engine_tests.rs"]
1683mod query_engine_tests;