1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::config::global::get_date_notation;
9use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
10use crate::data::data_view::DataView;
11use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
12use crate::data::group_by_expressions::GroupByExpressions;
13use crate::data::hash_join::HashJoinExecutor;
14use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
15use crate::data::subquery_executor::SubqueryExecutor;
16use crate::data::virtual_table_generator::VirtualTableGenerator;
17use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
18use crate::sql::aggregates::contains_aggregate;
19use crate::sql::parser::ast::TableSource;
20use crate::sql::recursive_parser::{
21 CTEType, OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression,
22 TableFunction,
23};
24
25#[derive(Clone)]
27pub struct QueryEngine {
28 case_insensitive: bool,
29 date_notation: String,
30 behavior_config: Option<BehaviorConfig>,
31}
32
33impl Default for QueryEngine {
34 fn default() -> Self {
35 Self::new()
36 }
37}
38
39impl QueryEngine {
40 #[must_use]
41 pub fn new() -> Self {
42 Self {
43 case_insensitive: false,
44 date_notation: get_date_notation(),
45 behavior_config: None,
46 }
47 }
48
49 #[must_use]
50 pub fn with_behavior_config(config: BehaviorConfig) -> Self {
51 let case_insensitive = config.case_insensitive_default;
52 let date_notation = get_date_notation();
54 Self {
55 case_insensitive,
56 date_notation,
57 behavior_config: Some(config),
58 }
59 }
60
61 #[must_use]
62 pub fn with_date_notation(_date_notation: String) -> Self {
63 Self {
64 case_insensitive: false,
65 date_notation: get_date_notation(), behavior_config: None,
67 }
68 }
69
70 #[must_use]
71 pub fn with_case_insensitive(case_insensitive: bool) -> Self {
72 Self {
73 case_insensitive,
74 date_notation: get_date_notation(),
75 behavior_config: None,
76 }
77 }
78
79 #[must_use]
80 pub fn with_case_insensitive_and_date_notation(
81 case_insensitive: bool,
82 _date_notation: String, ) -> Self {
84 Self {
85 case_insensitive,
86 date_notation: get_date_notation(), behavior_config: None,
88 }
89 }
90
91 fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
93 let columns = table.column_names();
94 let mut best_match: Option<(String, usize)> = None;
95
96 for col in columns {
97 let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
98 let max_distance = if name.len() > 10 { 3 } else { 2 };
101 if distance <= max_distance {
102 match &best_match {
103 None => best_match = Some((col, distance)),
104 Some((_, best_dist)) if distance < *best_dist => {
105 best_match = Some((col, distance));
106 }
107 _ => {}
108 }
109 }
110 }
111
112 best_match.map(|(name, _)| name)
113 }
114
115 fn edit_distance(&self, s1: &str, s2: &str) -> usize {
117 let len1 = s1.len();
118 let len2 = s2.len();
119 let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
120
121 for i in 0..=len1 {
122 matrix[i][0] = i;
123 }
124 for j in 0..=len2 {
125 matrix[0][j] = j;
126 }
127
128 for (i, c1) in s1.chars().enumerate() {
129 for (j, c2) in s2.chars().enumerate() {
130 let cost = usize::from(c1 != c2);
131 matrix[i + 1][j + 1] = std::cmp::min(
132 matrix[i][j + 1] + 1, std::cmp::min(
134 matrix[i + 1][j] + 1, matrix[i][j] + cost, ),
137 );
138 }
139 }
140
141 matrix[len1][len2]
142 }
143
144 pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
146 let (view, _plan) = self.execute_with_plan(table, sql)?;
147 Ok(view)
148 }
149
150 pub fn execute_statement(
152 &self,
153 table: Arc<DataTable>,
154 statement: SelectStatement,
155 ) -> Result<DataView> {
156 let mut cte_context = HashMap::new();
158 for cte in &statement.ctes {
159 debug!("QueryEngine: Pre-processing CTE '{}'...", cte.name);
160 let cte_result = match &cte.cte_type {
162 CTEType::Standard(query) => {
163 self.build_view_with_context(table.clone(), query.clone(), &mut cte_context)?
165 }
166 CTEType::Web(web_spec) => {
167 use crate::web::http_fetcher::WebDataFetcher;
169
170 let fetcher = WebDataFetcher::new()?;
171 let data_table = fetcher.fetch(web_spec, &cte.name)?;
172
173 DataView::new(Arc::new(data_table))
175 }
176 };
177 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
179 debug!(
180 "QueryEngine: CTE '{}' pre-processed, stored in context",
181 cte.name
182 );
183 }
184
185 let mut subquery_executor =
187 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
188 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
189
190 self.build_view_with_context(table, processed_statement, &mut cte_context)
192 }
193
194 pub fn execute_statement_with_cte_context(
196 &self,
197 table: Arc<DataTable>,
198 statement: SelectStatement,
199 cte_context: &HashMap<String, Arc<DataView>>,
200 ) -> Result<DataView> {
201 let mut local_context = cte_context.clone();
203
204 for cte in &statement.ctes {
206 debug!("QueryEngine: Processing nested CTE '{}'...", cte.name);
207 let cte_result = match &cte.cte_type {
208 CTEType::Standard(query) => {
209 self.build_view_with_context(table.clone(), query.clone(), &mut local_context)?
210 }
211 CTEType::Web(web_spec) => {
212 use crate::web::http_fetcher::WebDataFetcher;
214
215 let fetcher = WebDataFetcher::new()?;
216 let data_table = fetcher.fetch(web_spec, &cte.name)?;
217
218 DataView::new(Arc::new(data_table))
220 }
221 };
222 local_context.insert(cte.name.clone(), Arc::new(cte_result));
223 }
224
225 let mut subquery_executor =
227 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), local_context.clone());
228 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
229
230 self.build_view_with_context(table, processed_statement, &mut local_context)
232 }
233
234 pub fn execute_with_plan(
236 &self,
237 table: Arc<DataTable>,
238 sql: &str,
239 ) -> Result<(DataView, ExecutionPlan)> {
240 let mut plan_builder = ExecutionPlanBuilder::new();
241 let start_time = Instant::now();
242
243 plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
245 plan_builder.add_detail(format!("Query: {}", sql));
246 let mut parser = Parser::new(sql);
247 let statement = parser
248 .parse()
249 .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
250 plan_builder.add_detail(format!("Parsed successfully"));
251 if let Some(from) = &statement.from_table {
252 plan_builder.add_detail(format!("FROM: {}", from));
253 }
254 if statement.where_clause.is_some() {
255 plan_builder.add_detail("WHERE clause present".to_string());
256 }
257 plan_builder.end_step();
258
259 let mut cte_context = HashMap::new();
261
262 if !statement.ctes.is_empty() {
263 plan_builder.begin_step(
264 StepType::CTE,
265 format!("Process {} CTEs", statement.ctes.len()),
266 );
267
268 for cte in &statement.ctes {
269 let cte_start = Instant::now();
270 plan_builder.begin_step(StepType::CTE, format!("CTE '{}'", cte.name));
271
272 let cte_result = match &cte.cte_type {
273 CTEType::Standard(query) => {
274 if let Some(from) = &query.from_table {
276 plan_builder.add_detail(format!("Source: {}", from));
277 }
278 if query.where_clause.is_some() {
279 plan_builder.add_detail("Has WHERE clause".to_string());
280 }
281 if query.group_by.is_some() {
282 plan_builder.add_detail("Has GROUP BY".to_string());
283 }
284
285 self.build_view_with_context(
286 table.clone(),
287 query.clone(),
288 &mut cte_context,
289 )?
290 }
291 CTEType::Web(web_spec) => {
292 plan_builder.add_detail(format!("URL: {}", web_spec.url));
293 if let Some(format) = &web_spec.format {
294 plan_builder.add_detail(format!("Format: {:?}", format));
295 }
296 if let Some(cache) = web_spec.cache_seconds {
297 plan_builder.add_detail(format!("Cache: {} seconds", cache));
298 }
299
300 use crate::web::http_fetcher::WebDataFetcher;
302
303 let fetcher = WebDataFetcher::new()?;
304 let data_table = fetcher.fetch(web_spec, &cte.name)?;
305
306 DataView::new(Arc::new(data_table))
308 }
309 };
310
311 plan_builder.set_rows_out(cte_result.row_count());
313 plan_builder.add_detail(format!(
314 "Result: {} rows, {} columns",
315 cte_result.row_count(),
316 cte_result.column_count()
317 ));
318 plan_builder.add_detail(format!(
319 "Execution time: {:.3}ms",
320 cte_start.elapsed().as_secs_f64() * 1000.0
321 ));
322
323 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
324 plan_builder.end_step();
325 }
326
327 plan_builder.add_detail(format!(
328 "All {} CTEs cached in context",
329 statement.ctes.len()
330 ));
331 plan_builder.end_step();
332 }
333
334 plan_builder.begin_step(StepType::Subquery, "Process subqueries".to_string());
336 let mut subquery_executor =
337 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
338
339 let has_subqueries = statement.where_clause.as_ref().map_or(false, |w| {
341 format!("{:?}", w).contains("Subquery")
343 });
344
345 if has_subqueries {
346 plan_builder.add_detail("Evaluating subqueries in WHERE clause".to_string());
347 }
348
349 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
350
351 if has_subqueries {
352 plan_builder.add_detail("Subqueries replaced with materialized values".to_string());
353 } else {
354 plan_builder.add_detail("No subqueries to process".to_string());
355 }
356
357 plan_builder.end_step();
358 let result = self.build_view_with_context_and_plan(
359 table,
360 processed_statement,
361 &mut cte_context,
362 &mut plan_builder,
363 )?;
364
365 let total_duration = start_time.elapsed();
366 info!(
367 "Query execution complete: total={:?}, rows={}",
368 total_duration,
369 result.row_count()
370 );
371
372 let plan = plan_builder.build();
373 Ok((result, plan))
374 }
375
376 fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
378 let mut cte_context = HashMap::new();
379 self.build_view_with_context(table, statement, &mut cte_context)
380 }
381
382 fn build_view_with_context(
384 &self,
385 table: Arc<DataTable>,
386 statement: SelectStatement,
387 cte_context: &mut HashMap<String, Arc<DataView>>,
388 ) -> Result<DataView> {
389 let mut dummy_plan = ExecutionPlanBuilder::new();
390 self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
391 }
392
393 fn build_view_with_context_and_plan(
395 &self,
396 table: Arc<DataTable>,
397 statement: SelectStatement,
398 cte_context: &mut HashMap<String, Arc<DataView>>,
399 plan: &mut ExecutionPlanBuilder,
400 ) -> Result<DataView> {
401 for cte in &statement.ctes {
403 if cte_context.contains_key(&cte.name) {
405 debug!(
406 "QueryEngine: CTE '{}' already in context, skipping",
407 cte.name
408 );
409 continue;
410 }
411
412 debug!("QueryEngine: Processing CTE '{}'...", cte.name);
413 let cte_result = match &cte.cte_type {
415 CTEType::Standard(query) => {
416 self.build_view_with_context(table.clone(), query.clone(), cte_context)?
417 }
418 CTEType::Web(_web_spec) => {
419 return Err(anyhow!(
421 "Web CTEs should be processed in execute_select method"
422 ));
423 }
424 };
425
426 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
428 debug!(
429 "QueryEngine: CTE '{}' processed, stored in context",
430 cte.name
431 );
432 }
433
434 let source_table = if let Some(ref table_func) = statement.from_function {
436 debug!("QueryEngine: Processing table function...");
438 match table_func {
439 TableFunction::Range { start, end, step } => {
440 let mut evaluator =
442 ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
443
444 let dummy_row = 0;
446
447 let start_val = evaluator.evaluate(start, dummy_row)?;
448 let end_val = evaluator.evaluate(end, dummy_row)?;
449 let step_val = if let Some(step_expr) = step {
450 Some(evaluator.evaluate(step_expr, dummy_row)?)
451 } else {
452 None
453 };
454
455 let start_int = match start_val {
457 DataValue::Integer(i) => i,
458 DataValue::Float(f) => f as i64,
459 _ => return Err(anyhow!("RANGE start must be numeric")),
460 };
461
462 let end_int = match end_val {
463 DataValue::Integer(i) => i,
464 DataValue::Float(f) => f as i64,
465 _ => return Err(anyhow!("RANGE end must be numeric")),
466 };
467
468 let step_int = if let Some(step) = step_val {
469 match step {
470 DataValue::Integer(i) => Some(i),
471 DataValue::Float(f) => Some(f as i64),
472 _ => return Err(anyhow!("RANGE step must be numeric")),
473 }
474 } else {
475 None
476 };
477
478 VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
480 }
481 }
482 } else if let Some(ref subquery) = statement.from_subquery {
483 debug!("QueryEngine: Processing FROM subquery...");
485 let subquery_result =
486 self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
487
488 let materialized = self.materialize_view(subquery_result)?;
491 Arc::new(materialized)
492 } else if let Some(ref table_name) = statement.from_table {
493 if let Some(cte_view) = cte_context.get(table_name) {
495 debug!("QueryEngine: Using CTE '{}' as source table", table_name);
496 let materialized = self.materialize_view((**cte_view).clone())?;
498 Arc::new(materialized)
499 } else {
500 table.clone()
502 }
503 } else {
504 table.clone()
506 };
507
508 let final_table = if !statement.joins.is_empty() {
510 plan.begin_step(
511 StepType::Join,
512 format!("Process {} JOINs", statement.joins.len()),
513 );
514 plan.set_rows_in(source_table.row_count());
515
516 let join_executor = HashJoinExecutor::new(self.case_insensitive);
517 let mut current_table = source_table;
518
519 for (idx, join_clause) in statement.joins.iter().enumerate() {
520 let join_start = Instant::now();
521 plan.begin_step(StepType::Join, format!("JOIN #{}", idx + 1));
522 plan.add_detail(format!("Type: {:?}", join_clause.join_type));
523 plan.add_detail(format!("Left table: {} rows", current_table.row_count()));
524 plan.add_detail(format!(
525 "Executing {:?} JOIN on {}",
526 join_clause.join_type, join_clause.condition.left_column
527 ));
528
529 let right_table = match &join_clause.table {
531 TableSource::Table(name) => {
532 if let Some(cte_view) = cte_context.get(name) {
534 let materialized = self.materialize_view((**cte_view).clone())?;
535 Arc::new(materialized)
536 } else {
537 return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
540 }
541 }
542 TableSource::DerivedTable { query, alias: _ } => {
543 let subquery_result = self.build_view_with_context(
545 table.clone(),
546 *query.clone(),
547 cte_context,
548 )?;
549 let materialized = self.materialize_view(subquery_result)?;
550 Arc::new(materialized)
551 }
552 };
553
554 let joined = join_executor.execute_join(
556 current_table.clone(),
557 join_clause,
558 right_table.clone(),
559 )?;
560
561 plan.add_detail(format!("Right table: {} rows", right_table.row_count()));
562 plan.set_rows_out(joined.row_count());
563 plan.add_detail(format!("Result: {} rows", joined.row_count()));
564 plan.add_detail(format!(
565 "Join time: {:.3}ms",
566 join_start.elapsed().as_secs_f64() * 1000.0
567 ));
568 plan.end_step();
569
570 current_table = Arc::new(joined);
571 }
572
573 plan.set_rows_out(current_table.row_count());
574 plan.add_detail(format!(
575 "Final result after all joins: {} rows",
576 current_table.row_count()
577 ));
578 plan.end_step();
579 current_table
580 } else {
581 source_table
582 };
583
584 self.build_view_internal_with_plan(final_table, statement, plan)
586 }
587
588 fn materialize_view(&self, view: DataView) -> Result<DataTable> {
590 let source = view.source();
591 let mut result_table = DataTable::new("derived");
592
593 let visible_cols = view.visible_column_indices().to_vec();
595
596 for col_idx in &visible_cols {
598 let col = &source.columns[*col_idx];
599 let new_col = DataColumn {
600 name: col.name.clone(),
601 data_type: col.data_type.clone(),
602 nullable: col.nullable,
603 unique_values: col.unique_values,
604 null_count: col.null_count,
605 metadata: col.metadata.clone(),
606 };
607 result_table.add_column(new_col);
608 }
609
610 for row_idx in view.visible_row_indices() {
612 let source_row = &source.rows[*row_idx];
613 let mut new_row = DataRow { values: Vec::new() };
614
615 for col_idx in &visible_cols {
616 new_row.values.push(source_row.values[*col_idx].clone());
617 }
618
619 result_table.add_row(new_row);
620 }
621
622 Ok(result_table)
623 }
624
625 fn build_view_internal(
626 &self,
627 table: Arc<DataTable>,
628 statement: SelectStatement,
629 ) -> Result<DataView> {
630 let mut dummy_plan = ExecutionPlanBuilder::new();
631 self.build_view_internal_with_plan(table, statement, &mut dummy_plan)
632 }
633
634 fn build_view_internal_with_plan(
635 &self,
636 table: Arc<DataTable>,
637 statement: SelectStatement,
638 plan: &mut ExecutionPlanBuilder,
639 ) -> Result<DataView> {
640 debug!(
641 "QueryEngine::build_view - select_items: {:?}",
642 statement.select_items
643 );
644 debug!(
645 "QueryEngine::build_view - where_clause: {:?}",
646 statement.where_clause
647 );
648
649 let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
651
652 if let Some(where_clause) = &statement.where_clause {
654 let total_rows = table.row_count();
655 debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
656 debug!("QueryEngine: WHERE clause = {:?}", where_clause);
657
658 plan.begin_step(StepType::Filter, "WHERE clause filtering".to_string());
659 plan.set_rows_in(total_rows);
660 plan.add_detail(format!("Input: {} rows", total_rows));
661
662 for condition in &where_clause.conditions {
664 plan.add_detail(format!("Condition: {:?}", condition.expr));
665 }
666
667 let filter_start = Instant::now();
668 let mut filtered_rows = Vec::new();
670 for row_idx in visible_rows {
671 if row_idx < 3 {
673 debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
674 }
675 let evaluator = RecursiveWhereEvaluator::with_config(
676 &table,
677 self.case_insensitive,
678 self.date_notation.clone(),
679 );
680 match evaluator.evaluate(where_clause, row_idx) {
681 Ok(result) => {
682 if row_idx < 3 {
683 debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
684 }
685 if result {
686 filtered_rows.push(row_idx);
687 }
688 }
689 Err(e) => {
690 if row_idx < 3 {
691 debug!(
692 "QueryEngine: WHERE evaluation error for row {}: {}",
693 row_idx, e
694 );
695 }
696 return Err(e);
698 }
699 }
700 }
701 visible_rows = filtered_rows;
702 let filter_duration = filter_start.elapsed();
703 info!(
704 "WHERE clause filtering: {} rows -> {} rows in {:?}",
705 total_rows,
706 visible_rows.len(),
707 filter_duration
708 );
709
710 plan.set_rows_out(visible_rows.len());
711 plan.add_detail(format!("Output: {} rows", visible_rows.len()));
712 plan.add_detail(format!(
713 "Filter time: {:.3}ms",
714 filter_duration.as_secs_f64() * 1000.0
715 ));
716 plan.end_step();
717 }
718
719 let mut view = DataView::new(table.clone());
721 view = view.with_rows(visible_rows);
722
723 if let Some(group_by_exprs) = &statement.group_by {
725 if !group_by_exprs.is_empty() {
726 debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_exprs);
727
728 plan.begin_step(
729 StepType::GroupBy,
730 format!("GROUP BY {} expressions", group_by_exprs.len()),
731 );
732 plan.set_rows_in(view.row_count());
733 plan.add_detail(format!("Input: {} rows", view.row_count()));
734 for expr in group_by_exprs {
735 plan.add_detail(format!("Group by: {:?}", expr));
736 }
737
738 let group_start = Instant::now();
739 view = self.apply_group_by(
740 view,
741 group_by_exprs,
742 &statement.select_items,
743 statement.having.as_ref(),
744 )?;
745
746 plan.set_rows_out(view.row_count());
747 plan.add_detail(format!("Output: {} groups", view.row_count()));
748 plan.add_detail(format!(
749 "Group time: {:.3}ms",
750 group_start.elapsed().as_secs_f64() * 1000.0
751 ));
752 plan.end_step();
753 }
754 } else {
755 if !statement.select_items.is_empty() {
757 let has_non_star_items = statement
759 .select_items
760 .iter()
761 .any(|item| !matches!(item, SelectItem::Star));
762
763 if has_non_star_items || statement.select_items.len() > 1 {
767 view = self.apply_select_items(view, &statement.select_items)?;
768 }
769 } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
771 let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
773 view = view.with_columns(column_indices);
774 }
775 }
776
777 if statement.distinct {
779 plan.begin_step(StepType::Distinct, "Remove duplicate rows".to_string());
780 plan.set_rows_in(view.row_count());
781 plan.add_detail(format!("Input: {} rows", view.row_count()));
782
783 let distinct_start = Instant::now();
784 view = self.apply_distinct(view)?;
785
786 plan.set_rows_out(view.row_count());
787 plan.add_detail(format!("Output: {} unique rows", view.row_count()));
788 plan.add_detail(format!(
789 "Distinct time: {:.3}ms",
790 distinct_start.elapsed().as_secs_f64() * 1000.0
791 ));
792 plan.end_step();
793 }
794
795 if let Some(order_by_columns) = &statement.order_by {
797 if !order_by_columns.is_empty() {
798 plan.begin_step(
799 StepType::Sort,
800 format!("ORDER BY {} columns", order_by_columns.len()),
801 );
802 plan.set_rows_in(view.row_count());
803 for col in order_by_columns {
804 plan.add_detail(format!("{} {:?}", col.column, col.direction));
805 }
806
807 let sort_start = Instant::now();
808 view = self.apply_multi_order_by(view, order_by_columns)?;
809
810 plan.add_detail(format!(
811 "Sort time: {:.3}ms",
812 sort_start.elapsed().as_secs_f64() * 1000.0
813 ));
814 plan.end_step();
815 }
816 }
817
818 if let Some(limit) = statement.limit {
820 let offset = statement.offset.unwrap_or(0);
821 plan.begin_step(StepType::Limit, format!("LIMIT {}", limit));
822 plan.set_rows_in(view.row_count());
823 if offset > 0 {
824 plan.add_detail(format!("OFFSET: {}", offset));
825 }
826 view = view.with_limit(limit, offset);
827 plan.set_rows_out(view.row_count());
828 plan.add_detail(format!("Output: {} rows", view.row_count()));
829 plan.end_step();
830 }
831
832 Ok(view)
833 }
834
835 fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
837 let mut indices = Vec::new();
838 let table_columns = table.column_names();
839
840 for col_name in columns {
841 let index = table_columns
842 .iter()
843 .position(|c| c.eq_ignore_ascii_case(col_name))
844 .ok_or_else(|| {
845 let suggestion = self.find_similar_column(table, col_name);
846 match suggestion {
847 Some(similar) => anyhow::anyhow!(
848 "Column '{}' not found. Did you mean '{}'?",
849 col_name,
850 similar
851 ),
852 None => anyhow::anyhow!("Column '{}' not found", col_name),
853 }
854 })?;
855 indices.push(index);
856 }
857
858 Ok(indices)
859 }
860
861 fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
863 debug!(
864 "QueryEngine::apply_select_items - items: {:?}",
865 select_items
866 );
867 debug!(
868 "QueryEngine::apply_select_items - input view has {} rows",
869 view.row_count()
870 );
871
872 let all_aggregates = select_items.iter().all(|item| match item {
874 SelectItem::Expression { expr, .. } => contains_aggregate(expr),
875 SelectItem::Column(_) => false,
876 SelectItem::Star => false,
877 });
878
879 if all_aggregates && view.row_count() > 0 {
880 return self.apply_aggregate_select(view, select_items);
883 }
884
885 let has_computed_expressions = select_items
887 .iter()
888 .any(|item| matches!(item, SelectItem::Expression { .. }));
889
890 debug!(
891 "QueryEngine::apply_select_items - has_computed_expressions: {}",
892 has_computed_expressions
893 );
894
895 if !has_computed_expressions {
896 let column_indices = self.resolve_select_columns(view.source(), select_items)?;
898 return Ok(view.with_columns(column_indices));
899 }
900
901 let source_table = view.source();
906 let visible_rows = view.visible_row_indices();
907
908 let mut computed_table = DataTable::new("query_result");
911
912 let mut expanded_items = Vec::new();
914 for item in select_items {
915 match item {
916 SelectItem::Star => {
917 for col_name in source_table.column_names() {
919 expanded_items.push(SelectItem::Column(col_name));
920 }
921 }
922 _ => expanded_items.push(item.clone()),
923 }
924 }
925
926 let mut column_name_counts: std::collections::HashMap<String, usize> =
928 std::collections::HashMap::new();
929
930 for item in &expanded_items {
931 let base_name = match item {
932 SelectItem::Column(name) => name.clone(),
933 SelectItem::Expression { alias, .. } => alias.clone(),
934 SelectItem::Star => unreachable!("Star should have been expanded"),
935 };
936
937 let count = column_name_counts.entry(base_name.clone()).or_insert(0);
939 let column_name = if *count == 0 {
940 base_name.clone()
942 } else {
943 format!("{base_name}_{count}")
945 };
946 *count += 1;
947
948 computed_table.add_column(DataColumn::new(&column_name));
949 }
950
951 let mut evaluator =
953 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
954
955 for &row_idx in visible_rows {
956 let mut row_values = Vec::new();
957
958 for item in &expanded_items {
959 let value = match item {
960 SelectItem::Column(col_name) => {
961 let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
963 let suggestion = self.find_similar_column(source_table, col_name);
964 match suggestion {
965 Some(similar) => anyhow::anyhow!(
966 "Column '{}' not found. Did you mean '{}'?",
967 col_name,
968 similar
969 ),
970 None => anyhow::anyhow!("Column '{}' not found", col_name),
971 }
972 })?;
973 let row = source_table
974 .get_row(row_idx)
975 .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
976 row.get(col_idx)
977 .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
978 .clone()
979 }
980 SelectItem::Expression { expr, .. } => {
981 evaluator.evaluate(expr, row_idx)?
983 }
984 SelectItem::Star => unreachable!("Star should have been expanded"),
985 };
986 row_values.push(value);
987 }
988
989 computed_table
990 .add_row(DataRow::new(row_values))
991 .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
992 }
993
994 Ok(DataView::new(Arc::new(computed_table)))
997 }
998
999 fn apply_aggregate_select(
1001 &self,
1002 view: DataView,
1003 select_items: &[SelectItem],
1004 ) -> Result<DataView> {
1005 debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
1006
1007 let source_table = view.source();
1008 let mut result_table = DataTable::new("aggregate_result");
1009
1010 for item in select_items {
1012 let column_name = match item {
1013 SelectItem::Expression { alias, .. } => alias.clone(),
1014 _ => unreachable!("Should only have expressions in aggregate-only query"),
1015 };
1016 result_table.add_column(DataColumn::new(&column_name));
1017 }
1018
1019 let visible_rows = view.visible_row_indices().to_vec();
1021 let mut evaluator =
1022 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
1023 .with_visible_rows(visible_rows);
1024
1025 let mut row_values = Vec::new();
1027 for item in select_items {
1028 match item {
1029 SelectItem::Expression { expr, .. } => {
1030 let value = evaluator.evaluate(expr, 0)?;
1033 row_values.push(value);
1034 }
1035 _ => unreachable!("Should only have expressions in aggregate-only query"),
1036 }
1037 }
1038
1039 result_table
1041 .add_row(DataRow::new(row_values))
1042 .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
1043
1044 Ok(DataView::new(Arc::new(result_table)))
1045 }
1046
1047 fn resolve_select_columns(
1049 &self,
1050 table: &DataTable,
1051 select_items: &[SelectItem],
1052 ) -> Result<Vec<usize>> {
1053 let mut indices = Vec::new();
1054 let table_columns = table.column_names();
1055
1056 for item in select_items {
1057 match item {
1058 SelectItem::Column(col_name) => {
1059 let index = table_columns
1060 .iter()
1061 .position(|c| c.eq_ignore_ascii_case(col_name))
1062 .ok_or_else(|| {
1063 let suggestion = self.find_similar_column(table, col_name);
1064 match suggestion {
1065 Some(similar) => anyhow::anyhow!(
1066 "Column '{}' not found. Did you mean '{}'?",
1067 col_name,
1068 similar
1069 ),
1070 None => anyhow::anyhow!("Column '{}' not found", col_name),
1071 }
1072 })?;
1073 indices.push(index);
1074 }
1075 SelectItem::Star => {
1076 for i in 0..table_columns.len() {
1078 indices.push(i);
1079 }
1080 }
1081 SelectItem::Expression { .. } => {
1082 return Err(anyhow::anyhow!(
1083 "Computed expressions require new table creation"
1084 ));
1085 }
1086 }
1087 }
1088
1089 Ok(indices)
1090 }
1091
1092 fn apply_distinct(&self, view: DataView) -> Result<DataView> {
1094 use std::collections::HashSet;
1095
1096 let source = view.source();
1097 let visible_cols = view.visible_column_indices();
1098 let visible_rows = view.visible_row_indices();
1099
1100 let mut seen_rows = HashSet::new();
1102 let mut unique_row_indices = Vec::new();
1103
1104 for &row_idx in visible_rows {
1105 let mut row_key = Vec::new();
1107 for &col_idx in visible_cols {
1108 let value = source
1109 .get_value(row_idx, col_idx)
1110 .ok_or_else(|| anyhow!("Invalid cell reference"))?;
1111 row_key.push(format!("{:?}", value));
1113 }
1114
1115 if seen_rows.insert(row_key) {
1117 unique_row_indices.push(row_idx);
1119 }
1120 }
1121
1122 Ok(view.with_rows(unique_row_indices))
1124 }
1125
1126 fn apply_multi_order_by(
1128 &self,
1129 mut view: DataView,
1130 order_by_columns: &[OrderByColumn],
1131 ) -> Result<DataView> {
1132 let mut sort_columns = Vec::new();
1134
1135 for order_col in order_by_columns {
1136 let col_index = view
1140 .source()
1141 .get_column_index(&order_col.column)
1142 .ok_or_else(|| {
1143 let suggestion = self.find_similar_column(view.source(), &order_col.column);
1145 match suggestion {
1146 Some(similar) => anyhow::anyhow!(
1147 "Column '{}' not found. Did you mean '{}'?",
1148 order_col.column,
1149 similar
1150 ),
1151 None => {
1152 let available_cols = view.source().column_names().join(", ");
1154 anyhow::anyhow!(
1155 "Column '{}' not found. Available columns: {}",
1156 order_col.column,
1157 available_cols
1158 )
1159 }
1160 }
1161 })?;
1162
1163 let ascending = matches!(order_col.direction, SortDirection::Asc);
1164 sort_columns.push((col_index, ascending));
1165 }
1166
1167 view.apply_multi_sort(&sort_columns)?;
1169 Ok(view)
1170 }
1171
1172 fn apply_group_by(
1174 &self,
1175 view: DataView,
1176 group_by_exprs: &[SqlExpression],
1177 select_items: &[SelectItem],
1178 having: Option<&SqlExpression>,
1179 ) -> Result<DataView> {
1180 self.apply_group_by_expressions(
1182 view,
1183 group_by_exprs,
1184 select_items,
1185 having,
1186 self.case_insensitive,
1187 self.date_notation.clone(),
1188 )
1189 }
1190}
1191
1192#[cfg(test)]
1193mod tests {
1194 use super::*;
1195 use crate::data::datatable::{DataColumn, DataRow, DataValue};
1196
1197 fn create_test_table() -> Arc<DataTable> {
1198 let mut table = DataTable::new("test");
1199
1200 table.add_column(DataColumn::new("id"));
1202 table.add_column(DataColumn::new("name"));
1203 table.add_column(DataColumn::new("age"));
1204
1205 table
1207 .add_row(DataRow::new(vec![
1208 DataValue::Integer(1),
1209 DataValue::String("Alice".to_string()),
1210 DataValue::Integer(30),
1211 ]))
1212 .unwrap();
1213
1214 table
1215 .add_row(DataRow::new(vec![
1216 DataValue::Integer(2),
1217 DataValue::String("Bob".to_string()),
1218 DataValue::Integer(25),
1219 ]))
1220 .unwrap();
1221
1222 table
1223 .add_row(DataRow::new(vec![
1224 DataValue::Integer(3),
1225 DataValue::String("Charlie".to_string()),
1226 DataValue::Integer(35),
1227 ]))
1228 .unwrap();
1229
1230 Arc::new(table)
1231 }
1232
1233 #[test]
1234 fn test_select_all() {
1235 let table = create_test_table();
1236 let engine = QueryEngine::new();
1237
1238 let view = engine
1239 .execute(table.clone(), "SELECT * FROM users")
1240 .unwrap();
1241 assert_eq!(view.row_count(), 3);
1242 assert_eq!(view.column_count(), 3);
1243 }
1244
1245 #[test]
1246 fn test_select_columns() {
1247 let table = create_test_table();
1248 let engine = QueryEngine::new();
1249
1250 let view = engine
1251 .execute(table.clone(), "SELECT name, age FROM users")
1252 .unwrap();
1253 assert_eq!(view.row_count(), 3);
1254 assert_eq!(view.column_count(), 2);
1255 }
1256
1257 #[test]
1258 fn test_select_with_limit() {
1259 let table = create_test_table();
1260 let engine = QueryEngine::new();
1261
1262 let view = engine
1263 .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1264 .unwrap();
1265 assert_eq!(view.row_count(), 2);
1266 }
1267
1268 #[test]
1269 fn test_type_coercion_contains() {
1270 let _ = tracing_subscriber::fmt()
1272 .with_max_level(tracing::Level::DEBUG)
1273 .try_init();
1274
1275 let mut table = DataTable::new("test");
1276 table.add_column(DataColumn::new("id"));
1277 table.add_column(DataColumn::new("status"));
1278 table.add_column(DataColumn::new("price"));
1279
1280 table
1282 .add_row(DataRow::new(vec![
1283 DataValue::Integer(1),
1284 DataValue::String("Pending".to_string()),
1285 DataValue::Float(99.99),
1286 ]))
1287 .unwrap();
1288
1289 table
1290 .add_row(DataRow::new(vec![
1291 DataValue::Integer(2),
1292 DataValue::String("Confirmed".to_string()),
1293 DataValue::Float(150.50),
1294 ]))
1295 .unwrap();
1296
1297 table
1298 .add_row(DataRow::new(vec![
1299 DataValue::Integer(3),
1300 DataValue::String("Pending".to_string()),
1301 DataValue::Float(75.00),
1302 ]))
1303 .unwrap();
1304
1305 let table = Arc::new(table);
1306 let engine = QueryEngine::new();
1307
1308 println!("\n=== Testing WHERE clause with Contains ===");
1309 println!("Table has {} rows", table.row_count());
1310 for i in 0..table.row_count() {
1311 let status = table.get_value(i, 1);
1312 println!("Row {i}: status = {status:?}");
1313 }
1314
1315 println!("\n--- Test 1: status.Contains('pend') ---");
1317 let result = engine.execute(
1318 table.clone(),
1319 "SELECT * FROM test WHERE status.Contains('pend')",
1320 );
1321 match result {
1322 Ok(view) => {
1323 println!("SUCCESS: Found {} matching rows", view.row_count());
1324 assert_eq!(view.row_count(), 2); }
1326 Err(e) => {
1327 panic!("Query failed: {e}");
1328 }
1329 }
1330
1331 println!("\n--- Test 2: price.Contains('9') ---");
1333 let result = engine.execute(
1334 table.clone(),
1335 "SELECT * FROM test WHERE price.Contains('9')",
1336 );
1337 match result {
1338 Ok(view) => {
1339 println!(
1340 "SUCCESS: Found {} matching rows with price containing '9'",
1341 view.row_count()
1342 );
1343 assert!(view.row_count() >= 1);
1345 }
1346 Err(e) => {
1347 panic!("Numeric coercion query failed: {e}");
1348 }
1349 }
1350
1351 println!("\n=== All tests passed! ===");
1352 }
1353
1354 #[test]
1355 fn test_not_in_clause() {
1356 let _ = tracing_subscriber::fmt()
1358 .with_max_level(tracing::Level::DEBUG)
1359 .try_init();
1360
1361 let mut table = DataTable::new("test");
1362 table.add_column(DataColumn::new("id"));
1363 table.add_column(DataColumn::new("country"));
1364
1365 table
1367 .add_row(DataRow::new(vec![
1368 DataValue::Integer(1),
1369 DataValue::String("CA".to_string()),
1370 ]))
1371 .unwrap();
1372
1373 table
1374 .add_row(DataRow::new(vec![
1375 DataValue::Integer(2),
1376 DataValue::String("US".to_string()),
1377 ]))
1378 .unwrap();
1379
1380 table
1381 .add_row(DataRow::new(vec![
1382 DataValue::Integer(3),
1383 DataValue::String("UK".to_string()),
1384 ]))
1385 .unwrap();
1386
1387 let table = Arc::new(table);
1388 let engine = QueryEngine::new();
1389
1390 println!("\n=== Testing NOT IN clause ===");
1391 println!("Table has {} rows", table.row_count());
1392 for i in 0..table.row_count() {
1393 let country = table.get_value(i, 1);
1394 println!("Row {i}: country = {country:?}");
1395 }
1396
1397 println!("\n--- Test: country NOT IN ('CA') ---");
1399 let result = engine.execute(
1400 table.clone(),
1401 "SELECT * FROM test WHERE country NOT IN ('CA')",
1402 );
1403 match result {
1404 Ok(view) => {
1405 println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1406 assert_eq!(view.row_count(), 2); }
1408 Err(e) => {
1409 panic!("NOT IN query failed: {e}");
1410 }
1411 }
1412
1413 println!("\n=== NOT IN test complete! ===");
1414 }
1415
1416 #[test]
1417 fn test_case_insensitive_in_and_not_in() {
1418 let _ = tracing_subscriber::fmt()
1420 .with_max_level(tracing::Level::DEBUG)
1421 .try_init();
1422
1423 let mut table = DataTable::new("test");
1424 table.add_column(DataColumn::new("id"));
1425 table.add_column(DataColumn::new("country"));
1426
1427 table
1429 .add_row(DataRow::new(vec![
1430 DataValue::Integer(1),
1431 DataValue::String("CA".to_string()), ]))
1433 .unwrap();
1434
1435 table
1436 .add_row(DataRow::new(vec![
1437 DataValue::Integer(2),
1438 DataValue::String("us".to_string()), ]))
1440 .unwrap();
1441
1442 table
1443 .add_row(DataRow::new(vec![
1444 DataValue::Integer(3),
1445 DataValue::String("UK".to_string()), ]))
1447 .unwrap();
1448
1449 let table = Arc::new(table);
1450
1451 println!("\n=== Testing Case-Insensitive IN clause ===");
1452 println!("Table has {} rows", table.row_count());
1453 for i in 0..table.row_count() {
1454 let country = table.get_value(i, 1);
1455 println!("Row {i}: country = {country:?}");
1456 }
1457
1458 println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1460 let engine = QueryEngine::with_case_insensitive(true);
1461 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1462 match result {
1463 Ok(view) => {
1464 println!(
1465 "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1466 view.row_count()
1467 );
1468 assert_eq!(view.row_count(), 1); }
1470 Err(e) => {
1471 panic!("Case-insensitive IN query failed: {e}");
1472 }
1473 }
1474
1475 println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1477 let result = engine.execute(
1478 table.clone(),
1479 "SELECT * FROM test WHERE country NOT IN ('ca')",
1480 );
1481 match result {
1482 Ok(view) => {
1483 println!(
1484 "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1485 view.row_count()
1486 );
1487 assert_eq!(view.row_count(), 2); }
1489 Err(e) => {
1490 panic!("Case-insensitive NOT IN query failed: {e}");
1491 }
1492 }
1493
1494 println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1496 let engine_case_sensitive = QueryEngine::new(); let result = engine_case_sensitive
1498 .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1499 match result {
1500 Ok(view) => {
1501 println!(
1502 "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1503 view.row_count()
1504 );
1505 assert_eq!(view.row_count(), 0); }
1507 Err(e) => {
1508 panic!("Case-sensitive IN query failed: {e}");
1509 }
1510 }
1511
1512 println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1513 }
1514
1515 #[test]
1516 #[ignore = "Parentheses in WHERE clause not yet implemented"]
1517 fn test_parentheses_in_where_clause() {
1518 let _ = tracing_subscriber::fmt()
1520 .with_max_level(tracing::Level::DEBUG)
1521 .try_init();
1522
1523 let mut table = DataTable::new("test");
1524 table.add_column(DataColumn::new("id"));
1525 table.add_column(DataColumn::new("status"));
1526 table.add_column(DataColumn::new("priority"));
1527
1528 table
1530 .add_row(DataRow::new(vec![
1531 DataValue::Integer(1),
1532 DataValue::String("Pending".to_string()),
1533 DataValue::String("High".to_string()),
1534 ]))
1535 .unwrap();
1536
1537 table
1538 .add_row(DataRow::new(vec![
1539 DataValue::Integer(2),
1540 DataValue::String("Complete".to_string()),
1541 DataValue::String("High".to_string()),
1542 ]))
1543 .unwrap();
1544
1545 table
1546 .add_row(DataRow::new(vec![
1547 DataValue::Integer(3),
1548 DataValue::String("Pending".to_string()),
1549 DataValue::String("Low".to_string()),
1550 ]))
1551 .unwrap();
1552
1553 table
1554 .add_row(DataRow::new(vec![
1555 DataValue::Integer(4),
1556 DataValue::String("Complete".to_string()),
1557 DataValue::String("Low".to_string()),
1558 ]))
1559 .unwrap();
1560
1561 let table = Arc::new(table);
1562 let engine = QueryEngine::new();
1563
1564 println!("\n=== Testing Parentheses in WHERE clause ===");
1565 println!("Table has {} rows", table.row_count());
1566 for i in 0..table.row_count() {
1567 let status = table.get_value(i, 1);
1568 let priority = table.get_value(i, 2);
1569 println!("Row {i}: status = {status:?}, priority = {priority:?}");
1570 }
1571
1572 println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1574 let result = engine.execute(
1575 table.clone(),
1576 "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1577 );
1578 match result {
1579 Ok(view) => {
1580 println!(
1581 "SUCCESS: Found {} rows with parenthetical logic",
1582 view.row_count()
1583 );
1584 assert_eq!(view.row_count(), 2); }
1586 Err(e) => {
1587 panic!("Parentheses query failed: {e}");
1588 }
1589 }
1590
1591 println!("\n=== Parentheses test complete! ===");
1592 }
1593
1594 #[test]
1595 #[ignore = "Numeric type coercion needs fixing"]
1596 fn test_numeric_type_coercion() {
1597 let _ = tracing_subscriber::fmt()
1599 .with_max_level(tracing::Level::DEBUG)
1600 .try_init();
1601
1602 let mut table = DataTable::new("test");
1603 table.add_column(DataColumn::new("id"));
1604 table.add_column(DataColumn::new("price"));
1605 table.add_column(DataColumn::new("quantity"));
1606
1607 table
1609 .add_row(DataRow::new(vec![
1610 DataValue::Integer(1),
1611 DataValue::Float(99.50), DataValue::Integer(100),
1613 ]))
1614 .unwrap();
1615
1616 table
1617 .add_row(DataRow::new(vec![
1618 DataValue::Integer(2),
1619 DataValue::Float(150.0), DataValue::Integer(200),
1621 ]))
1622 .unwrap();
1623
1624 table
1625 .add_row(DataRow::new(vec![
1626 DataValue::Integer(3),
1627 DataValue::Integer(75), DataValue::Integer(50),
1629 ]))
1630 .unwrap();
1631
1632 let table = Arc::new(table);
1633 let engine = QueryEngine::new();
1634
1635 println!("\n=== Testing Numeric Type Coercion ===");
1636 println!("Table has {} rows", table.row_count());
1637 for i in 0..table.row_count() {
1638 let price = table.get_value(i, 1);
1639 let quantity = table.get_value(i, 2);
1640 println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1641 }
1642
1643 println!("\n--- Test: price.Contains('.') ---");
1645 let result = engine.execute(
1646 table.clone(),
1647 "SELECT * FROM test WHERE price.Contains('.')",
1648 );
1649 match result {
1650 Ok(view) => {
1651 println!(
1652 "SUCCESS: Found {} rows with decimal points in price",
1653 view.row_count()
1654 );
1655 assert_eq!(view.row_count(), 2); }
1657 Err(e) => {
1658 panic!("Numeric Contains query failed: {e}");
1659 }
1660 }
1661
1662 println!("\n--- Test: quantity.Contains('0') ---");
1664 let result = engine.execute(
1665 table.clone(),
1666 "SELECT * FROM test WHERE quantity.Contains('0')",
1667 );
1668 match result {
1669 Ok(view) => {
1670 println!(
1671 "SUCCESS: Found {} rows with '0' in quantity",
1672 view.row_count()
1673 );
1674 assert_eq!(view.row_count(), 2); }
1676 Err(e) => {
1677 panic!("Integer Contains query failed: {e}");
1678 }
1679 }
1680
1681 println!("\n=== Numeric type coercion test complete! ===");
1682 }
1683
1684 #[test]
1685 fn test_datetime_comparisons() {
1686 let _ = tracing_subscriber::fmt()
1688 .with_max_level(tracing::Level::DEBUG)
1689 .try_init();
1690
1691 let mut table = DataTable::new("test");
1692 table.add_column(DataColumn::new("id"));
1693 table.add_column(DataColumn::new("created_date"));
1694
1695 table
1697 .add_row(DataRow::new(vec![
1698 DataValue::Integer(1),
1699 DataValue::String("2024-12-15".to_string()),
1700 ]))
1701 .unwrap();
1702
1703 table
1704 .add_row(DataRow::new(vec![
1705 DataValue::Integer(2),
1706 DataValue::String("2025-01-15".to_string()),
1707 ]))
1708 .unwrap();
1709
1710 table
1711 .add_row(DataRow::new(vec![
1712 DataValue::Integer(3),
1713 DataValue::String("2025-02-15".to_string()),
1714 ]))
1715 .unwrap();
1716
1717 let table = Arc::new(table);
1718 let engine = QueryEngine::new();
1719
1720 println!("\n=== Testing DateTime Comparisons ===");
1721 println!("Table has {} rows", table.row_count());
1722 for i in 0..table.row_count() {
1723 let date = table.get_value(i, 1);
1724 println!("Row {i}: created_date = {date:?}");
1725 }
1726
1727 println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1729 let result = engine.execute(
1730 table.clone(),
1731 "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1732 );
1733 match result {
1734 Ok(view) => {
1735 println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1736 assert_eq!(view.row_count(), 2); }
1738 Err(e) => {
1739 panic!("DateTime comparison query failed: {e}");
1740 }
1741 }
1742
1743 println!("\n=== DateTime comparison test complete! ===");
1744 }
1745
1746 #[test]
1747 fn test_not_with_method_calls() {
1748 let _ = tracing_subscriber::fmt()
1750 .with_max_level(tracing::Level::DEBUG)
1751 .try_init();
1752
1753 let mut table = DataTable::new("test");
1754 table.add_column(DataColumn::new("id"));
1755 table.add_column(DataColumn::new("status"));
1756
1757 table
1759 .add_row(DataRow::new(vec![
1760 DataValue::Integer(1),
1761 DataValue::String("Pending Review".to_string()),
1762 ]))
1763 .unwrap();
1764
1765 table
1766 .add_row(DataRow::new(vec![
1767 DataValue::Integer(2),
1768 DataValue::String("Complete".to_string()),
1769 ]))
1770 .unwrap();
1771
1772 table
1773 .add_row(DataRow::new(vec![
1774 DataValue::Integer(3),
1775 DataValue::String("Pending Approval".to_string()),
1776 ]))
1777 .unwrap();
1778
1779 let table = Arc::new(table);
1780 let engine = QueryEngine::with_case_insensitive(true);
1781
1782 println!("\n=== Testing NOT with Method Calls ===");
1783 println!("Table has {} rows", table.row_count());
1784 for i in 0..table.row_count() {
1785 let status = table.get_value(i, 1);
1786 println!("Row {i}: status = {status:?}");
1787 }
1788
1789 println!("\n--- Test: NOT status.Contains('pend') ---");
1791 let result = engine.execute(
1792 table.clone(),
1793 "SELECT * FROM test WHERE NOT status.Contains('pend')",
1794 );
1795 match result {
1796 Ok(view) => {
1797 println!(
1798 "SUCCESS: Found {} rows NOT containing 'pend'",
1799 view.row_count()
1800 );
1801 assert_eq!(view.row_count(), 1); }
1803 Err(e) => {
1804 panic!("NOT Contains query failed: {e}");
1805 }
1806 }
1807
1808 println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1810 let result = engine.execute(
1811 table.clone(),
1812 "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1813 );
1814 match result {
1815 Ok(view) => {
1816 println!(
1817 "SUCCESS: Found {} rows NOT starting with 'Pending'",
1818 view.row_count()
1819 );
1820 assert_eq!(view.row_count(), 1); }
1822 Err(e) => {
1823 panic!("NOT StartsWith query failed: {e}");
1824 }
1825 }
1826
1827 println!("\n=== NOT with method calls test complete! ===");
1828 }
1829
1830 #[test]
1831 #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1832 fn test_complex_logical_expressions() {
1833 let _ = tracing_subscriber::fmt()
1835 .with_max_level(tracing::Level::DEBUG)
1836 .try_init();
1837
1838 let mut table = DataTable::new("test");
1839 table.add_column(DataColumn::new("id"));
1840 table.add_column(DataColumn::new("status"));
1841 table.add_column(DataColumn::new("priority"));
1842 table.add_column(DataColumn::new("assigned"));
1843
1844 table
1846 .add_row(DataRow::new(vec![
1847 DataValue::Integer(1),
1848 DataValue::String("Pending".to_string()),
1849 DataValue::String("High".to_string()),
1850 DataValue::String("John".to_string()),
1851 ]))
1852 .unwrap();
1853
1854 table
1855 .add_row(DataRow::new(vec![
1856 DataValue::Integer(2),
1857 DataValue::String("Complete".to_string()),
1858 DataValue::String("High".to_string()),
1859 DataValue::String("Jane".to_string()),
1860 ]))
1861 .unwrap();
1862
1863 table
1864 .add_row(DataRow::new(vec![
1865 DataValue::Integer(3),
1866 DataValue::String("Pending".to_string()),
1867 DataValue::String("Low".to_string()),
1868 DataValue::String("John".to_string()),
1869 ]))
1870 .unwrap();
1871
1872 table
1873 .add_row(DataRow::new(vec![
1874 DataValue::Integer(4),
1875 DataValue::String("In Progress".to_string()),
1876 DataValue::String("Medium".to_string()),
1877 DataValue::String("Jane".to_string()),
1878 ]))
1879 .unwrap();
1880
1881 let table = Arc::new(table);
1882 let engine = QueryEngine::new();
1883
1884 println!("\n=== Testing Complex Logical Expressions ===");
1885 println!("Table has {} rows", table.row_count());
1886 for i in 0..table.row_count() {
1887 let status = table.get_value(i, 1);
1888 let priority = table.get_value(i, 2);
1889 let assigned = table.get_value(i, 3);
1890 println!(
1891 "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1892 );
1893 }
1894
1895 println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1897 let result = engine.execute(
1898 table.clone(),
1899 "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1900 );
1901 match result {
1902 Ok(view) => {
1903 println!(
1904 "SUCCESS: Found {} rows with complex logic",
1905 view.row_count()
1906 );
1907 assert_eq!(view.row_count(), 2); }
1909 Err(e) => {
1910 panic!("Complex logic query failed: {e}");
1911 }
1912 }
1913
1914 println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1916 let result = engine.execute(
1917 table.clone(),
1918 "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1919 );
1920 match result {
1921 Ok(view) => {
1922 println!(
1923 "SUCCESS: Found {} rows with NOT complex logic",
1924 view.row_count()
1925 );
1926 assert_eq!(view.row_count(), 2); }
1928 Err(e) => {
1929 panic!("NOT complex logic query failed: {e}");
1930 }
1931 }
1932
1933 println!("\n=== Complex logical expressions test complete! ===");
1934 }
1935
1936 #[test]
1937 fn test_mixed_data_types_and_edge_cases() {
1938 let _ = tracing_subscriber::fmt()
1940 .with_max_level(tracing::Level::DEBUG)
1941 .try_init();
1942
1943 let mut table = DataTable::new("test");
1944 table.add_column(DataColumn::new("id"));
1945 table.add_column(DataColumn::new("value"));
1946 table.add_column(DataColumn::new("nullable_field"));
1947
1948 table
1950 .add_row(DataRow::new(vec![
1951 DataValue::Integer(1),
1952 DataValue::String("123.45".to_string()),
1953 DataValue::String("present".to_string()),
1954 ]))
1955 .unwrap();
1956
1957 table
1958 .add_row(DataRow::new(vec![
1959 DataValue::Integer(2),
1960 DataValue::Float(678.90),
1961 DataValue::Null,
1962 ]))
1963 .unwrap();
1964
1965 table
1966 .add_row(DataRow::new(vec![
1967 DataValue::Integer(3),
1968 DataValue::Boolean(true),
1969 DataValue::String("also present".to_string()),
1970 ]))
1971 .unwrap();
1972
1973 table
1974 .add_row(DataRow::new(vec![
1975 DataValue::Integer(4),
1976 DataValue::String("false".to_string()),
1977 DataValue::Null,
1978 ]))
1979 .unwrap();
1980
1981 let table = Arc::new(table);
1982 let engine = QueryEngine::new();
1983
1984 println!("\n=== Testing Mixed Data Types and Edge Cases ===");
1985 println!("Table has {} rows", table.row_count());
1986 for i in 0..table.row_count() {
1987 let value = table.get_value(i, 1);
1988 let nullable = table.get_value(i, 2);
1989 println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
1990 }
1991
1992 println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
1994 let result = engine.execute(
1995 table.clone(),
1996 "SELECT * FROM test WHERE value.Contains('true')",
1997 );
1998 match result {
1999 Ok(view) => {
2000 println!(
2001 "SUCCESS: Found {} rows with boolean coercion",
2002 view.row_count()
2003 );
2004 assert_eq!(view.row_count(), 1); }
2006 Err(e) => {
2007 panic!("Boolean coercion query failed: {e}");
2008 }
2009 }
2010
2011 println!("\n--- Test: id IN (1, 3) ---");
2013 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
2014 match result {
2015 Ok(view) => {
2016 println!("SUCCESS: Found {} rows with IN clause", view.row_count());
2017 assert_eq!(view.row_count(), 2); }
2019 Err(e) => {
2020 panic!("Multiple IN values query failed: {e}");
2021 }
2022 }
2023
2024 println!("\n=== Mixed data types test complete! ===");
2025 }
2026
2027 #[test]
2028 fn test_not_in_parsing() {
2029 use crate::sql::recursive_parser::Parser;
2030
2031 let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
2032 println!("\n=== Testing NOT IN parsing ===");
2033 println!("Parsing query: {query}");
2034
2035 let mut parser = Parser::new(query);
2036 match parser.parse() {
2037 Ok(statement) => {
2038 println!("Parsed statement: {statement:#?}");
2039 if let Some(where_clause) = statement.where_clause {
2040 println!("WHERE conditions: {:#?}", where_clause.conditions);
2041 if let Some(first_condition) = where_clause.conditions.first() {
2042 println!("First condition expression: {:#?}", first_condition.expr);
2043 }
2044 }
2045 }
2046 Err(e) => {
2047 panic!("Parse error: {e}");
2048 }
2049 }
2050 }
2051}
2052
2053#[cfg(test)]
2054#[path = "query_engine_tests.rs"]
2055mod query_engine_tests;