1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::config::global::get_date_notation;
9use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
10use crate::data::data_view::DataView;
11use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
12use crate::data::group_by_expressions::GroupByExpressions;
13use crate::data::hash_join::HashJoinExecutor;
14use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
15use crate::data::subquery_executor::SubqueryExecutor;
16use crate::data::virtual_table_generator::VirtualTableGenerator;
17use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
18use crate::sql::aggregates::{contains_aggregate, is_aggregate_compatible};
19use crate::sql::parser::ast::TableSource;
20use crate::sql::recursive_parser::{
21 CTEType, OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression,
22 TableFunction,
23};
24
25#[derive(Clone)]
27pub struct QueryEngine {
28 case_insensitive: bool,
29 date_notation: String,
30 behavior_config: Option<BehaviorConfig>,
31}
32
33impl Default for QueryEngine {
34 fn default() -> Self {
35 Self::new()
36 }
37}
38
39impl QueryEngine {
40 #[must_use]
41 pub fn new() -> Self {
42 Self {
43 case_insensitive: false,
44 date_notation: get_date_notation(),
45 behavior_config: None,
46 }
47 }
48
49 #[must_use]
50 pub fn with_behavior_config(config: BehaviorConfig) -> Self {
51 let case_insensitive = config.case_insensitive_default;
52 let date_notation = get_date_notation();
54 Self {
55 case_insensitive,
56 date_notation,
57 behavior_config: Some(config),
58 }
59 }
60
61 #[must_use]
62 pub fn with_date_notation(_date_notation: String) -> Self {
63 Self {
64 case_insensitive: false,
65 date_notation: get_date_notation(), behavior_config: None,
67 }
68 }
69
70 #[must_use]
71 pub fn with_case_insensitive(case_insensitive: bool) -> Self {
72 Self {
73 case_insensitive,
74 date_notation: get_date_notation(),
75 behavior_config: None,
76 }
77 }
78
79 #[must_use]
80 pub fn with_case_insensitive_and_date_notation(
81 case_insensitive: bool,
82 _date_notation: String, ) -> Self {
84 Self {
85 case_insensitive,
86 date_notation: get_date_notation(), behavior_config: None,
88 }
89 }
90
91 fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
93 let columns = table.column_names();
94 let mut best_match: Option<(String, usize)> = None;
95
96 for col in columns {
97 let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
98 let max_distance = if name.len() > 10 { 3 } else { 2 };
101 if distance <= max_distance {
102 match &best_match {
103 None => best_match = Some((col, distance)),
104 Some((_, best_dist)) if distance < *best_dist => {
105 best_match = Some((col, distance));
106 }
107 _ => {}
108 }
109 }
110 }
111
112 best_match.map(|(name, _)| name)
113 }
114
115 fn edit_distance(&self, s1: &str, s2: &str) -> usize {
117 let len1 = s1.len();
118 let len2 = s2.len();
119 let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
120
121 for i in 0..=len1 {
122 matrix[i][0] = i;
123 }
124 for j in 0..=len2 {
125 matrix[0][j] = j;
126 }
127
128 for (i, c1) in s1.chars().enumerate() {
129 for (j, c2) in s2.chars().enumerate() {
130 let cost = usize::from(c1 != c2);
131 matrix[i + 1][j + 1] = std::cmp::min(
132 matrix[i][j + 1] + 1, std::cmp::min(
134 matrix[i + 1][j] + 1, matrix[i][j] + cost, ),
137 );
138 }
139 }
140
141 matrix[len1][len2]
142 }
143
144 pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
146 let (view, _plan) = self.execute_with_plan(table, sql)?;
147 Ok(view)
148 }
149
150 pub fn execute_statement(
152 &self,
153 table: Arc<DataTable>,
154 statement: SelectStatement,
155 ) -> Result<DataView> {
156 let mut cte_context = HashMap::new();
158 for cte in &statement.ctes {
159 debug!("QueryEngine: Pre-processing CTE '{}'...", cte.name);
160 let cte_result = match &cte.cte_type {
162 CTEType::Standard(query) => {
163 self.build_view_with_context(table.clone(), query.clone(), &mut cte_context)?
165 }
166 CTEType::Web(web_spec) => {
167 use crate::web::http_fetcher::WebDataFetcher;
169
170 let fetcher = WebDataFetcher::new()?;
171 let data_table = fetcher.fetch(web_spec, &cte.name)?;
172
173 DataView::new(Arc::new(data_table))
175 }
176 };
177 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
179 debug!(
180 "QueryEngine: CTE '{}' pre-processed, stored in context",
181 cte.name
182 );
183 }
184
185 let mut subquery_executor =
187 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
188 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
189
190 self.build_view_with_context(table, processed_statement, &mut cte_context)
192 }
193
194 pub fn execute_statement_with_cte_context(
196 &self,
197 table: Arc<DataTable>,
198 statement: SelectStatement,
199 cte_context: &HashMap<String, Arc<DataView>>,
200 ) -> Result<DataView> {
201 let mut local_context = cte_context.clone();
203
204 for cte in &statement.ctes {
206 debug!("QueryEngine: Processing nested CTE '{}'...", cte.name);
207 let cte_result = match &cte.cte_type {
208 CTEType::Standard(query) => {
209 self.build_view_with_context(table.clone(), query.clone(), &mut local_context)?
210 }
211 CTEType::Web(web_spec) => {
212 use crate::web::http_fetcher::WebDataFetcher;
214
215 let fetcher = WebDataFetcher::new()?;
216 let data_table = fetcher.fetch(web_spec, &cte.name)?;
217
218 DataView::new(Arc::new(data_table))
220 }
221 };
222 local_context.insert(cte.name.clone(), Arc::new(cte_result));
223 }
224
225 let mut subquery_executor =
227 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), local_context.clone());
228 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
229
230 self.build_view_with_context(table, processed_statement, &mut local_context)
232 }
233
234 pub fn execute_with_plan(
236 &self,
237 table: Arc<DataTable>,
238 sql: &str,
239 ) -> Result<(DataView, ExecutionPlan)> {
240 let mut plan_builder = ExecutionPlanBuilder::new();
241 let start_time = Instant::now();
242
243 plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
245 plan_builder.add_detail(format!("Query: {}", sql));
246 let mut parser = Parser::new(sql);
247 let statement = parser
248 .parse()
249 .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
250 plan_builder.add_detail(format!("Parsed successfully"));
251 if let Some(from) = &statement.from_table {
252 plan_builder.add_detail(format!("FROM: {}", from));
253 }
254 if statement.where_clause.is_some() {
255 plan_builder.add_detail("WHERE clause present".to_string());
256 }
257 plan_builder.end_step();
258
259 let mut cte_context = HashMap::new();
261
262 if !statement.ctes.is_empty() {
263 plan_builder.begin_step(
264 StepType::CTE,
265 format!("Process {} CTEs", statement.ctes.len()),
266 );
267
268 for cte in &statement.ctes {
269 let cte_start = Instant::now();
270 plan_builder.begin_step(StepType::CTE, format!("CTE '{}'", cte.name));
271
272 let cte_result = match &cte.cte_type {
273 CTEType::Standard(query) => {
274 if let Some(from) = &query.from_table {
276 plan_builder.add_detail(format!("Source: {}", from));
277 }
278 if query.where_clause.is_some() {
279 plan_builder.add_detail("Has WHERE clause".to_string());
280 }
281 if query.group_by.is_some() {
282 plan_builder.add_detail("Has GROUP BY".to_string());
283 }
284
285 debug!(
286 "QueryEngine: Processing CTE '{}' with existing context: {:?}",
287 cte.name,
288 cte_context.keys().collect::<Vec<_>>()
289 );
290
291 let mut subquery_executor = SubqueryExecutor::with_cte_context(
294 self.clone(),
295 table.clone(),
296 cte_context.clone(),
297 );
298 let processed_query = subquery_executor.execute_subqueries(query)?;
299
300 self.build_view_with_context(
301 table.clone(),
302 processed_query,
303 &mut cte_context,
304 )?
305 }
306 CTEType::Web(web_spec) => {
307 plan_builder.add_detail(format!("URL: {}", web_spec.url));
308 if let Some(format) = &web_spec.format {
309 plan_builder.add_detail(format!("Format: {:?}", format));
310 }
311 if let Some(cache) = web_spec.cache_seconds {
312 plan_builder.add_detail(format!("Cache: {} seconds", cache));
313 }
314
315 use crate::web::http_fetcher::WebDataFetcher;
317
318 let fetcher = WebDataFetcher::new()?;
319 let data_table = fetcher.fetch(web_spec, &cte.name)?;
320
321 DataView::new(Arc::new(data_table))
323 }
324 };
325
326 plan_builder.set_rows_out(cte_result.row_count());
328 plan_builder.add_detail(format!(
329 "Result: {} rows, {} columns",
330 cte_result.row_count(),
331 cte_result.column_count()
332 ));
333 plan_builder.add_detail(format!(
334 "Execution time: {:.3}ms",
335 cte_start.elapsed().as_secs_f64() * 1000.0
336 ));
337
338 debug!(
339 "QueryEngine: Storing CTE '{}' in context with {} rows",
340 cte.name,
341 cte_result.row_count()
342 );
343 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
344 plan_builder.end_step();
345 }
346
347 plan_builder.add_detail(format!(
348 "All {} CTEs cached in context",
349 statement.ctes.len()
350 ));
351 plan_builder.end_step();
352 }
353
354 plan_builder.begin_step(StepType::Subquery, "Process subqueries".to_string());
356 let mut subquery_executor =
357 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
358
359 let has_subqueries = statement.where_clause.as_ref().map_or(false, |w| {
361 format!("{:?}", w).contains("Subquery")
363 });
364
365 if has_subqueries {
366 plan_builder.add_detail("Evaluating subqueries in WHERE clause".to_string());
367 }
368
369 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
370
371 if has_subqueries {
372 plan_builder.add_detail("Subqueries replaced with materialized values".to_string());
373 } else {
374 plan_builder.add_detail("No subqueries to process".to_string());
375 }
376
377 plan_builder.end_step();
378 let result = self.build_view_with_context_and_plan(
379 table,
380 processed_statement,
381 &mut cte_context,
382 &mut plan_builder,
383 )?;
384
385 let total_duration = start_time.elapsed();
386 info!(
387 "Query execution complete: total={:?}, rows={}",
388 total_duration,
389 result.row_count()
390 );
391
392 let plan = plan_builder.build();
393 Ok((result, plan))
394 }
395
396 fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
398 let mut cte_context = HashMap::new();
399 self.build_view_with_context(table, statement, &mut cte_context)
400 }
401
402 fn build_view_with_context(
404 &self,
405 table: Arc<DataTable>,
406 statement: SelectStatement,
407 cte_context: &mut HashMap<String, Arc<DataView>>,
408 ) -> Result<DataView> {
409 let mut dummy_plan = ExecutionPlanBuilder::new();
410 self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
411 }
412
413 fn build_view_with_context_and_plan(
415 &self,
416 table: Arc<DataTable>,
417 statement: SelectStatement,
418 cte_context: &mut HashMap<String, Arc<DataView>>,
419 plan: &mut ExecutionPlanBuilder,
420 ) -> Result<DataView> {
421 for cte in &statement.ctes {
423 if cte_context.contains_key(&cte.name) {
425 debug!(
426 "QueryEngine: CTE '{}' already in context, skipping",
427 cte.name
428 );
429 continue;
430 }
431
432 debug!("QueryEngine: Processing CTE '{}'...", cte.name);
433 debug!(
434 "QueryEngine: Available CTEs for '{}': {:?}",
435 cte.name,
436 cte_context.keys().collect::<Vec<_>>()
437 );
438
439 let cte_result = match &cte.cte_type {
441 CTEType::Standard(query) => {
442 self.build_view_with_context(table.clone(), query.clone(), cte_context)?
443 }
444 CTEType::Web(_web_spec) => {
445 return Err(anyhow!(
447 "Web CTEs should be processed in execute_select method"
448 ));
449 }
450 };
451
452 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
454 debug!(
455 "QueryEngine: CTE '{}' processed, stored in context",
456 cte.name
457 );
458 }
459
460 let source_table = if let Some(ref table_func) = statement.from_function {
462 debug!("QueryEngine: Processing table function...");
464 match table_func {
465 TableFunction::Range { start, end, step } => {
466 let mut evaluator =
468 ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
469
470 let dummy_row = 0;
472
473 let start_val = evaluator.evaluate(start, dummy_row)?;
474 let end_val = evaluator.evaluate(end, dummy_row)?;
475 let step_val = if let Some(step_expr) = step {
476 Some(evaluator.evaluate(step_expr, dummy_row)?)
477 } else {
478 None
479 };
480
481 let start_int = match start_val {
483 DataValue::Integer(i) => i,
484 DataValue::Float(f) => f as i64,
485 _ => return Err(anyhow!("RANGE start must be numeric")),
486 };
487
488 let end_int = match end_val {
489 DataValue::Integer(i) => i,
490 DataValue::Float(f) => f as i64,
491 _ => return Err(anyhow!("RANGE end must be numeric")),
492 };
493
494 let step_int = if let Some(step) = step_val {
495 match step {
496 DataValue::Integer(i) => Some(i),
497 DataValue::Float(f) => Some(f as i64),
498 _ => return Err(anyhow!("RANGE step must be numeric")),
499 }
500 } else {
501 None
502 };
503
504 VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
506 }
507 }
508 } else if let Some(ref subquery) = statement.from_subquery {
509 debug!("QueryEngine: Processing FROM subquery...");
511 let subquery_result =
512 self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
513
514 let materialized = self.materialize_view(subquery_result)?;
517 Arc::new(materialized)
518 } else if let Some(ref table_name) = statement.from_table {
519 if let Some(cte_view) = cte_context.get(table_name) {
521 debug!("QueryEngine: Using CTE '{}' as source table", table_name);
522 let materialized = self.materialize_view((**cte_view).clone())?;
524 Arc::new(materialized)
525 } else {
526 table.clone()
528 }
529 } else {
530 table.clone()
532 };
533
534 let final_table = if !statement.joins.is_empty() {
536 plan.begin_step(
537 StepType::Join,
538 format!("Process {} JOINs", statement.joins.len()),
539 );
540 plan.set_rows_in(source_table.row_count());
541
542 let join_executor = HashJoinExecutor::new(self.case_insensitive);
543 let mut current_table = source_table;
544
545 for (idx, join_clause) in statement.joins.iter().enumerate() {
546 let join_start = Instant::now();
547 plan.begin_step(StepType::Join, format!("JOIN #{}", idx + 1));
548 plan.add_detail(format!("Type: {:?}", join_clause.join_type));
549 plan.add_detail(format!("Left table: {} rows", current_table.row_count()));
550 plan.add_detail(format!(
551 "Executing {:?} JOIN on {}",
552 join_clause.join_type, join_clause.condition.left_column
553 ));
554
555 let right_table = match &join_clause.table {
557 TableSource::Table(name) => {
558 if let Some(cte_view) = cte_context.get(name) {
560 let materialized = self.materialize_view((**cte_view).clone())?;
561 Arc::new(materialized)
562 } else {
563 return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
566 }
567 }
568 TableSource::DerivedTable { query, alias: _ } => {
569 let subquery_result = self.build_view_with_context(
571 table.clone(),
572 *query.clone(),
573 cte_context,
574 )?;
575 let materialized = self.materialize_view(subquery_result)?;
576 Arc::new(materialized)
577 }
578 };
579
580 let joined = join_executor.execute_join(
582 current_table.clone(),
583 join_clause,
584 right_table.clone(),
585 )?;
586
587 plan.add_detail(format!("Right table: {} rows", right_table.row_count()));
588 plan.set_rows_out(joined.row_count());
589 plan.add_detail(format!("Result: {} rows", joined.row_count()));
590 plan.add_detail(format!(
591 "Join time: {:.3}ms",
592 join_start.elapsed().as_secs_f64() * 1000.0
593 ));
594 plan.end_step();
595
596 current_table = Arc::new(joined);
597 }
598
599 plan.set_rows_out(current_table.row_count());
600 plan.add_detail(format!(
601 "Final result after all joins: {} rows",
602 current_table.row_count()
603 ));
604 plan.end_step();
605 current_table
606 } else {
607 source_table
608 };
609
610 self.build_view_internal_with_plan(final_table, statement, plan)
612 }
613
614 fn materialize_view(&self, view: DataView) -> Result<DataTable> {
616 let source = view.source();
617 let mut result_table = DataTable::new("derived");
618
619 let visible_cols = view.visible_column_indices().to_vec();
621
622 for col_idx in &visible_cols {
624 let col = &source.columns[*col_idx];
625 let new_col = DataColumn {
626 name: col.name.clone(),
627 data_type: col.data_type.clone(),
628 nullable: col.nullable,
629 unique_values: col.unique_values,
630 null_count: col.null_count,
631 metadata: col.metadata.clone(),
632 };
633 result_table.add_column(new_col);
634 }
635
636 for row_idx in view.visible_row_indices() {
638 let source_row = &source.rows[*row_idx];
639 let mut new_row = DataRow { values: Vec::new() };
640
641 for col_idx in &visible_cols {
642 new_row.values.push(source_row.values[*col_idx].clone());
643 }
644
645 result_table.add_row(new_row);
646 }
647
648 Ok(result_table)
649 }
650
651 fn build_view_internal(
652 &self,
653 table: Arc<DataTable>,
654 statement: SelectStatement,
655 ) -> Result<DataView> {
656 let mut dummy_plan = ExecutionPlanBuilder::new();
657 self.build_view_internal_with_plan(table, statement, &mut dummy_plan)
658 }
659
660 fn build_view_internal_with_plan(
661 &self,
662 table: Arc<DataTable>,
663 statement: SelectStatement,
664 plan: &mut ExecutionPlanBuilder,
665 ) -> Result<DataView> {
666 debug!(
667 "QueryEngine::build_view - select_items: {:?}",
668 statement.select_items
669 );
670 debug!(
671 "QueryEngine::build_view - where_clause: {:?}",
672 statement.where_clause
673 );
674
675 let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
677
678 if let Some(where_clause) = &statement.where_clause {
680 let total_rows = table.row_count();
681 debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
682 debug!("QueryEngine: WHERE clause = {:?}", where_clause);
683
684 plan.begin_step(StepType::Filter, "WHERE clause filtering".to_string());
685 plan.set_rows_in(total_rows);
686 plan.add_detail(format!("Input: {} rows", total_rows));
687
688 for condition in &where_clause.conditions {
690 plan.add_detail(format!("Condition: {:?}", condition.expr));
691 }
692
693 let filter_start = Instant::now();
694 let mut filtered_rows = Vec::new();
696 for row_idx in visible_rows {
697 if row_idx < 3 {
699 debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
700 }
701 let evaluator = RecursiveWhereEvaluator::with_config(
702 &table,
703 self.case_insensitive,
704 self.date_notation.clone(),
705 );
706 match evaluator.evaluate(where_clause, row_idx) {
707 Ok(result) => {
708 if row_idx < 3 {
709 debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
710 }
711 if result {
712 filtered_rows.push(row_idx);
713 }
714 }
715 Err(e) => {
716 if row_idx < 3 {
717 debug!(
718 "QueryEngine: WHERE evaluation error for row {}: {}",
719 row_idx, e
720 );
721 }
722 return Err(e);
724 }
725 }
726 }
727 visible_rows = filtered_rows;
728 let filter_duration = filter_start.elapsed();
729 info!(
730 "WHERE clause filtering: {} rows -> {} rows in {:?}",
731 total_rows,
732 visible_rows.len(),
733 filter_duration
734 );
735
736 plan.set_rows_out(visible_rows.len());
737 plan.add_detail(format!("Output: {} rows", visible_rows.len()));
738 plan.add_detail(format!(
739 "Filter time: {:.3}ms",
740 filter_duration.as_secs_f64() * 1000.0
741 ));
742 plan.end_step();
743 }
744
745 let mut view = DataView::new(table.clone());
747 view = view.with_rows(visible_rows);
748
749 if let Some(group_by_exprs) = &statement.group_by {
751 if !group_by_exprs.is_empty() {
752 debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_exprs);
753
754 plan.begin_step(
755 StepType::GroupBy,
756 format!("GROUP BY {} expressions", group_by_exprs.len()),
757 );
758 plan.set_rows_in(view.row_count());
759 plan.add_detail(format!("Input: {} rows", view.row_count()));
760 for expr in group_by_exprs {
761 plan.add_detail(format!("Group by: {:?}", expr));
762 }
763
764 let group_start = Instant::now();
765 view = self.apply_group_by(
766 view,
767 group_by_exprs,
768 &statement.select_items,
769 statement.having.as_ref(),
770 )?;
771
772 plan.set_rows_out(view.row_count());
773 plan.add_detail(format!("Output: {} groups", view.row_count()));
774 plan.add_detail(format!(
775 "Group time: {:.3}ms",
776 group_start.elapsed().as_secs_f64() * 1000.0
777 ));
778 plan.end_step();
779 }
780 } else {
781 if !statement.select_items.is_empty() {
783 let has_non_star_items = statement
785 .select_items
786 .iter()
787 .any(|item| !matches!(item, SelectItem::Star));
788
789 if has_non_star_items || statement.select_items.len() > 1 {
793 view = self.apply_select_items(view, &statement.select_items)?;
794 }
795 } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
797 let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
799 view = view.with_columns(column_indices);
800 }
801 }
802
803 if statement.distinct {
805 plan.begin_step(StepType::Distinct, "Remove duplicate rows".to_string());
806 plan.set_rows_in(view.row_count());
807 plan.add_detail(format!("Input: {} rows", view.row_count()));
808
809 let distinct_start = Instant::now();
810 view = self.apply_distinct(view)?;
811
812 plan.set_rows_out(view.row_count());
813 plan.add_detail(format!("Output: {} unique rows", view.row_count()));
814 plan.add_detail(format!(
815 "Distinct time: {:.3}ms",
816 distinct_start.elapsed().as_secs_f64() * 1000.0
817 ));
818 plan.end_step();
819 }
820
821 if let Some(order_by_columns) = &statement.order_by {
823 if !order_by_columns.is_empty() {
824 plan.begin_step(
825 StepType::Sort,
826 format!("ORDER BY {} columns", order_by_columns.len()),
827 );
828 plan.set_rows_in(view.row_count());
829 for col in order_by_columns {
830 plan.add_detail(format!("{} {:?}", col.column, col.direction));
831 }
832
833 let sort_start = Instant::now();
834 view = self.apply_multi_order_by(view, order_by_columns)?;
835
836 plan.add_detail(format!(
837 "Sort time: {:.3}ms",
838 sort_start.elapsed().as_secs_f64() * 1000.0
839 ));
840 plan.end_step();
841 }
842 }
843
844 if let Some(limit) = statement.limit {
846 let offset = statement.offset.unwrap_or(0);
847 plan.begin_step(StepType::Limit, format!("LIMIT {}", limit));
848 plan.set_rows_in(view.row_count());
849 if offset > 0 {
850 plan.add_detail(format!("OFFSET: {}", offset));
851 }
852 view = view.with_limit(limit, offset);
853 plan.set_rows_out(view.row_count());
854 plan.add_detail(format!("Output: {} rows", view.row_count()));
855 plan.end_step();
856 }
857
858 Ok(view)
859 }
860
861 fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
863 let mut indices = Vec::new();
864 let table_columns = table.column_names();
865
866 for col_name in columns {
867 let index = table_columns
868 .iter()
869 .position(|c| c.eq_ignore_ascii_case(col_name))
870 .ok_or_else(|| {
871 let suggestion = self.find_similar_column(table, col_name);
872 match suggestion {
873 Some(similar) => anyhow::anyhow!(
874 "Column '{}' not found. Did you mean '{}'?",
875 col_name,
876 similar
877 ),
878 None => anyhow::anyhow!("Column '{}' not found", col_name),
879 }
880 })?;
881 indices.push(index);
882 }
883
884 Ok(indices)
885 }
886
887 fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
889 debug!(
890 "QueryEngine::apply_select_items - items: {:?}",
891 select_items
892 );
893 debug!(
894 "QueryEngine::apply_select_items - input view has {} rows",
895 view.row_count()
896 );
897
898 let has_aggregates = select_items.iter().any(|item| match item {
902 SelectItem::Expression { expr, .. } => contains_aggregate(expr),
903 SelectItem::Column(_) => false,
904 SelectItem::Star => false,
905 });
906
907 let all_aggregate_compatible = select_items.iter().all(|item| match item {
908 SelectItem::Expression { expr, .. } => is_aggregate_compatible(expr),
909 SelectItem::Column(_) => false, SelectItem::Star => false, });
912
913 if has_aggregates && all_aggregate_compatible && view.row_count() > 0 {
914 debug!("QueryEngine::apply_select_items - detected aggregate query with constants");
917 return self.apply_aggregate_select(view, select_items);
918 }
919
920 let has_computed_expressions = select_items
922 .iter()
923 .any(|item| matches!(item, SelectItem::Expression { .. }));
924
925 debug!(
926 "QueryEngine::apply_select_items - has_computed_expressions: {}",
927 has_computed_expressions
928 );
929
930 if !has_computed_expressions {
931 let column_indices = self.resolve_select_columns(view.source(), select_items)?;
933 return Ok(view.with_columns(column_indices));
934 }
935
936 let source_table = view.source();
941 let visible_rows = view.visible_row_indices();
942
943 let mut computed_table = DataTable::new("query_result");
946
947 let mut expanded_items = Vec::new();
949 for item in select_items {
950 match item {
951 SelectItem::Star => {
952 for col_name in source_table.column_names() {
954 expanded_items.push(SelectItem::Column(col_name));
955 }
956 }
957 _ => expanded_items.push(item.clone()),
958 }
959 }
960
961 let mut column_name_counts: std::collections::HashMap<String, usize> =
963 std::collections::HashMap::new();
964
965 for item in &expanded_items {
966 let base_name = match item {
967 SelectItem::Column(name) => name.clone(),
968 SelectItem::Expression { alias, .. } => alias.clone(),
969 SelectItem::Star => unreachable!("Star should have been expanded"),
970 };
971
972 let count = column_name_counts.entry(base_name.clone()).or_insert(0);
974 let column_name = if *count == 0 {
975 base_name.clone()
977 } else {
978 format!("{base_name}_{count}")
980 };
981 *count += 1;
982
983 computed_table.add_column(DataColumn::new(&column_name));
984 }
985
986 let mut evaluator =
988 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
989
990 for &row_idx in visible_rows {
991 let mut row_values = Vec::new();
992
993 for item in &expanded_items {
994 let value = match item {
995 SelectItem::Column(col_name) => {
996 let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
998 let suggestion = self.find_similar_column(source_table, col_name);
999 match suggestion {
1000 Some(similar) => anyhow::anyhow!(
1001 "Column '{}' not found. Did you mean '{}'?",
1002 col_name,
1003 similar
1004 ),
1005 None => anyhow::anyhow!("Column '{}' not found", col_name),
1006 }
1007 })?;
1008 let row = source_table
1009 .get_row(row_idx)
1010 .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
1011 row.get(col_idx)
1012 .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
1013 .clone()
1014 }
1015 SelectItem::Expression { expr, .. } => {
1016 evaluator.evaluate(expr, row_idx)?
1018 }
1019 SelectItem::Star => unreachable!("Star should have been expanded"),
1020 };
1021 row_values.push(value);
1022 }
1023
1024 computed_table
1025 .add_row(DataRow::new(row_values))
1026 .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
1027 }
1028
1029 Ok(DataView::new(Arc::new(computed_table)))
1032 }
1033
1034 fn apply_aggregate_select(
1036 &self,
1037 view: DataView,
1038 select_items: &[SelectItem],
1039 ) -> Result<DataView> {
1040 debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
1041
1042 let source_table = view.source();
1043 let mut result_table = DataTable::new("aggregate_result");
1044
1045 for item in select_items {
1047 let column_name = match item {
1048 SelectItem::Expression { alias, .. } => alias.clone(),
1049 _ => unreachable!("Should only have expressions in aggregate-only query"),
1050 };
1051 result_table.add_column(DataColumn::new(&column_name));
1052 }
1053
1054 let visible_rows = view.visible_row_indices().to_vec();
1056 let mut evaluator =
1057 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
1058 .with_visible_rows(visible_rows);
1059
1060 let mut row_values = Vec::new();
1062 for item in select_items {
1063 match item {
1064 SelectItem::Expression { expr, .. } => {
1065 let value = evaluator.evaluate(expr, 0)?;
1068 row_values.push(value);
1069 }
1070 _ => unreachable!("Should only have expressions in aggregate-only query"),
1071 }
1072 }
1073
1074 result_table
1076 .add_row(DataRow::new(row_values))
1077 .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
1078
1079 Ok(DataView::new(Arc::new(result_table)))
1080 }
1081
1082 fn resolve_select_columns(
1084 &self,
1085 table: &DataTable,
1086 select_items: &[SelectItem],
1087 ) -> Result<Vec<usize>> {
1088 let mut indices = Vec::new();
1089 let table_columns = table.column_names();
1090
1091 for item in select_items {
1092 match item {
1093 SelectItem::Column(col_name) => {
1094 let index = table_columns
1095 .iter()
1096 .position(|c| c.eq_ignore_ascii_case(col_name))
1097 .ok_or_else(|| {
1098 let suggestion = self.find_similar_column(table, col_name);
1099 match suggestion {
1100 Some(similar) => anyhow::anyhow!(
1101 "Column '{}' not found. Did you mean '{}'?",
1102 col_name,
1103 similar
1104 ),
1105 None => anyhow::anyhow!("Column '{}' not found", col_name),
1106 }
1107 })?;
1108 indices.push(index);
1109 }
1110 SelectItem::Star => {
1111 for i in 0..table_columns.len() {
1113 indices.push(i);
1114 }
1115 }
1116 SelectItem::Expression { .. } => {
1117 return Err(anyhow::anyhow!(
1118 "Computed expressions require new table creation"
1119 ));
1120 }
1121 }
1122 }
1123
1124 Ok(indices)
1125 }
1126
1127 fn apply_distinct(&self, view: DataView) -> Result<DataView> {
1129 use std::collections::HashSet;
1130
1131 let source = view.source();
1132 let visible_cols = view.visible_column_indices();
1133 let visible_rows = view.visible_row_indices();
1134
1135 let mut seen_rows = HashSet::new();
1137 let mut unique_row_indices = Vec::new();
1138
1139 for &row_idx in visible_rows {
1140 let mut row_key = Vec::new();
1142 for &col_idx in visible_cols {
1143 let value = source
1144 .get_value(row_idx, col_idx)
1145 .ok_or_else(|| anyhow!("Invalid cell reference"))?;
1146 row_key.push(format!("{:?}", value));
1148 }
1149
1150 if seen_rows.insert(row_key) {
1152 unique_row_indices.push(row_idx);
1154 }
1155 }
1156
1157 Ok(view.with_rows(unique_row_indices))
1159 }
1160
1161 fn apply_multi_order_by(
1163 &self,
1164 mut view: DataView,
1165 order_by_columns: &[OrderByColumn],
1166 ) -> Result<DataView> {
1167 let mut sort_columns = Vec::new();
1169
1170 for order_col in order_by_columns {
1171 let col_index = view
1175 .source()
1176 .get_column_index(&order_col.column)
1177 .ok_or_else(|| {
1178 let suggestion = self.find_similar_column(view.source(), &order_col.column);
1180 match suggestion {
1181 Some(similar) => anyhow::anyhow!(
1182 "Column '{}' not found. Did you mean '{}'?",
1183 order_col.column,
1184 similar
1185 ),
1186 None => {
1187 let available_cols = view.source().column_names().join(", ");
1189 anyhow::anyhow!(
1190 "Column '{}' not found. Available columns: {}",
1191 order_col.column,
1192 available_cols
1193 )
1194 }
1195 }
1196 })?;
1197
1198 let ascending = matches!(order_col.direction, SortDirection::Asc);
1199 sort_columns.push((col_index, ascending));
1200 }
1201
1202 view.apply_multi_sort(&sort_columns)?;
1204 Ok(view)
1205 }
1206
1207 fn apply_group_by(
1209 &self,
1210 view: DataView,
1211 group_by_exprs: &[SqlExpression],
1212 select_items: &[SelectItem],
1213 having: Option<&SqlExpression>,
1214 ) -> Result<DataView> {
1215 self.apply_group_by_expressions(
1217 view,
1218 group_by_exprs,
1219 select_items,
1220 having,
1221 self.case_insensitive,
1222 self.date_notation.clone(),
1223 )
1224 }
1225}
1226
1227#[cfg(test)]
1228mod tests {
1229 use super::*;
1230 use crate::data::datatable::{DataColumn, DataRow, DataValue};
1231
1232 fn create_test_table() -> Arc<DataTable> {
1233 let mut table = DataTable::new("test");
1234
1235 table.add_column(DataColumn::new("id"));
1237 table.add_column(DataColumn::new("name"));
1238 table.add_column(DataColumn::new("age"));
1239
1240 table
1242 .add_row(DataRow::new(vec![
1243 DataValue::Integer(1),
1244 DataValue::String("Alice".to_string()),
1245 DataValue::Integer(30),
1246 ]))
1247 .unwrap();
1248
1249 table
1250 .add_row(DataRow::new(vec![
1251 DataValue::Integer(2),
1252 DataValue::String("Bob".to_string()),
1253 DataValue::Integer(25),
1254 ]))
1255 .unwrap();
1256
1257 table
1258 .add_row(DataRow::new(vec![
1259 DataValue::Integer(3),
1260 DataValue::String("Charlie".to_string()),
1261 DataValue::Integer(35),
1262 ]))
1263 .unwrap();
1264
1265 Arc::new(table)
1266 }
1267
1268 #[test]
1269 fn test_select_all() {
1270 let table = create_test_table();
1271 let engine = QueryEngine::new();
1272
1273 let view = engine
1274 .execute(table.clone(), "SELECT * FROM users")
1275 .unwrap();
1276 assert_eq!(view.row_count(), 3);
1277 assert_eq!(view.column_count(), 3);
1278 }
1279
1280 #[test]
1281 fn test_select_columns() {
1282 let table = create_test_table();
1283 let engine = QueryEngine::new();
1284
1285 let view = engine
1286 .execute(table.clone(), "SELECT name, age FROM users")
1287 .unwrap();
1288 assert_eq!(view.row_count(), 3);
1289 assert_eq!(view.column_count(), 2);
1290 }
1291
1292 #[test]
1293 fn test_select_with_limit() {
1294 let table = create_test_table();
1295 let engine = QueryEngine::new();
1296
1297 let view = engine
1298 .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1299 .unwrap();
1300 assert_eq!(view.row_count(), 2);
1301 }
1302
1303 #[test]
1304 fn test_type_coercion_contains() {
1305 let _ = tracing_subscriber::fmt()
1307 .with_max_level(tracing::Level::DEBUG)
1308 .try_init();
1309
1310 let mut table = DataTable::new("test");
1311 table.add_column(DataColumn::new("id"));
1312 table.add_column(DataColumn::new("status"));
1313 table.add_column(DataColumn::new("price"));
1314
1315 table
1317 .add_row(DataRow::new(vec![
1318 DataValue::Integer(1),
1319 DataValue::String("Pending".to_string()),
1320 DataValue::Float(99.99),
1321 ]))
1322 .unwrap();
1323
1324 table
1325 .add_row(DataRow::new(vec![
1326 DataValue::Integer(2),
1327 DataValue::String("Confirmed".to_string()),
1328 DataValue::Float(150.50),
1329 ]))
1330 .unwrap();
1331
1332 table
1333 .add_row(DataRow::new(vec![
1334 DataValue::Integer(3),
1335 DataValue::String("Pending".to_string()),
1336 DataValue::Float(75.00),
1337 ]))
1338 .unwrap();
1339
1340 let table = Arc::new(table);
1341 let engine = QueryEngine::new();
1342
1343 println!("\n=== Testing WHERE clause with Contains ===");
1344 println!("Table has {} rows", table.row_count());
1345 for i in 0..table.row_count() {
1346 let status = table.get_value(i, 1);
1347 println!("Row {i}: status = {status:?}");
1348 }
1349
1350 println!("\n--- Test 1: status.Contains('pend') ---");
1352 let result = engine.execute(
1353 table.clone(),
1354 "SELECT * FROM test WHERE status.Contains('pend')",
1355 );
1356 match result {
1357 Ok(view) => {
1358 println!("SUCCESS: Found {} matching rows", view.row_count());
1359 assert_eq!(view.row_count(), 2); }
1361 Err(e) => {
1362 panic!("Query failed: {e}");
1363 }
1364 }
1365
1366 println!("\n--- Test 2: price.Contains('9') ---");
1368 let result = engine.execute(
1369 table.clone(),
1370 "SELECT * FROM test WHERE price.Contains('9')",
1371 );
1372 match result {
1373 Ok(view) => {
1374 println!(
1375 "SUCCESS: Found {} matching rows with price containing '9'",
1376 view.row_count()
1377 );
1378 assert!(view.row_count() >= 1);
1380 }
1381 Err(e) => {
1382 panic!("Numeric coercion query failed: {e}");
1383 }
1384 }
1385
1386 println!("\n=== All tests passed! ===");
1387 }
1388
1389 #[test]
1390 fn test_not_in_clause() {
1391 let _ = tracing_subscriber::fmt()
1393 .with_max_level(tracing::Level::DEBUG)
1394 .try_init();
1395
1396 let mut table = DataTable::new("test");
1397 table.add_column(DataColumn::new("id"));
1398 table.add_column(DataColumn::new("country"));
1399
1400 table
1402 .add_row(DataRow::new(vec![
1403 DataValue::Integer(1),
1404 DataValue::String("CA".to_string()),
1405 ]))
1406 .unwrap();
1407
1408 table
1409 .add_row(DataRow::new(vec![
1410 DataValue::Integer(2),
1411 DataValue::String("US".to_string()),
1412 ]))
1413 .unwrap();
1414
1415 table
1416 .add_row(DataRow::new(vec![
1417 DataValue::Integer(3),
1418 DataValue::String("UK".to_string()),
1419 ]))
1420 .unwrap();
1421
1422 let table = Arc::new(table);
1423 let engine = QueryEngine::new();
1424
1425 println!("\n=== Testing NOT IN clause ===");
1426 println!("Table has {} rows", table.row_count());
1427 for i in 0..table.row_count() {
1428 let country = table.get_value(i, 1);
1429 println!("Row {i}: country = {country:?}");
1430 }
1431
1432 println!("\n--- Test: country NOT IN ('CA') ---");
1434 let result = engine.execute(
1435 table.clone(),
1436 "SELECT * FROM test WHERE country NOT IN ('CA')",
1437 );
1438 match result {
1439 Ok(view) => {
1440 println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1441 assert_eq!(view.row_count(), 2); }
1443 Err(e) => {
1444 panic!("NOT IN query failed: {e}");
1445 }
1446 }
1447
1448 println!("\n=== NOT IN test complete! ===");
1449 }
1450
1451 #[test]
1452 fn test_case_insensitive_in_and_not_in() {
1453 let _ = tracing_subscriber::fmt()
1455 .with_max_level(tracing::Level::DEBUG)
1456 .try_init();
1457
1458 let mut table = DataTable::new("test");
1459 table.add_column(DataColumn::new("id"));
1460 table.add_column(DataColumn::new("country"));
1461
1462 table
1464 .add_row(DataRow::new(vec![
1465 DataValue::Integer(1),
1466 DataValue::String("CA".to_string()), ]))
1468 .unwrap();
1469
1470 table
1471 .add_row(DataRow::new(vec![
1472 DataValue::Integer(2),
1473 DataValue::String("us".to_string()), ]))
1475 .unwrap();
1476
1477 table
1478 .add_row(DataRow::new(vec![
1479 DataValue::Integer(3),
1480 DataValue::String("UK".to_string()), ]))
1482 .unwrap();
1483
1484 let table = Arc::new(table);
1485
1486 println!("\n=== Testing Case-Insensitive IN clause ===");
1487 println!("Table has {} rows", table.row_count());
1488 for i in 0..table.row_count() {
1489 let country = table.get_value(i, 1);
1490 println!("Row {i}: country = {country:?}");
1491 }
1492
1493 println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1495 let engine = QueryEngine::with_case_insensitive(true);
1496 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1497 match result {
1498 Ok(view) => {
1499 println!(
1500 "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1501 view.row_count()
1502 );
1503 assert_eq!(view.row_count(), 1); }
1505 Err(e) => {
1506 panic!("Case-insensitive IN query failed: {e}");
1507 }
1508 }
1509
1510 println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1512 let result = engine.execute(
1513 table.clone(),
1514 "SELECT * FROM test WHERE country NOT IN ('ca')",
1515 );
1516 match result {
1517 Ok(view) => {
1518 println!(
1519 "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1520 view.row_count()
1521 );
1522 assert_eq!(view.row_count(), 2); }
1524 Err(e) => {
1525 panic!("Case-insensitive NOT IN query failed: {e}");
1526 }
1527 }
1528
1529 println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1531 let engine_case_sensitive = QueryEngine::new(); let result = engine_case_sensitive
1533 .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1534 match result {
1535 Ok(view) => {
1536 println!(
1537 "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1538 view.row_count()
1539 );
1540 assert_eq!(view.row_count(), 0); }
1542 Err(e) => {
1543 panic!("Case-sensitive IN query failed: {e}");
1544 }
1545 }
1546
1547 println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1548 }
1549
1550 #[test]
1551 #[ignore = "Parentheses in WHERE clause not yet implemented"]
1552 fn test_parentheses_in_where_clause() {
1553 let _ = tracing_subscriber::fmt()
1555 .with_max_level(tracing::Level::DEBUG)
1556 .try_init();
1557
1558 let mut table = DataTable::new("test");
1559 table.add_column(DataColumn::new("id"));
1560 table.add_column(DataColumn::new("status"));
1561 table.add_column(DataColumn::new("priority"));
1562
1563 table
1565 .add_row(DataRow::new(vec![
1566 DataValue::Integer(1),
1567 DataValue::String("Pending".to_string()),
1568 DataValue::String("High".to_string()),
1569 ]))
1570 .unwrap();
1571
1572 table
1573 .add_row(DataRow::new(vec![
1574 DataValue::Integer(2),
1575 DataValue::String("Complete".to_string()),
1576 DataValue::String("High".to_string()),
1577 ]))
1578 .unwrap();
1579
1580 table
1581 .add_row(DataRow::new(vec![
1582 DataValue::Integer(3),
1583 DataValue::String("Pending".to_string()),
1584 DataValue::String("Low".to_string()),
1585 ]))
1586 .unwrap();
1587
1588 table
1589 .add_row(DataRow::new(vec![
1590 DataValue::Integer(4),
1591 DataValue::String("Complete".to_string()),
1592 DataValue::String("Low".to_string()),
1593 ]))
1594 .unwrap();
1595
1596 let table = Arc::new(table);
1597 let engine = QueryEngine::new();
1598
1599 println!("\n=== Testing Parentheses in WHERE clause ===");
1600 println!("Table has {} rows", table.row_count());
1601 for i in 0..table.row_count() {
1602 let status = table.get_value(i, 1);
1603 let priority = table.get_value(i, 2);
1604 println!("Row {i}: status = {status:?}, priority = {priority:?}");
1605 }
1606
1607 println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1609 let result = engine.execute(
1610 table.clone(),
1611 "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1612 );
1613 match result {
1614 Ok(view) => {
1615 println!(
1616 "SUCCESS: Found {} rows with parenthetical logic",
1617 view.row_count()
1618 );
1619 assert_eq!(view.row_count(), 2); }
1621 Err(e) => {
1622 panic!("Parentheses query failed: {e}");
1623 }
1624 }
1625
1626 println!("\n=== Parentheses test complete! ===");
1627 }
1628
1629 #[test]
1630 #[ignore = "Numeric type coercion needs fixing"]
1631 fn test_numeric_type_coercion() {
1632 let _ = tracing_subscriber::fmt()
1634 .with_max_level(tracing::Level::DEBUG)
1635 .try_init();
1636
1637 let mut table = DataTable::new("test");
1638 table.add_column(DataColumn::new("id"));
1639 table.add_column(DataColumn::new("price"));
1640 table.add_column(DataColumn::new("quantity"));
1641
1642 table
1644 .add_row(DataRow::new(vec![
1645 DataValue::Integer(1),
1646 DataValue::Float(99.50), DataValue::Integer(100),
1648 ]))
1649 .unwrap();
1650
1651 table
1652 .add_row(DataRow::new(vec![
1653 DataValue::Integer(2),
1654 DataValue::Float(150.0), DataValue::Integer(200),
1656 ]))
1657 .unwrap();
1658
1659 table
1660 .add_row(DataRow::new(vec![
1661 DataValue::Integer(3),
1662 DataValue::Integer(75), DataValue::Integer(50),
1664 ]))
1665 .unwrap();
1666
1667 let table = Arc::new(table);
1668 let engine = QueryEngine::new();
1669
1670 println!("\n=== Testing Numeric Type Coercion ===");
1671 println!("Table has {} rows", table.row_count());
1672 for i in 0..table.row_count() {
1673 let price = table.get_value(i, 1);
1674 let quantity = table.get_value(i, 2);
1675 println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1676 }
1677
1678 println!("\n--- Test: price.Contains('.') ---");
1680 let result = engine.execute(
1681 table.clone(),
1682 "SELECT * FROM test WHERE price.Contains('.')",
1683 );
1684 match result {
1685 Ok(view) => {
1686 println!(
1687 "SUCCESS: Found {} rows with decimal points in price",
1688 view.row_count()
1689 );
1690 assert_eq!(view.row_count(), 2); }
1692 Err(e) => {
1693 panic!("Numeric Contains query failed: {e}");
1694 }
1695 }
1696
1697 println!("\n--- Test: quantity.Contains('0') ---");
1699 let result = engine.execute(
1700 table.clone(),
1701 "SELECT * FROM test WHERE quantity.Contains('0')",
1702 );
1703 match result {
1704 Ok(view) => {
1705 println!(
1706 "SUCCESS: Found {} rows with '0' in quantity",
1707 view.row_count()
1708 );
1709 assert_eq!(view.row_count(), 2); }
1711 Err(e) => {
1712 panic!("Integer Contains query failed: {e}");
1713 }
1714 }
1715
1716 println!("\n=== Numeric type coercion test complete! ===");
1717 }
1718
1719 #[test]
1720 fn test_datetime_comparisons() {
1721 let _ = tracing_subscriber::fmt()
1723 .with_max_level(tracing::Level::DEBUG)
1724 .try_init();
1725
1726 let mut table = DataTable::new("test");
1727 table.add_column(DataColumn::new("id"));
1728 table.add_column(DataColumn::new("created_date"));
1729
1730 table
1732 .add_row(DataRow::new(vec![
1733 DataValue::Integer(1),
1734 DataValue::String("2024-12-15".to_string()),
1735 ]))
1736 .unwrap();
1737
1738 table
1739 .add_row(DataRow::new(vec![
1740 DataValue::Integer(2),
1741 DataValue::String("2025-01-15".to_string()),
1742 ]))
1743 .unwrap();
1744
1745 table
1746 .add_row(DataRow::new(vec![
1747 DataValue::Integer(3),
1748 DataValue::String("2025-02-15".to_string()),
1749 ]))
1750 .unwrap();
1751
1752 let table = Arc::new(table);
1753 let engine = QueryEngine::new();
1754
1755 println!("\n=== Testing DateTime Comparisons ===");
1756 println!("Table has {} rows", table.row_count());
1757 for i in 0..table.row_count() {
1758 let date = table.get_value(i, 1);
1759 println!("Row {i}: created_date = {date:?}");
1760 }
1761
1762 println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1764 let result = engine.execute(
1765 table.clone(),
1766 "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1767 );
1768 match result {
1769 Ok(view) => {
1770 println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1771 assert_eq!(view.row_count(), 2); }
1773 Err(e) => {
1774 panic!("DateTime comparison query failed: {e}");
1775 }
1776 }
1777
1778 println!("\n=== DateTime comparison test complete! ===");
1779 }
1780
1781 #[test]
1782 fn test_not_with_method_calls() {
1783 let _ = tracing_subscriber::fmt()
1785 .with_max_level(tracing::Level::DEBUG)
1786 .try_init();
1787
1788 let mut table = DataTable::new("test");
1789 table.add_column(DataColumn::new("id"));
1790 table.add_column(DataColumn::new("status"));
1791
1792 table
1794 .add_row(DataRow::new(vec![
1795 DataValue::Integer(1),
1796 DataValue::String("Pending Review".to_string()),
1797 ]))
1798 .unwrap();
1799
1800 table
1801 .add_row(DataRow::new(vec![
1802 DataValue::Integer(2),
1803 DataValue::String("Complete".to_string()),
1804 ]))
1805 .unwrap();
1806
1807 table
1808 .add_row(DataRow::new(vec![
1809 DataValue::Integer(3),
1810 DataValue::String("Pending Approval".to_string()),
1811 ]))
1812 .unwrap();
1813
1814 let table = Arc::new(table);
1815 let engine = QueryEngine::with_case_insensitive(true);
1816
1817 println!("\n=== Testing NOT with Method Calls ===");
1818 println!("Table has {} rows", table.row_count());
1819 for i in 0..table.row_count() {
1820 let status = table.get_value(i, 1);
1821 println!("Row {i}: status = {status:?}");
1822 }
1823
1824 println!("\n--- Test: NOT status.Contains('pend') ---");
1826 let result = engine.execute(
1827 table.clone(),
1828 "SELECT * FROM test WHERE NOT status.Contains('pend')",
1829 );
1830 match result {
1831 Ok(view) => {
1832 println!(
1833 "SUCCESS: Found {} rows NOT containing 'pend'",
1834 view.row_count()
1835 );
1836 assert_eq!(view.row_count(), 1); }
1838 Err(e) => {
1839 panic!("NOT Contains query failed: {e}");
1840 }
1841 }
1842
1843 println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1845 let result = engine.execute(
1846 table.clone(),
1847 "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1848 );
1849 match result {
1850 Ok(view) => {
1851 println!(
1852 "SUCCESS: Found {} rows NOT starting with 'Pending'",
1853 view.row_count()
1854 );
1855 assert_eq!(view.row_count(), 1); }
1857 Err(e) => {
1858 panic!("NOT StartsWith query failed: {e}");
1859 }
1860 }
1861
1862 println!("\n=== NOT with method calls test complete! ===");
1863 }
1864
1865 #[test]
1866 #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1867 fn test_complex_logical_expressions() {
1868 let _ = tracing_subscriber::fmt()
1870 .with_max_level(tracing::Level::DEBUG)
1871 .try_init();
1872
1873 let mut table = DataTable::new("test");
1874 table.add_column(DataColumn::new("id"));
1875 table.add_column(DataColumn::new("status"));
1876 table.add_column(DataColumn::new("priority"));
1877 table.add_column(DataColumn::new("assigned"));
1878
1879 table
1881 .add_row(DataRow::new(vec![
1882 DataValue::Integer(1),
1883 DataValue::String("Pending".to_string()),
1884 DataValue::String("High".to_string()),
1885 DataValue::String("John".to_string()),
1886 ]))
1887 .unwrap();
1888
1889 table
1890 .add_row(DataRow::new(vec![
1891 DataValue::Integer(2),
1892 DataValue::String("Complete".to_string()),
1893 DataValue::String("High".to_string()),
1894 DataValue::String("Jane".to_string()),
1895 ]))
1896 .unwrap();
1897
1898 table
1899 .add_row(DataRow::new(vec![
1900 DataValue::Integer(3),
1901 DataValue::String("Pending".to_string()),
1902 DataValue::String("Low".to_string()),
1903 DataValue::String("John".to_string()),
1904 ]))
1905 .unwrap();
1906
1907 table
1908 .add_row(DataRow::new(vec![
1909 DataValue::Integer(4),
1910 DataValue::String("In Progress".to_string()),
1911 DataValue::String("Medium".to_string()),
1912 DataValue::String("Jane".to_string()),
1913 ]))
1914 .unwrap();
1915
1916 let table = Arc::new(table);
1917 let engine = QueryEngine::new();
1918
1919 println!("\n=== Testing Complex Logical Expressions ===");
1920 println!("Table has {} rows", table.row_count());
1921 for i in 0..table.row_count() {
1922 let status = table.get_value(i, 1);
1923 let priority = table.get_value(i, 2);
1924 let assigned = table.get_value(i, 3);
1925 println!(
1926 "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
1927 );
1928 }
1929
1930 println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
1932 let result = engine.execute(
1933 table.clone(),
1934 "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
1935 );
1936 match result {
1937 Ok(view) => {
1938 println!(
1939 "SUCCESS: Found {} rows with complex logic",
1940 view.row_count()
1941 );
1942 assert_eq!(view.row_count(), 2); }
1944 Err(e) => {
1945 panic!("Complex logic query failed: {e}");
1946 }
1947 }
1948
1949 println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
1951 let result = engine.execute(
1952 table.clone(),
1953 "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
1954 );
1955 match result {
1956 Ok(view) => {
1957 println!(
1958 "SUCCESS: Found {} rows with NOT complex logic",
1959 view.row_count()
1960 );
1961 assert_eq!(view.row_count(), 2); }
1963 Err(e) => {
1964 panic!("NOT complex logic query failed: {e}");
1965 }
1966 }
1967
1968 println!("\n=== Complex logical expressions test complete! ===");
1969 }
1970
1971 #[test]
1972 fn test_mixed_data_types_and_edge_cases() {
1973 let _ = tracing_subscriber::fmt()
1975 .with_max_level(tracing::Level::DEBUG)
1976 .try_init();
1977
1978 let mut table = DataTable::new("test");
1979 table.add_column(DataColumn::new("id"));
1980 table.add_column(DataColumn::new("value"));
1981 table.add_column(DataColumn::new("nullable_field"));
1982
1983 table
1985 .add_row(DataRow::new(vec![
1986 DataValue::Integer(1),
1987 DataValue::String("123.45".to_string()),
1988 DataValue::String("present".to_string()),
1989 ]))
1990 .unwrap();
1991
1992 table
1993 .add_row(DataRow::new(vec![
1994 DataValue::Integer(2),
1995 DataValue::Float(678.90),
1996 DataValue::Null,
1997 ]))
1998 .unwrap();
1999
2000 table
2001 .add_row(DataRow::new(vec![
2002 DataValue::Integer(3),
2003 DataValue::Boolean(true),
2004 DataValue::String("also present".to_string()),
2005 ]))
2006 .unwrap();
2007
2008 table
2009 .add_row(DataRow::new(vec![
2010 DataValue::Integer(4),
2011 DataValue::String("false".to_string()),
2012 DataValue::Null,
2013 ]))
2014 .unwrap();
2015
2016 let table = Arc::new(table);
2017 let engine = QueryEngine::new();
2018
2019 println!("\n=== Testing Mixed Data Types and Edge Cases ===");
2020 println!("Table has {} rows", table.row_count());
2021 for i in 0..table.row_count() {
2022 let value = table.get_value(i, 1);
2023 let nullable = table.get_value(i, 2);
2024 println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
2025 }
2026
2027 println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
2029 let result = engine.execute(
2030 table.clone(),
2031 "SELECT * FROM test WHERE value.Contains('true')",
2032 );
2033 match result {
2034 Ok(view) => {
2035 println!(
2036 "SUCCESS: Found {} rows with boolean coercion",
2037 view.row_count()
2038 );
2039 assert_eq!(view.row_count(), 1); }
2041 Err(e) => {
2042 panic!("Boolean coercion query failed: {e}");
2043 }
2044 }
2045
2046 println!("\n--- Test: id IN (1, 3) ---");
2048 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
2049 match result {
2050 Ok(view) => {
2051 println!("SUCCESS: Found {} rows with IN clause", view.row_count());
2052 assert_eq!(view.row_count(), 2); }
2054 Err(e) => {
2055 panic!("Multiple IN values query failed: {e}");
2056 }
2057 }
2058
2059 println!("\n=== Mixed data types test complete! ===");
2060 }
2061
2062 #[test]
2064 fn test_aggregate_only_single_row() {
2065 let table = create_test_stock_data();
2066 let engine = QueryEngine::new();
2067
2068 let result = engine
2070 .execute(
2071 table.clone(),
2072 "SELECT COUNT(*), MIN(close), MAX(close), AVG(close) FROM stock",
2073 )
2074 .expect("Query should succeed");
2075
2076 assert_eq!(
2077 result.row_count(),
2078 1,
2079 "Aggregate-only query should return exactly 1 row"
2080 );
2081 assert_eq!(result.column_count(), 4, "Should have 4 aggregate columns");
2082
2083 let source = result.source();
2085 let row = source.get_row(0).expect("Should have first row");
2086
2087 assert_eq!(row.values[0], DataValue::Integer(5));
2089
2090 assert_eq!(row.values[1], DataValue::Float(99.5));
2092
2093 assert_eq!(row.values[2], DataValue::Float(105.0));
2095
2096 if let DataValue::Float(avg) = &row.values[3] {
2098 assert!(
2099 (avg - 102.4).abs() < 0.01,
2100 "Average should be approximately 102.4, got {}",
2101 avg
2102 );
2103 } else {
2104 panic!("AVG should return a Float value");
2105 }
2106 }
2107
2108 #[test]
2110 fn test_single_aggregate_single_row() {
2111 let table = create_test_stock_data();
2112 let engine = QueryEngine::new();
2113
2114 let result = engine
2115 .execute(table.clone(), "SELECT COUNT(*) FROM stock")
2116 .expect("Query should succeed");
2117
2118 assert_eq!(
2119 result.row_count(),
2120 1,
2121 "Single aggregate query should return exactly 1 row"
2122 );
2123 assert_eq!(result.column_count(), 1, "Should have 1 column");
2124
2125 let source = result.source();
2126 let row = source.get_row(0).expect("Should have first row");
2127 assert_eq!(row.values[0], DataValue::Integer(5));
2128 }
2129
2130 #[test]
2132 fn test_aggregate_with_where_single_row() {
2133 let table = create_test_stock_data();
2134 let engine = QueryEngine::new();
2135
2136 let result = engine
2138 .execute(
2139 table.clone(),
2140 "SELECT COUNT(*), MIN(close), MAX(close) FROM stock WHERE close >= 103.0",
2141 )
2142 .expect("Query should succeed");
2143
2144 assert_eq!(
2145 result.row_count(),
2146 1,
2147 "Filtered aggregate query should return exactly 1 row"
2148 );
2149 assert_eq!(result.column_count(), 3, "Should have 3 aggregate columns");
2150
2151 let source = result.source();
2152 let row = source.get_row(0).expect("Should have first row");
2153
2154 assert_eq!(row.values[0], DataValue::Integer(2));
2156 assert_eq!(row.values[1], DataValue::Float(103.5)); assert_eq!(row.values[2], DataValue::Float(105.0)); }
2159
2160 #[test]
2161 fn test_not_in_parsing() {
2162 use crate::sql::recursive_parser::Parser;
2163
2164 let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
2165 println!("\n=== Testing NOT IN parsing ===");
2166 println!("Parsing query: {query}");
2167
2168 let mut parser = Parser::new(query);
2169 match parser.parse() {
2170 Ok(statement) => {
2171 println!("Parsed statement: {statement:#?}");
2172 if let Some(where_clause) = statement.where_clause {
2173 println!("WHERE conditions: {:#?}", where_clause.conditions);
2174 if let Some(first_condition) = where_clause.conditions.first() {
2175 println!("First condition expression: {:#?}", first_condition.expr);
2176 }
2177 }
2178 }
2179 Err(e) => {
2180 panic!("Parse error: {e}");
2181 }
2182 }
2183 }
2184
2185 fn create_test_stock_data() -> Arc<DataTable> {
2187 let mut table = DataTable::new("stock");
2188
2189 table.add_column(DataColumn::new("symbol"));
2190 table.add_column(DataColumn::new("close"));
2191 table.add_column(DataColumn::new("volume"));
2192
2193 let test_data = vec![
2195 ("AAPL", 99.5, 1000),
2196 ("AAPL", 101.2, 1500),
2197 ("AAPL", 103.5, 2000),
2198 ("AAPL", 105.0, 1200),
2199 ("AAPL", 102.8, 1800),
2200 ];
2201
2202 for (symbol, close, volume) in test_data {
2203 table
2204 .add_row(DataRow::new(vec![
2205 DataValue::String(symbol.to_string()),
2206 DataValue::Float(close),
2207 DataValue::Integer(volume),
2208 ]))
2209 .expect("Should add row successfully");
2210 }
2211
2212 Arc::new(table)
2213 }
2214}
2215
2216#[cfg(test)]
2217#[path = "query_engine_tests.rs"]
2218mod query_engine_tests;