1use anyhow::{anyhow, Result};
2use fxhash::{FxHashMap, FxHashSet};
3use std::cmp::min;
4use std::collections::HashMap;
5use std::sync::Arc;
6use std::time::Instant;
7use tracing::{debug, info};
8
9use crate::config::config::BehaviorConfig;
10use crate::config::global::get_date_notation;
11use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
12use crate::data::data_view::DataView;
13use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
14use crate::data::evaluation_context::EvaluationContext;
15use crate::data::group_by_expressions::GroupByExpressions;
16use crate::data::hash_join::HashJoinExecutor;
17use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
18use crate::data::subquery_executor::SubqueryExecutor;
19use crate::data::virtual_table_generator::VirtualTableGenerator;
20use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
21use crate::sql::aggregates::{contains_aggregate, is_aggregate_compatible};
22use crate::sql::parser::ast::TableSource;
23use crate::sql::recursive_parser::{
24 CTEType, OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression,
25 TableFunction,
26};
27
28#[derive(Clone)]
30pub struct QueryEngine {
31 case_insensitive: bool,
32 date_notation: String,
33 behavior_config: Option<BehaviorConfig>,
34}
35
36impl Default for QueryEngine {
37 fn default() -> Self {
38 Self::new()
39 }
40}
41
42impl QueryEngine {
43 #[must_use]
44 pub fn new() -> Self {
45 Self {
46 case_insensitive: false,
47 date_notation: get_date_notation(),
48 behavior_config: None,
49 }
50 }
51
52 #[must_use]
53 pub fn with_behavior_config(config: BehaviorConfig) -> Self {
54 let case_insensitive = config.case_insensitive_default;
55 let date_notation = get_date_notation();
57 Self {
58 case_insensitive,
59 date_notation,
60 behavior_config: Some(config),
61 }
62 }
63
64 #[must_use]
65 pub fn with_date_notation(_date_notation: String) -> Self {
66 Self {
67 case_insensitive: false,
68 date_notation: get_date_notation(), behavior_config: None,
70 }
71 }
72
73 #[must_use]
74 pub fn with_case_insensitive(case_insensitive: bool) -> Self {
75 Self {
76 case_insensitive,
77 date_notation: get_date_notation(),
78 behavior_config: None,
79 }
80 }
81
82 #[must_use]
83 pub fn with_case_insensitive_and_date_notation(
84 case_insensitive: bool,
85 _date_notation: String, ) -> Self {
87 Self {
88 case_insensitive,
89 date_notation: get_date_notation(), behavior_config: None,
91 }
92 }
93
94 fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
96 let columns = table.column_names();
97 let mut best_match: Option<(String, usize)> = None;
98
99 for col in columns {
100 let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
101 let max_distance = if name.len() > 10 { 3 } else { 2 };
104 if distance <= max_distance {
105 match &best_match {
106 None => best_match = Some((col, distance)),
107 Some((_, best_dist)) if distance < *best_dist => {
108 best_match = Some((col, distance));
109 }
110 _ => {}
111 }
112 }
113 }
114
115 best_match.map(|(name, _)| name)
116 }
117
118 fn edit_distance(&self, s1: &str, s2: &str) -> usize {
120 let len1 = s1.len();
121 let len2 = s2.len();
122 let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
123
124 for i in 0..=len1 {
125 matrix[i][0] = i;
126 }
127 for j in 0..=len2 {
128 matrix[0][j] = j;
129 }
130
131 for (i, c1) in s1.chars().enumerate() {
132 for (j, c2) in s2.chars().enumerate() {
133 let cost = usize::from(c1 != c2);
134 matrix[i + 1][j + 1] = std::cmp::min(
135 matrix[i][j + 1] + 1, std::cmp::min(
137 matrix[i + 1][j] + 1, matrix[i][j] + cost, ),
140 );
141 }
142 }
143
144 matrix[len1][len2]
145 }
146
147 pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
149 let (view, _plan) = self.execute_with_plan(table, sql)?;
150 Ok(view)
151 }
152
153 pub fn execute_statement(
155 &self,
156 table: Arc<DataTable>,
157 statement: SelectStatement,
158 ) -> Result<DataView> {
159 let mut cte_context = HashMap::new();
161 for cte in &statement.ctes {
162 debug!("QueryEngine: Pre-processing CTE '{}'...", cte.name);
163 let cte_result = match &cte.cte_type {
165 CTEType::Standard(query) => {
166 self.build_view_with_context(table.clone(), query.clone(), &mut cte_context)?
168 }
169 CTEType::Web(web_spec) => {
170 use crate::web::http_fetcher::WebDataFetcher;
172
173 let fetcher = WebDataFetcher::new()?;
174 let data_table = fetcher.fetch(web_spec, &cte.name)?;
175
176 DataView::new(Arc::new(data_table))
178 }
179 };
180 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
182 debug!(
183 "QueryEngine: CTE '{}' pre-processed, stored in context",
184 cte.name
185 );
186 }
187
188 let mut subquery_executor =
190 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
191 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
192
193 self.build_view_with_context(table, processed_statement, &mut cte_context)
195 }
196
197 pub fn execute_statement_with_cte_context(
199 &self,
200 table: Arc<DataTable>,
201 statement: SelectStatement,
202 cte_context: &HashMap<String, Arc<DataView>>,
203 ) -> Result<DataView> {
204 let mut local_context = cte_context.clone();
206
207 for cte in &statement.ctes {
209 debug!("QueryEngine: Processing nested CTE '{}'...", cte.name);
210 let cte_result = match &cte.cte_type {
211 CTEType::Standard(query) => {
212 self.build_view_with_context(table.clone(), query.clone(), &mut local_context)?
213 }
214 CTEType::Web(web_spec) => {
215 use crate::web::http_fetcher::WebDataFetcher;
217
218 let fetcher = WebDataFetcher::new()?;
219 let data_table = fetcher.fetch(web_spec, &cte.name)?;
220
221 DataView::new(Arc::new(data_table))
223 }
224 };
225 local_context.insert(cte.name.clone(), Arc::new(cte_result));
226 }
227
228 let mut subquery_executor =
230 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), local_context.clone());
231 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
232
233 self.build_view_with_context(table, processed_statement, &mut local_context)
235 }
236
237 pub fn execute_with_plan(
239 &self,
240 table: Arc<DataTable>,
241 sql: &str,
242 ) -> Result<(DataView, ExecutionPlan)> {
243 let mut plan_builder = ExecutionPlanBuilder::new();
244 let start_time = Instant::now();
245
246 plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
248 plan_builder.add_detail(format!("Query: {}", sql));
249 let mut parser = Parser::new(sql);
250 let statement = parser
251 .parse()
252 .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
253 plan_builder.add_detail(format!("Parsed successfully"));
254 if let Some(from) = &statement.from_table {
255 plan_builder.add_detail(format!("FROM: {}", from));
256 }
257 if statement.where_clause.is_some() {
258 plan_builder.add_detail("WHERE clause present".to_string());
259 }
260 plan_builder.end_step();
261
262 let mut cte_context = HashMap::new();
264
265 if !statement.ctes.is_empty() {
266 plan_builder.begin_step(
267 StepType::CTE,
268 format!("Process {} CTEs", statement.ctes.len()),
269 );
270
271 for cte in &statement.ctes {
272 let cte_start = Instant::now();
273 plan_builder.begin_step(StepType::CTE, format!("CTE '{}'", cte.name));
274
275 let cte_result = match &cte.cte_type {
276 CTEType::Standard(query) => {
277 if let Some(from) = &query.from_table {
279 plan_builder.add_detail(format!("Source: {}", from));
280 }
281 if query.where_clause.is_some() {
282 plan_builder.add_detail("Has WHERE clause".to_string());
283 }
284 if query.group_by.is_some() {
285 plan_builder.add_detail("Has GROUP BY".to_string());
286 }
287
288 debug!(
289 "QueryEngine: Processing CTE '{}' with existing context: {:?}",
290 cte.name,
291 cte_context.keys().collect::<Vec<_>>()
292 );
293
294 let mut subquery_executor = SubqueryExecutor::with_cte_context(
297 self.clone(),
298 table.clone(),
299 cte_context.clone(),
300 );
301 let processed_query = subquery_executor.execute_subqueries(query)?;
302
303 self.build_view_with_context(
304 table.clone(),
305 processed_query,
306 &mut cte_context,
307 )?
308 }
309 CTEType::Web(web_spec) => {
310 plan_builder.add_detail(format!("URL: {}", web_spec.url));
311 if let Some(format) = &web_spec.format {
312 plan_builder.add_detail(format!("Format: {:?}", format));
313 }
314 if let Some(cache) = web_spec.cache_seconds {
315 plan_builder.add_detail(format!("Cache: {} seconds", cache));
316 }
317
318 use crate::web::http_fetcher::WebDataFetcher;
320
321 let fetcher = WebDataFetcher::new()?;
322 let data_table = fetcher.fetch(web_spec, &cte.name)?;
323
324 DataView::new(Arc::new(data_table))
326 }
327 };
328
329 plan_builder.set_rows_out(cte_result.row_count());
331 plan_builder.add_detail(format!(
332 "Result: {} rows, {} columns",
333 cte_result.row_count(),
334 cte_result.column_count()
335 ));
336 plan_builder.add_detail(format!(
337 "Execution time: {:.3}ms",
338 cte_start.elapsed().as_secs_f64() * 1000.0
339 ));
340
341 debug!(
342 "QueryEngine: Storing CTE '{}' in context with {} rows",
343 cte.name,
344 cte_result.row_count()
345 );
346 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
347 plan_builder.end_step();
348 }
349
350 plan_builder.add_detail(format!(
351 "All {} CTEs cached in context",
352 statement.ctes.len()
353 ));
354 plan_builder.end_step();
355 }
356
357 plan_builder.begin_step(StepType::Subquery, "Process subqueries".to_string());
359 let mut subquery_executor =
360 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
361
362 let has_subqueries = statement.where_clause.as_ref().map_or(false, |w| {
364 format!("{:?}", w).contains("Subquery")
366 });
367
368 if has_subqueries {
369 plan_builder.add_detail("Evaluating subqueries in WHERE clause".to_string());
370 }
371
372 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
373
374 if has_subqueries {
375 plan_builder.add_detail("Subqueries replaced with materialized values".to_string());
376 } else {
377 plan_builder.add_detail("No subqueries to process".to_string());
378 }
379
380 plan_builder.end_step();
381 let result = self.build_view_with_context_and_plan(
382 table,
383 processed_statement,
384 &mut cte_context,
385 &mut plan_builder,
386 )?;
387
388 let total_duration = start_time.elapsed();
389 info!(
390 "Query execution complete: total={:?}, rows={}",
391 total_duration,
392 result.row_count()
393 );
394
395 let plan = plan_builder.build();
396 Ok((result, plan))
397 }
398
399 fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
401 let mut cte_context = HashMap::new();
402 self.build_view_with_context(table, statement, &mut cte_context)
403 }
404
405 fn build_view_with_context(
407 &self,
408 table: Arc<DataTable>,
409 statement: SelectStatement,
410 cte_context: &mut HashMap<String, Arc<DataView>>,
411 ) -> Result<DataView> {
412 let mut dummy_plan = ExecutionPlanBuilder::new();
413 self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
414 }
415
416 fn build_view_with_context_and_plan(
418 &self,
419 table: Arc<DataTable>,
420 statement: SelectStatement,
421 cte_context: &mut HashMap<String, Arc<DataView>>,
422 plan: &mut ExecutionPlanBuilder,
423 ) -> Result<DataView> {
424 for cte in &statement.ctes {
426 if cte_context.contains_key(&cte.name) {
428 debug!(
429 "QueryEngine: CTE '{}' already in context, skipping",
430 cte.name
431 );
432 continue;
433 }
434
435 debug!("QueryEngine: Processing CTE '{}'...", cte.name);
436 debug!(
437 "QueryEngine: Available CTEs for '{}': {:?}",
438 cte.name,
439 cte_context.keys().collect::<Vec<_>>()
440 );
441
442 let cte_result = match &cte.cte_type {
444 CTEType::Standard(query) => {
445 self.build_view_with_context(table.clone(), query.clone(), cte_context)?
446 }
447 CTEType::Web(_web_spec) => {
448 return Err(anyhow!(
450 "Web CTEs should be processed in execute_select method"
451 ));
452 }
453 };
454
455 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
457 debug!(
458 "QueryEngine: CTE '{}' processed, stored in context",
459 cte.name
460 );
461 }
462
463 let source_table = if let Some(ref table_func) = statement.from_function {
465 debug!("QueryEngine: Processing table function...");
467 match table_func {
468 TableFunction::Range { start, end, step } => {
469 let mut evaluator =
471 ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
472
473 let dummy_row = 0;
475
476 let start_val = evaluator.evaluate(start, dummy_row)?;
477 let end_val = evaluator.evaluate(end, dummy_row)?;
478 let step_val = if let Some(step_expr) = step {
479 Some(evaluator.evaluate(step_expr, dummy_row)?)
480 } else {
481 None
482 };
483
484 let start_int = match start_val {
486 DataValue::Integer(i) => i,
487 DataValue::Float(f) => f as i64,
488 _ => return Err(anyhow!("RANGE start must be numeric")),
489 };
490
491 let end_int = match end_val {
492 DataValue::Integer(i) => i,
493 DataValue::Float(f) => f as i64,
494 _ => return Err(anyhow!("RANGE end must be numeric")),
495 };
496
497 let step_int = if let Some(step) = step_val {
498 match step {
499 DataValue::Integer(i) => Some(i),
500 DataValue::Float(f) => Some(f as i64),
501 _ => return Err(anyhow!("RANGE step must be numeric")),
502 }
503 } else {
504 None
505 };
506
507 VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
509 }
510 TableFunction::Split { text, delimiter } => {
511 let mut evaluator =
513 ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
514
515 let dummy_row = 0;
517
518 let text_val = evaluator.evaluate(text, dummy_row)?;
519 let delimiter_val = if let Some(delim_expr) = delimiter {
520 Some(evaluator.evaluate(delim_expr, dummy_row)?)
521 } else {
522 None
523 };
524
525 let text_str = match text_val {
527 DataValue::String(s) => s,
528 DataValue::Null => return Err(anyhow!("SPLIT text cannot be NULL")),
529 _ => text_val.to_string(),
530 };
531
532 let delimiter_str = if let Some(delim) = delimiter_val {
533 match delim {
534 DataValue::String(s) => Some(s),
535 DataValue::Null => None,
536 _ => Some(delim.to_string()),
537 }
538 } else {
539 None
540 };
541
542 VirtualTableGenerator::generate_split(
544 &text_str,
545 delimiter_str.as_deref(),
546 None,
547 )?
548 }
549 TableFunction::Generator { name, args } => {
550 use crate::sql::generators::GeneratorRegistry;
552
553 let registry = GeneratorRegistry::new();
555
556 if let Some(generator) = registry.get(name) {
557 let mut evaluator = ArithmeticEvaluator::with_date_notation(
559 &table,
560 self.date_notation.clone(),
561 );
562 let dummy_row = 0;
563
564 let mut evaluated_args = Vec::new();
565 for arg in args {
566 evaluated_args.push(evaluator.evaluate(arg, dummy_row)?);
567 }
568
569 generator.generate(evaluated_args)?
571 } else {
572 return Err(anyhow!("Unknown generator function: {}", name));
573 }
574 }
575 }
576 } else if let Some(ref subquery) = statement.from_subquery {
577 debug!("QueryEngine: Processing FROM subquery...");
579 let subquery_result =
580 self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
581
582 let materialized = self.materialize_view(subquery_result)?;
585 Arc::new(materialized)
586 } else if let Some(ref table_name) = statement.from_table {
587 if let Some(cte_view) = cte_context.get(table_name) {
589 debug!("QueryEngine: Using CTE '{}' as source table", table_name);
590 let materialized = self.materialize_view((**cte_view).clone())?;
592 Arc::new(materialized)
593 } else {
594 table.clone()
596 }
597 } else {
598 table.clone()
600 };
601
602 let final_table = if !statement.joins.is_empty() {
604 plan.begin_step(
605 StepType::Join,
606 format!("Process {} JOINs", statement.joins.len()),
607 );
608 plan.set_rows_in(source_table.row_count());
609
610 let join_executor = HashJoinExecutor::new(self.case_insensitive);
611 let mut current_table = source_table;
612
613 for (idx, join_clause) in statement.joins.iter().enumerate() {
614 let join_start = Instant::now();
615 plan.begin_step(StepType::Join, format!("JOIN #{}", idx + 1));
616 plan.add_detail(format!("Type: {:?}", join_clause.join_type));
617 plan.add_detail(format!("Left table: {} rows", current_table.row_count()));
618 plan.add_detail(format!(
619 "Executing {:?} JOIN on {}",
620 join_clause.join_type, join_clause.condition.left_column
621 ));
622
623 let right_table = match &join_clause.table {
625 TableSource::Table(name) => {
626 if let Some(cte_view) = cte_context.get(name) {
628 let materialized = self.materialize_view((**cte_view).clone())?;
629 Arc::new(materialized)
630 } else {
631 return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
634 }
635 }
636 TableSource::DerivedTable { query, alias: _ } => {
637 let subquery_result = self.build_view_with_context(
639 table.clone(),
640 *query.clone(),
641 cte_context,
642 )?;
643 let materialized = self.materialize_view(subquery_result)?;
644 Arc::new(materialized)
645 }
646 };
647
648 let joined = join_executor.execute_join(
650 current_table.clone(),
651 join_clause,
652 right_table.clone(),
653 )?;
654
655 plan.add_detail(format!("Right table: {} rows", right_table.row_count()));
656 plan.set_rows_out(joined.row_count());
657 plan.add_detail(format!("Result: {} rows", joined.row_count()));
658 plan.add_detail(format!(
659 "Join time: {:.3}ms",
660 join_start.elapsed().as_secs_f64() * 1000.0
661 ));
662 plan.end_step();
663
664 current_table = Arc::new(joined);
665 }
666
667 plan.set_rows_out(current_table.row_count());
668 plan.add_detail(format!(
669 "Final result after all joins: {} rows",
670 current_table.row_count()
671 ));
672 plan.end_step();
673 current_table
674 } else {
675 source_table
676 };
677
678 self.build_view_internal_with_plan(final_table, statement, plan)
680 }
681
682 fn materialize_view(&self, view: DataView) -> Result<DataTable> {
684 let source = view.source();
685 let mut result_table = DataTable::new("derived");
686
687 let visible_cols = view.visible_column_indices().to_vec();
689
690 for col_idx in &visible_cols {
692 let col = &source.columns[*col_idx];
693 let new_col = DataColumn {
694 name: col.name.clone(),
695 data_type: col.data_type.clone(),
696 nullable: col.nullable,
697 unique_values: col.unique_values,
698 null_count: col.null_count,
699 metadata: col.metadata.clone(),
700 };
701 result_table.add_column(new_col);
702 }
703
704 for row_idx in view.visible_row_indices() {
706 let source_row = &source.rows[*row_idx];
707 let mut new_row = DataRow { values: Vec::new() };
708
709 for col_idx in &visible_cols {
710 new_row.values.push(source_row.values[*col_idx].clone());
711 }
712
713 result_table.add_row(new_row);
714 }
715
716 Ok(result_table)
717 }
718
719 fn build_view_internal(
720 &self,
721 table: Arc<DataTable>,
722 statement: SelectStatement,
723 ) -> Result<DataView> {
724 let mut dummy_plan = ExecutionPlanBuilder::new();
725 self.build_view_internal_with_plan(table, statement, &mut dummy_plan)
726 }
727
728 fn build_view_internal_with_plan(
729 &self,
730 table: Arc<DataTable>,
731 statement: SelectStatement,
732 plan: &mut ExecutionPlanBuilder,
733 ) -> Result<DataView> {
734 debug!(
735 "QueryEngine::build_view - select_items: {:?}",
736 statement.select_items
737 );
738 debug!(
739 "QueryEngine::build_view - where_clause: {:?}",
740 statement.where_clause
741 );
742
743 let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
745
746 if let Some(where_clause) = &statement.where_clause {
748 let total_rows = table.row_count();
749 debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
750 debug!("QueryEngine: WHERE clause = {:?}", where_clause);
751
752 plan.begin_step(StepType::Filter, "WHERE clause filtering".to_string());
753 plan.set_rows_in(total_rows);
754 plan.add_detail(format!("Input: {} rows", total_rows));
755
756 for condition in &where_clause.conditions {
758 plan.add_detail(format!("Condition: {:?}", condition.expr));
759 }
760
761 let filter_start = Instant::now();
762 let mut eval_context = EvaluationContext::new(self.case_insensitive);
764
765 let mut filtered_rows = Vec::new();
767 for row_idx in visible_rows {
768 if row_idx < 3 {
770 debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
771 }
772 let mut evaluator =
773 RecursiveWhereEvaluator::with_context(&table, &mut eval_context);
774 match evaluator.evaluate(where_clause, row_idx) {
775 Ok(result) => {
776 if row_idx < 3 {
777 debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
778 }
779 if result {
780 filtered_rows.push(row_idx);
781 }
782 }
783 Err(e) => {
784 if row_idx < 3 {
785 debug!(
786 "QueryEngine: WHERE evaluation error for row {}: {}",
787 row_idx, e
788 );
789 }
790 return Err(e);
792 }
793 }
794 }
795
796 let (compilations, cache_hits) = eval_context.get_stats();
798 if compilations > 0 || cache_hits > 0 {
799 debug!(
800 "LIKE pattern cache: {} compilations, {} cache hits",
801 compilations, cache_hits
802 );
803 }
804 visible_rows = filtered_rows;
805 let filter_duration = filter_start.elapsed();
806 info!(
807 "WHERE clause filtering: {} rows -> {} rows in {:?}",
808 total_rows,
809 visible_rows.len(),
810 filter_duration
811 );
812
813 plan.set_rows_out(visible_rows.len());
814 plan.add_detail(format!("Output: {} rows", visible_rows.len()));
815 plan.add_detail(format!(
816 "Filter time: {:.3}ms",
817 filter_duration.as_secs_f64() * 1000.0
818 ));
819 plan.end_step();
820 }
821
822 let mut view = DataView::new(table.clone());
824 view = view.with_rows(visible_rows);
825
826 if let Some(group_by_exprs) = &statement.group_by {
828 if !group_by_exprs.is_empty() {
829 debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_exprs);
830
831 plan.begin_step(
832 StepType::GroupBy,
833 format!("GROUP BY {} expressions", group_by_exprs.len()),
834 );
835 plan.set_rows_in(view.row_count());
836 plan.add_detail(format!("Input: {} rows", view.row_count()));
837 for expr in group_by_exprs {
838 plan.add_detail(format!("Group by: {:?}", expr));
839 }
840
841 let group_start = Instant::now();
842 view = self.apply_group_by(
843 view,
844 group_by_exprs,
845 &statement.select_items,
846 statement.having.as_ref(),
847 )?;
848
849 plan.set_rows_out(view.row_count());
850 plan.add_detail(format!("Output: {} groups", view.row_count()));
851 plan.add_detail(format!(
852 "Group time: {:.3}ms",
853 group_start.elapsed().as_secs_f64() * 1000.0
854 ));
855 plan.end_step();
856 }
857 } else {
858 if !statement.select_items.is_empty() {
860 let has_non_star_items = statement
862 .select_items
863 .iter()
864 .any(|item| !matches!(item, SelectItem::Star));
865
866 if has_non_star_items || statement.select_items.len() > 1 {
870 view = self.apply_select_items(view, &statement.select_items)?;
871 }
872 } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
874 let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
876 view = view.with_columns(column_indices);
877 }
878 }
879
880 if statement.distinct {
882 plan.begin_step(StepType::Distinct, "Remove duplicate rows".to_string());
883 plan.set_rows_in(view.row_count());
884 plan.add_detail(format!("Input: {} rows", view.row_count()));
885
886 let distinct_start = Instant::now();
887 view = self.apply_distinct(view)?;
888
889 plan.set_rows_out(view.row_count());
890 plan.add_detail(format!("Output: {} unique rows", view.row_count()));
891 plan.add_detail(format!(
892 "Distinct time: {:.3}ms",
893 distinct_start.elapsed().as_secs_f64() * 1000.0
894 ));
895 plan.end_step();
896 }
897
898 if let Some(order_by_columns) = &statement.order_by {
900 if !order_by_columns.is_empty() {
901 plan.begin_step(
902 StepType::Sort,
903 format!("ORDER BY {} columns", order_by_columns.len()),
904 );
905 plan.set_rows_in(view.row_count());
906 for col in order_by_columns {
907 plan.add_detail(format!("{} {:?}", col.column, col.direction));
908 }
909
910 let sort_start = Instant::now();
911 view = self.apply_multi_order_by(view, order_by_columns)?;
912
913 plan.add_detail(format!(
914 "Sort time: {:.3}ms",
915 sort_start.elapsed().as_secs_f64() * 1000.0
916 ));
917 plan.end_step();
918 }
919 }
920
921 if let Some(limit) = statement.limit {
923 let offset = statement.offset.unwrap_or(0);
924 plan.begin_step(StepType::Limit, format!("LIMIT {}", limit));
925 plan.set_rows_in(view.row_count());
926 if offset > 0 {
927 plan.add_detail(format!("OFFSET: {}", offset));
928 }
929 view = view.with_limit(limit, offset);
930 plan.set_rows_out(view.row_count());
931 plan.add_detail(format!("Output: {} rows", view.row_count()));
932 plan.end_step();
933 }
934
935 Ok(view)
936 }
937
938 fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
940 let mut indices = Vec::new();
941 let table_columns = table.column_names();
942
943 for col_name in columns {
944 let index = table_columns
945 .iter()
946 .position(|c| c.eq_ignore_ascii_case(col_name))
947 .ok_or_else(|| {
948 let suggestion = self.find_similar_column(table, col_name);
949 match suggestion {
950 Some(similar) => anyhow::anyhow!(
951 "Column '{}' not found. Did you mean '{}'?",
952 col_name,
953 similar
954 ),
955 None => anyhow::anyhow!("Column '{}' not found", col_name),
956 }
957 })?;
958 indices.push(index);
959 }
960
961 Ok(indices)
962 }
963
964 fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
966 debug!(
967 "QueryEngine::apply_select_items - items: {:?}",
968 select_items
969 );
970 debug!(
971 "QueryEngine::apply_select_items - input view has {} rows",
972 view.row_count()
973 );
974
975 let has_aggregates = select_items.iter().any(|item| match item {
979 SelectItem::Expression { expr, .. } => contains_aggregate(expr),
980 SelectItem::Column(_) => false,
981 SelectItem::Star => false,
982 });
983
984 let all_aggregate_compatible = select_items.iter().all(|item| match item {
985 SelectItem::Expression { expr, .. } => is_aggregate_compatible(expr),
986 SelectItem::Column(_) => false, SelectItem::Star => false, });
989
990 if has_aggregates && all_aggregate_compatible && view.row_count() > 0 {
991 debug!("QueryEngine::apply_select_items - detected aggregate query with constants");
994 return self.apply_aggregate_select(view, select_items);
995 }
996
997 let has_computed_expressions = select_items
999 .iter()
1000 .any(|item| matches!(item, SelectItem::Expression { .. }));
1001
1002 debug!(
1003 "QueryEngine::apply_select_items - has_computed_expressions: {}",
1004 has_computed_expressions
1005 );
1006
1007 if !has_computed_expressions {
1008 let column_indices = self.resolve_select_columns(view.source(), select_items)?;
1010 return Ok(view.with_columns(column_indices));
1011 }
1012
1013 let source_table = view.source();
1018 let visible_rows = view.visible_row_indices();
1019
1020 let mut computed_table = DataTable::new("query_result");
1023
1024 let mut expanded_items = Vec::new();
1026 for item in select_items {
1027 match item {
1028 SelectItem::Star => {
1029 for col_name in source_table.column_names() {
1031 expanded_items.push(SelectItem::Column(col_name));
1032 }
1033 }
1034 _ => expanded_items.push(item.clone()),
1035 }
1036 }
1037
1038 let mut column_name_counts: std::collections::HashMap<String, usize> =
1040 std::collections::HashMap::new();
1041
1042 for item in &expanded_items {
1043 let base_name = match item {
1044 SelectItem::Column(name) => name.clone(),
1045 SelectItem::Expression { alias, .. } => alias.clone(),
1046 SelectItem::Star => unreachable!("Star should have been expanded"),
1047 };
1048
1049 let count = column_name_counts.entry(base_name.clone()).or_insert(0);
1051 let column_name = if *count == 0 {
1052 base_name.clone()
1054 } else {
1055 format!("{base_name}_{count}")
1057 };
1058 *count += 1;
1059
1060 computed_table.add_column(DataColumn::new(&column_name));
1061 }
1062
1063 let mut evaluator =
1065 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
1066
1067 for &row_idx in visible_rows {
1068 let mut row_values = Vec::new();
1069
1070 for item in &expanded_items {
1071 let value = match item {
1072 SelectItem::Column(col_name) => {
1073 let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
1075 let suggestion = self.find_similar_column(source_table, col_name);
1076 match suggestion {
1077 Some(similar) => anyhow::anyhow!(
1078 "Column '{}' not found. Did you mean '{}'?",
1079 col_name,
1080 similar
1081 ),
1082 None => anyhow::anyhow!("Column '{}' not found", col_name),
1083 }
1084 })?;
1085 let row = source_table
1086 .get_row(row_idx)
1087 .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
1088 row.get(col_idx)
1089 .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
1090 .clone()
1091 }
1092 SelectItem::Expression { expr, .. } => {
1093 evaluator.evaluate(expr, row_idx)?
1095 }
1096 SelectItem::Star => unreachable!("Star should have been expanded"),
1097 };
1098 row_values.push(value);
1099 }
1100
1101 computed_table
1102 .add_row(DataRow::new(row_values))
1103 .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
1104 }
1105
1106 Ok(DataView::new(Arc::new(computed_table)))
1109 }
1110
1111 fn apply_aggregate_select(
1113 &self,
1114 view: DataView,
1115 select_items: &[SelectItem],
1116 ) -> Result<DataView> {
1117 debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
1118
1119 let source_table = view.source();
1120 let mut result_table = DataTable::new("aggregate_result");
1121
1122 for item in select_items {
1124 let column_name = match item {
1125 SelectItem::Expression { alias, .. } => alias.clone(),
1126 _ => unreachable!("Should only have expressions in aggregate-only query"),
1127 };
1128 result_table.add_column(DataColumn::new(&column_name));
1129 }
1130
1131 let visible_rows = view.visible_row_indices().to_vec();
1133 let mut evaluator =
1134 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
1135 .with_visible_rows(visible_rows);
1136
1137 let mut row_values = Vec::new();
1139 for item in select_items {
1140 match item {
1141 SelectItem::Expression { expr, .. } => {
1142 let value = evaluator.evaluate(expr, 0)?;
1145 row_values.push(value);
1146 }
1147 _ => unreachable!("Should only have expressions in aggregate-only query"),
1148 }
1149 }
1150
1151 result_table
1153 .add_row(DataRow::new(row_values))
1154 .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
1155
1156 Ok(DataView::new(Arc::new(result_table)))
1157 }
1158
1159 fn resolve_select_columns(
1161 &self,
1162 table: &DataTable,
1163 select_items: &[SelectItem],
1164 ) -> Result<Vec<usize>> {
1165 let mut indices = Vec::new();
1166 let table_columns = table.column_names();
1167
1168 for item in select_items {
1169 match item {
1170 SelectItem::Column(col_name) => {
1171 let index = table_columns
1172 .iter()
1173 .position(|c| c.eq_ignore_ascii_case(col_name))
1174 .ok_or_else(|| {
1175 let suggestion = self.find_similar_column(table, col_name);
1176 match suggestion {
1177 Some(similar) => anyhow::anyhow!(
1178 "Column '{}' not found. Did you mean '{}'?",
1179 col_name,
1180 similar
1181 ),
1182 None => anyhow::anyhow!("Column '{}' not found", col_name),
1183 }
1184 })?;
1185 indices.push(index);
1186 }
1187 SelectItem::Star => {
1188 for i in 0..table_columns.len() {
1190 indices.push(i);
1191 }
1192 }
1193 SelectItem::Expression { .. } => {
1194 return Err(anyhow::anyhow!(
1195 "Computed expressions require new table creation"
1196 ));
1197 }
1198 }
1199 }
1200
1201 Ok(indices)
1202 }
1203
1204 fn apply_distinct(&self, view: DataView) -> Result<DataView> {
1206 use std::collections::HashSet;
1207
1208 let source = view.source();
1209 let visible_cols = view.visible_column_indices();
1210 let visible_rows = view.visible_row_indices();
1211
1212 let mut seen_rows = HashSet::new();
1214 let mut unique_row_indices = Vec::new();
1215
1216 for &row_idx in visible_rows {
1217 let mut row_key = Vec::new();
1219 for &col_idx in visible_cols {
1220 let value = source
1221 .get_value(row_idx, col_idx)
1222 .ok_or_else(|| anyhow!("Invalid cell reference"))?;
1223 row_key.push(format!("{:?}", value));
1225 }
1226
1227 if seen_rows.insert(row_key) {
1229 unique_row_indices.push(row_idx);
1231 }
1232 }
1233
1234 Ok(view.with_rows(unique_row_indices))
1236 }
1237
1238 fn apply_multi_order_by(
1240 &self,
1241 mut view: DataView,
1242 order_by_columns: &[OrderByColumn],
1243 ) -> Result<DataView> {
1244 let mut sort_columns = Vec::new();
1246
1247 for order_col in order_by_columns {
1248 let col_index = view
1252 .source()
1253 .get_column_index(&order_col.column)
1254 .ok_or_else(|| {
1255 let suggestion = self.find_similar_column(view.source(), &order_col.column);
1257 match suggestion {
1258 Some(similar) => anyhow::anyhow!(
1259 "Column '{}' not found. Did you mean '{}'?",
1260 order_col.column,
1261 similar
1262 ),
1263 None => {
1264 let available_cols = view.source().column_names().join(", ");
1266 anyhow::anyhow!(
1267 "Column '{}' not found. Available columns: {}",
1268 order_col.column,
1269 available_cols
1270 )
1271 }
1272 }
1273 })?;
1274
1275 let ascending = matches!(order_col.direction, SortDirection::Asc);
1276 sort_columns.push((col_index, ascending));
1277 }
1278
1279 view.apply_multi_sort(&sort_columns)?;
1281 Ok(view)
1282 }
1283
1284 fn apply_group_by(
1286 &self,
1287 view: DataView,
1288 group_by_exprs: &[SqlExpression],
1289 select_items: &[SelectItem],
1290 having: Option<&SqlExpression>,
1291 ) -> Result<DataView> {
1292 self.apply_group_by_expressions(
1294 view,
1295 group_by_exprs,
1296 select_items,
1297 having,
1298 self.case_insensitive,
1299 self.date_notation.clone(),
1300 )
1301 }
1302
1303 pub fn estimate_group_cardinality(
1306 &self,
1307 view: &DataView,
1308 group_by_exprs: &[SqlExpression],
1309 ) -> usize {
1310 let row_count = view.get_visible_rows().len();
1312 if row_count <= 100 {
1313 return row_count;
1314 }
1315
1316 let sample_size = min(1000, row_count / 10).max(100);
1318 let mut seen = FxHashSet::default();
1319
1320 let visible_rows = view.get_visible_rows();
1321 for (i, &row_idx) in visible_rows.iter().enumerate() {
1322 if i >= sample_size {
1323 break;
1324 }
1325
1326 let mut key_values = Vec::new();
1328 for expr in group_by_exprs {
1329 let mut evaluator = ArithmeticEvaluator::new(view.source());
1330 let value = evaluator.evaluate(expr, row_idx).unwrap_or(DataValue::Null);
1331 key_values.push(value);
1332 }
1333
1334 seen.insert(key_values);
1335 }
1336
1337 let sample_cardinality = seen.len();
1339 let estimated = (sample_cardinality * row_count) / sample_size;
1340
1341 estimated.min(row_count).max(sample_cardinality)
1343 }
1344}
1345
1346#[cfg(test)]
1347mod tests {
1348 use super::*;
1349 use crate::data::datatable::{DataColumn, DataRow, DataValue};
1350
1351 fn create_test_table() -> Arc<DataTable> {
1352 let mut table = DataTable::new("test");
1353
1354 table.add_column(DataColumn::new("id"));
1356 table.add_column(DataColumn::new("name"));
1357 table.add_column(DataColumn::new("age"));
1358
1359 table
1361 .add_row(DataRow::new(vec![
1362 DataValue::Integer(1),
1363 DataValue::String("Alice".to_string()),
1364 DataValue::Integer(30),
1365 ]))
1366 .unwrap();
1367
1368 table
1369 .add_row(DataRow::new(vec![
1370 DataValue::Integer(2),
1371 DataValue::String("Bob".to_string()),
1372 DataValue::Integer(25),
1373 ]))
1374 .unwrap();
1375
1376 table
1377 .add_row(DataRow::new(vec![
1378 DataValue::Integer(3),
1379 DataValue::String("Charlie".to_string()),
1380 DataValue::Integer(35),
1381 ]))
1382 .unwrap();
1383
1384 Arc::new(table)
1385 }
1386
1387 #[test]
1388 fn test_select_all() {
1389 let table = create_test_table();
1390 let engine = QueryEngine::new();
1391
1392 let view = engine
1393 .execute(table.clone(), "SELECT * FROM users")
1394 .unwrap();
1395 assert_eq!(view.row_count(), 3);
1396 assert_eq!(view.column_count(), 3);
1397 }
1398
1399 #[test]
1400 fn test_select_columns() {
1401 let table = create_test_table();
1402 let engine = QueryEngine::new();
1403
1404 let view = engine
1405 .execute(table.clone(), "SELECT name, age FROM users")
1406 .unwrap();
1407 assert_eq!(view.row_count(), 3);
1408 assert_eq!(view.column_count(), 2);
1409 }
1410
1411 #[test]
1412 fn test_select_with_limit() {
1413 let table = create_test_table();
1414 let engine = QueryEngine::new();
1415
1416 let view = engine
1417 .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1418 .unwrap();
1419 assert_eq!(view.row_count(), 2);
1420 }
1421
1422 #[test]
1423 fn test_type_coercion_contains() {
1424 let _ = tracing_subscriber::fmt()
1426 .with_max_level(tracing::Level::DEBUG)
1427 .try_init();
1428
1429 let mut table = DataTable::new("test");
1430 table.add_column(DataColumn::new("id"));
1431 table.add_column(DataColumn::new("status"));
1432 table.add_column(DataColumn::new("price"));
1433
1434 table
1436 .add_row(DataRow::new(vec![
1437 DataValue::Integer(1),
1438 DataValue::String("Pending".to_string()),
1439 DataValue::Float(99.99),
1440 ]))
1441 .unwrap();
1442
1443 table
1444 .add_row(DataRow::new(vec![
1445 DataValue::Integer(2),
1446 DataValue::String("Confirmed".to_string()),
1447 DataValue::Float(150.50),
1448 ]))
1449 .unwrap();
1450
1451 table
1452 .add_row(DataRow::new(vec![
1453 DataValue::Integer(3),
1454 DataValue::String("Pending".to_string()),
1455 DataValue::Float(75.00),
1456 ]))
1457 .unwrap();
1458
1459 let table = Arc::new(table);
1460 let engine = QueryEngine::new();
1461
1462 println!("\n=== Testing WHERE clause with Contains ===");
1463 println!("Table has {} rows", table.row_count());
1464 for i in 0..table.row_count() {
1465 let status = table.get_value(i, 1);
1466 println!("Row {i}: status = {status:?}");
1467 }
1468
1469 println!("\n--- Test 1: status.Contains('pend') ---");
1471 let result = engine.execute(
1472 table.clone(),
1473 "SELECT * FROM test WHERE status.Contains('pend')",
1474 );
1475 match result {
1476 Ok(view) => {
1477 println!("SUCCESS: Found {} matching rows", view.row_count());
1478 assert_eq!(view.row_count(), 2); }
1480 Err(e) => {
1481 panic!("Query failed: {e}");
1482 }
1483 }
1484
1485 println!("\n--- Test 2: price.Contains('9') ---");
1487 let result = engine.execute(
1488 table.clone(),
1489 "SELECT * FROM test WHERE price.Contains('9')",
1490 );
1491 match result {
1492 Ok(view) => {
1493 println!(
1494 "SUCCESS: Found {} matching rows with price containing '9'",
1495 view.row_count()
1496 );
1497 assert!(view.row_count() >= 1);
1499 }
1500 Err(e) => {
1501 panic!("Numeric coercion query failed: {e}");
1502 }
1503 }
1504
1505 println!("\n=== All tests passed! ===");
1506 }
1507
1508 #[test]
1509 fn test_not_in_clause() {
1510 let _ = tracing_subscriber::fmt()
1512 .with_max_level(tracing::Level::DEBUG)
1513 .try_init();
1514
1515 let mut table = DataTable::new("test");
1516 table.add_column(DataColumn::new("id"));
1517 table.add_column(DataColumn::new("country"));
1518
1519 table
1521 .add_row(DataRow::new(vec![
1522 DataValue::Integer(1),
1523 DataValue::String("CA".to_string()),
1524 ]))
1525 .unwrap();
1526
1527 table
1528 .add_row(DataRow::new(vec![
1529 DataValue::Integer(2),
1530 DataValue::String("US".to_string()),
1531 ]))
1532 .unwrap();
1533
1534 table
1535 .add_row(DataRow::new(vec![
1536 DataValue::Integer(3),
1537 DataValue::String("UK".to_string()),
1538 ]))
1539 .unwrap();
1540
1541 let table = Arc::new(table);
1542 let engine = QueryEngine::new();
1543
1544 println!("\n=== Testing NOT IN clause ===");
1545 println!("Table has {} rows", table.row_count());
1546 for i in 0..table.row_count() {
1547 let country = table.get_value(i, 1);
1548 println!("Row {i}: country = {country:?}");
1549 }
1550
1551 println!("\n--- Test: country NOT IN ('CA') ---");
1553 let result = engine.execute(
1554 table.clone(),
1555 "SELECT * FROM test WHERE country NOT IN ('CA')",
1556 );
1557 match result {
1558 Ok(view) => {
1559 println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1560 assert_eq!(view.row_count(), 2); }
1562 Err(e) => {
1563 panic!("NOT IN query failed: {e}");
1564 }
1565 }
1566
1567 println!("\n=== NOT IN test complete! ===");
1568 }
1569
1570 #[test]
1571 fn test_case_insensitive_in_and_not_in() {
1572 let _ = tracing_subscriber::fmt()
1574 .with_max_level(tracing::Level::DEBUG)
1575 .try_init();
1576
1577 let mut table = DataTable::new("test");
1578 table.add_column(DataColumn::new("id"));
1579 table.add_column(DataColumn::new("country"));
1580
1581 table
1583 .add_row(DataRow::new(vec![
1584 DataValue::Integer(1),
1585 DataValue::String("CA".to_string()), ]))
1587 .unwrap();
1588
1589 table
1590 .add_row(DataRow::new(vec![
1591 DataValue::Integer(2),
1592 DataValue::String("us".to_string()), ]))
1594 .unwrap();
1595
1596 table
1597 .add_row(DataRow::new(vec![
1598 DataValue::Integer(3),
1599 DataValue::String("UK".to_string()), ]))
1601 .unwrap();
1602
1603 let table = Arc::new(table);
1604
1605 println!("\n=== Testing Case-Insensitive IN clause ===");
1606 println!("Table has {} rows", table.row_count());
1607 for i in 0..table.row_count() {
1608 let country = table.get_value(i, 1);
1609 println!("Row {i}: country = {country:?}");
1610 }
1611
1612 println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1614 let engine = QueryEngine::with_case_insensitive(true);
1615 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1616 match result {
1617 Ok(view) => {
1618 println!(
1619 "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1620 view.row_count()
1621 );
1622 assert_eq!(view.row_count(), 1); }
1624 Err(e) => {
1625 panic!("Case-insensitive IN query failed: {e}");
1626 }
1627 }
1628
1629 println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1631 let result = engine.execute(
1632 table.clone(),
1633 "SELECT * FROM test WHERE country NOT IN ('ca')",
1634 );
1635 match result {
1636 Ok(view) => {
1637 println!(
1638 "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1639 view.row_count()
1640 );
1641 assert_eq!(view.row_count(), 2); }
1643 Err(e) => {
1644 panic!("Case-insensitive NOT IN query failed: {e}");
1645 }
1646 }
1647
1648 println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1650 let engine_case_sensitive = QueryEngine::new(); let result = engine_case_sensitive
1652 .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1653 match result {
1654 Ok(view) => {
1655 println!(
1656 "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1657 view.row_count()
1658 );
1659 assert_eq!(view.row_count(), 0); }
1661 Err(e) => {
1662 panic!("Case-sensitive IN query failed: {e}");
1663 }
1664 }
1665
1666 println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1667 }
1668
1669 #[test]
1670 #[ignore = "Parentheses in WHERE clause not yet implemented"]
1671 fn test_parentheses_in_where_clause() {
1672 let _ = tracing_subscriber::fmt()
1674 .with_max_level(tracing::Level::DEBUG)
1675 .try_init();
1676
1677 let mut table = DataTable::new("test");
1678 table.add_column(DataColumn::new("id"));
1679 table.add_column(DataColumn::new("status"));
1680 table.add_column(DataColumn::new("priority"));
1681
1682 table
1684 .add_row(DataRow::new(vec![
1685 DataValue::Integer(1),
1686 DataValue::String("Pending".to_string()),
1687 DataValue::String("High".to_string()),
1688 ]))
1689 .unwrap();
1690
1691 table
1692 .add_row(DataRow::new(vec![
1693 DataValue::Integer(2),
1694 DataValue::String("Complete".to_string()),
1695 DataValue::String("High".to_string()),
1696 ]))
1697 .unwrap();
1698
1699 table
1700 .add_row(DataRow::new(vec![
1701 DataValue::Integer(3),
1702 DataValue::String("Pending".to_string()),
1703 DataValue::String("Low".to_string()),
1704 ]))
1705 .unwrap();
1706
1707 table
1708 .add_row(DataRow::new(vec![
1709 DataValue::Integer(4),
1710 DataValue::String("Complete".to_string()),
1711 DataValue::String("Low".to_string()),
1712 ]))
1713 .unwrap();
1714
1715 let table = Arc::new(table);
1716 let engine = QueryEngine::new();
1717
1718 println!("\n=== Testing Parentheses in WHERE clause ===");
1719 println!("Table has {} rows", table.row_count());
1720 for i in 0..table.row_count() {
1721 let status = table.get_value(i, 1);
1722 let priority = table.get_value(i, 2);
1723 println!("Row {i}: status = {status:?}, priority = {priority:?}");
1724 }
1725
1726 println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1728 let result = engine.execute(
1729 table.clone(),
1730 "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1731 );
1732 match result {
1733 Ok(view) => {
1734 println!(
1735 "SUCCESS: Found {} rows with parenthetical logic",
1736 view.row_count()
1737 );
1738 assert_eq!(view.row_count(), 2); }
1740 Err(e) => {
1741 panic!("Parentheses query failed: {e}");
1742 }
1743 }
1744
1745 println!("\n=== Parentheses test complete! ===");
1746 }
1747
1748 #[test]
1749 #[ignore = "Numeric type coercion needs fixing"]
1750 fn test_numeric_type_coercion() {
1751 let _ = tracing_subscriber::fmt()
1753 .with_max_level(tracing::Level::DEBUG)
1754 .try_init();
1755
1756 let mut table = DataTable::new("test");
1757 table.add_column(DataColumn::new("id"));
1758 table.add_column(DataColumn::new("price"));
1759 table.add_column(DataColumn::new("quantity"));
1760
1761 table
1763 .add_row(DataRow::new(vec![
1764 DataValue::Integer(1),
1765 DataValue::Float(99.50), DataValue::Integer(100),
1767 ]))
1768 .unwrap();
1769
1770 table
1771 .add_row(DataRow::new(vec![
1772 DataValue::Integer(2),
1773 DataValue::Float(150.0), DataValue::Integer(200),
1775 ]))
1776 .unwrap();
1777
1778 table
1779 .add_row(DataRow::new(vec![
1780 DataValue::Integer(3),
1781 DataValue::Integer(75), DataValue::Integer(50),
1783 ]))
1784 .unwrap();
1785
1786 let table = Arc::new(table);
1787 let engine = QueryEngine::new();
1788
1789 println!("\n=== Testing Numeric Type Coercion ===");
1790 println!("Table has {} rows", table.row_count());
1791 for i in 0..table.row_count() {
1792 let price = table.get_value(i, 1);
1793 let quantity = table.get_value(i, 2);
1794 println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1795 }
1796
1797 println!("\n--- Test: price.Contains('.') ---");
1799 let result = engine.execute(
1800 table.clone(),
1801 "SELECT * FROM test WHERE price.Contains('.')",
1802 );
1803 match result {
1804 Ok(view) => {
1805 println!(
1806 "SUCCESS: Found {} rows with decimal points in price",
1807 view.row_count()
1808 );
1809 assert_eq!(view.row_count(), 2); }
1811 Err(e) => {
1812 panic!("Numeric Contains query failed: {e}");
1813 }
1814 }
1815
1816 println!("\n--- Test: quantity.Contains('0') ---");
1818 let result = engine.execute(
1819 table.clone(),
1820 "SELECT * FROM test WHERE quantity.Contains('0')",
1821 );
1822 match result {
1823 Ok(view) => {
1824 println!(
1825 "SUCCESS: Found {} rows with '0' in quantity",
1826 view.row_count()
1827 );
1828 assert_eq!(view.row_count(), 2); }
1830 Err(e) => {
1831 panic!("Integer Contains query failed: {e}");
1832 }
1833 }
1834
1835 println!("\n=== Numeric type coercion test complete! ===");
1836 }
1837
1838 #[test]
1839 fn test_datetime_comparisons() {
1840 let _ = tracing_subscriber::fmt()
1842 .with_max_level(tracing::Level::DEBUG)
1843 .try_init();
1844
1845 let mut table = DataTable::new("test");
1846 table.add_column(DataColumn::new("id"));
1847 table.add_column(DataColumn::new("created_date"));
1848
1849 table
1851 .add_row(DataRow::new(vec![
1852 DataValue::Integer(1),
1853 DataValue::String("2024-12-15".to_string()),
1854 ]))
1855 .unwrap();
1856
1857 table
1858 .add_row(DataRow::new(vec![
1859 DataValue::Integer(2),
1860 DataValue::String("2025-01-15".to_string()),
1861 ]))
1862 .unwrap();
1863
1864 table
1865 .add_row(DataRow::new(vec![
1866 DataValue::Integer(3),
1867 DataValue::String("2025-02-15".to_string()),
1868 ]))
1869 .unwrap();
1870
1871 let table = Arc::new(table);
1872 let engine = QueryEngine::new();
1873
1874 println!("\n=== Testing DateTime Comparisons ===");
1875 println!("Table has {} rows", table.row_count());
1876 for i in 0..table.row_count() {
1877 let date = table.get_value(i, 1);
1878 println!("Row {i}: created_date = {date:?}");
1879 }
1880
1881 println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1883 let result = engine.execute(
1884 table.clone(),
1885 "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1886 );
1887 match result {
1888 Ok(view) => {
1889 println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1890 assert_eq!(view.row_count(), 2); }
1892 Err(e) => {
1893 panic!("DateTime comparison query failed: {e}");
1894 }
1895 }
1896
1897 println!("\n=== DateTime comparison test complete! ===");
1898 }
1899
1900 #[test]
1901 fn test_not_with_method_calls() {
1902 let _ = tracing_subscriber::fmt()
1904 .with_max_level(tracing::Level::DEBUG)
1905 .try_init();
1906
1907 let mut table = DataTable::new("test");
1908 table.add_column(DataColumn::new("id"));
1909 table.add_column(DataColumn::new("status"));
1910
1911 table
1913 .add_row(DataRow::new(vec![
1914 DataValue::Integer(1),
1915 DataValue::String("Pending Review".to_string()),
1916 ]))
1917 .unwrap();
1918
1919 table
1920 .add_row(DataRow::new(vec![
1921 DataValue::Integer(2),
1922 DataValue::String("Complete".to_string()),
1923 ]))
1924 .unwrap();
1925
1926 table
1927 .add_row(DataRow::new(vec![
1928 DataValue::Integer(3),
1929 DataValue::String("Pending Approval".to_string()),
1930 ]))
1931 .unwrap();
1932
1933 let table = Arc::new(table);
1934 let engine = QueryEngine::with_case_insensitive(true);
1935
1936 println!("\n=== Testing NOT with Method Calls ===");
1937 println!("Table has {} rows", table.row_count());
1938 for i in 0..table.row_count() {
1939 let status = table.get_value(i, 1);
1940 println!("Row {i}: status = {status:?}");
1941 }
1942
1943 println!("\n--- Test: NOT status.Contains('pend') ---");
1945 let result = engine.execute(
1946 table.clone(),
1947 "SELECT * FROM test WHERE NOT status.Contains('pend')",
1948 );
1949 match result {
1950 Ok(view) => {
1951 println!(
1952 "SUCCESS: Found {} rows NOT containing 'pend'",
1953 view.row_count()
1954 );
1955 assert_eq!(view.row_count(), 1); }
1957 Err(e) => {
1958 panic!("NOT Contains query failed: {e}");
1959 }
1960 }
1961
1962 println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1964 let result = engine.execute(
1965 table.clone(),
1966 "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1967 );
1968 match result {
1969 Ok(view) => {
1970 println!(
1971 "SUCCESS: Found {} rows NOT starting with 'Pending'",
1972 view.row_count()
1973 );
1974 assert_eq!(view.row_count(), 1); }
1976 Err(e) => {
1977 panic!("NOT StartsWith query failed: {e}");
1978 }
1979 }
1980
1981 println!("\n=== NOT with method calls test complete! ===");
1982 }
1983
1984 #[test]
1985 #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1986 fn test_complex_logical_expressions() {
1987 let _ = tracing_subscriber::fmt()
1989 .with_max_level(tracing::Level::DEBUG)
1990 .try_init();
1991
1992 let mut table = DataTable::new("test");
1993 table.add_column(DataColumn::new("id"));
1994 table.add_column(DataColumn::new("status"));
1995 table.add_column(DataColumn::new("priority"));
1996 table.add_column(DataColumn::new("assigned"));
1997
1998 table
2000 .add_row(DataRow::new(vec![
2001 DataValue::Integer(1),
2002 DataValue::String("Pending".to_string()),
2003 DataValue::String("High".to_string()),
2004 DataValue::String("John".to_string()),
2005 ]))
2006 .unwrap();
2007
2008 table
2009 .add_row(DataRow::new(vec![
2010 DataValue::Integer(2),
2011 DataValue::String("Complete".to_string()),
2012 DataValue::String("High".to_string()),
2013 DataValue::String("Jane".to_string()),
2014 ]))
2015 .unwrap();
2016
2017 table
2018 .add_row(DataRow::new(vec![
2019 DataValue::Integer(3),
2020 DataValue::String("Pending".to_string()),
2021 DataValue::String("Low".to_string()),
2022 DataValue::String("John".to_string()),
2023 ]))
2024 .unwrap();
2025
2026 table
2027 .add_row(DataRow::new(vec![
2028 DataValue::Integer(4),
2029 DataValue::String("In Progress".to_string()),
2030 DataValue::String("Medium".to_string()),
2031 DataValue::String("Jane".to_string()),
2032 ]))
2033 .unwrap();
2034
2035 let table = Arc::new(table);
2036 let engine = QueryEngine::new();
2037
2038 println!("\n=== Testing Complex Logical Expressions ===");
2039 println!("Table has {} rows", table.row_count());
2040 for i in 0..table.row_count() {
2041 let status = table.get_value(i, 1);
2042 let priority = table.get_value(i, 2);
2043 let assigned = table.get_value(i, 3);
2044 println!(
2045 "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
2046 );
2047 }
2048
2049 println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
2051 let result = engine.execute(
2052 table.clone(),
2053 "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
2054 );
2055 match result {
2056 Ok(view) => {
2057 println!(
2058 "SUCCESS: Found {} rows with complex logic",
2059 view.row_count()
2060 );
2061 assert_eq!(view.row_count(), 2); }
2063 Err(e) => {
2064 panic!("Complex logic query failed: {e}");
2065 }
2066 }
2067
2068 println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
2070 let result = engine.execute(
2071 table.clone(),
2072 "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
2073 );
2074 match result {
2075 Ok(view) => {
2076 println!(
2077 "SUCCESS: Found {} rows with NOT complex logic",
2078 view.row_count()
2079 );
2080 assert_eq!(view.row_count(), 2); }
2082 Err(e) => {
2083 panic!("NOT complex logic query failed: {e}");
2084 }
2085 }
2086
2087 println!("\n=== Complex logical expressions test complete! ===");
2088 }
2089
2090 #[test]
2091 fn test_mixed_data_types_and_edge_cases() {
2092 let _ = tracing_subscriber::fmt()
2094 .with_max_level(tracing::Level::DEBUG)
2095 .try_init();
2096
2097 let mut table = DataTable::new("test");
2098 table.add_column(DataColumn::new("id"));
2099 table.add_column(DataColumn::new("value"));
2100 table.add_column(DataColumn::new("nullable_field"));
2101
2102 table
2104 .add_row(DataRow::new(vec![
2105 DataValue::Integer(1),
2106 DataValue::String("123.45".to_string()),
2107 DataValue::String("present".to_string()),
2108 ]))
2109 .unwrap();
2110
2111 table
2112 .add_row(DataRow::new(vec![
2113 DataValue::Integer(2),
2114 DataValue::Float(678.90),
2115 DataValue::Null,
2116 ]))
2117 .unwrap();
2118
2119 table
2120 .add_row(DataRow::new(vec![
2121 DataValue::Integer(3),
2122 DataValue::Boolean(true),
2123 DataValue::String("also present".to_string()),
2124 ]))
2125 .unwrap();
2126
2127 table
2128 .add_row(DataRow::new(vec![
2129 DataValue::Integer(4),
2130 DataValue::String("false".to_string()),
2131 DataValue::Null,
2132 ]))
2133 .unwrap();
2134
2135 let table = Arc::new(table);
2136 let engine = QueryEngine::new();
2137
2138 println!("\n=== Testing Mixed Data Types and Edge Cases ===");
2139 println!("Table has {} rows", table.row_count());
2140 for i in 0..table.row_count() {
2141 let value = table.get_value(i, 1);
2142 let nullable = table.get_value(i, 2);
2143 println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
2144 }
2145
2146 println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
2148 let result = engine.execute(
2149 table.clone(),
2150 "SELECT * FROM test WHERE value.Contains('true')",
2151 );
2152 match result {
2153 Ok(view) => {
2154 println!(
2155 "SUCCESS: Found {} rows with boolean coercion",
2156 view.row_count()
2157 );
2158 assert_eq!(view.row_count(), 1); }
2160 Err(e) => {
2161 panic!("Boolean coercion query failed: {e}");
2162 }
2163 }
2164
2165 println!("\n--- Test: id IN (1, 3) ---");
2167 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
2168 match result {
2169 Ok(view) => {
2170 println!("SUCCESS: Found {} rows with IN clause", view.row_count());
2171 assert_eq!(view.row_count(), 2); }
2173 Err(e) => {
2174 panic!("Multiple IN values query failed: {e}");
2175 }
2176 }
2177
2178 println!("\n=== Mixed data types test complete! ===");
2179 }
2180
2181 #[test]
2183 fn test_aggregate_only_single_row() {
2184 let table = create_test_stock_data();
2185 let engine = QueryEngine::new();
2186
2187 let result = engine
2189 .execute(
2190 table.clone(),
2191 "SELECT COUNT(*), MIN(close), MAX(close), AVG(close) FROM stock",
2192 )
2193 .expect("Query should succeed");
2194
2195 assert_eq!(
2196 result.row_count(),
2197 1,
2198 "Aggregate-only query should return exactly 1 row"
2199 );
2200 assert_eq!(result.column_count(), 4, "Should have 4 aggregate columns");
2201
2202 let source = result.source();
2204 let row = source.get_row(0).expect("Should have first row");
2205
2206 assert_eq!(row.values[0], DataValue::Integer(5));
2208
2209 assert_eq!(row.values[1], DataValue::Float(99.5));
2211
2212 assert_eq!(row.values[2], DataValue::Float(105.0));
2214
2215 if let DataValue::Float(avg) = &row.values[3] {
2217 assert!(
2218 (avg - 102.4).abs() < 0.01,
2219 "Average should be approximately 102.4, got {}",
2220 avg
2221 );
2222 } else {
2223 panic!("AVG should return a Float value");
2224 }
2225 }
2226
2227 #[test]
2229 fn test_single_aggregate_single_row() {
2230 let table = create_test_stock_data();
2231 let engine = QueryEngine::new();
2232
2233 let result = engine
2234 .execute(table.clone(), "SELECT COUNT(*) FROM stock")
2235 .expect("Query should succeed");
2236
2237 assert_eq!(
2238 result.row_count(),
2239 1,
2240 "Single aggregate query should return exactly 1 row"
2241 );
2242 assert_eq!(result.column_count(), 1, "Should have 1 column");
2243
2244 let source = result.source();
2245 let row = source.get_row(0).expect("Should have first row");
2246 assert_eq!(row.values[0], DataValue::Integer(5));
2247 }
2248
2249 #[test]
2251 fn test_aggregate_with_where_single_row() {
2252 let table = create_test_stock_data();
2253 let engine = QueryEngine::new();
2254
2255 let result = engine
2257 .execute(
2258 table.clone(),
2259 "SELECT COUNT(*), MIN(close), MAX(close) FROM stock WHERE close >= 103.0",
2260 )
2261 .expect("Query should succeed");
2262
2263 assert_eq!(
2264 result.row_count(),
2265 1,
2266 "Filtered aggregate query should return exactly 1 row"
2267 );
2268 assert_eq!(result.column_count(), 3, "Should have 3 aggregate columns");
2269
2270 let source = result.source();
2271 let row = source.get_row(0).expect("Should have first row");
2272
2273 assert_eq!(row.values[0], DataValue::Integer(2));
2275 assert_eq!(row.values[1], DataValue::Float(103.5)); assert_eq!(row.values[2], DataValue::Float(105.0)); }
2278
2279 #[test]
2280 fn test_not_in_parsing() {
2281 use crate::sql::recursive_parser::Parser;
2282
2283 let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
2284 println!("\n=== Testing NOT IN parsing ===");
2285 println!("Parsing query: {query}");
2286
2287 let mut parser = Parser::new(query);
2288 match parser.parse() {
2289 Ok(statement) => {
2290 println!("Parsed statement: {statement:#?}");
2291 if let Some(where_clause) = statement.where_clause {
2292 println!("WHERE conditions: {:#?}", where_clause.conditions);
2293 if let Some(first_condition) = where_clause.conditions.first() {
2294 println!("First condition expression: {:#?}", first_condition.expr);
2295 }
2296 }
2297 }
2298 Err(e) => {
2299 panic!("Parse error: {e}");
2300 }
2301 }
2302 }
2303
2304 fn create_test_stock_data() -> Arc<DataTable> {
2306 let mut table = DataTable::new("stock");
2307
2308 table.add_column(DataColumn::new("symbol"));
2309 table.add_column(DataColumn::new("close"));
2310 table.add_column(DataColumn::new("volume"));
2311
2312 let test_data = vec![
2314 ("AAPL", 99.5, 1000),
2315 ("AAPL", 101.2, 1500),
2316 ("AAPL", 103.5, 2000),
2317 ("AAPL", 105.0, 1200),
2318 ("AAPL", 102.8, 1800),
2319 ];
2320
2321 for (symbol, close, volume) in test_data {
2322 table
2323 .add_row(DataRow::new(vec![
2324 DataValue::String(symbol.to_string()),
2325 DataValue::Float(close),
2326 DataValue::Integer(volume),
2327 ]))
2328 .expect("Should add row successfully");
2329 }
2330
2331 Arc::new(table)
2332 }
2333}
2334
2335#[cfg(test)]
2336#[path = "query_engine_tests.rs"]
2337mod query_engine_tests;