1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::sync::Arc;
4use std::time::Instant;
5use tracing::{debug, info};
6
7use crate::config::config::BehaviorConfig;
8use crate::config::global::get_date_notation;
9use crate::data::arithmetic_evaluator::ArithmeticEvaluator;
10use crate::data::data_view::DataView;
11use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
12use crate::data::evaluation_context::EvaluationContext;
13use crate::data::group_by_expressions::GroupByExpressions;
14use crate::data::hash_join::HashJoinExecutor;
15use crate::data::recursive_where_evaluator::RecursiveWhereEvaluator;
16use crate::data::subquery_executor::SubqueryExecutor;
17use crate::data::virtual_table_generator::VirtualTableGenerator;
18use crate::execution_plan::{ExecutionPlan, ExecutionPlanBuilder, StepType};
19use crate::sql::aggregates::{contains_aggregate, is_aggregate_compatible};
20use crate::sql::parser::ast::TableSource;
21use crate::sql::recursive_parser::{
22 CTEType, OrderByColumn, Parser, SelectItem, SelectStatement, SortDirection, SqlExpression,
23 TableFunction,
24};
25
26#[derive(Clone)]
28pub struct QueryEngine {
29 case_insensitive: bool,
30 date_notation: String,
31 behavior_config: Option<BehaviorConfig>,
32}
33
34impl Default for QueryEngine {
35 fn default() -> Self {
36 Self::new()
37 }
38}
39
40impl QueryEngine {
41 #[must_use]
42 pub fn new() -> Self {
43 Self {
44 case_insensitive: false,
45 date_notation: get_date_notation(),
46 behavior_config: None,
47 }
48 }
49
50 #[must_use]
51 pub fn with_behavior_config(config: BehaviorConfig) -> Self {
52 let case_insensitive = config.case_insensitive_default;
53 let date_notation = get_date_notation();
55 Self {
56 case_insensitive,
57 date_notation,
58 behavior_config: Some(config),
59 }
60 }
61
62 #[must_use]
63 pub fn with_date_notation(_date_notation: String) -> Self {
64 Self {
65 case_insensitive: false,
66 date_notation: get_date_notation(), behavior_config: None,
68 }
69 }
70
71 #[must_use]
72 pub fn with_case_insensitive(case_insensitive: bool) -> Self {
73 Self {
74 case_insensitive,
75 date_notation: get_date_notation(),
76 behavior_config: None,
77 }
78 }
79
80 #[must_use]
81 pub fn with_case_insensitive_and_date_notation(
82 case_insensitive: bool,
83 _date_notation: String, ) -> Self {
85 Self {
86 case_insensitive,
87 date_notation: get_date_notation(), behavior_config: None,
89 }
90 }
91
92 fn find_similar_column(&self, table: &DataTable, name: &str) -> Option<String> {
94 let columns = table.column_names();
95 let mut best_match: Option<(String, usize)> = None;
96
97 for col in columns {
98 let distance = self.edit_distance(&col.to_lowercase(), &name.to_lowercase());
99 let max_distance = if name.len() > 10 { 3 } else { 2 };
102 if distance <= max_distance {
103 match &best_match {
104 None => best_match = Some((col, distance)),
105 Some((_, best_dist)) if distance < *best_dist => {
106 best_match = Some((col, distance));
107 }
108 _ => {}
109 }
110 }
111 }
112
113 best_match.map(|(name, _)| name)
114 }
115
116 fn edit_distance(&self, s1: &str, s2: &str) -> usize {
118 let len1 = s1.len();
119 let len2 = s2.len();
120 let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
121
122 for i in 0..=len1 {
123 matrix[i][0] = i;
124 }
125 for j in 0..=len2 {
126 matrix[0][j] = j;
127 }
128
129 for (i, c1) in s1.chars().enumerate() {
130 for (j, c2) in s2.chars().enumerate() {
131 let cost = usize::from(c1 != c2);
132 matrix[i + 1][j + 1] = std::cmp::min(
133 matrix[i][j + 1] + 1, std::cmp::min(
135 matrix[i + 1][j] + 1, matrix[i][j] + cost, ),
138 );
139 }
140 }
141
142 matrix[len1][len2]
143 }
144
145 pub fn execute(&self, table: Arc<DataTable>, sql: &str) -> Result<DataView> {
147 let (view, _plan) = self.execute_with_plan(table, sql)?;
148 Ok(view)
149 }
150
151 pub fn execute_statement(
153 &self,
154 table: Arc<DataTable>,
155 statement: SelectStatement,
156 ) -> Result<DataView> {
157 let mut cte_context = HashMap::new();
159 for cte in &statement.ctes {
160 debug!("QueryEngine: Pre-processing CTE '{}'...", cte.name);
161 let cte_result = match &cte.cte_type {
163 CTEType::Standard(query) => {
164 self.build_view_with_context(table.clone(), query.clone(), &mut cte_context)?
166 }
167 CTEType::Web(web_spec) => {
168 use crate::web::http_fetcher::WebDataFetcher;
170
171 let fetcher = WebDataFetcher::new()?;
172 let data_table = fetcher.fetch(web_spec, &cte.name)?;
173
174 DataView::new(Arc::new(data_table))
176 }
177 };
178 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
180 debug!(
181 "QueryEngine: CTE '{}' pre-processed, stored in context",
182 cte.name
183 );
184 }
185
186 let mut subquery_executor =
188 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
189 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
190
191 self.build_view_with_context(table, processed_statement, &mut cte_context)
193 }
194
195 pub fn execute_statement_with_cte_context(
197 &self,
198 table: Arc<DataTable>,
199 statement: SelectStatement,
200 cte_context: &HashMap<String, Arc<DataView>>,
201 ) -> Result<DataView> {
202 let mut local_context = cte_context.clone();
204
205 for cte in &statement.ctes {
207 debug!("QueryEngine: Processing nested CTE '{}'...", cte.name);
208 let cte_result = match &cte.cte_type {
209 CTEType::Standard(query) => {
210 self.build_view_with_context(table.clone(), query.clone(), &mut local_context)?
211 }
212 CTEType::Web(web_spec) => {
213 use crate::web::http_fetcher::WebDataFetcher;
215
216 let fetcher = WebDataFetcher::new()?;
217 let data_table = fetcher.fetch(web_spec, &cte.name)?;
218
219 DataView::new(Arc::new(data_table))
221 }
222 };
223 local_context.insert(cte.name.clone(), Arc::new(cte_result));
224 }
225
226 let mut subquery_executor =
228 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), local_context.clone());
229 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
230
231 self.build_view_with_context(table, processed_statement, &mut local_context)
233 }
234
235 pub fn execute_with_plan(
237 &self,
238 table: Arc<DataTable>,
239 sql: &str,
240 ) -> Result<(DataView, ExecutionPlan)> {
241 let mut plan_builder = ExecutionPlanBuilder::new();
242 let start_time = Instant::now();
243
244 plan_builder.begin_step(StepType::Parse, "Parse SQL query".to_string());
246 plan_builder.add_detail(format!("Query: {}", sql));
247 let mut parser = Parser::new(sql);
248 let statement = parser
249 .parse()
250 .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
251 plan_builder.add_detail(format!("Parsed successfully"));
252 if let Some(from) = &statement.from_table {
253 plan_builder.add_detail(format!("FROM: {}", from));
254 }
255 if statement.where_clause.is_some() {
256 plan_builder.add_detail("WHERE clause present".to_string());
257 }
258 plan_builder.end_step();
259
260 let mut cte_context = HashMap::new();
262
263 if !statement.ctes.is_empty() {
264 plan_builder.begin_step(
265 StepType::CTE,
266 format!("Process {} CTEs", statement.ctes.len()),
267 );
268
269 for cte in &statement.ctes {
270 let cte_start = Instant::now();
271 plan_builder.begin_step(StepType::CTE, format!("CTE '{}'", cte.name));
272
273 let cte_result = match &cte.cte_type {
274 CTEType::Standard(query) => {
275 if let Some(from) = &query.from_table {
277 plan_builder.add_detail(format!("Source: {}", from));
278 }
279 if query.where_clause.is_some() {
280 plan_builder.add_detail("Has WHERE clause".to_string());
281 }
282 if query.group_by.is_some() {
283 plan_builder.add_detail("Has GROUP BY".to_string());
284 }
285
286 debug!(
287 "QueryEngine: Processing CTE '{}' with existing context: {:?}",
288 cte.name,
289 cte_context.keys().collect::<Vec<_>>()
290 );
291
292 let mut subquery_executor = SubqueryExecutor::with_cte_context(
295 self.clone(),
296 table.clone(),
297 cte_context.clone(),
298 );
299 let processed_query = subquery_executor.execute_subqueries(query)?;
300
301 self.build_view_with_context(
302 table.clone(),
303 processed_query,
304 &mut cte_context,
305 )?
306 }
307 CTEType::Web(web_spec) => {
308 plan_builder.add_detail(format!("URL: {}", web_spec.url));
309 if let Some(format) = &web_spec.format {
310 plan_builder.add_detail(format!("Format: {:?}", format));
311 }
312 if let Some(cache) = web_spec.cache_seconds {
313 plan_builder.add_detail(format!("Cache: {} seconds", cache));
314 }
315
316 use crate::web::http_fetcher::WebDataFetcher;
318
319 let fetcher = WebDataFetcher::new()?;
320 let data_table = fetcher.fetch(web_spec, &cte.name)?;
321
322 DataView::new(Arc::new(data_table))
324 }
325 };
326
327 plan_builder.set_rows_out(cte_result.row_count());
329 plan_builder.add_detail(format!(
330 "Result: {} rows, {} columns",
331 cte_result.row_count(),
332 cte_result.column_count()
333 ));
334 plan_builder.add_detail(format!(
335 "Execution time: {:.3}ms",
336 cte_start.elapsed().as_secs_f64() * 1000.0
337 ));
338
339 debug!(
340 "QueryEngine: Storing CTE '{}' in context with {} rows",
341 cte.name,
342 cte_result.row_count()
343 );
344 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
345 plan_builder.end_step();
346 }
347
348 plan_builder.add_detail(format!(
349 "All {} CTEs cached in context",
350 statement.ctes.len()
351 ));
352 plan_builder.end_step();
353 }
354
355 plan_builder.begin_step(StepType::Subquery, "Process subqueries".to_string());
357 let mut subquery_executor =
358 SubqueryExecutor::with_cte_context(self.clone(), table.clone(), cte_context.clone());
359
360 let has_subqueries = statement.where_clause.as_ref().map_or(false, |w| {
362 format!("{:?}", w).contains("Subquery")
364 });
365
366 if has_subqueries {
367 plan_builder.add_detail("Evaluating subqueries in WHERE clause".to_string());
368 }
369
370 let processed_statement = subquery_executor.execute_subqueries(&statement)?;
371
372 if has_subqueries {
373 plan_builder.add_detail("Subqueries replaced with materialized values".to_string());
374 } else {
375 plan_builder.add_detail("No subqueries to process".to_string());
376 }
377
378 plan_builder.end_step();
379 let result = self.build_view_with_context_and_plan(
380 table,
381 processed_statement,
382 &mut cte_context,
383 &mut plan_builder,
384 )?;
385
386 let total_duration = start_time.elapsed();
387 info!(
388 "Query execution complete: total={:?}, rows={}",
389 total_duration,
390 result.row_count()
391 );
392
393 let plan = plan_builder.build();
394 Ok((result, plan))
395 }
396
397 fn build_view(&self, table: Arc<DataTable>, statement: SelectStatement) -> Result<DataView> {
399 let mut cte_context = HashMap::new();
400 self.build_view_with_context(table, statement, &mut cte_context)
401 }
402
403 fn build_view_with_context(
405 &self,
406 table: Arc<DataTable>,
407 statement: SelectStatement,
408 cte_context: &mut HashMap<String, Arc<DataView>>,
409 ) -> Result<DataView> {
410 let mut dummy_plan = ExecutionPlanBuilder::new();
411 self.build_view_with_context_and_plan(table, statement, cte_context, &mut dummy_plan)
412 }
413
414 fn build_view_with_context_and_plan(
416 &self,
417 table: Arc<DataTable>,
418 statement: SelectStatement,
419 cte_context: &mut HashMap<String, Arc<DataView>>,
420 plan: &mut ExecutionPlanBuilder,
421 ) -> Result<DataView> {
422 for cte in &statement.ctes {
424 if cte_context.contains_key(&cte.name) {
426 debug!(
427 "QueryEngine: CTE '{}' already in context, skipping",
428 cte.name
429 );
430 continue;
431 }
432
433 debug!("QueryEngine: Processing CTE '{}'...", cte.name);
434 debug!(
435 "QueryEngine: Available CTEs for '{}': {:?}",
436 cte.name,
437 cte_context.keys().collect::<Vec<_>>()
438 );
439
440 let cte_result = match &cte.cte_type {
442 CTEType::Standard(query) => {
443 self.build_view_with_context(table.clone(), query.clone(), cte_context)?
444 }
445 CTEType::Web(_web_spec) => {
446 return Err(anyhow!(
448 "Web CTEs should be processed in execute_select method"
449 ));
450 }
451 };
452
453 cte_context.insert(cte.name.clone(), Arc::new(cte_result));
455 debug!(
456 "QueryEngine: CTE '{}' processed, stored in context",
457 cte.name
458 );
459 }
460
461 let source_table = if let Some(ref table_func) = statement.from_function {
463 debug!("QueryEngine: Processing table function...");
465 match table_func {
466 TableFunction::Range { start, end, step } => {
467 let mut evaluator =
469 ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
470
471 let dummy_row = 0;
473
474 let start_val = evaluator.evaluate(start, dummy_row)?;
475 let end_val = evaluator.evaluate(end, dummy_row)?;
476 let step_val = if let Some(step_expr) = step {
477 Some(evaluator.evaluate(step_expr, dummy_row)?)
478 } else {
479 None
480 };
481
482 let start_int = match start_val {
484 DataValue::Integer(i) => i,
485 DataValue::Float(f) => f as i64,
486 _ => return Err(anyhow!("RANGE start must be numeric")),
487 };
488
489 let end_int = match end_val {
490 DataValue::Integer(i) => i,
491 DataValue::Float(f) => f as i64,
492 _ => return Err(anyhow!("RANGE end must be numeric")),
493 };
494
495 let step_int = if let Some(step) = step_val {
496 match step {
497 DataValue::Integer(i) => Some(i),
498 DataValue::Float(f) => Some(f as i64),
499 _ => return Err(anyhow!("RANGE step must be numeric")),
500 }
501 } else {
502 None
503 };
504
505 VirtualTableGenerator::generate_range(start_int, end_int, step_int, None)?
507 }
508 TableFunction::Split { text, delimiter } => {
509 let mut evaluator =
511 ArithmeticEvaluator::with_date_notation(&table, self.date_notation.clone());
512
513 let dummy_row = 0;
515
516 let text_val = evaluator.evaluate(text, dummy_row)?;
517 let delimiter_val = if let Some(delim_expr) = delimiter {
518 Some(evaluator.evaluate(delim_expr, dummy_row)?)
519 } else {
520 None
521 };
522
523 let text_str = match text_val {
525 DataValue::String(s) => s,
526 DataValue::Null => return Err(anyhow!("SPLIT text cannot be NULL")),
527 _ => text_val.to_string(),
528 };
529
530 let delimiter_str = if let Some(delim) = delimiter_val {
531 match delim {
532 DataValue::String(s) => Some(s),
533 DataValue::Null => None,
534 _ => Some(delim.to_string()),
535 }
536 } else {
537 None
538 };
539
540 VirtualTableGenerator::generate_split(
542 &text_str,
543 delimiter_str.as_deref(),
544 None,
545 )?
546 }
547 TableFunction::Generator { name, args } => {
548 use crate::sql::generators::GeneratorRegistry;
550
551 let registry = GeneratorRegistry::new();
553
554 if let Some(generator) = registry.get(name) {
555 let mut evaluator = ArithmeticEvaluator::with_date_notation(
557 &table,
558 self.date_notation.clone(),
559 );
560 let dummy_row = 0;
561
562 let mut evaluated_args = Vec::new();
563 for arg in args {
564 evaluated_args.push(evaluator.evaluate(arg, dummy_row)?);
565 }
566
567 generator.generate(evaluated_args)?
569 } else {
570 return Err(anyhow!("Unknown generator function: {}", name));
571 }
572 }
573 }
574 } else if let Some(ref subquery) = statement.from_subquery {
575 debug!("QueryEngine: Processing FROM subquery...");
577 let subquery_result =
578 self.build_view_with_context(table.clone(), *subquery.clone(), cte_context)?;
579
580 let materialized = self.materialize_view(subquery_result)?;
583 Arc::new(materialized)
584 } else if let Some(ref table_name) = statement.from_table {
585 if let Some(cte_view) = cte_context.get(table_name) {
587 debug!("QueryEngine: Using CTE '{}' as source table", table_name);
588 let materialized = self.materialize_view((**cte_view).clone())?;
590 Arc::new(materialized)
591 } else {
592 table.clone()
594 }
595 } else {
596 table.clone()
598 };
599
600 let final_table = if !statement.joins.is_empty() {
602 plan.begin_step(
603 StepType::Join,
604 format!("Process {} JOINs", statement.joins.len()),
605 );
606 plan.set_rows_in(source_table.row_count());
607
608 let join_executor = HashJoinExecutor::new(self.case_insensitive);
609 let mut current_table = source_table;
610
611 for (idx, join_clause) in statement.joins.iter().enumerate() {
612 let join_start = Instant::now();
613 plan.begin_step(StepType::Join, format!("JOIN #{}", idx + 1));
614 plan.add_detail(format!("Type: {:?}", join_clause.join_type));
615 plan.add_detail(format!("Left table: {} rows", current_table.row_count()));
616 plan.add_detail(format!(
617 "Executing {:?} JOIN on {}",
618 join_clause.join_type, join_clause.condition.left_column
619 ));
620
621 let right_table = match &join_clause.table {
623 TableSource::Table(name) => {
624 if let Some(cte_view) = cte_context.get(name) {
626 let materialized = self.materialize_view((**cte_view).clone())?;
627 Arc::new(materialized)
628 } else {
629 return Err(anyhow!("Cannot resolve table '{}' for JOIN", name));
632 }
633 }
634 TableSource::DerivedTable { query, alias: _ } => {
635 let subquery_result = self.build_view_with_context(
637 table.clone(),
638 *query.clone(),
639 cte_context,
640 )?;
641 let materialized = self.materialize_view(subquery_result)?;
642 Arc::new(materialized)
643 }
644 };
645
646 let joined = join_executor.execute_join(
648 current_table.clone(),
649 join_clause,
650 right_table.clone(),
651 )?;
652
653 plan.add_detail(format!("Right table: {} rows", right_table.row_count()));
654 plan.set_rows_out(joined.row_count());
655 plan.add_detail(format!("Result: {} rows", joined.row_count()));
656 plan.add_detail(format!(
657 "Join time: {:.3}ms",
658 join_start.elapsed().as_secs_f64() * 1000.0
659 ));
660 plan.end_step();
661
662 current_table = Arc::new(joined);
663 }
664
665 plan.set_rows_out(current_table.row_count());
666 plan.add_detail(format!(
667 "Final result after all joins: {} rows",
668 current_table.row_count()
669 ));
670 plan.end_step();
671 current_table
672 } else {
673 source_table
674 };
675
676 self.build_view_internal_with_plan(final_table, statement, plan)
678 }
679
680 fn materialize_view(&self, view: DataView) -> Result<DataTable> {
682 let source = view.source();
683 let mut result_table = DataTable::new("derived");
684
685 let visible_cols = view.visible_column_indices().to_vec();
687
688 for col_idx in &visible_cols {
690 let col = &source.columns[*col_idx];
691 let new_col = DataColumn {
692 name: col.name.clone(),
693 data_type: col.data_type.clone(),
694 nullable: col.nullable,
695 unique_values: col.unique_values,
696 null_count: col.null_count,
697 metadata: col.metadata.clone(),
698 };
699 result_table.add_column(new_col);
700 }
701
702 for row_idx in view.visible_row_indices() {
704 let source_row = &source.rows[*row_idx];
705 let mut new_row = DataRow { values: Vec::new() };
706
707 for col_idx in &visible_cols {
708 new_row.values.push(source_row.values[*col_idx].clone());
709 }
710
711 result_table.add_row(new_row);
712 }
713
714 Ok(result_table)
715 }
716
717 fn build_view_internal(
718 &self,
719 table: Arc<DataTable>,
720 statement: SelectStatement,
721 ) -> Result<DataView> {
722 let mut dummy_plan = ExecutionPlanBuilder::new();
723 self.build_view_internal_with_plan(table, statement, &mut dummy_plan)
724 }
725
726 fn build_view_internal_with_plan(
727 &self,
728 table: Arc<DataTable>,
729 statement: SelectStatement,
730 plan: &mut ExecutionPlanBuilder,
731 ) -> Result<DataView> {
732 debug!(
733 "QueryEngine::build_view - select_items: {:?}",
734 statement.select_items
735 );
736 debug!(
737 "QueryEngine::build_view - where_clause: {:?}",
738 statement.where_clause
739 );
740
741 let mut visible_rows: Vec<usize> = (0..table.row_count()).collect();
743
744 if let Some(where_clause) = &statement.where_clause {
746 let total_rows = table.row_count();
747 debug!("QueryEngine: Applying WHERE clause to {} rows", total_rows);
748 debug!("QueryEngine: WHERE clause = {:?}", where_clause);
749
750 plan.begin_step(StepType::Filter, "WHERE clause filtering".to_string());
751 plan.set_rows_in(total_rows);
752 plan.add_detail(format!("Input: {} rows", total_rows));
753
754 for condition in &where_clause.conditions {
756 plan.add_detail(format!("Condition: {:?}", condition.expr));
757 }
758
759 let filter_start = Instant::now();
760 let mut eval_context = EvaluationContext::new(self.case_insensitive);
762
763 let mut filtered_rows = Vec::new();
765 for row_idx in visible_rows {
766 if row_idx < 3 {
768 debug!("QueryEngine: Evaluating WHERE clause for row {}", row_idx);
769 }
770 let mut evaluator =
771 RecursiveWhereEvaluator::with_context(&table, &mut eval_context);
772 match evaluator.evaluate(where_clause, row_idx) {
773 Ok(result) => {
774 if row_idx < 3 {
775 debug!("QueryEngine: Row {} WHERE result: {}", row_idx, result);
776 }
777 if result {
778 filtered_rows.push(row_idx);
779 }
780 }
781 Err(e) => {
782 if row_idx < 3 {
783 debug!(
784 "QueryEngine: WHERE evaluation error for row {}: {}",
785 row_idx, e
786 );
787 }
788 return Err(e);
790 }
791 }
792 }
793
794 let (compilations, cache_hits) = eval_context.get_stats();
796 if compilations > 0 || cache_hits > 0 {
797 debug!(
798 "LIKE pattern cache: {} compilations, {} cache hits",
799 compilations, cache_hits
800 );
801 }
802 visible_rows = filtered_rows;
803 let filter_duration = filter_start.elapsed();
804 info!(
805 "WHERE clause filtering: {} rows -> {} rows in {:?}",
806 total_rows,
807 visible_rows.len(),
808 filter_duration
809 );
810
811 plan.set_rows_out(visible_rows.len());
812 plan.add_detail(format!("Output: {} rows", visible_rows.len()));
813 plan.add_detail(format!(
814 "Filter time: {:.3}ms",
815 filter_duration.as_secs_f64() * 1000.0
816 ));
817 plan.end_step();
818 }
819
820 let mut view = DataView::new(table.clone());
822 view = view.with_rows(visible_rows);
823
824 if let Some(group_by_exprs) = &statement.group_by {
826 if !group_by_exprs.is_empty() {
827 debug!("QueryEngine: Processing GROUP BY: {:?}", group_by_exprs);
828
829 plan.begin_step(
830 StepType::GroupBy,
831 format!("GROUP BY {} expressions", group_by_exprs.len()),
832 );
833 plan.set_rows_in(view.row_count());
834 plan.add_detail(format!("Input: {} rows", view.row_count()));
835 for expr in group_by_exprs {
836 plan.add_detail(format!("Group by: {:?}", expr));
837 }
838
839 let group_start = Instant::now();
840 view = self.apply_group_by(
841 view,
842 group_by_exprs,
843 &statement.select_items,
844 statement.having.as_ref(),
845 )?;
846
847 plan.set_rows_out(view.row_count());
848 plan.add_detail(format!("Output: {} groups", view.row_count()));
849 plan.add_detail(format!(
850 "Group time: {:.3}ms",
851 group_start.elapsed().as_secs_f64() * 1000.0
852 ));
853 plan.end_step();
854 }
855 } else {
856 if !statement.select_items.is_empty() {
858 let has_non_star_items = statement
860 .select_items
861 .iter()
862 .any(|item| !matches!(item, SelectItem::Star));
863
864 if has_non_star_items || statement.select_items.len() > 1 {
868 view = self.apply_select_items(view, &statement.select_items)?;
869 }
870 } else if !statement.columns.is_empty() && statement.columns[0] != "*" {
872 let column_indices = self.resolve_column_indices(&table, &statement.columns)?;
874 view = view.with_columns(column_indices);
875 }
876 }
877
878 if statement.distinct {
880 plan.begin_step(StepType::Distinct, "Remove duplicate rows".to_string());
881 plan.set_rows_in(view.row_count());
882 plan.add_detail(format!("Input: {} rows", view.row_count()));
883
884 let distinct_start = Instant::now();
885 view = self.apply_distinct(view)?;
886
887 plan.set_rows_out(view.row_count());
888 plan.add_detail(format!("Output: {} unique rows", view.row_count()));
889 plan.add_detail(format!(
890 "Distinct time: {:.3}ms",
891 distinct_start.elapsed().as_secs_f64() * 1000.0
892 ));
893 plan.end_step();
894 }
895
896 if let Some(order_by_columns) = &statement.order_by {
898 if !order_by_columns.is_empty() {
899 plan.begin_step(
900 StepType::Sort,
901 format!("ORDER BY {} columns", order_by_columns.len()),
902 );
903 plan.set_rows_in(view.row_count());
904 for col in order_by_columns {
905 plan.add_detail(format!("{} {:?}", col.column, col.direction));
906 }
907
908 let sort_start = Instant::now();
909 view = self.apply_multi_order_by(view, order_by_columns)?;
910
911 plan.add_detail(format!(
912 "Sort time: {:.3}ms",
913 sort_start.elapsed().as_secs_f64() * 1000.0
914 ));
915 plan.end_step();
916 }
917 }
918
919 if let Some(limit) = statement.limit {
921 let offset = statement.offset.unwrap_or(0);
922 plan.begin_step(StepType::Limit, format!("LIMIT {}", limit));
923 plan.set_rows_in(view.row_count());
924 if offset > 0 {
925 plan.add_detail(format!("OFFSET: {}", offset));
926 }
927 view = view.with_limit(limit, offset);
928 plan.set_rows_out(view.row_count());
929 plan.add_detail(format!("Output: {} rows", view.row_count()));
930 plan.end_step();
931 }
932
933 Ok(view)
934 }
935
936 fn resolve_column_indices(&self, table: &DataTable, columns: &[String]) -> Result<Vec<usize>> {
938 let mut indices = Vec::new();
939 let table_columns = table.column_names();
940
941 for col_name in columns {
942 let index = table_columns
943 .iter()
944 .position(|c| c.eq_ignore_ascii_case(col_name))
945 .ok_or_else(|| {
946 let suggestion = self.find_similar_column(table, col_name);
947 match suggestion {
948 Some(similar) => anyhow::anyhow!(
949 "Column '{}' not found. Did you mean '{}'?",
950 col_name,
951 similar
952 ),
953 None => anyhow::anyhow!("Column '{}' not found", col_name),
954 }
955 })?;
956 indices.push(index);
957 }
958
959 Ok(indices)
960 }
961
962 fn apply_select_items(&self, view: DataView, select_items: &[SelectItem]) -> Result<DataView> {
964 debug!(
965 "QueryEngine::apply_select_items - items: {:?}",
966 select_items
967 );
968 debug!(
969 "QueryEngine::apply_select_items - input view has {} rows",
970 view.row_count()
971 );
972
973 let has_aggregates = select_items.iter().any(|item| match item {
977 SelectItem::Expression { expr, .. } => contains_aggregate(expr),
978 SelectItem::Column(_) => false,
979 SelectItem::Star => false,
980 });
981
982 let all_aggregate_compatible = select_items.iter().all(|item| match item {
983 SelectItem::Expression { expr, .. } => is_aggregate_compatible(expr),
984 SelectItem::Column(_) => false, SelectItem::Star => false, });
987
988 if has_aggregates && all_aggregate_compatible && view.row_count() > 0 {
989 debug!("QueryEngine::apply_select_items - detected aggregate query with constants");
992 return self.apply_aggregate_select(view, select_items);
993 }
994
995 let has_computed_expressions = select_items
997 .iter()
998 .any(|item| matches!(item, SelectItem::Expression { .. }));
999
1000 debug!(
1001 "QueryEngine::apply_select_items - has_computed_expressions: {}",
1002 has_computed_expressions
1003 );
1004
1005 if !has_computed_expressions {
1006 let column_indices = self.resolve_select_columns(view.source(), select_items)?;
1008 return Ok(view.with_columns(column_indices));
1009 }
1010
1011 let source_table = view.source();
1016 let visible_rows = view.visible_row_indices();
1017
1018 let mut computed_table = DataTable::new("query_result");
1021
1022 let mut expanded_items = Vec::new();
1024 for item in select_items {
1025 match item {
1026 SelectItem::Star => {
1027 for col_name in source_table.column_names() {
1029 expanded_items.push(SelectItem::Column(col_name));
1030 }
1031 }
1032 _ => expanded_items.push(item.clone()),
1033 }
1034 }
1035
1036 let mut column_name_counts: std::collections::HashMap<String, usize> =
1038 std::collections::HashMap::new();
1039
1040 for item in &expanded_items {
1041 let base_name = match item {
1042 SelectItem::Column(name) => name.clone(),
1043 SelectItem::Expression { alias, .. } => alias.clone(),
1044 SelectItem::Star => unreachable!("Star should have been expanded"),
1045 };
1046
1047 let count = column_name_counts.entry(base_name.clone()).or_insert(0);
1049 let column_name = if *count == 0 {
1050 base_name.clone()
1052 } else {
1053 format!("{base_name}_{count}")
1055 };
1056 *count += 1;
1057
1058 computed_table.add_column(DataColumn::new(&column_name));
1059 }
1060
1061 let mut evaluator =
1063 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone());
1064
1065 for &row_idx in visible_rows {
1066 let mut row_values = Vec::new();
1067
1068 for item in &expanded_items {
1069 let value = match item {
1070 SelectItem::Column(col_name) => {
1071 let col_idx = source_table.get_column_index(col_name).ok_or_else(|| {
1073 let suggestion = self.find_similar_column(source_table, col_name);
1074 match suggestion {
1075 Some(similar) => anyhow::anyhow!(
1076 "Column '{}' not found. Did you mean '{}'?",
1077 col_name,
1078 similar
1079 ),
1080 None => anyhow::anyhow!("Column '{}' not found", col_name),
1081 }
1082 })?;
1083 let row = source_table
1084 .get_row(row_idx)
1085 .ok_or_else(|| anyhow::anyhow!("Row {} not found", row_idx))?;
1086 row.get(col_idx)
1087 .ok_or_else(|| anyhow::anyhow!("Column {} not found in row", col_idx))?
1088 .clone()
1089 }
1090 SelectItem::Expression { expr, .. } => {
1091 evaluator.evaluate(expr, row_idx)?
1093 }
1094 SelectItem::Star => unreachable!("Star should have been expanded"),
1095 };
1096 row_values.push(value);
1097 }
1098
1099 computed_table
1100 .add_row(DataRow::new(row_values))
1101 .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
1102 }
1103
1104 Ok(DataView::new(Arc::new(computed_table)))
1107 }
1108
1109 fn apply_aggregate_select(
1111 &self,
1112 view: DataView,
1113 select_items: &[SelectItem],
1114 ) -> Result<DataView> {
1115 debug!("QueryEngine::apply_aggregate_select - creating single row aggregate result");
1116
1117 let source_table = view.source();
1118 let mut result_table = DataTable::new("aggregate_result");
1119
1120 for item in select_items {
1122 let column_name = match item {
1123 SelectItem::Expression { alias, .. } => alias.clone(),
1124 _ => unreachable!("Should only have expressions in aggregate-only query"),
1125 };
1126 result_table.add_column(DataColumn::new(&column_name));
1127 }
1128
1129 let visible_rows = view.visible_row_indices().to_vec();
1131 let mut evaluator =
1132 ArithmeticEvaluator::with_date_notation(source_table, self.date_notation.clone())
1133 .with_visible_rows(visible_rows);
1134
1135 let mut row_values = Vec::new();
1137 for item in select_items {
1138 match item {
1139 SelectItem::Expression { expr, .. } => {
1140 let value = evaluator.evaluate(expr, 0)?;
1143 row_values.push(value);
1144 }
1145 _ => unreachable!("Should only have expressions in aggregate-only query"),
1146 }
1147 }
1148
1149 result_table
1151 .add_row(DataRow::new(row_values))
1152 .map_err(|e| anyhow::anyhow!("Failed to add aggregate result row: {}", e))?;
1153
1154 Ok(DataView::new(Arc::new(result_table)))
1155 }
1156
1157 fn resolve_select_columns(
1159 &self,
1160 table: &DataTable,
1161 select_items: &[SelectItem],
1162 ) -> Result<Vec<usize>> {
1163 let mut indices = Vec::new();
1164 let table_columns = table.column_names();
1165
1166 for item in select_items {
1167 match item {
1168 SelectItem::Column(col_name) => {
1169 let index = table_columns
1170 .iter()
1171 .position(|c| c.eq_ignore_ascii_case(col_name))
1172 .ok_or_else(|| {
1173 let suggestion = self.find_similar_column(table, col_name);
1174 match suggestion {
1175 Some(similar) => anyhow::anyhow!(
1176 "Column '{}' not found. Did you mean '{}'?",
1177 col_name,
1178 similar
1179 ),
1180 None => anyhow::anyhow!("Column '{}' not found", col_name),
1181 }
1182 })?;
1183 indices.push(index);
1184 }
1185 SelectItem::Star => {
1186 for i in 0..table_columns.len() {
1188 indices.push(i);
1189 }
1190 }
1191 SelectItem::Expression { .. } => {
1192 return Err(anyhow::anyhow!(
1193 "Computed expressions require new table creation"
1194 ));
1195 }
1196 }
1197 }
1198
1199 Ok(indices)
1200 }
1201
1202 fn apply_distinct(&self, view: DataView) -> Result<DataView> {
1204 use std::collections::HashSet;
1205
1206 let source = view.source();
1207 let visible_cols = view.visible_column_indices();
1208 let visible_rows = view.visible_row_indices();
1209
1210 let mut seen_rows = HashSet::new();
1212 let mut unique_row_indices = Vec::new();
1213
1214 for &row_idx in visible_rows {
1215 let mut row_key = Vec::new();
1217 for &col_idx in visible_cols {
1218 let value = source
1219 .get_value(row_idx, col_idx)
1220 .ok_or_else(|| anyhow!("Invalid cell reference"))?;
1221 row_key.push(format!("{:?}", value));
1223 }
1224
1225 if seen_rows.insert(row_key) {
1227 unique_row_indices.push(row_idx);
1229 }
1230 }
1231
1232 Ok(view.with_rows(unique_row_indices))
1234 }
1235
1236 fn apply_multi_order_by(
1238 &self,
1239 mut view: DataView,
1240 order_by_columns: &[OrderByColumn],
1241 ) -> Result<DataView> {
1242 let mut sort_columns = Vec::new();
1244
1245 for order_col in order_by_columns {
1246 let col_index = view
1250 .source()
1251 .get_column_index(&order_col.column)
1252 .ok_or_else(|| {
1253 let suggestion = self.find_similar_column(view.source(), &order_col.column);
1255 match suggestion {
1256 Some(similar) => anyhow::anyhow!(
1257 "Column '{}' not found. Did you mean '{}'?",
1258 order_col.column,
1259 similar
1260 ),
1261 None => {
1262 let available_cols = view.source().column_names().join(", ");
1264 anyhow::anyhow!(
1265 "Column '{}' not found. Available columns: {}",
1266 order_col.column,
1267 available_cols
1268 )
1269 }
1270 }
1271 })?;
1272
1273 let ascending = matches!(order_col.direction, SortDirection::Asc);
1274 sort_columns.push((col_index, ascending));
1275 }
1276
1277 view.apply_multi_sort(&sort_columns)?;
1279 Ok(view)
1280 }
1281
1282 fn apply_group_by(
1284 &self,
1285 view: DataView,
1286 group_by_exprs: &[SqlExpression],
1287 select_items: &[SelectItem],
1288 having: Option<&SqlExpression>,
1289 ) -> Result<DataView> {
1290 self.apply_group_by_expressions(
1292 view,
1293 group_by_exprs,
1294 select_items,
1295 having,
1296 self.case_insensitive,
1297 self.date_notation.clone(),
1298 )
1299 }
1300}
1301
1302#[cfg(test)]
1303mod tests {
1304 use super::*;
1305 use crate::data::datatable::{DataColumn, DataRow, DataValue};
1306
1307 fn create_test_table() -> Arc<DataTable> {
1308 let mut table = DataTable::new("test");
1309
1310 table.add_column(DataColumn::new("id"));
1312 table.add_column(DataColumn::new("name"));
1313 table.add_column(DataColumn::new("age"));
1314
1315 table
1317 .add_row(DataRow::new(vec![
1318 DataValue::Integer(1),
1319 DataValue::String("Alice".to_string()),
1320 DataValue::Integer(30),
1321 ]))
1322 .unwrap();
1323
1324 table
1325 .add_row(DataRow::new(vec![
1326 DataValue::Integer(2),
1327 DataValue::String("Bob".to_string()),
1328 DataValue::Integer(25),
1329 ]))
1330 .unwrap();
1331
1332 table
1333 .add_row(DataRow::new(vec![
1334 DataValue::Integer(3),
1335 DataValue::String("Charlie".to_string()),
1336 DataValue::Integer(35),
1337 ]))
1338 .unwrap();
1339
1340 Arc::new(table)
1341 }
1342
1343 #[test]
1344 fn test_select_all() {
1345 let table = create_test_table();
1346 let engine = QueryEngine::new();
1347
1348 let view = engine
1349 .execute(table.clone(), "SELECT * FROM users")
1350 .unwrap();
1351 assert_eq!(view.row_count(), 3);
1352 assert_eq!(view.column_count(), 3);
1353 }
1354
1355 #[test]
1356 fn test_select_columns() {
1357 let table = create_test_table();
1358 let engine = QueryEngine::new();
1359
1360 let view = engine
1361 .execute(table.clone(), "SELECT name, age FROM users")
1362 .unwrap();
1363 assert_eq!(view.row_count(), 3);
1364 assert_eq!(view.column_count(), 2);
1365 }
1366
1367 #[test]
1368 fn test_select_with_limit() {
1369 let table = create_test_table();
1370 let engine = QueryEngine::new();
1371
1372 let view = engine
1373 .execute(table.clone(), "SELECT * FROM users LIMIT 2")
1374 .unwrap();
1375 assert_eq!(view.row_count(), 2);
1376 }
1377
1378 #[test]
1379 fn test_type_coercion_contains() {
1380 let _ = tracing_subscriber::fmt()
1382 .with_max_level(tracing::Level::DEBUG)
1383 .try_init();
1384
1385 let mut table = DataTable::new("test");
1386 table.add_column(DataColumn::new("id"));
1387 table.add_column(DataColumn::new("status"));
1388 table.add_column(DataColumn::new("price"));
1389
1390 table
1392 .add_row(DataRow::new(vec![
1393 DataValue::Integer(1),
1394 DataValue::String("Pending".to_string()),
1395 DataValue::Float(99.99),
1396 ]))
1397 .unwrap();
1398
1399 table
1400 .add_row(DataRow::new(vec![
1401 DataValue::Integer(2),
1402 DataValue::String("Confirmed".to_string()),
1403 DataValue::Float(150.50),
1404 ]))
1405 .unwrap();
1406
1407 table
1408 .add_row(DataRow::new(vec![
1409 DataValue::Integer(3),
1410 DataValue::String("Pending".to_string()),
1411 DataValue::Float(75.00),
1412 ]))
1413 .unwrap();
1414
1415 let table = Arc::new(table);
1416 let engine = QueryEngine::new();
1417
1418 println!("\n=== Testing WHERE clause with Contains ===");
1419 println!("Table has {} rows", table.row_count());
1420 for i in 0..table.row_count() {
1421 let status = table.get_value(i, 1);
1422 println!("Row {i}: status = {status:?}");
1423 }
1424
1425 println!("\n--- Test 1: status.Contains('pend') ---");
1427 let result = engine.execute(
1428 table.clone(),
1429 "SELECT * FROM test WHERE status.Contains('pend')",
1430 );
1431 match result {
1432 Ok(view) => {
1433 println!("SUCCESS: Found {} matching rows", view.row_count());
1434 assert_eq!(view.row_count(), 2); }
1436 Err(e) => {
1437 panic!("Query failed: {e}");
1438 }
1439 }
1440
1441 println!("\n--- Test 2: price.Contains('9') ---");
1443 let result = engine.execute(
1444 table.clone(),
1445 "SELECT * FROM test WHERE price.Contains('9')",
1446 );
1447 match result {
1448 Ok(view) => {
1449 println!(
1450 "SUCCESS: Found {} matching rows with price containing '9'",
1451 view.row_count()
1452 );
1453 assert!(view.row_count() >= 1);
1455 }
1456 Err(e) => {
1457 panic!("Numeric coercion query failed: {e}");
1458 }
1459 }
1460
1461 println!("\n=== All tests passed! ===");
1462 }
1463
1464 #[test]
1465 fn test_not_in_clause() {
1466 let _ = tracing_subscriber::fmt()
1468 .with_max_level(tracing::Level::DEBUG)
1469 .try_init();
1470
1471 let mut table = DataTable::new("test");
1472 table.add_column(DataColumn::new("id"));
1473 table.add_column(DataColumn::new("country"));
1474
1475 table
1477 .add_row(DataRow::new(vec![
1478 DataValue::Integer(1),
1479 DataValue::String("CA".to_string()),
1480 ]))
1481 .unwrap();
1482
1483 table
1484 .add_row(DataRow::new(vec![
1485 DataValue::Integer(2),
1486 DataValue::String("US".to_string()),
1487 ]))
1488 .unwrap();
1489
1490 table
1491 .add_row(DataRow::new(vec![
1492 DataValue::Integer(3),
1493 DataValue::String("UK".to_string()),
1494 ]))
1495 .unwrap();
1496
1497 let table = Arc::new(table);
1498 let engine = QueryEngine::new();
1499
1500 println!("\n=== Testing NOT IN clause ===");
1501 println!("Table has {} rows", table.row_count());
1502 for i in 0..table.row_count() {
1503 let country = table.get_value(i, 1);
1504 println!("Row {i}: country = {country:?}");
1505 }
1506
1507 println!("\n--- Test: country NOT IN ('CA') ---");
1509 let result = engine.execute(
1510 table.clone(),
1511 "SELECT * FROM test WHERE country NOT IN ('CA')",
1512 );
1513 match result {
1514 Ok(view) => {
1515 println!("SUCCESS: Found {} rows not in ('CA')", view.row_count());
1516 assert_eq!(view.row_count(), 2); }
1518 Err(e) => {
1519 panic!("NOT IN query failed: {e}");
1520 }
1521 }
1522
1523 println!("\n=== NOT IN test complete! ===");
1524 }
1525
1526 #[test]
1527 fn test_case_insensitive_in_and_not_in() {
1528 let _ = tracing_subscriber::fmt()
1530 .with_max_level(tracing::Level::DEBUG)
1531 .try_init();
1532
1533 let mut table = DataTable::new("test");
1534 table.add_column(DataColumn::new("id"));
1535 table.add_column(DataColumn::new("country"));
1536
1537 table
1539 .add_row(DataRow::new(vec![
1540 DataValue::Integer(1),
1541 DataValue::String("CA".to_string()), ]))
1543 .unwrap();
1544
1545 table
1546 .add_row(DataRow::new(vec![
1547 DataValue::Integer(2),
1548 DataValue::String("us".to_string()), ]))
1550 .unwrap();
1551
1552 table
1553 .add_row(DataRow::new(vec![
1554 DataValue::Integer(3),
1555 DataValue::String("UK".to_string()), ]))
1557 .unwrap();
1558
1559 let table = Arc::new(table);
1560
1561 println!("\n=== Testing Case-Insensitive IN clause ===");
1562 println!("Table has {} rows", table.row_count());
1563 for i in 0..table.row_count() {
1564 let country = table.get_value(i, 1);
1565 println!("Row {i}: country = {country:?}");
1566 }
1567
1568 println!("\n--- Test: country IN ('ca') with case_insensitive=true ---");
1570 let engine = QueryEngine::with_case_insensitive(true);
1571 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1572 match result {
1573 Ok(view) => {
1574 println!(
1575 "SUCCESS: Found {} rows matching 'ca' (case-insensitive)",
1576 view.row_count()
1577 );
1578 assert_eq!(view.row_count(), 1); }
1580 Err(e) => {
1581 panic!("Case-insensitive IN query failed: {e}");
1582 }
1583 }
1584
1585 println!("\n--- Test: country NOT IN ('ca') with case_insensitive=true ---");
1587 let result = engine.execute(
1588 table.clone(),
1589 "SELECT * FROM test WHERE country NOT IN ('ca')",
1590 );
1591 match result {
1592 Ok(view) => {
1593 println!(
1594 "SUCCESS: Found {} rows not matching 'ca' (case-insensitive)",
1595 view.row_count()
1596 );
1597 assert_eq!(view.row_count(), 2); }
1599 Err(e) => {
1600 panic!("Case-insensitive NOT IN query failed: {e}");
1601 }
1602 }
1603
1604 println!("\n--- Test: country IN ('ca') with case_insensitive=false ---");
1606 let engine_case_sensitive = QueryEngine::new(); let result = engine_case_sensitive
1608 .execute(table.clone(), "SELECT * FROM test WHERE country IN ('ca')");
1609 match result {
1610 Ok(view) => {
1611 println!(
1612 "SUCCESS: Found {} rows matching 'ca' (case-sensitive)",
1613 view.row_count()
1614 );
1615 assert_eq!(view.row_count(), 0); }
1617 Err(e) => {
1618 panic!("Case-sensitive IN query failed: {e}");
1619 }
1620 }
1621
1622 println!("\n=== Case-insensitive IN/NOT IN test complete! ===");
1623 }
1624
1625 #[test]
1626 #[ignore = "Parentheses in WHERE clause not yet implemented"]
1627 fn test_parentheses_in_where_clause() {
1628 let _ = tracing_subscriber::fmt()
1630 .with_max_level(tracing::Level::DEBUG)
1631 .try_init();
1632
1633 let mut table = DataTable::new("test");
1634 table.add_column(DataColumn::new("id"));
1635 table.add_column(DataColumn::new("status"));
1636 table.add_column(DataColumn::new("priority"));
1637
1638 table
1640 .add_row(DataRow::new(vec![
1641 DataValue::Integer(1),
1642 DataValue::String("Pending".to_string()),
1643 DataValue::String("High".to_string()),
1644 ]))
1645 .unwrap();
1646
1647 table
1648 .add_row(DataRow::new(vec![
1649 DataValue::Integer(2),
1650 DataValue::String("Complete".to_string()),
1651 DataValue::String("High".to_string()),
1652 ]))
1653 .unwrap();
1654
1655 table
1656 .add_row(DataRow::new(vec![
1657 DataValue::Integer(3),
1658 DataValue::String("Pending".to_string()),
1659 DataValue::String("Low".to_string()),
1660 ]))
1661 .unwrap();
1662
1663 table
1664 .add_row(DataRow::new(vec![
1665 DataValue::Integer(4),
1666 DataValue::String("Complete".to_string()),
1667 DataValue::String("Low".to_string()),
1668 ]))
1669 .unwrap();
1670
1671 let table = Arc::new(table);
1672 let engine = QueryEngine::new();
1673
1674 println!("\n=== Testing Parentheses in WHERE clause ===");
1675 println!("Table has {} rows", table.row_count());
1676 for i in 0..table.row_count() {
1677 let status = table.get_value(i, 1);
1678 let priority = table.get_value(i, 2);
1679 println!("Row {i}: status = {status:?}, priority = {priority:?}");
1680 }
1681
1682 println!("\n--- Test: (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low') ---");
1684 let result = engine.execute(
1685 table.clone(),
1686 "SELECT * FROM test WHERE (status = 'Pending' AND priority = 'High') OR (status = 'Complete' AND priority = 'Low')",
1687 );
1688 match result {
1689 Ok(view) => {
1690 println!(
1691 "SUCCESS: Found {} rows with parenthetical logic",
1692 view.row_count()
1693 );
1694 assert_eq!(view.row_count(), 2); }
1696 Err(e) => {
1697 panic!("Parentheses query failed: {e}");
1698 }
1699 }
1700
1701 println!("\n=== Parentheses test complete! ===");
1702 }
1703
1704 #[test]
1705 #[ignore = "Numeric type coercion needs fixing"]
1706 fn test_numeric_type_coercion() {
1707 let _ = tracing_subscriber::fmt()
1709 .with_max_level(tracing::Level::DEBUG)
1710 .try_init();
1711
1712 let mut table = DataTable::new("test");
1713 table.add_column(DataColumn::new("id"));
1714 table.add_column(DataColumn::new("price"));
1715 table.add_column(DataColumn::new("quantity"));
1716
1717 table
1719 .add_row(DataRow::new(vec![
1720 DataValue::Integer(1),
1721 DataValue::Float(99.50), DataValue::Integer(100),
1723 ]))
1724 .unwrap();
1725
1726 table
1727 .add_row(DataRow::new(vec![
1728 DataValue::Integer(2),
1729 DataValue::Float(150.0), DataValue::Integer(200),
1731 ]))
1732 .unwrap();
1733
1734 table
1735 .add_row(DataRow::new(vec![
1736 DataValue::Integer(3),
1737 DataValue::Integer(75), DataValue::Integer(50),
1739 ]))
1740 .unwrap();
1741
1742 let table = Arc::new(table);
1743 let engine = QueryEngine::new();
1744
1745 println!("\n=== Testing Numeric Type Coercion ===");
1746 println!("Table has {} rows", table.row_count());
1747 for i in 0..table.row_count() {
1748 let price = table.get_value(i, 1);
1749 let quantity = table.get_value(i, 2);
1750 println!("Row {i}: price = {price:?}, quantity = {quantity:?}");
1751 }
1752
1753 println!("\n--- Test: price.Contains('.') ---");
1755 let result = engine.execute(
1756 table.clone(),
1757 "SELECT * FROM test WHERE price.Contains('.')",
1758 );
1759 match result {
1760 Ok(view) => {
1761 println!(
1762 "SUCCESS: Found {} rows with decimal points in price",
1763 view.row_count()
1764 );
1765 assert_eq!(view.row_count(), 2); }
1767 Err(e) => {
1768 panic!("Numeric Contains query failed: {e}");
1769 }
1770 }
1771
1772 println!("\n--- Test: quantity.Contains('0') ---");
1774 let result = engine.execute(
1775 table.clone(),
1776 "SELECT * FROM test WHERE quantity.Contains('0')",
1777 );
1778 match result {
1779 Ok(view) => {
1780 println!(
1781 "SUCCESS: Found {} rows with '0' in quantity",
1782 view.row_count()
1783 );
1784 assert_eq!(view.row_count(), 2); }
1786 Err(e) => {
1787 panic!("Integer Contains query failed: {e}");
1788 }
1789 }
1790
1791 println!("\n=== Numeric type coercion test complete! ===");
1792 }
1793
1794 #[test]
1795 fn test_datetime_comparisons() {
1796 let _ = tracing_subscriber::fmt()
1798 .with_max_level(tracing::Level::DEBUG)
1799 .try_init();
1800
1801 let mut table = DataTable::new("test");
1802 table.add_column(DataColumn::new("id"));
1803 table.add_column(DataColumn::new("created_date"));
1804
1805 table
1807 .add_row(DataRow::new(vec![
1808 DataValue::Integer(1),
1809 DataValue::String("2024-12-15".to_string()),
1810 ]))
1811 .unwrap();
1812
1813 table
1814 .add_row(DataRow::new(vec![
1815 DataValue::Integer(2),
1816 DataValue::String("2025-01-15".to_string()),
1817 ]))
1818 .unwrap();
1819
1820 table
1821 .add_row(DataRow::new(vec![
1822 DataValue::Integer(3),
1823 DataValue::String("2025-02-15".to_string()),
1824 ]))
1825 .unwrap();
1826
1827 let table = Arc::new(table);
1828 let engine = QueryEngine::new();
1829
1830 println!("\n=== Testing DateTime Comparisons ===");
1831 println!("Table has {} rows", table.row_count());
1832 for i in 0..table.row_count() {
1833 let date = table.get_value(i, 1);
1834 println!("Row {i}: created_date = {date:?}");
1835 }
1836
1837 println!("\n--- Test: created_date > DateTime(2025,1,1) ---");
1839 let result = engine.execute(
1840 table.clone(),
1841 "SELECT * FROM test WHERE created_date > DateTime(2025,1,1)",
1842 );
1843 match result {
1844 Ok(view) => {
1845 println!("SUCCESS: Found {} rows after 2025-01-01", view.row_count());
1846 assert_eq!(view.row_count(), 2); }
1848 Err(e) => {
1849 panic!("DateTime comparison query failed: {e}");
1850 }
1851 }
1852
1853 println!("\n=== DateTime comparison test complete! ===");
1854 }
1855
1856 #[test]
1857 fn test_not_with_method_calls() {
1858 let _ = tracing_subscriber::fmt()
1860 .with_max_level(tracing::Level::DEBUG)
1861 .try_init();
1862
1863 let mut table = DataTable::new("test");
1864 table.add_column(DataColumn::new("id"));
1865 table.add_column(DataColumn::new("status"));
1866
1867 table
1869 .add_row(DataRow::new(vec![
1870 DataValue::Integer(1),
1871 DataValue::String("Pending Review".to_string()),
1872 ]))
1873 .unwrap();
1874
1875 table
1876 .add_row(DataRow::new(vec![
1877 DataValue::Integer(2),
1878 DataValue::String("Complete".to_string()),
1879 ]))
1880 .unwrap();
1881
1882 table
1883 .add_row(DataRow::new(vec![
1884 DataValue::Integer(3),
1885 DataValue::String("Pending Approval".to_string()),
1886 ]))
1887 .unwrap();
1888
1889 let table = Arc::new(table);
1890 let engine = QueryEngine::with_case_insensitive(true);
1891
1892 println!("\n=== Testing NOT with Method Calls ===");
1893 println!("Table has {} rows", table.row_count());
1894 for i in 0..table.row_count() {
1895 let status = table.get_value(i, 1);
1896 println!("Row {i}: status = {status:?}");
1897 }
1898
1899 println!("\n--- Test: NOT status.Contains('pend') ---");
1901 let result = engine.execute(
1902 table.clone(),
1903 "SELECT * FROM test WHERE NOT status.Contains('pend')",
1904 );
1905 match result {
1906 Ok(view) => {
1907 println!(
1908 "SUCCESS: Found {} rows NOT containing 'pend'",
1909 view.row_count()
1910 );
1911 assert_eq!(view.row_count(), 1); }
1913 Err(e) => {
1914 panic!("NOT Contains query failed: {e}");
1915 }
1916 }
1917
1918 println!("\n--- Test: NOT status.StartsWith('Pending') ---");
1920 let result = engine.execute(
1921 table.clone(),
1922 "SELECT * FROM test WHERE NOT status.StartsWith('Pending')",
1923 );
1924 match result {
1925 Ok(view) => {
1926 println!(
1927 "SUCCESS: Found {} rows NOT starting with 'Pending'",
1928 view.row_count()
1929 );
1930 assert_eq!(view.row_count(), 1); }
1932 Err(e) => {
1933 panic!("NOT StartsWith query failed: {e}");
1934 }
1935 }
1936
1937 println!("\n=== NOT with method calls test complete! ===");
1938 }
1939
1940 #[test]
1941 #[ignore = "Complex logical expressions with parentheses not yet implemented"]
1942 fn test_complex_logical_expressions() {
1943 let _ = tracing_subscriber::fmt()
1945 .with_max_level(tracing::Level::DEBUG)
1946 .try_init();
1947
1948 let mut table = DataTable::new("test");
1949 table.add_column(DataColumn::new("id"));
1950 table.add_column(DataColumn::new("status"));
1951 table.add_column(DataColumn::new("priority"));
1952 table.add_column(DataColumn::new("assigned"));
1953
1954 table
1956 .add_row(DataRow::new(vec![
1957 DataValue::Integer(1),
1958 DataValue::String("Pending".to_string()),
1959 DataValue::String("High".to_string()),
1960 DataValue::String("John".to_string()),
1961 ]))
1962 .unwrap();
1963
1964 table
1965 .add_row(DataRow::new(vec![
1966 DataValue::Integer(2),
1967 DataValue::String("Complete".to_string()),
1968 DataValue::String("High".to_string()),
1969 DataValue::String("Jane".to_string()),
1970 ]))
1971 .unwrap();
1972
1973 table
1974 .add_row(DataRow::new(vec![
1975 DataValue::Integer(3),
1976 DataValue::String("Pending".to_string()),
1977 DataValue::String("Low".to_string()),
1978 DataValue::String("John".to_string()),
1979 ]))
1980 .unwrap();
1981
1982 table
1983 .add_row(DataRow::new(vec![
1984 DataValue::Integer(4),
1985 DataValue::String("In Progress".to_string()),
1986 DataValue::String("Medium".to_string()),
1987 DataValue::String("Jane".to_string()),
1988 ]))
1989 .unwrap();
1990
1991 let table = Arc::new(table);
1992 let engine = QueryEngine::new();
1993
1994 println!("\n=== Testing Complex Logical Expressions ===");
1995 println!("Table has {} rows", table.row_count());
1996 for i in 0..table.row_count() {
1997 let status = table.get_value(i, 1);
1998 let priority = table.get_value(i, 2);
1999 let assigned = table.get_value(i, 3);
2000 println!(
2001 "Row {i}: status = {status:?}, priority = {priority:?}, assigned = {assigned:?}"
2002 );
2003 }
2004
2005 println!("\n--- Test: status = 'Pending' AND (priority = 'High' OR assigned = 'John') ---");
2007 let result = engine.execute(
2008 table.clone(),
2009 "SELECT * FROM test WHERE status = 'Pending' AND (priority = 'High' OR assigned = 'John')",
2010 );
2011 match result {
2012 Ok(view) => {
2013 println!(
2014 "SUCCESS: Found {} rows with complex logic",
2015 view.row_count()
2016 );
2017 assert_eq!(view.row_count(), 2); }
2019 Err(e) => {
2020 panic!("Complex logic query failed: {e}");
2021 }
2022 }
2023
2024 println!("\n--- Test: NOT (status.Contains('Complete') OR priority = 'Low') ---");
2026 let result = engine.execute(
2027 table.clone(),
2028 "SELECT * FROM test WHERE NOT (status.Contains('Complete') OR priority = 'Low')",
2029 );
2030 match result {
2031 Ok(view) => {
2032 println!(
2033 "SUCCESS: Found {} rows with NOT complex logic",
2034 view.row_count()
2035 );
2036 assert_eq!(view.row_count(), 2); }
2038 Err(e) => {
2039 panic!("NOT complex logic query failed: {e}");
2040 }
2041 }
2042
2043 println!("\n=== Complex logical expressions test complete! ===");
2044 }
2045
2046 #[test]
2047 fn test_mixed_data_types_and_edge_cases() {
2048 let _ = tracing_subscriber::fmt()
2050 .with_max_level(tracing::Level::DEBUG)
2051 .try_init();
2052
2053 let mut table = DataTable::new("test");
2054 table.add_column(DataColumn::new("id"));
2055 table.add_column(DataColumn::new("value"));
2056 table.add_column(DataColumn::new("nullable_field"));
2057
2058 table
2060 .add_row(DataRow::new(vec![
2061 DataValue::Integer(1),
2062 DataValue::String("123.45".to_string()),
2063 DataValue::String("present".to_string()),
2064 ]))
2065 .unwrap();
2066
2067 table
2068 .add_row(DataRow::new(vec![
2069 DataValue::Integer(2),
2070 DataValue::Float(678.90),
2071 DataValue::Null,
2072 ]))
2073 .unwrap();
2074
2075 table
2076 .add_row(DataRow::new(vec![
2077 DataValue::Integer(3),
2078 DataValue::Boolean(true),
2079 DataValue::String("also present".to_string()),
2080 ]))
2081 .unwrap();
2082
2083 table
2084 .add_row(DataRow::new(vec![
2085 DataValue::Integer(4),
2086 DataValue::String("false".to_string()),
2087 DataValue::Null,
2088 ]))
2089 .unwrap();
2090
2091 let table = Arc::new(table);
2092 let engine = QueryEngine::new();
2093
2094 println!("\n=== Testing Mixed Data Types and Edge Cases ===");
2095 println!("Table has {} rows", table.row_count());
2096 for i in 0..table.row_count() {
2097 let value = table.get_value(i, 1);
2098 let nullable = table.get_value(i, 2);
2099 println!("Row {i}: value = {value:?}, nullable_field = {nullable:?}");
2100 }
2101
2102 println!("\n--- Test: value.Contains('true') (boolean to string coercion) ---");
2104 let result = engine.execute(
2105 table.clone(),
2106 "SELECT * FROM test WHERE value.Contains('true')",
2107 );
2108 match result {
2109 Ok(view) => {
2110 println!(
2111 "SUCCESS: Found {} rows with boolean coercion",
2112 view.row_count()
2113 );
2114 assert_eq!(view.row_count(), 1); }
2116 Err(e) => {
2117 panic!("Boolean coercion query failed: {e}");
2118 }
2119 }
2120
2121 println!("\n--- Test: id IN (1, 3) ---");
2123 let result = engine.execute(table.clone(), "SELECT * FROM test WHERE id IN (1, 3)");
2124 match result {
2125 Ok(view) => {
2126 println!("SUCCESS: Found {} rows with IN clause", view.row_count());
2127 assert_eq!(view.row_count(), 2); }
2129 Err(e) => {
2130 panic!("Multiple IN values query failed: {e}");
2131 }
2132 }
2133
2134 println!("\n=== Mixed data types test complete! ===");
2135 }
2136
2137 #[test]
2139 fn test_aggregate_only_single_row() {
2140 let table = create_test_stock_data();
2141 let engine = QueryEngine::new();
2142
2143 let result = engine
2145 .execute(
2146 table.clone(),
2147 "SELECT COUNT(*), MIN(close), MAX(close), AVG(close) FROM stock",
2148 )
2149 .expect("Query should succeed");
2150
2151 assert_eq!(
2152 result.row_count(),
2153 1,
2154 "Aggregate-only query should return exactly 1 row"
2155 );
2156 assert_eq!(result.column_count(), 4, "Should have 4 aggregate columns");
2157
2158 let source = result.source();
2160 let row = source.get_row(0).expect("Should have first row");
2161
2162 assert_eq!(row.values[0], DataValue::Integer(5));
2164
2165 assert_eq!(row.values[1], DataValue::Float(99.5));
2167
2168 assert_eq!(row.values[2], DataValue::Float(105.0));
2170
2171 if let DataValue::Float(avg) = &row.values[3] {
2173 assert!(
2174 (avg - 102.4).abs() < 0.01,
2175 "Average should be approximately 102.4, got {}",
2176 avg
2177 );
2178 } else {
2179 panic!("AVG should return a Float value");
2180 }
2181 }
2182
2183 #[test]
2185 fn test_single_aggregate_single_row() {
2186 let table = create_test_stock_data();
2187 let engine = QueryEngine::new();
2188
2189 let result = engine
2190 .execute(table.clone(), "SELECT COUNT(*) FROM stock")
2191 .expect("Query should succeed");
2192
2193 assert_eq!(
2194 result.row_count(),
2195 1,
2196 "Single aggregate query should return exactly 1 row"
2197 );
2198 assert_eq!(result.column_count(), 1, "Should have 1 column");
2199
2200 let source = result.source();
2201 let row = source.get_row(0).expect("Should have first row");
2202 assert_eq!(row.values[0], DataValue::Integer(5));
2203 }
2204
2205 #[test]
2207 fn test_aggregate_with_where_single_row() {
2208 let table = create_test_stock_data();
2209 let engine = QueryEngine::new();
2210
2211 let result = engine
2213 .execute(
2214 table.clone(),
2215 "SELECT COUNT(*), MIN(close), MAX(close) FROM stock WHERE close >= 103.0",
2216 )
2217 .expect("Query should succeed");
2218
2219 assert_eq!(
2220 result.row_count(),
2221 1,
2222 "Filtered aggregate query should return exactly 1 row"
2223 );
2224 assert_eq!(result.column_count(), 3, "Should have 3 aggregate columns");
2225
2226 let source = result.source();
2227 let row = source.get_row(0).expect("Should have first row");
2228
2229 assert_eq!(row.values[0], DataValue::Integer(2));
2231 assert_eq!(row.values[1], DataValue::Float(103.5)); assert_eq!(row.values[2], DataValue::Float(105.0)); }
2234
2235 #[test]
2236 fn test_not_in_parsing() {
2237 use crate::sql::recursive_parser::Parser;
2238
2239 let query = "SELECT * FROM test WHERE country NOT IN ('CA')";
2240 println!("\n=== Testing NOT IN parsing ===");
2241 println!("Parsing query: {query}");
2242
2243 let mut parser = Parser::new(query);
2244 match parser.parse() {
2245 Ok(statement) => {
2246 println!("Parsed statement: {statement:#?}");
2247 if let Some(where_clause) = statement.where_clause {
2248 println!("WHERE conditions: {:#?}", where_clause.conditions);
2249 if let Some(first_condition) = where_clause.conditions.first() {
2250 println!("First condition expression: {:#?}", first_condition.expr);
2251 }
2252 }
2253 }
2254 Err(e) => {
2255 panic!("Parse error: {e}");
2256 }
2257 }
2258 }
2259
2260 fn create_test_stock_data() -> Arc<DataTable> {
2262 let mut table = DataTable::new("stock");
2263
2264 table.add_column(DataColumn::new("symbol"));
2265 table.add_column(DataColumn::new("close"));
2266 table.add_column(DataColumn::new("volume"));
2267
2268 let test_data = vec![
2270 ("AAPL", 99.5, 1000),
2271 ("AAPL", 101.2, 1500),
2272 ("AAPL", 103.5, 2000),
2273 ("AAPL", 105.0, 1200),
2274 ("AAPL", 102.8, 1800),
2275 ];
2276
2277 for (symbol, close, volume) in test_data {
2278 table
2279 .add_row(DataRow::new(vec![
2280 DataValue::String(symbol.to_string()),
2281 DataValue::Float(close),
2282 DataValue::Integer(volume),
2283 ]))
2284 .expect("Should add row successfully");
2285 }
2286
2287 Arc::new(table)
2288 }
2289}
2290
2291#[cfg(test)]
2292#[path = "query_engine_tests.rs"]
2293mod query_engine_tests;