1use arrow::array::{
18 Array, ArrayRef, BooleanArray, BooleanBuilder, Float32Array, Float64Array, Int8Array,
19 Int16Array, Int32Array, Int64Array, Int64Builder, LargeStringArray, RecordBatch, StringArray,
20 StructArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, new_null_array,
21};
22use arrow::compute::{
23 SortColumn, SortOptions, cast, concat_batches, filter_record_batch, lexsort_to_indices, take,
24};
25use arrow::datatypes::{DataType, Field, Float64Type, Int64Type, Schema};
26use llkv_aggregate::{AggregateAccumulator, AggregateKind, AggregateSpec, AggregateState};
27use llkv_column_map::gather::gather_indices_from_batches;
28use llkv_column_map::store::Projection as StoreProjection;
29use llkv_column_map::types::LogicalFieldId;
30use llkv_expr::SubqueryId;
31use llkv_expr::expr::{
32 AggregateCall, BinaryOp, CompareOp, Expr as LlkvExpr, Filter, Operator, ScalarExpr,
33};
34use llkv_expr::literal::Literal;
35use llkv_expr::typed_predicate::{
36 build_bool_predicate, build_fixed_width_predicate, build_var_width_predicate,
37};
38use llkv_join::cross_join_pair;
39use llkv_plan::{
40 AggregateExpr, AggregateFunction, CanonicalRow, CompoundOperator, CompoundQuantifier,
41 CompoundSelectComponent, CompoundSelectPlan, OrderByPlan, OrderSortType, OrderTarget,
42 PlanValue, SelectPlan, SelectProjection,
43};
44use llkv_result::Error;
45use llkv_storage::pager::Pager;
46use llkv_table::table::{
47 RowIdFilter, ScanOrderDirection, ScanOrderSpec, ScanOrderTransform, ScanProjection,
48 ScanStreamOptions,
49};
50use llkv_table::types::FieldId;
51use llkv_table::{NumericArray, NumericArrayMap, NumericKernels, ROW_ID_FIELD_ID};
52use rayon::prelude::*;
53use rustc_hash::{FxHashMap, FxHashSet};
54use simd_r_drive_entry_handle::EntryHandle;
55use std::convert::TryFrom;
56use std::fmt;
57use std::sync::Arc;
58use std::sync::atomic::Ordering;
59
60#[cfg(test)]
61use std::cell::RefCell;
62
63pub mod insert;
68pub mod translation;
69pub mod types;
70pub mod utils;
71
72pub type ExecutorResult<T> = Result<T, Error>;
78
79pub use insert::{
80 build_array_for_column, normalize_insert_value_for_column, resolve_insert_columns,
81};
82pub use translation::{
83 build_projected_columns, build_wildcard_projections, full_table_scan_filter,
84 resolve_field_id_from_schema, schema_for_projections, translate_predicate,
85 translate_predicate_with, translate_scalar, translate_scalar_with,
86};
87pub use types::{
88 ExecutorColumn, ExecutorMultiColumnUnique, ExecutorRowBatch, ExecutorSchema, ExecutorTable,
89 ExecutorTableProvider,
90};
91pub use utils::current_time_micros;
92
93#[derive(Clone, Debug, PartialEq, Eq, Hash)]
94enum GroupKeyValue {
95 Null,
96 Int(i64),
97 Bool(bool),
98 String(String),
99}
100
101#[derive(Clone, Debug, PartialEq)]
104enum AggregateValue {
105 Int64(i64),
106 Float64(f64),
107}
108
109impl AggregateValue {
110 fn to_i64(&self) -> i64 {
112 match self {
113 AggregateValue::Int64(v) => *v,
114 AggregateValue::Float64(v) => *v as i64,
115 }
116 }
117
118 #[allow(dead_code)]
120 fn to_f64(&self) -> f64 {
121 match self {
122 AggregateValue::Int64(v) => *v as f64,
123 AggregateValue::Float64(v) => *v,
124 }
125 }
126}
127
128struct GroupState {
129 batch: RecordBatch,
130 row_idx: usize,
131}
132
133struct GroupAggregateState {
135 representative_batch_idx: usize,
136 representative_row: usize,
137 row_locations: Vec<(usize, usize)>,
138}
139
140struct OutputColumn {
141 field: Field,
142 source: OutputSource,
143}
144
145enum OutputSource {
146 TableColumn { index: usize },
147 Computed { projection_index: usize },
148}
149
150#[cfg(test)]
155thread_local! {
156 static QUERY_LABEL_STACK: RefCell<Vec<String>> = const { RefCell::new(Vec::new()) };
157}
158
159pub struct QueryLogGuard {
161 _private: (),
162}
163
164#[cfg(test)]
167pub fn push_query_label(label: impl Into<String>) -> QueryLogGuard {
168 QUERY_LABEL_STACK.with(|stack| stack.borrow_mut().push(label.into()));
169 QueryLogGuard { _private: () }
170}
171
172#[cfg(not(test))]
177#[inline]
178pub fn push_query_label(_label: impl Into<String>) -> QueryLogGuard {
179 QueryLogGuard { _private: () }
180}
181
182#[cfg(test)]
183impl Drop for QueryLogGuard {
184 fn drop(&mut self) {
185 QUERY_LABEL_STACK.with(|stack| {
186 let _ = stack.borrow_mut().pop();
187 });
188 }
189}
190
191#[cfg(not(test))]
192impl Drop for QueryLogGuard {
193 #[inline]
194 fn drop(&mut self) {
195 }
197}
198
199#[cfg(test)]
201pub fn current_query_label() -> Option<String> {
202 QUERY_LABEL_STACK.with(|stack| stack.borrow().last().cloned())
203}
204
205#[cfg(not(test))]
209#[inline]
210pub fn current_query_label() -> Option<String> {
211 None
212}
213
214fn try_extract_simple_column<F: AsRef<str>>(expr: &ScalarExpr<F>) -> Option<&str> {
227 match expr {
228 ScalarExpr::Column(name) => Some(name.as_ref()),
229 ScalarExpr::Binary { left, op, right } => {
231 match op {
233 BinaryOp::Add => {
234 if matches!(left.as_ref(), ScalarExpr::Literal(Literal::Integer(0))) {
236 return try_extract_simple_column(right);
237 }
238 if matches!(right.as_ref(), ScalarExpr::Literal(Literal::Integer(0))) {
239 return try_extract_simple_column(left);
240 }
241 }
242 BinaryOp::Multiply => {
245 if matches!(left.as_ref(), ScalarExpr::Literal(Literal::Integer(-1))) {
247 return try_extract_simple_column(right);
248 }
249 if matches!(right.as_ref(), ScalarExpr::Literal(Literal::Integer(-1))) {
250 return try_extract_simple_column(left);
251 }
252 if matches!(left.as_ref(), ScalarExpr::Literal(Literal::Integer(1))) {
254 return try_extract_simple_column(right);
255 }
256 if matches!(right.as_ref(), ScalarExpr::Literal(Literal::Integer(1))) {
257 return try_extract_simple_column(left);
258 }
259 }
260 _ => {}
261 }
262 None
263 }
264 _ => None,
265 }
266}
267
268fn plan_values_to_arrow_array(values: &[PlanValue]) -> ExecutorResult<ArrayRef> {
273 use arrow::array::{Float64Array, Int64Array, StringArray};
274
275 let mut value_type = None;
277 for v in values {
278 if !matches!(v, PlanValue::Null) {
279 value_type = Some(v);
280 break;
281 }
282 }
283
284 match value_type {
285 Some(PlanValue::Integer(_)) => {
286 let int_values: Vec<Option<i64>> = values
287 .iter()
288 .map(|v| match v {
289 PlanValue::Integer(i) => Some(*i),
290 PlanValue::Null => None,
291 _ => Some(0), })
293 .collect();
294 Ok(Arc::new(Int64Array::from(int_values)) as ArrayRef)
295 }
296 Some(PlanValue::Float(_)) => {
297 let float_values: Vec<Option<f64>> = values
298 .iter()
299 .map(|v| match v {
300 PlanValue::Float(f) => Some(*f),
301 PlanValue::Integer(i) => Some(*i as f64),
302 PlanValue::Null => None,
303 _ => Some(0.0), })
305 .collect();
306 Ok(Arc::new(Float64Array::from(float_values)) as ArrayRef)
307 }
308 Some(PlanValue::String(_)) => {
309 let string_values: Vec<Option<&str>> = values
310 .iter()
311 .map(|v| match v {
312 PlanValue::String(s) => Some(s.as_str()),
313 PlanValue::Null => None,
314 _ => Some(""), })
316 .collect();
317 Ok(Arc::new(StringArray::from(string_values)) as ArrayRef)
318 }
319 _ => {
320 let null_values: Vec<Option<i64>> = vec![None; values.len()];
322 Ok(Arc::new(Int64Array::from(null_values)) as ArrayRef)
323 }
324 }
325}
326
327fn resolve_column_name_to_index(
336 col_name: &str,
337 column_lookup_map: &FxHashMap<String, usize>,
338) -> Option<usize> {
339 let col_lower = col_name.to_ascii_lowercase();
340
341 if let Some(&idx) = column_lookup_map.get(&col_lower) {
343 return Some(idx);
344 }
345
346 let unqualified = col_name
349 .rsplit('.')
350 .next()
351 .unwrap_or(col_name)
352 .to_ascii_lowercase();
353 column_lookup_map
354 .iter()
355 .find(|(k, _)| k.ends_with(&format!(".{}", unqualified)) || k == &&unqualified)
356 .map(|(_, &idx)| idx)
357}
358
359pub struct QueryExecutor<P>
361where
362 P: Pager<Blob = EntryHandle> + Send + Sync,
363{
364 provider: Arc<dyn ExecutorTableProvider<P>>,
365}
366
367impl<P> QueryExecutor<P>
368where
369 P: Pager<Blob = EntryHandle> + Send + Sync + 'static,
370{
371 pub fn new(provider: Arc<dyn ExecutorTableProvider<P>>) -> Self {
372 Self { provider }
373 }
374
375 pub fn execute_select(&self, plan: SelectPlan) -> ExecutorResult<SelectExecution<P>> {
376 self.execute_select_with_filter(plan, None)
377 }
378
379 pub fn execute_select_with_filter(
380 &self,
381 plan: SelectPlan,
382 row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
383 ) -> ExecutorResult<SelectExecution<P>> {
384 if plan.compound.is_some() {
385 return self.execute_compound_select(plan, row_filter);
386 }
387
388 if plan.tables.is_empty() {
390 return self.execute_select_without_table(plan);
391 }
392
393 if !plan.group_by.is_empty() {
394 if plan.tables.len() > 1 {
395 return self.execute_cross_product(plan);
396 }
397 let table_ref = &plan.tables[0];
398 let table = self.provider.get_table(&table_ref.qualified_name())?;
399 let display_name = table_ref.qualified_name();
400 return self.execute_group_by_single_table(table, display_name, plan, row_filter);
401 }
402
403 if plan.tables.len() > 1 {
405 return self.execute_cross_product(plan);
406 }
407
408 let table_ref = &plan.tables[0];
410 let table = self.provider.get_table(&table_ref.qualified_name())?;
411 let display_name = table_ref.qualified_name();
412
413 if !plan.aggregates.is_empty() {
414 self.execute_aggregates(table, display_name, plan, row_filter)
415 } else if self.has_computed_aggregates(&plan) {
416 self.execute_computed_aggregates(table, display_name, plan, row_filter)
418 } else {
419 self.execute_projection(table, display_name, plan, row_filter)
420 }
421 }
422
423 fn execute_compound_select(
443 &self,
444 plan: SelectPlan,
445 row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
446 ) -> ExecutorResult<SelectExecution<P>> {
447 let order_by = plan.order_by.clone();
448 let compound = plan.compound.expect("compound plan should be present");
449
450 let CompoundSelectPlan {
451 initial,
452 operations,
453 } = compound;
454
455 let initial_exec = self.execute_select_with_filter(*initial, row_filter.clone())?;
456 let schema = initial_exec.schema();
457 let mut rows = initial_exec.into_rows()?;
458 let mut distinct_cache: Option<FxHashSet<Vec<u8>>> = None;
459
460 for component in operations {
461 let exec = self.execute_select_with_filter(component.plan, row_filter.clone())?;
462 let other_schema = exec.schema();
463 ensure_schema_compatibility(schema.as_ref(), other_schema.as_ref())?;
464 let other_rows = exec.into_rows()?;
465
466 match (component.operator, component.quantifier) {
467 (CompoundOperator::Union, CompoundQuantifier::All) => {
468 rows.extend(other_rows);
469 distinct_cache = None;
470 }
471 (CompoundOperator::Union, CompoundQuantifier::Distinct) => {
472 ensure_distinct_rows(&mut rows, &mut distinct_cache);
473 let cache = distinct_cache
474 .as_mut()
475 .expect("distinct cache should be initialized");
476 for row in other_rows {
477 let key = encode_row(&row);
478 if cache.insert(key) {
479 rows.push(row);
480 }
481 }
482 }
483 (CompoundOperator::Except, CompoundQuantifier::Distinct) => {
484 ensure_distinct_rows(&mut rows, &mut distinct_cache);
485 let cache = distinct_cache
486 .as_mut()
487 .expect("distinct cache should be initialized");
488 if rows.is_empty() {
489 continue;
490 }
491 let mut remove_keys = FxHashSet::default();
492 for row in other_rows {
493 remove_keys.insert(encode_row(&row));
494 }
495 if remove_keys.is_empty() {
496 continue;
497 }
498 rows.retain(|row| {
499 let key = encode_row(row);
500 if remove_keys.contains(&key) {
501 cache.remove(&key);
502 false
503 } else {
504 true
505 }
506 });
507 }
508 (CompoundOperator::Except, CompoundQuantifier::All) => {
509 return Err(Error::InvalidArgumentError(
510 "EXCEPT ALL is not supported yet".into(),
511 ));
512 }
513 (CompoundOperator::Intersect, CompoundQuantifier::Distinct) => {
514 ensure_distinct_rows(&mut rows, &mut distinct_cache);
515 let mut right_keys = FxHashSet::default();
516 for row in other_rows {
517 right_keys.insert(encode_row(&row));
518 }
519 if right_keys.is_empty() {
520 rows.clear();
521 distinct_cache = Some(FxHashSet::default());
522 continue;
523 }
524 let mut new_rows = Vec::new();
525 let mut new_cache = FxHashSet::default();
526 for row in rows.drain(..) {
527 let key = encode_row(&row);
528 if right_keys.contains(&key) && new_cache.insert(key) {
529 new_rows.push(row);
530 }
531 }
532 rows = new_rows;
533 distinct_cache = Some(new_cache);
534 }
535 (CompoundOperator::Intersect, CompoundQuantifier::All) => {
536 return Err(Error::InvalidArgumentError(
537 "INTERSECT ALL is not supported yet".into(),
538 ));
539 }
540 }
541 }
542
543 let mut batch = rows_to_record_batch(schema.clone(), &rows)?;
544 if !order_by.is_empty() && batch.num_rows() > 0 {
545 batch = sort_record_batch_with_order(&schema, &batch, &order_by)?;
546 }
547
548 Ok(SelectExecution::new_single_batch(
549 String::new(),
550 schema,
551 batch,
552 ))
553 }
554
555 fn has_computed_aggregates(&self, plan: &SelectPlan) -> bool {
557 plan.projections.iter().any(|proj| {
558 if let SelectProjection::Computed { expr, .. } = proj {
559 Self::expr_contains_aggregate(expr)
560 } else {
561 false
562 }
563 })
564 }
565
566 fn predicate_contains_aggregate(expr: &llkv_expr::expr::Expr<String>) -> bool {
568 match expr {
569 llkv_expr::expr::Expr::And(exprs) | llkv_expr::expr::Expr::Or(exprs) => {
570 exprs.iter().any(Self::predicate_contains_aggregate)
571 }
572 llkv_expr::expr::Expr::Not(inner) => Self::predicate_contains_aggregate(inner),
573 llkv_expr::expr::Expr::Compare { left, right, .. } => {
574 Self::expr_contains_aggregate(left) || Self::expr_contains_aggregate(right)
575 }
576 llkv_expr::expr::Expr::InList { expr, list, .. } => {
577 Self::expr_contains_aggregate(expr)
578 || list.iter().any(|e| Self::expr_contains_aggregate(e))
579 }
580 llkv_expr::expr::Expr::IsNull { expr, .. } => Self::expr_contains_aggregate(expr),
581 llkv_expr::expr::Expr::Literal(_) => false,
582 llkv_expr::expr::Expr::Pred(_) => false,
583 llkv_expr::expr::Expr::Exists(_) => false,
584 }
585 }
586
587 fn expr_contains_aggregate(expr: &ScalarExpr<String>) -> bool {
589 match expr {
590 ScalarExpr::Aggregate(_) => true,
591 ScalarExpr::Binary { left, right, .. } => {
592 Self::expr_contains_aggregate(left) || Self::expr_contains_aggregate(right)
593 }
594 ScalarExpr::Compare { left, right, .. } => {
595 Self::expr_contains_aggregate(left) || Self::expr_contains_aggregate(right)
596 }
597 ScalarExpr::GetField { base, .. } => Self::expr_contains_aggregate(base),
598 ScalarExpr::Cast { expr, .. } => Self::expr_contains_aggregate(expr),
599 ScalarExpr::Not(expr) => Self::expr_contains_aggregate(expr),
600 ScalarExpr::IsNull { expr, .. } => Self::expr_contains_aggregate(expr),
601 ScalarExpr::Case {
602 operand,
603 branches,
604 else_expr,
605 } => {
606 operand
607 .as_deref()
608 .map(Self::expr_contains_aggregate)
609 .unwrap_or(false)
610 || branches.iter().any(|(when_expr, then_expr)| {
611 Self::expr_contains_aggregate(when_expr)
612 || Self::expr_contains_aggregate(then_expr)
613 })
614 || else_expr
615 .as_deref()
616 .map(Self::expr_contains_aggregate)
617 .unwrap_or(false)
618 }
619 ScalarExpr::Coalesce(items) => items.iter().any(Self::expr_contains_aggregate),
620 ScalarExpr::Column(_) | ScalarExpr::Literal(_) => false,
621 ScalarExpr::ScalarSubquery(_) => false,
622 }
623 }
624
625 fn evaluate_exists_subquery(
626 &self,
627 context: &mut CrossProductExpressionContext,
628 subquery: &llkv_plan::FilterSubquery,
629 batch: &RecordBatch,
630 row_idx: usize,
631 ) -> ExecutorResult<bool> {
632 let bindings =
633 collect_correlated_bindings(context, batch, row_idx, &subquery.correlated_columns)?;
634 let bound_plan = bind_select_plan(&subquery.plan, &bindings)?;
635 let execution = self.execute_select(bound_plan)?;
636 let mut found = false;
637 execution.stream(|inner_batch| {
638 if inner_batch.num_rows() > 0 {
639 found = true;
640 }
641 Ok(())
642 })?;
643 Ok(found)
644 }
645
646 fn evaluate_scalar_subquery_literal(
647 &self,
648 context: &mut CrossProductExpressionContext,
649 subquery: &llkv_plan::ScalarSubquery,
650 batch: &RecordBatch,
651 row_idx: usize,
652 ) -> ExecutorResult<Literal> {
653 let bindings =
654 collect_correlated_bindings(context, batch, row_idx, &subquery.correlated_columns)?;
655 let bound_plan = bind_select_plan(&subquery.plan, &bindings)?;
656 let execution = self.execute_select(bound_plan)?;
657 let mut rows_seen: usize = 0;
658 let mut result: Option<Literal> = None;
659 execution.stream(|inner_batch| {
660 if inner_batch.num_columns() != 1 {
661 return Err(Error::InvalidArgumentError(
662 "scalar subquery must return exactly one column".into(),
663 ));
664 }
665 let column = inner_batch.column(0).clone();
666 for idx in 0..inner_batch.num_rows() {
667 if rows_seen >= 1 {
668 return Err(Error::InvalidArgumentError(
669 "scalar subquery produced more than one row".into(),
670 ));
671 }
672 rows_seen = rows_seen.saturating_add(1);
673 result = Some(array_value_to_literal(&column, idx)?);
674 }
675 Ok(())
676 })?;
677
678 if rows_seen == 0 {
679 Ok(Literal::Null)
680 } else {
681 result
682 .ok_or_else(|| Error::Internal("scalar subquery evaluation missing result".into()))
683 }
684 }
685
686 fn evaluate_scalar_subquery_numeric(
687 &self,
688 context: &mut CrossProductExpressionContext,
689 subquery: &llkv_plan::ScalarSubquery,
690 batch: &RecordBatch,
691 ) -> ExecutorResult<NumericArray> {
692 let mut values: Vec<Option<f64>> = Vec::with_capacity(batch.num_rows());
693 let mut all_integer = true;
694
695 for row_idx in 0..batch.num_rows() {
696 let literal =
697 self.evaluate_scalar_subquery_literal(context, subquery, batch, row_idx)?;
698 match literal {
699 Literal::Null => values.push(None),
700 Literal::Integer(value) => {
701 let cast = i64::try_from(value).map_err(|_| {
702 Error::InvalidArgumentError(
703 "scalar subquery integer result exceeds supported range".into(),
704 )
705 })?;
706 values.push(Some(cast as f64));
707 }
708 Literal::Float(value) => {
709 all_integer = false;
710 values.push(Some(value));
711 }
712 Literal::Boolean(flag) => {
713 let numeric = if flag { 1.0 } else { 0.0 };
714 values.push(Some(numeric));
715 }
716 Literal::String(_) | Literal::Struct(_) => {
717 return Err(Error::InvalidArgumentError(
718 "scalar subquery produced non-numeric result in numeric context".into(),
719 ));
720 }
721 }
722 }
723
724 if all_integer {
725 let iter = values.into_iter().map(|opt| opt.map(|v| v as i64));
726 let array = Int64Array::from_iter(iter);
727 NumericArray::try_from_arrow(&(Arc::new(array) as ArrayRef))
728 } else {
729 let array = Float64Array::from_iter(values);
730 NumericArray::try_from_arrow(&(Arc::new(array) as ArrayRef))
731 }
732 }
733
734 fn evaluate_projection_expression(
735 &self,
736 context: &mut CrossProductExpressionContext,
737 expr: &ScalarExpr<String>,
738 batch: &RecordBatch,
739 scalar_lookup: &FxHashMap<SubqueryId, &llkv_plan::ScalarSubquery>,
740 ) -> ExecutorResult<ArrayRef> {
741 let translated = translate_scalar(expr, context.schema(), |name| {
742 Error::InvalidArgumentError(format!(
743 "column '{}' not found in cross product result",
744 name
745 ))
746 })?;
747
748 let mut subquery_ids: FxHashSet<SubqueryId> = FxHashSet::default();
749 collect_scalar_subquery_ids(&translated, &mut subquery_ids);
750
751 let mut mapping: FxHashMap<SubqueryId, FieldId> = FxHashMap::default();
752 for subquery_id in subquery_ids {
753 let info = scalar_lookup
754 .get(&subquery_id)
755 .ok_or_else(|| Error::Internal("missing scalar subquery metadata".into()))?;
756 let field_id = context.allocate_synthetic_field_id()?;
757 let numeric = self.evaluate_scalar_subquery_numeric(context, info, batch)?;
758 context.numeric_cache.insert(field_id, numeric);
759 mapping.insert(subquery_id, field_id);
760 }
761
762 let rewritten = rewrite_scalar_expr_for_subqueries(&translated, &mapping);
763 context.evaluate_numeric(&rewritten, batch)
764 }
765
766 fn execute_select_without_table(&self, plan: SelectPlan) -> ExecutorResult<SelectExecution<P>> {
768 use arrow::array::ArrayRef;
769 use arrow::datatypes::Field;
770
771 let mut fields = Vec::new();
773 let mut arrays: Vec<ArrayRef> = Vec::new();
774
775 for proj in &plan.projections {
776 match proj {
777 SelectProjection::Computed { expr, alias } => {
778 let (field_name, dtype, array) = match expr {
780 ScalarExpr::Literal(lit) => {
781 let (dtype, array) = Self::literal_to_array(lit)?;
782 (alias.clone(), dtype, array)
783 }
784 _ => {
785 return Err(Error::InvalidArgumentError(
786 "SELECT without FROM only supports literal expressions".into(),
787 ));
788 }
789 };
790
791 fields.push(Field::new(field_name, dtype, true));
792 arrays.push(array);
793 }
794 _ => {
795 return Err(Error::InvalidArgumentError(
796 "SELECT without FROM only supports computed projections".into(),
797 ));
798 }
799 }
800 }
801
802 let schema = Arc::new(Schema::new(fields));
803 let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)
804 .map_err(|e| Error::Internal(format!("failed to create record batch: {}", e)))?;
805
806 if plan.distinct {
807 let mut state = DistinctState::default();
808 batch = match distinct_filter_batch(batch, &mut state)? {
809 Some(filtered) => filtered,
810 None => RecordBatch::new_empty(Arc::clone(&schema)),
811 };
812 }
813
814 let schema = batch.schema();
815
816 Ok(SelectExecution::new_single_batch(
817 String::new(), schema,
819 batch,
820 ))
821 }
822
823 fn literal_to_array(lit: &llkv_expr::literal::Literal) -> ExecutorResult<(DataType, ArrayRef)> {
825 use arrow::array::{
826 ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray, StructArray,
827 new_null_array,
828 };
829 use arrow::datatypes::{DataType, Field};
830 use llkv_expr::literal::Literal;
831
832 match lit {
833 Literal::Integer(v) => {
834 let val = i64::try_from(*v).unwrap_or(0);
835 Ok((
836 DataType::Int64,
837 Arc::new(Int64Array::from(vec![val])) as ArrayRef,
838 ))
839 }
840 Literal::Float(v) => Ok((
841 DataType::Float64,
842 Arc::new(Float64Array::from(vec![*v])) as ArrayRef,
843 )),
844 Literal::Boolean(v) => Ok((
845 DataType::Boolean,
846 Arc::new(BooleanArray::from(vec![*v])) as ArrayRef,
847 )),
848 Literal::String(v) => Ok((
849 DataType::Utf8,
850 Arc::new(StringArray::from(vec![v.clone()])) as ArrayRef,
851 )),
852 Literal::Null => Ok((DataType::Null, new_null_array(&DataType::Null, 1))),
853 Literal::Struct(struct_fields) => {
854 let mut inner_fields = Vec::new();
856 let mut inner_arrays = Vec::new();
857
858 for (field_name, field_lit) in struct_fields {
859 let (field_dtype, field_array) = Self::literal_to_array(field_lit)?;
860 inner_fields.push(Field::new(field_name.clone(), field_dtype, true));
861 inner_arrays.push(field_array);
862 }
863
864 let struct_array =
865 StructArray::try_new(inner_fields.clone().into(), inner_arrays, None).map_err(
866 |e| Error::Internal(format!("failed to create struct array: {}", e)),
867 )?;
868
869 Ok((
870 DataType::Struct(inner_fields.into()),
871 Arc::new(struct_array) as ArrayRef,
872 ))
873 }
874 }
875 }
876
877 fn execute_cross_product(&self, plan: SelectPlan) -> ExecutorResult<SelectExecution<P>> {
879 use arrow::compute::concat_batches;
880
881 if plan.tables.len() < 2 {
882 return Err(Error::InvalidArgumentError(
883 "cross product requires at least 2 tables".into(),
884 ));
885 }
886
887 let mut tables_with_handles = Vec::with_capacity(plan.tables.len());
888 for table_ref in &plan.tables {
889 let qualified_name = table_ref.qualified_name();
890 let table = self.provider.get_table(&qualified_name)?;
891 tables_with_handles.push((table_ref.clone(), table));
892 }
893
894 let display_name = tables_with_handles
895 .iter()
896 .map(|(table_ref, _)| table_ref.qualified_name())
897 .collect::<Vec<_>>()
898 .join(",");
899
900 let mut remaining_filter = plan.filter.clone();
901
902 let join_data = if plan.scalar_subqueries.is_empty() && remaining_filter.as_ref().is_some()
904 {
905 self.try_execute_hash_join(&plan, &tables_with_handles)?
906 } else {
907 None
908 };
909
910 let current = if let Some((joined, handled_all_predicates)) = join_data {
911 if handled_all_predicates {
913 remaining_filter = None;
914 }
915 joined
916 } else {
917 let has_joins = !plan.joins.is_empty();
919
920 if has_joins && tables_with_handles.len() == 2 {
921 use llkv_join::{JoinKey, JoinOptions, TableJoinExt};
923
924 let (left_ref, left_table) = &tables_with_handles[0];
925 let (right_ref, right_table) = &tables_with_handles[1];
926
927 let join_type = plan
929 .joins
930 .first()
931 .map(|j| match j.join_type {
932 llkv_plan::JoinPlan::Inner => llkv_join::JoinType::Inner,
933 llkv_plan::JoinPlan::Left => llkv_join::JoinType::Left,
934 llkv_plan::JoinPlan::Right => llkv_join::JoinType::Right,
935 llkv_plan::JoinPlan::Full => llkv_join::JoinType::Full,
936 })
937 .unwrap_or(llkv_join::JoinType::Inner);
938
939 tracing::debug!(
940 "Using llkv-join for {join_type:?} join between {} and {}",
941 left_ref.qualified_name(),
942 right_ref.qualified_name()
943 );
944
945 let join_keys: Vec<JoinKey> = Vec::new();
949
950 let mut result_batches = Vec::new();
951 left_table.table.join_stream(
952 &right_table.table,
953 &join_keys,
954 &JoinOptions {
955 join_type,
956 ..Default::default()
957 },
958 |batch| {
959 result_batches.push(batch);
960 },
961 )?;
962
963 let mut combined_fields = Vec::new();
965 for col in &left_table.schema.columns {
966 combined_fields.push(Field::new(
967 col.name.clone(),
968 col.data_type.clone(),
969 col.nullable,
970 ));
971 }
972 for col in &right_table.schema.columns {
973 combined_fields.push(Field::new(
974 col.name.clone(),
975 col.data_type.clone(),
976 col.nullable,
977 ));
978 }
979 let combined_schema = Arc::new(Schema::new(combined_fields));
980
981 let column_counts = vec![
982 left_table.schema.columns.len(),
983 right_table.schema.columns.len(),
984 ];
985 let table_indices = vec![0, 1];
986
987 TableCrossProductData {
988 schema: combined_schema,
989 batches: result_batches,
990 column_counts,
991 table_indices,
992 }
993 } else {
994 let constraint_map = if let Some(filter_wrapper) = remaining_filter.as_ref() {
996 extract_literal_pushdown_filters(
997 &filter_wrapper.predicate,
998 &tables_with_handles,
999 )
1000 } else {
1001 vec![Vec::new(); tables_with_handles.len()]
1002 };
1003
1004 let mut staged: Vec<TableCrossProductData> =
1005 Vec::with_capacity(tables_with_handles.len());
1006 for (idx, (table_ref, table)) in tables_with_handles.iter().enumerate() {
1007 let constraints = constraint_map.get(idx).map(|v| v.as_slice()).unwrap_or(&[]);
1008 staged.push(collect_table_data(
1009 idx,
1010 table_ref,
1011 table.as_ref(),
1012 constraints,
1013 )?);
1014 }
1015 cross_join_all(staged)?
1016 }
1017 };
1018
1019 let TableCrossProductData {
1020 schema: combined_schema,
1021 batches: mut combined_batches,
1022 column_counts,
1023 table_indices,
1024 } = current;
1025
1026 let column_lookup_map = build_cross_product_column_lookup(
1027 combined_schema.as_ref(),
1028 &plan.tables,
1029 &column_counts,
1030 &table_indices,
1031 );
1032
1033 if let Some(filter_wrapper) = remaining_filter.as_ref() {
1034 let mut filter_context = CrossProductExpressionContext::new(
1035 combined_schema.as_ref(),
1036 column_lookup_map.clone(),
1037 )?;
1038 let translated_filter = translate_predicate(
1039 filter_wrapper.predicate.clone(),
1040 filter_context.schema(),
1041 |name| {
1042 Error::InvalidArgumentError(format!(
1043 "column '{}' not found in cross product result",
1044 name
1045 ))
1046 },
1047 )?;
1048
1049 let subquery_lookup: FxHashMap<llkv_expr::SubqueryId, &llkv_plan::FilterSubquery> =
1050 filter_wrapper
1051 .subqueries
1052 .iter()
1053 .map(|subquery| (subquery.id, subquery))
1054 .collect();
1055
1056 let mut filtered_batches = Vec::with_capacity(combined_batches.len());
1057 for batch in combined_batches.into_iter() {
1058 filter_context.reset();
1059 let mask = filter_context.evaluate_predicate_mask(
1060 &translated_filter,
1061 &batch,
1062 |ctx, subquery_expr, row_idx, current_batch| {
1063 let subquery = subquery_lookup.get(&subquery_expr.id).ok_or_else(|| {
1064 Error::Internal("missing correlated subquery metadata".into())
1065 })?;
1066 let exists =
1067 self.evaluate_exists_subquery(ctx, subquery, current_batch, row_idx)?;
1068 let value = if subquery_expr.negated {
1069 !exists
1070 } else {
1071 exists
1072 };
1073 Ok(Some(value))
1074 },
1075 )?;
1076 let filtered = filter_record_batch(&batch, &mask).map_err(|err| {
1077 Error::InvalidArgumentError(format!(
1078 "failed to apply cross product filter: {err}"
1079 ))
1080 })?;
1081 if filtered.num_rows() > 0 {
1082 filtered_batches.push(filtered);
1083 }
1084 }
1085 combined_batches = filtered_batches;
1086 }
1087
1088 if !plan.group_by.is_empty() {
1090 return self.execute_group_by_from_batches(
1091 display_name,
1092 plan,
1093 combined_schema,
1094 combined_batches,
1095 column_lookup_map,
1096 );
1097 }
1098
1099 if !plan.aggregates.is_empty() {
1100 return self.execute_cross_product_aggregates(
1101 Arc::clone(&combined_schema),
1102 combined_batches,
1103 &column_lookup_map,
1104 &plan,
1105 &display_name,
1106 );
1107 }
1108
1109 if self.has_computed_aggregates(&plan) {
1110 return self.execute_cross_product_computed_aggregates(
1111 Arc::clone(&combined_schema),
1112 combined_batches,
1113 &column_lookup_map,
1114 &plan,
1115 &display_name,
1116 );
1117 }
1118
1119 let mut combined_batch = if combined_batches.is_empty() {
1120 RecordBatch::new_empty(Arc::clone(&combined_schema))
1121 } else if combined_batches.len() == 1 {
1122 combined_batches.pop().unwrap()
1123 } else {
1124 concat_batches(&combined_schema, &combined_batches).map_err(|e| {
1125 Error::Internal(format!(
1126 "failed to concatenate cross product batches: {}",
1127 e
1128 ))
1129 })?
1130 };
1131
1132 let scalar_lookup: FxHashMap<SubqueryId, &llkv_plan::ScalarSubquery> = plan
1133 .scalar_subqueries
1134 .iter()
1135 .map(|subquery| (subquery.id, subquery))
1136 .collect();
1137
1138 if !plan.projections.is_empty() {
1140 let mut selected_fields = Vec::new();
1141 let mut selected_columns = Vec::new();
1142 let mut expr_context: Option<CrossProductExpressionContext> = None;
1143
1144 for proj in &plan.projections {
1145 match proj {
1146 SelectProjection::AllColumns => {
1147 selected_fields = combined_schema.fields().iter().cloned().collect();
1149 selected_columns = combined_batch.columns().to_vec();
1150 break;
1151 }
1152 SelectProjection::AllColumnsExcept { exclude } => {
1153 let exclude_lower: Vec<String> =
1155 exclude.iter().map(|e| e.to_ascii_lowercase()).collect();
1156
1157 for (idx, field) in combined_schema.fields().iter().enumerate() {
1158 let field_name_lower = field.name().to_ascii_lowercase();
1159 if !exclude_lower.contains(&field_name_lower) {
1160 selected_fields.push(field.clone());
1161 selected_columns.push(combined_batch.column(idx).clone());
1162 }
1163 }
1164 break;
1165 }
1166 SelectProjection::Column { name, alias } => {
1167 let col_name = name.to_ascii_lowercase();
1169 if let Some(&idx) = column_lookup_map.get(&col_name) {
1170 let field = combined_schema.field(idx);
1171 let output_name = alias.as_ref().unwrap_or(name).clone();
1172 selected_fields.push(Arc::new(arrow::datatypes::Field::new(
1173 output_name,
1174 field.data_type().clone(),
1175 field.is_nullable(),
1176 )));
1177 selected_columns.push(combined_batch.column(idx).clone());
1178 } else {
1179 return Err(Error::InvalidArgumentError(format!(
1180 "column '{}' not found in cross product result",
1181 name
1182 )));
1183 }
1184 }
1185 SelectProjection::Computed { expr, alias } => {
1186 if expr_context.is_none() {
1187 expr_context = Some(CrossProductExpressionContext::new(
1188 combined_schema.as_ref(),
1189 column_lookup_map.clone(),
1190 )?);
1191 }
1192 let context = expr_context
1193 .as_mut()
1194 .expect("projection context must be initialized");
1195 context.reset();
1196 let evaluated = self.evaluate_projection_expression(
1197 context,
1198 expr,
1199 &combined_batch,
1200 &scalar_lookup,
1201 )?;
1202 let field = Arc::new(arrow::datatypes::Field::new(
1203 alias.clone(),
1204 evaluated.data_type().clone(),
1205 true,
1206 ));
1207 selected_fields.push(field);
1208 selected_columns.push(evaluated);
1209 }
1210 }
1211 }
1212
1213 let projected_schema = Arc::new(Schema::new(selected_fields));
1214 combined_batch = RecordBatch::try_new(projected_schema, selected_columns)
1215 .map_err(|e| Error::Internal(format!("failed to apply projections: {}", e)))?;
1216 }
1217
1218 if plan.distinct {
1219 let mut state = DistinctState::default();
1220 let source_schema = combined_batch.schema();
1221 combined_batch = match distinct_filter_batch(combined_batch, &mut state)? {
1222 Some(filtered) => filtered,
1223 None => RecordBatch::new_empty(source_schema),
1224 };
1225 }
1226
1227 let schema = combined_batch.schema();
1228
1229 Ok(SelectExecution::new_single_batch(
1230 display_name,
1231 schema,
1232 combined_batch,
1233 ))
1234 }
1235
1236 fn execute_cross_product_aggregates(
1237 &self,
1238 combined_schema: Arc<Schema>,
1239 batches: Vec<RecordBatch>,
1240 column_lookup_map: &FxHashMap<String, usize>,
1241 plan: &SelectPlan,
1242 display_name: &str,
1243 ) -> ExecutorResult<SelectExecution<P>> {
1244 if !plan.scalar_subqueries.is_empty() {
1245 return Err(Error::InvalidArgumentError(
1246 "scalar subqueries not supported in aggregate joins".into(),
1247 ));
1248 }
1249
1250 let mut specs: Vec<AggregateSpec> = Vec::with_capacity(plan.aggregates.len());
1251 let mut spec_to_projection: Vec<Option<usize>> = Vec::with_capacity(plan.aggregates.len());
1252
1253 for aggregate in &plan.aggregates {
1254 match aggregate {
1255 AggregateExpr::CountStar { alias } => {
1256 specs.push(AggregateSpec {
1257 alias: alias.clone(),
1258 kind: AggregateKind::Count {
1259 field_id: None,
1260 distinct: false,
1261 },
1262 });
1263 spec_to_projection.push(None);
1264 }
1265 AggregateExpr::Column {
1266 column,
1267 alias,
1268 function,
1269 distinct,
1270 } => {
1271 let key = column.to_ascii_lowercase();
1272 let column_index = *column_lookup_map.get(&key).ok_or_else(|| {
1273 Error::InvalidArgumentError(format!(
1274 "unknown column '{column}' in aggregate"
1275 ))
1276 })?;
1277 let field = combined_schema.field(column_index);
1278 let kind = match function {
1279 AggregateFunction::Count => AggregateKind::Count {
1280 field_id: Some(column_index as u32),
1281 distinct: *distinct,
1282 },
1283 AggregateFunction::SumInt64 => {
1284 let input_type = Self::validate_aggregate_type(
1285 Some(field.data_type().clone()),
1286 "SUM",
1287 &[DataType::Int64, DataType::Float64],
1288 )?;
1289 AggregateKind::Sum {
1290 field_id: column_index as u32,
1291 data_type: input_type,
1292 distinct: *distinct,
1293 }
1294 }
1295 AggregateFunction::MinInt64 => {
1296 let input_type = Self::validate_aggregate_type(
1297 Some(field.data_type().clone()),
1298 "MIN",
1299 &[DataType::Int64, DataType::Float64],
1300 )?;
1301 AggregateKind::Min {
1302 field_id: column_index as u32,
1303 data_type: input_type,
1304 }
1305 }
1306 AggregateFunction::MaxInt64 => {
1307 let input_type = Self::validate_aggregate_type(
1308 Some(field.data_type().clone()),
1309 "MAX",
1310 &[DataType::Int64, DataType::Float64],
1311 )?;
1312 AggregateKind::Max {
1313 field_id: column_index as u32,
1314 data_type: input_type,
1315 }
1316 }
1317 AggregateFunction::CountNulls => AggregateKind::CountNulls {
1318 field_id: column_index as u32,
1319 },
1320 };
1321
1322 specs.push(AggregateSpec {
1323 alias: alias.clone(),
1324 kind,
1325 });
1326 spec_to_projection.push(Some(column_index));
1327 }
1328 }
1329 }
1330
1331 if specs.is_empty() {
1332 return Err(Error::InvalidArgumentError(
1333 "aggregate query requires at least one aggregate expression".into(),
1334 ));
1335 }
1336
1337 let mut states = Vec::with_capacity(specs.len());
1338 for (idx, spec) in specs.iter().enumerate() {
1339 states.push(AggregateState {
1340 alias: spec.alias.clone(),
1341 accumulator: AggregateAccumulator::new_with_projection_index(
1342 spec,
1343 spec_to_projection[idx],
1344 None,
1345 )?,
1346 override_value: None,
1347 });
1348 }
1349
1350 for batch in &batches {
1351 for state in &mut states {
1352 state.update(batch)?;
1353 }
1354 }
1355
1356 let mut fields = Vec::with_capacity(states.len());
1357 let mut arrays: Vec<ArrayRef> = Vec::with_capacity(states.len());
1358 for state in states {
1359 let (field, array) = state.finalize()?;
1360 fields.push(Arc::new(field));
1361 arrays.push(array);
1362 }
1363
1364 let schema = Arc::new(Schema::new(fields));
1365 let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
1366
1367 if plan.distinct {
1368 let mut distinct_state = DistinctState::default();
1369 batch = match distinct_filter_batch(batch, &mut distinct_state)? {
1370 Some(filtered) => filtered,
1371 None => RecordBatch::new_empty(Arc::clone(&schema)),
1372 };
1373 }
1374
1375 if !plan.order_by.is_empty() && batch.num_rows() > 0 {
1376 batch = sort_record_batch_with_order(&schema, &batch, &plan.order_by)?;
1377 }
1378
1379 Ok(SelectExecution::new_single_batch(
1380 display_name.to_string(),
1381 schema,
1382 batch,
1383 ))
1384 }
1385
1386 fn execute_cross_product_computed_aggregates(
1387 &self,
1388 combined_schema: Arc<Schema>,
1389 batches: Vec<RecordBatch>,
1390 column_lookup_map: &FxHashMap<String, usize>,
1391 plan: &SelectPlan,
1392 display_name: &str,
1393 ) -> ExecutorResult<SelectExecution<P>> {
1394 let mut aggregate_specs: Vec<(String, AggregateCall<String>)> = Vec::new();
1395 for projection in &plan.projections {
1396 match projection {
1397 SelectProjection::Computed { expr, .. } => {
1398 Self::collect_aggregates(expr, &mut aggregate_specs);
1399 }
1400 SelectProjection::AllColumns
1401 | SelectProjection::AllColumnsExcept { .. }
1402 | SelectProjection::Column { .. } => {
1403 return Err(Error::InvalidArgumentError(
1404 "non-computed projections not supported with aggregate expressions".into(),
1405 ));
1406 }
1407 }
1408 }
1409
1410 if aggregate_specs.is_empty() {
1411 return Err(Error::InvalidArgumentError(
1412 "computed aggregate query requires at least one aggregate expression".into(),
1413 ));
1414 }
1415
1416 let aggregate_values = self.compute_cross_product_aggregate_values(
1417 &combined_schema,
1418 &batches,
1419 column_lookup_map,
1420 &aggregate_specs,
1421 )?;
1422
1423 let mut fields = Vec::with_capacity(plan.projections.len());
1424 let mut arrays: Vec<ArrayRef> = Vec::with_capacity(plan.projections.len());
1425
1426 for projection in &plan.projections {
1427 if let SelectProjection::Computed { expr, alias } = projection {
1428 let value = Self::evaluate_expr_with_aggregates(expr, &aggregate_values)?;
1429 fields.push(Arc::new(Field::new(alias, DataType::Int64, false)));
1430 arrays.push(Arc::new(Int64Array::from(vec![value])) as ArrayRef);
1431 }
1432 }
1433
1434 let schema = Arc::new(Schema::new(fields));
1435 let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
1436
1437 if plan.distinct {
1438 let mut distinct_state = DistinctState::default();
1439 batch = match distinct_filter_batch(batch, &mut distinct_state)? {
1440 Some(filtered) => filtered,
1441 None => RecordBatch::new_empty(Arc::clone(&schema)),
1442 };
1443 }
1444
1445 if !plan.order_by.is_empty() && batch.num_rows() > 0 {
1446 batch = sort_record_batch_with_order(&schema, &batch, &plan.order_by)?;
1447 }
1448
1449 Ok(SelectExecution::new_single_batch(
1450 display_name.to_string(),
1451 schema,
1452 batch,
1453 ))
1454 }
1455
1456 fn compute_cross_product_aggregate_values(
1457 &self,
1458 combined_schema: &Arc<Schema>,
1459 batches: &[RecordBatch],
1460 column_lookup_map: &FxHashMap<String, usize>,
1461 aggregate_specs: &[(String, AggregateCall<String>)],
1462 ) -> ExecutorResult<FxHashMap<String, AggregateValue>> {
1463 let mut specs: Vec<AggregateSpec> = Vec::with_capacity(aggregate_specs.len());
1464 let mut spec_to_projection: Vec<Option<usize>> = Vec::with_capacity(aggregate_specs.len());
1465
1466 for (key, agg) in aggregate_specs {
1467 match agg {
1468 AggregateCall::CountStar => {
1469 specs.push(AggregateSpec {
1470 alias: key.clone(),
1471 kind: AggregateKind::Count {
1472 field_id: None,
1473 distinct: false,
1474 },
1475 });
1476 spec_to_projection.push(None);
1477 }
1478 AggregateCall::Count { expr, .. }
1479 | AggregateCall::Sum { expr, .. }
1480 | AggregateCall::Avg { expr, .. }
1481 | AggregateCall::Min(expr)
1482 | AggregateCall::Max(expr)
1483 | AggregateCall::CountNulls(expr) => {
1484 let column = try_extract_simple_column(expr).ok_or_else(|| {
1487 Error::InvalidArgumentError(
1488 "complex expressions in aggregates not yet supported in this context"
1489 .into(),
1490 )
1491 })?;
1492 let key_lower = column.to_ascii_lowercase();
1493 let column_index = *column_lookup_map.get(&key_lower).ok_or_else(|| {
1494 Error::InvalidArgumentError(format!(
1495 "unknown column '{column}' in aggregate"
1496 ))
1497 })?;
1498 let field = combined_schema.field(column_index);
1499 let kind = match agg {
1500 AggregateCall::Count { distinct, .. } => AggregateKind::Count {
1501 field_id: Some(column_index as u32),
1502 distinct: *distinct,
1503 },
1504 AggregateCall::Sum { distinct, .. } => {
1505 let input_type = Self::validate_aggregate_type(
1506 Some(field.data_type().clone()),
1507 "SUM",
1508 &[DataType::Int64, DataType::Float64],
1509 )?;
1510 AggregateKind::Sum {
1511 field_id: column_index as u32,
1512 data_type: input_type,
1513 distinct: *distinct,
1514 }
1515 }
1516 AggregateCall::Avg { distinct, .. } => {
1517 let input_type = Self::validate_aggregate_type(
1518 Some(field.data_type().clone()),
1519 "AVG",
1520 &[DataType::Int64, DataType::Float64],
1521 )?;
1522 AggregateKind::Avg {
1523 field_id: column_index as u32,
1524 data_type: input_type,
1525 distinct: *distinct,
1526 }
1527 }
1528 AggregateCall::Min(_) => {
1529 let input_type = Self::validate_aggregate_type(
1530 Some(field.data_type().clone()),
1531 "MIN",
1532 &[DataType::Int64, DataType::Float64],
1533 )?;
1534 AggregateKind::Min {
1535 field_id: column_index as u32,
1536 data_type: input_type,
1537 }
1538 }
1539 AggregateCall::Max(_) => {
1540 let input_type = Self::validate_aggregate_type(
1541 Some(field.data_type().clone()),
1542 "MAX",
1543 &[DataType::Int64, DataType::Float64],
1544 )?;
1545 AggregateKind::Max {
1546 field_id: column_index as u32,
1547 data_type: input_type,
1548 }
1549 }
1550 AggregateCall::CountNulls(_) => AggregateKind::CountNulls {
1551 field_id: column_index as u32,
1552 },
1553 _ => unreachable!(),
1554 };
1555
1556 specs.push(AggregateSpec {
1557 alias: key.clone(),
1558 kind,
1559 });
1560 spec_to_projection.push(Some(column_index));
1561 }
1562 }
1563 }
1564
1565 let mut states = Vec::with_capacity(specs.len());
1566 for (idx, spec) in specs.iter().enumerate() {
1567 states.push(AggregateState {
1568 alias: spec.alias.clone(),
1569 accumulator: AggregateAccumulator::new_with_projection_index(
1570 spec,
1571 spec_to_projection[idx],
1572 None,
1573 )?,
1574 override_value: None,
1575 });
1576 }
1577
1578 for batch in batches {
1579 for state in &mut states {
1580 state.update(batch)?;
1581 }
1582 }
1583
1584 let mut results = FxHashMap::default();
1585 for state in states {
1586 let (field, array) = state.finalize()?;
1587
1588 if let Some(int_array) = array.as_any().downcast_ref::<Int64Array>() {
1590 if int_array.len() != 1 {
1591 return Err(Error::Internal(format!(
1592 "Expected single value from aggregate, got {}",
1593 int_array.len()
1594 )));
1595 }
1596 let value = if int_array.is_null(0) {
1597 AggregateValue::Int64(0)
1598 } else {
1599 AggregateValue::Int64(int_array.value(0))
1600 };
1601 results.insert(field.name().to_string(), value);
1602 }
1603 else if let Some(float_array) = array.as_any().downcast_ref::<Float64Array>() {
1605 if float_array.len() != 1 {
1606 return Err(Error::Internal(format!(
1607 "Expected single value from aggregate, got {}",
1608 float_array.len()
1609 )));
1610 }
1611 let value = if float_array.is_null(0) {
1612 AggregateValue::Float64(0.0)
1613 } else {
1614 AggregateValue::Float64(float_array.value(0))
1615 };
1616 results.insert(field.name().to_string(), value);
1617 } else {
1618 return Err(Error::Internal(format!(
1619 "Unexpected array type from aggregate: {:?}",
1620 array.data_type()
1621 )));
1622 }
1623 }
1624
1625 Ok(results)
1626 }
1627
1628 fn try_execute_hash_join(
1645 &self,
1646 plan: &SelectPlan,
1647 tables_with_handles: &[(llkv_plan::TableRef, Arc<ExecutorTable<P>>)],
1648 ) -> ExecutorResult<Option<(TableCrossProductData, bool)>> {
1649 let query_label_opt = current_query_label();
1650 let query_label = query_label_opt.as_deref().unwrap_or("<unknown query>");
1651
1652 let filter_wrapper = match &plan.filter {
1654 Some(filter) if filter.subqueries.is_empty() => filter,
1655 _ => {
1656 tracing::debug!(
1657 "join_opt[{query_label}]: skipping optimization – filter missing or uses subqueries"
1658 );
1659 return Ok(None);
1660 }
1661 };
1662
1663 if tables_with_handles.len() < 2 {
1664 tracing::debug!(
1665 "join_opt[{query_label}]: skipping optimization – requires at least 2 tables"
1666 );
1667 return Ok(None);
1668 }
1669
1670 let mut table_infos = Vec::with_capacity(tables_with_handles.len());
1672 for (index, (table_ref, executor_table)) in tables_with_handles.iter().enumerate() {
1673 let mut column_map = FxHashMap::default();
1674 for (column_idx, column) in executor_table.schema.columns.iter().enumerate() {
1675 let column_name = column.name.to_ascii_lowercase();
1676 column_map.entry(column_name).or_insert(column_idx);
1677 }
1678 table_infos.push(TableInfo {
1679 index,
1680 table_ref,
1681 column_map,
1682 });
1683 }
1684
1685 let constraint_plan = match extract_join_constraints(
1687 &filter_wrapper.predicate,
1688 &table_infos,
1689 ) {
1690 Some(plan) => plan,
1691 None => {
1692 tracing::debug!(
1693 "join_opt[{query_label}]: skipping optimization – predicate parsing failed (contains OR or other unsupported top-level structure)"
1694 );
1695 return Ok(None);
1696 }
1697 };
1698
1699 tracing::debug!(
1700 "join_opt[{query_label}]: constraint extraction succeeded - equalities={}, literals={}, handled={}/{} predicates",
1701 constraint_plan.equalities.len(),
1702 constraint_plan.literals.len(),
1703 constraint_plan.handled_conjuncts,
1704 constraint_plan.total_conjuncts
1705 );
1706 tracing::debug!(
1707 "join_opt[{query_label}]: attempting hash join with tables={:?} filter={:?}",
1708 plan.tables
1709 .iter()
1710 .map(|t| t.qualified_name())
1711 .collect::<Vec<_>>(),
1712 filter_wrapper.predicate,
1713 );
1714
1715 if constraint_plan.unsatisfiable {
1717 tracing::debug!(
1718 "join_opt[{query_label}]: predicate unsatisfiable – returning empty result"
1719 );
1720 let mut combined_fields = Vec::new();
1721 let mut column_counts = Vec::new();
1722 for (_table_ref, executor_table) in tables_with_handles {
1723 for column in &executor_table.schema.columns {
1724 combined_fields.push(Field::new(
1725 column.name.clone(),
1726 column.data_type.clone(),
1727 column.nullable,
1728 ));
1729 }
1730 column_counts.push(executor_table.schema.columns.len());
1731 }
1732 let combined_schema = Arc::new(Schema::new(combined_fields));
1733 let empty_batch = RecordBatch::new_empty(Arc::clone(&combined_schema));
1734 return Ok(Some((
1735 TableCrossProductData {
1736 schema: combined_schema,
1737 batches: vec![empty_batch],
1738 column_counts,
1739 table_indices: (0..tables_with_handles.len()).collect(),
1740 },
1741 true, )));
1743 }
1744
1745 if constraint_plan.equalities.is_empty() {
1747 tracing::debug!(
1748 "join_opt[{query_label}]: skipping optimization – no join equalities found"
1749 );
1750 return Ok(None);
1751 }
1752
1753 if !constraint_plan.literals.is_empty() {
1758 tracing::debug!(
1759 "join_opt[{query_label}]: found {} literal constraints - proceeding with hash join but may need fallback",
1760 constraint_plan.literals.len()
1761 );
1762 }
1763
1764 tracing::debug!(
1765 "join_opt[{query_label}]: hash join optimization applicable with {} equality constraints",
1766 constraint_plan.equalities.len()
1767 );
1768
1769 let mut literal_map: Vec<Vec<ColumnConstraint>> =
1770 vec![Vec::new(); tables_with_handles.len()];
1771 for constraint in &constraint_plan.literals {
1772 let table_idx = match constraint {
1773 ColumnConstraint::Equality(lit) => lit.column.table,
1774 ColumnConstraint::InList(in_list) => in_list.column.table,
1775 };
1776 if table_idx >= literal_map.len() {
1777 tracing::debug!(
1778 "join_opt[{query_label}]: constraint references unknown table index {}; falling back",
1779 table_idx
1780 );
1781 return Ok(None);
1782 }
1783 tracing::debug!(
1784 "join_opt[{query_label}]: mapping constraint to table_idx={} (table={})",
1785 table_idx,
1786 tables_with_handles[table_idx].0.qualified_name()
1787 );
1788 literal_map[table_idx].push(constraint.clone());
1789 }
1790
1791 let mut per_table: Vec<Option<TableCrossProductData>> =
1792 Vec::with_capacity(tables_with_handles.len());
1793 for (idx, (table_ref, table)) in tables_with_handles.iter().enumerate() {
1794 let data =
1795 collect_table_data(idx, table_ref, table.as_ref(), literal_map[idx].as_slice())?;
1796 per_table.push(Some(data));
1797 }
1798
1799 let has_left_join = plan
1801 .joins
1802 .iter()
1803 .any(|j| j.join_type == llkv_plan::JoinPlan::Left);
1804
1805 let mut current: Option<TableCrossProductData> = None;
1806
1807 if has_left_join {
1808 tracing::debug!(
1810 "join_opt[{query_label}]: delegating to llkv-join for LEFT JOIN support"
1811 );
1812 return Ok(None);
1814 } else {
1815 let mut remaining: Vec<usize> = (0..tables_with_handles.len()).collect();
1817 let mut used_tables: FxHashSet<usize> = FxHashSet::default();
1818
1819 while !remaining.is_empty() {
1820 let next_index = if used_tables.is_empty() {
1821 remaining[0]
1822 } else {
1823 match remaining.iter().copied().find(|idx| {
1824 table_has_join_with_used(*idx, &used_tables, &constraint_plan.equalities)
1825 }) {
1826 Some(idx) => idx,
1827 None => {
1828 tracing::debug!(
1829 "join_opt[{query_label}]: no remaining equality links – using cartesian expansion for table index {idx}",
1830 idx = remaining[0]
1831 );
1832 remaining[0]
1833 }
1834 }
1835 };
1836
1837 let position = remaining
1838 .iter()
1839 .position(|&idx| idx == next_index)
1840 .expect("next index present");
1841
1842 let next_data = per_table[next_index]
1843 .take()
1844 .ok_or_else(|| Error::Internal("hash join consumed table data twice".into()))?;
1845
1846 if let Some(current_data) = current.take() {
1847 let join_keys = gather_join_keys(
1848 ¤t_data,
1849 &next_data,
1850 &used_tables,
1851 next_index,
1852 &constraint_plan.equalities,
1853 )?;
1854
1855 let joined = if join_keys.is_empty() {
1856 tracing::debug!(
1857 "join_opt[{query_label}]: joining '{}' via cartesian expansion (no equality keys)",
1858 tables_with_handles[next_index].0.qualified_name()
1859 );
1860 cross_join_table_batches(current_data, next_data)?
1861 } else {
1862 hash_join_table_batches(
1863 current_data,
1864 next_data,
1865 &join_keys,
1866 llkv_join::JoinType::Inner,
1867 )?
1868 };
1869 current = Some(joined);
1870 } else {
1871 current = Some(next_data);
1872 }
1873
1874 used_tables.insert(next_index);
1875 remaining.remove(position);
1876 }
1877 }
1878
1879 if let Some(result) = current {
1880 let handled_all = constraint_plan.handled_conjuncts == constraint_plan.total_conjuncts;
1881 tracing::debug!(
1882 "join_opt[{query_label}]: hash join succeeded across {} tables (handled {}/{} predicates)",
1883 tables_with_handles.len(),
1884 constraint_plan.handled_conjuncts,
1885 constraint_plan.total_conjuncts
1886 );
1887 return Ok(Some((result, handled_all)));
1888 }
1889
1890 Ok(None)
1891 }
1892
1893 fn execute_projection(
1894 &self,
1895 table: Arc<ExecutorTable<P>>,
1896 display_name: String,
1897 plan: SelectPlan,
1898 row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
1899 ) -> ExecutorResult<SelectExecution<P>> {
1900 if plan.having.is_some() {
1901 return Err(Error::InvalidArgumentError(
1902 "HAVING requires GROUP BY".into(),
1903 ));
1904 }
1905 if plan
1906 .filter
1907 .as_ref()
1908 .is_some_and(|filter| !filter.subqueries.is_empty())
1909 || !plan.scalar_subqueries.is_empty()
1910 {
1911 return self.execute_projection_with_subqueries(table, display_name, plan, row_filter);
1912 }
1913
1914 let table_ref = table.as_ref();
1915 let constant_filter = plan
1916 .filter
1917 .as_ref()
1918 .and_then(|filter| evaluate_constant_predicate(&filter.predicate));
1919 let projections = if plan.projections.is_empty() {
1920 build_wildcard_projections(table_ref)
1921 } else {
1922 build_projected_columns(table_ref, &plan.projections)?
1923 };
1924 let schema = schema_for_projections(table_ref, &projections)?;
1925
1926 if let Some(result) = constant_filter {
1927 match result {
1928 Some(true) => {
1929 }
1931 Some(false) | None => {
1932 let batch = RecordBatch::new_empty(Arc::clone(&schema));
1933 return Ok(SelectExecution::new_single_batch(
1934 display_name,
1935 schema,
1936 batch,
1937 ));
1938 }
1939 }
1940 }
1941
1942 let (mut filter_expr, mut full_table_scan) = match &plan.filter {
1943 Some(filter_wrapper) => (
1944 crate::translation::expression::translate_predicate(
1945 filter_wrapper.predicate.clone(),
1946 table_ref.schema.as_ref(),
1947 |name| Error::InvalidArgumentError(format!("unknown column '{}'", name)),
1948 )?,
1949 false,
1950 ),
1951 None => {
1952 let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
1953 Error::InvalidArgumentError(
1954 "table has no columns; cannot perform wildcard scan".into(),
1955 )
1956 })?;
1957 (
1958 crate::translation::expression::full_table_scan_filter(field_id),
1959 true,
1960 )
1961 }
1962 };
1963
1964 if matches!(constant_filter, Some(Some(true))) {
1965 let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
1966 Error::InvalidArgumentError(
1967 "table has no columns; cannot perform wildcard scan".into(),
1968 )
1969 })?;
1970 filter_expr = crate::translation::expression::full_table_scan_filter(field_id);
1971 full_table_scan = true;
1972 }
1973
1974 let expanded_order = expand_order_targets(&plan.order_by, &projections)?;
1975
1976 let mut physical_order: Option<ScanOrderSpec> = None;
1977
1978 if let Some(first) = expanded_order.first() {
1979 match &first.target {
1980 OrderTarget::Column(name) => {
1981 if table_ref.schema.resolve(name).is_some() {
1982 physical_order = Some(resolve_scan_order(table_ref, &projections, first)?);
1983 }
1984 }
1985 OrderTarget::Index(position) => match projections.get(*position) {
1986 Some(ScanProjection::Column(_)) => {
1987 physical_order = Some(resolve_scan_order(table_ref, &projections, first)?);
1988 }
1989 Some(ScanProjection::Computed { .. }) => {}
1990 None => {
1991 return Err(Error::InvalidArgumentError(format!(
1992 "ORDER BY position {} is out of range",
1993 position + 1
1994 )));
1995 }
1996 },
1997 OrderTarget::All => {}
1998 }
1999 }
2000
2001 let options = if let Some(order_spec) = physical_order {
2002 if row_filter.is_some() {
2003 tracing::debug!("Applying MVCC row filter with ORDER BY");
2004 }
2005 ScanStreamOptions {
2006 include_nulls: true,
2007 order: Some(order_spec),
2008 row_id_filter: row_filter.clone(),
2009 }
2010 } else {
2011 if row_filter.is_some() {
2012 tracing::debug!("Applying MVCC row filter");
2013 }
2014 ScanStreamOptions {
2015 include_nulls: true,
2016 order: None,
2017 row_id_filter: row_filter.clone(),
2018 }
2019 };
2020
2021 Ok(SelectExecution::new_projection(
2022 display_name,
2023 schema,
2024 table,
2025 projections,
2026 filter_expr,
2027 options,
2028 full_table_scan,
2029 expanded_order,
2030 plan.distinct,
2031 ))
2032 }
2033
2034 fn execute_projection_with_subqueries(
2035 &self,
2036 table: Arc<ExecutorTable<P>>,
2037 display_name: String,
2038 plan: SelectPlan,
2039 row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
2040 ) -> ExecutorResult<SelectExecution<P>> {
2041 if plan.having.is_some() {
2042 return Err(Error::InvalidArgumentError(
2043 "HAVING requires GROUP BY".into(),
2044 ));
2045 }
2046 let table_ref = table.as_ref();
2047
2048 let (output_scan_projections, effective_projections): (
2049 Vec<ScanProjection>,
2050 Vec<SelectProjection>,
2051 ) = if plan.projections.is_empty() {
2052 (
2053 build_wildcard_projections(table_ref),
2054 vec![SelectProjection::AllColumns],
2055 )
2056 } else {
2057 (
2058 build_projected_columns(table_ref, &plan.projections)?,
2059 plan.projections.clone(),
2060 )
2061 };
2062
2063 let scalar_lookup: FxHashMap<SubqueryId, &llkv_plan::ScalarSubquery> = plan
2064 .scalar_subqueries
2065 .iter()
2066 .map(|subquery| (subquery.id, subquery))
2067 .collect();
2068
2069 let base_projections = build_wildcard_projections(table_ref);
2070
2071 let filter_wrapper_opt = plan.filter.as_ref();
2072
2073 let mut translated_filter: Option<llkv_expr::expr::Expr<'static, FieldId>> = None;
2074 let pushdown_filter = if let Some(filter_wrapper) = filter_wrapper_opt {
2075 let translated = crate::translation::expression::translate_predicate(
2076 filter_wrapper.predicate.clone(),
2077 table_ref.schema.as_ref(),
2078 |name| Error::InvalidArgumentError(format!("unknown column '{}'", name)),
2079 )?;
2080 if !filter_wrapper.subqueries.is_empty() {
2081 translated_filter = Some(translated.clone());
2082 strip_exists(&translated)
2083 } else {
2084 translated
2085 }
2086 } else {
2087 let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
2088 Error::InvalidArgumentError(
2089 "table has no columns; cannot perform scalar subquery projection".into(),
2090 )
2091 })?;
2092 crate::translation::expression::full_table_scan_filter(field_id)
2093 };
2094
2095 let mut base_fields: Vec<Field> = Vec::with_capacity(table_ref.schema.columns.len());
2096 for column in &table_ref.schema.columns {
2097 base_fields.push(Field::new(
2098 column.name.clone(),
2099 column.data_type.clone(),
2100 column.nullable,
2101 ));
2102 }
2103 let base_schema = Arc::new(Schema::new(base_fields));
2104 let base_column_counts = vec![base_schema.fields().len()];
2105 let base_table_indices = vec![0usize];
2106 let base_lookup = build_cross_product_column_lookup(
2107 base_schema.as_ref(),
2108 &plan.tables,
2109 &base_column_counts,
2110 &base_table_indices,
2111 );
2112
2113 let mut filter_context = if translated_filter.is_some() {
2114 Some(CrossProductExpressionContext::new(
2115 base_schema.as_ref(),
2116 base_lookup.clone(),
2117 )?)
2118 } else {
2119 None
2120 };
2121
2122 let options = ScanStreamOptions {
2123 include_nulls: true,
2124 order: None,
2125 row_id_filter: row_filter.clone(),
2126 };
2127
2128 let subquery_lookup: FxHashMap<llkv_expr::SubqueryId, &llkv_plan::FilterSubquery> =
2129 filter_wrapper_opt
2130 .map(|wrapper| {
2131 wrapper
2132 .subqueries
2133 .iter()
2134 .map(|subquery| (subquery.id, subquery))
2135 .collect()
2136 })
2137 .unwrap_or_default();
2138
2139 let mut projected_batches: Vec<RecordBatch> = Vec::new();
2140 let mut scan_error: Option<Error> = None;
2141
2142 table.table.scan_stream(
2143 base_projections.clone(),
2144 &pushdown_filter,
2145 options,
2146 |batch| {
2147 if scan_error.is_some() {
2148 return;
2149 }
2150 let effective_batch = if let Some(context) = filter_context.as_mut() {
2151 context.reset();
2152 let translated = translated_filter
2153 .as_ref()
2154 .expect("filter context requires translated filter");
2155 let mask = match context.evaluate_predicate_mask(
2156 translated,
2157 &batch,
2158 |ctx, subquery_expr, row_idx, current_batch| {
2159 let subquery =
2160 subquery_lookup.get(&subquery_expr.id).ok_or_else(|| {
2161 Error::Internal("missing correlated subquery metadata".into())
2162 })?;
2163 let exists = self.evaluate_exists_subquery(
2164 ctx,
2165 subquery,
2166 current_batch,
2167 row_idx,
2168 )?;
2169 let value = if subquery_expr.negated {
2170 !exists
2171 } else {
2172 exists
2173 };
2174 Ok(Some(value))
2175 },
2176 ) {
2177 Ok(mask) => mask,
2178 Err(err) => {
2179 scan_error = Some(err);
2180 return;
2181 }
2182 };
2183 match filter_record_batch(&batch, &mask) {
2184 Ok(filtered) => {
2185 if filtered.num_rows() == 0 {
2186 return;
2187 }
2188 filtered
2189 }
2190 Err(err) => {
2191 scan_error = Some(Error::InvalidArgumentError(format!(
2192 "failed to apply EXISTS filter: {err}"
2193 )));
2194 return;
2195 }
2196 }
2197 } else {
2198 batch.clone()
2199 };
2200
2201 if effective_batch.num_rows() == 0 {
2202 return;
2203 }
2204
2205 let projected = match self.project_record_batch(
2206 &effective_batch,
2207 &effective_projections,
2208 &base_lookup,
2209 &scalar_lookup,
2210 ) {
2211 Ok(batch) => batch,
2212 Err(err) => {
2213 scan_error = Some(Error::InvalidArgumentError(format!(
2214 "failed to evaluate projections: {err}"
2215 )));
2216 return;
2217 }
2218 };
2219 projected_batches.push(projected);
2220 },
2221 )?;
2222
2223 if let Some(err) = scan_error {
2224 return Err(err);
2225 }
2226
2227 let mut result_batch = if projected_batches.is_empty() {
2228 let empty_batch = RecordBatch::new_empty(Arc::clone(&base_schema));
2229 self.project_record_batch(
2230 &empty_batch,
2231 &effective_projections,
2232 &base_lookup,
2233 &scalar_lookup,
2234 )?
2235 } else if projected_batches.len() == 1 {
2236 projected_batches.pop().unwrap()
2237 } else {
2238 let schema = projected_batches[0].schema();
2239 concat_batches(&schema, &projected_batches).map_err(|err| {
2240 Error::Internal(format!("failed to combine filtered batches: {err}"))
2241 })?
2242 };
2243
2244 if plan.distinct && result_batch.num_rows() > 0 {
2245 let mut state = DistinctState::default();
2246 let schema = result_batch.schema();
2247 result_batch = match distinct_filter_batch(result_batch, &mut state)? {
2248 Some(filtered) => filtered,
2249 None => RecordBatch::new_empty(schema),
2250 };
2251 }
2252
2253 if !plan.order_by.is_empty() && result_batch.num_rows() > 0 {
2254 let expanded_order = expand_order_targets(&plan.order_by, &output_scan_projections)?;
2255 if !expanded_order.is_empty() {
2256 result_batch = sort_record_batch_with_order(
2257 &result_batch.schema(),
2258 &result_batch,
2259 &expanded_order,
2260 )?;
2261 }
2262 }
2263
2264 let schema = result_batch.schema();
2265
2266 Ok(SelectExecution::new_single_batch(
2267 display_name,
2268 schema,
2269 result_batch,
2270 ))
2271 }
2272
2273 fn execute_group_by_single_table(
2274 &self,
2275 table: Arc<ExecutorTable<P>>,
2276 display_name: String,
2277 plan: SelectPlan,
2278 row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
2279 ) -> ExecutorResult<SelectExecution<P>> {
2280 if plan
2281 .filter
2282 .as_ref()
2283 .is_some_and(|filter| !filter.subqueries.is_empty())
2284 || !plan.scalar_subqueries.is_empty()
2285 {
2286 return Err(Error::InvalidArgumentError(
2287 "GROUP BY with subqueries is not supported yet".into(),
2288 ));
2289 }
2290
2291 tracing::debug!(
2293 "[GROUP BY] Original plan: projections={}, aggregates={}, has_filter={}, has_having={}",
2294 plan.projections.len(),
2295 plan.aggregates.len(),
2296 plan.filter.is_some(),
2297 plan.having.is_some()
2298 );
2299
2300 let mut base_plan = plan.clone();
2304 base_plan.projections.clear();
2305 base_plan.aggregates.clear();
2306 base_plan.scalar_subqueries.clear();
2307 base_plan.order_by.clear();
2308 base_plan.distinct = false;
2309 base_plan.group_by.clear();
2310 base_plan.value_table_mode = None;
2311 base_plan.having = None;
2312
2313 tracing::debug!(
2314 "[GROUP BY] Base plan: projections={}, aggregates={}, has_filter={}, has_having={}",
2315 base_plan.projections.len(),
2316 base_plan.aggregates.len(),
2317 base_plan.filter.is_some(),
2318 base_plan.having.is_some()
2319 );
2320
2321 let table_ref = table.as_ref();
2324 let projections = build_wildcard_projections(table_ref);
2325 let base_schema = schema_for_projections(table_ref, &projections)?;
2326
2327 tracing::debug!(
2329 "[GROUP BY] Building base filter: has_filter={}",
2330 base_plan.filter.is_some()
2331 );
2332 let (filter_expr, full_table_scan) = match &base_plan.filter {
2333 Some(filter_wrapper) => {
2334 tracing::debug!(
2335 "[GROUP BY] Translating filter predicate: {:?}",
2336 filter_wrapper.predicate
2337 );
2338 let expr = crate::translation::expression::translate_predicate(
2339 filter_wrapper.predicate.clone(),
2340 table_ref.schema.as_ref(),
2341 |name| {
2342 Error::InvalidArgumentError(format!(
2343 "Binder Error: does not have a column named '{}'",
2344 name
2345 ))
2346 },
2347 )?;
2348 tracing::debug!("[GROUP BY] Translated filter expr: {:?}", expr);
2349 (expr, false)
2350 }
2351 None => {
2352 let first_col =
2354 table_ref.schema.columns.first().ok_or_else(|| {
2355 Error::InvalidArgumentError("Table has no columns".into())
2356 })?;
2357 (full_table_scan_filter(first_col.field_id), true)
2358 }
2359 };
2360
2361 let options = ScanStreamOptions {
2362 include_nulls: true,
2363 order: None,
2364 row_id_filter: row_filter.clone(),
2365 };
2366
2367 let execution = SelectExecution::new_projection(
2368 display_name.clone(),
2369 Arc::clone(&base_schema),
2370 Arc::clone(&table),
2371 projections,
2372 filter_expr,
2373 options,
2374 full_table_scan,
2375 vec![],
2376 false,
2377 );
2378
2379 let batches = execution.collect()?;
2380
2381 let column_lookup_map = build_column_lookup_map(base_schema.as_ref());
2382
2383 self.execute_group_by_from_batches(
2384 display_name,
2385 plan,
2386 base_schema,
2387 batches,
2388 column_lookup_map,
2389 )
2390 }
2391
2392 fn execute_group_by_from_batches(
2393 &self,
2394 display_name: String,
2395 plan: SelectPlan,
2396 base_schema: Arc<Schema>,
2397 batches: Vec<RecordBatch>,
2398 column_lookup_map: FxHashMap<String, usize>,
2399 ) -> ExecutorResult<SelectExecution<P>> {
2400 if plan
2401 .filter
2402 .as_ref()
2403 .is_some_and(|filter| !filter.subqueries.is_empty())
2404 || !plan.scalar_subqueries.is_empty()
2405 {
2406 return Err(Error::InvalidArgumentError(
2407 "GROUP BY with subqueries is not supported yet".into(),
2408 ));
2409 }
2410
2411 let having_has_aggregates = plan
2414 .having
2415 .as_ref()
2416 .map(|h| Self::predicate_contains_aggregate(h))
2417 .unwrap_or(false);
2418
2419 tracing::debug!(
2420 "[GROUP BY PATH] aggregates={}, has_computed={}, having_has_agg={}",
2421 plan.aggregates.len(),
2422 self.has_computed_aggregates(&plan),
2423 having_has_aggregates
2424 );
2425
2426 if !plan.aggregates.is_empty()
2427 || self.has_computed_aggregates(&plan)
2428 || having_has_aggregates
2429 {
2430 tracing::debug!("[GROUP BY PATH] Taking aggregates path");
2431 return self.execute_group_by_with_aggregates(
2432 display_name,
2433 plan,
2434 base_schema,
2435 batches,
2436 column_lookup_map,
2437 );
2438 }
2439
2440 let mut key_indices = Vec::with_capacity(plan.group_by.len());
2441 for column in &plan.group_by {
2442 let key = column.to_ascii_lowercase();
2443 let index = column_lookup_map.get(&key).ok_or_else(|| {
2444 Error::InvalidArgumentError(format!(
2445 "column '{}' not found in GROUP BY input",
2446 column
2447 ))
2448 })?;
2449 key_indices.push(*index);
2450 }
2451
2452 let sample_batch = batches
2453 .first()
2454 .cloned()
2455 .unwrap_or_else(|| RecordBatch::new_empty(Arc::clone(&base_schema)));
2456
2457 let output_columns = self.build_group_by_output_columns(
2458 &plan,
2459 base_schema.as_ref(),
2460 &column_lookup_map,
2461 &sample_batch,
2462 )?;
2463
2464 let constant_having = plan.having.as_ref().and_then(evaluate_constant_predicate);
2465
2466 if let Some(result) = constant_having
2467 && !result.unwrap_or(false)
2468 {
2469 let fields: Vec<Field> = output_columns
2470 .iter()
2471 .map(|output| output.field.clone())
2472 .collect();
2473 let schema = Arc::new(Schema::new(fields));
2474 let batch = RecordBatch::new_empty(Arc::clone(&schema));
2475 return Ok(SelectExecution::new_single_batch(
2476 display_name,
2477 schema,
2478 batch,
2479 ));
2480 }
2481
2482 let translated_having = if plan.having.is_some() && constant_having.is_none() {
2483 let having = plan.having.clone().expect("checked above");
2484 if Self::predicate_contains_aggregate(&having) {
2487 None
2488 } else {
2489 let temp_context = CrossProductExpressionContext::new(
2490 base_schema.as_ref(),
2491 column_lookup_map.clone(),
2492 )?;
2493 Some(translate_predicate(
2494 having,
2495 temp_context.schema(),
2496 |name| {
2497 Error::InvalidArgumentError(format!(
2498 "column '{}' not found in GROUP BY result",
2499 name
2500 ))
2501 },
2502 )?)
2503 }
2504 } else {
2505 None
2506 };
2507
2508 let mut group_index: FxHashMap<Vec<GroupKeyValue>, usize> = FxHashMap::default();
2509 let mut groups: Vec<GroupState> = Vec::new();
2510
2511 for batch in &batches {
2512 for row_idx in 0..batch.num_rows() {
2513 let key = build_group_key(batch, row_idx, &key_indices)?;
2514 if group_index.contains_key(&key) {
2515 continue;
2516 }
2517 group_index.insert(key, groups.len());
2518 groups.push(GroupState {
2519 batch: batch.clone(),
2520 row_idx,
2521 });
2522 }
2523 }
2524
2525 let mut rows: Vec<Vec<PlanValue>> = Vec::with_capacity(groups.len());
2526
2527 for group in &groups {
2528 if let Some(predicate) = translated_having.as_ref() {
2529 let mut context = CrossProductExpressionContext::new(
2530 group.batch.schema().as_ref(),
2531 column_lookup_map.clone(),
2532 )?;
2533 context.reset();
2534 let mut eval = |_ctx: &mut CrossProductExpressionContext,
2535 _subquery_expr: &llkv_expr::SubqueryExpr,
2536 _row_idx: usize,
2537 _current_batch: &RecordBatch|
2538 -> ExecutorResult<Option<bool>> {
2539 Err(Error::InvalidArgumentError(
2540 "HAVING subqueries are not supported yet".into(),
2541 ))
2542 };
2543 let truths =
2544 context.evaluate_predicate_truths(predicate, &group.batch, &mut eval)?;
2545 let passes = truths
2546 .get(group.row_idx)
2547 .copied()
2548 .flatten()
2549 .unwrap_or(false);
2550 if !passes {
2551 continue;
2552 }
2553 }
2554
2555 let mut row: Vec<PlanValue> = Vec::with_capacity(output_columns.len());
2556 for output in &output_columns {
2557 match output.source {
2558 OutputSource::TableColumn { index } => {
2559 let value = llkv_plan::plan_value_from_array(
2560 group.batch.column(index),
2561 group.row_idx,
2562 )?;
2563 row.push(value);
2564 }
2565 OutputSource::Computed { projection_index } => {
2566 let expr = match &plan.projections[projection_index] {
2567 SelectProjection::Computed { expr, .. } => expr,
2568 _ => unreachable!("projection index mismatch for computed column"),
2569 };
2570 let mut context = CrossProductExpressionContext::new(
2571 group.batch.schema().as_ref(),
2572 column_lookup_map.clone(),
2573 )?;
2574 context.reset();
2575 let evaluated = self.evaluate_projection_expression(
2576 &mut context,
2577 expr,
2578 &group.batch,
2579 &FxHashMap::default(),
2580 )?;
2581 let value = llkv_plan::plan_value_from_array(&evaluated, group.row_idx)?;
2582 row.push(value);
2583 }
2584 }
2585 }
2586 rows.push(row);
2587 }
2588
2589 let fields: Vec<Field> = output_columns
2590 .into_iter()
2591 .map(|output| output.field)
2592 .collect();
2593 let schema = Arc::new(Schema::new(fields));
2594
2595 let mut batch = rows_to_record_batch(Arc::clone(&schema), &rows)?;
2596
2597 if plan.distinct && batch.num_rows() > 0 {
2598 let mut state = DistinctState::default();
2599 batch = match distinct_filter_batch(batch, &mut state)? {
2600 Some(filtered) => filtered,
2601 None => RecordBatch::new_empty(Arc::clone(&schema)),
2602 };
2603 }
2604
2605 if !plan.order_by.is_empty() && batch.num_rows() > 0 {
2606 batch = sort_record_batch_with_order(&schema, &batch, &plan.order_by)?;
2607 }
2608
2609 Ok(SelectExecution::new_single_batch(
2610 display_name,
2611 schema,
2612 batch,
2613 ))
2614 }
2615
2616 fn build_group_by_output_columns(
2617 &self,
2618 plan: &SelectPlan,
2619 base_schema: &Schema,
2620 column_lookup_map: &FxHashMap<String, usize>,
2621 _sample_batch: &RecordBatch,
2622 ) -> ExecutorResult<Vec<OutputColumn>> {
2623 let projections = if plan.projections.is_empty() {
2624 vec![SelectProjection::AllColumns]
2625 } else {
2626 plan.projections.clone()
2627 };
2628
2629 let mut columns: Vec<OutputColumn> = Vec::new();
2630
2631 for (proj_idx, projection) in projections.iter().enumerate() {
2632 match projection {
2633 SelectProjection::AllColumns => {
2634 for (index, field) in base_schema.fields().iter().enumerate() {
2635 columns.push(OutputColumn {
2636 field: (**field).clone(),
2637 source: OutputSource::TableColumn { index },
2638 });
2639 }
2640 }
2641 SelectProjection::AllColumnsExcept { exclude } => {
2642 let exclude_lower: FxHashSet<String> = exclude
2643 .iter()
2644 .map(|name| name.to_ascii_lowercase())
2645 .collect();
2646 for (index, field) in base_schema.fields().iter().enumerate() {
2647 if !exclude_lower.contains(&field.name().to_ascii_lowercase()) {
2648 columns.push(OutputColumn {
2649 field: (**field).clone(),
2650 source: OutputSource::TableColumn { index },
2651 });
2652 }
2653 }
2654 }
2655 SelectProjection::Column { name, alias } => {
2656 let lookup_key = name.to_ascii_lowercase();
2657 let index = column_lookup_map.get(&lookup_key).ok_or_else(|| {
2658 Error::InvalidArgumentError(format!(
2659 "column '{}' not found in GROUP BY result",
2660 name
2661 ))
2662 })?;
2663 let field = base_schema.field(*index);
2664 let field = Field::new(
2665 alias.as_ref().unwrap_or(name).clone(),
2666 field.data_type().clone(),
2667 field.is_nullable(),
2668 );
2669 columns.push(OutputColumn {
2670 field,
2671 source: OutputSource::TableColumn { index: *index },
2672 });
2673 }
2674 SelectProjection::Computed { expr: _, alias } => {
2675 let field = Field::new(alias.clone(), DataType::Float64, true);
2679 columns.push(OutputColumn {
2680 field,
2681 source: OutputSource::Computed {
2682 projection_index: proj_idx,
2683 },
2684 });
2685 }
2686 }
2687 }
2688
2689 if columns.is_empty() {
2690 for (index, field) in base_schema.fields().iter().enumerate() {
2691 columns.push(OutputColumn {
2692 field: (**field).clone(),
2693 source: OutputSource::TableColumn { index },
2694 });
2695 }
2696 }
2697
2698 Ok(columns)
2699 }
2700
2701 fn project_record_batch(
2702 &self,
2703 batch: &RecordBatch,
2704 projections: &[SelectProjection],
2705 lookup: &FxHashMap<String, usize>,
2706 scalar_lookup: &FxHashMap<SubqueryId, &llkv_plan::ScalarSubquery>,
2707 ) -> ExecutorResult<RecordBatch> {
2708 if projections.is_empty() {
2709 return Ok(batch.clone());
2710 }
2711
2712 let schema = batch.schema();
2713 let mut selected_fields: Vec<Arc<Field>> = Vec::new();
2714 let mut selected_columns: Vec<ArrayRef> = Vec::new();
2715 let mut expr_context: Option<CrossProductExpressionContext> = None;
2716
2717 for proj in projections {
2718 match proj {
2719 SelectProjection::AllColumns => {
2720 selected_fields = schema.fields().iter().cloned().collect();
2721 selected_columns = batch.columns().to_vec();
2722 break;
2723 }
2724 SelectProjection::AllColumnsExcept { exclude } => {
2725 let exclude_lower: FxHashSet<String> = exclude
2726 .iter()
2727 .map(|name| name.to_ascii_lowercase())
2728 .collect();
2729 for (idx, field) in schema.fields().iter().enumerate() {
2730 let column_name = field.name().to_ascii_lowercase();
2731 if !exclude_lower.contains(&column_name) {
2732 selected_fields.push(Arc::clone(field));
2733 selected_columns.push(batch.column(idx).clone());
2734 }
2735 }
2736 break;
2737 }
2738 SelectProjection::Column { name, alias } => {
2739 let normalized = name.to_ascii_lowercase();
2740 let column_index = lookup.get(&normalized).ok_or_else(|| {
2741 Error::InvalidArgumentError(format!(
2742 "column '{}' not found in projection",
2743 name
2744 ))
2745 })?;
2746 let field = schema.field(*column_index);
2747 let output_field = Arc::new(Field::new(
2748 alias.as_ref().unwrap_or_else(|| field.name()),
2749 field.data_type().clone(),
2750 field.is_nullable(),
2751 ));
2752 selected_fields.push(output_field);
2753 selected_columns.push(batch.column(*column_index).clone());
2754 }
2755 SelectProjection::Computed { expr, alias } => {
2756 if expr_context.is_none() {
2757 expr_context = Some(CrossProductExpressionContext::new(
2758 schema.as_ref(),
2759 lookup.clone(),
2760 )?);
2761 }
2762 let context = expr_context
2763 .as_mut()
2764 .expect("projection context must be initialized");
2765 context.reset();
2766 let evaluated =
2767 self.evaluate_projection_expression(context, expr, batch, scalar_lookup)?;
2768 let field = Arc::new(Field::new(
2769 alias.clone(),
2770 evaluated.data_type().clone(),
2771 true,
2772 ));
2773 selected_fields.push(field);
2774 selected_columns.push(evaluated);
2775 }
2776 }
2777 }
2778
2779 let projected_schema = Arc::new(Schema::new(selected_fields));
2780 RecordBatch::try_new(projected_schema, selected_columns)
2781 .map_err(|e| Error::Internal(format!("failed to apply projections: {}", e)))
2782 }
2783
2784 fn execute_group_by_with_aggregates(
2786 &self,
2787 display_name: String,
2788 plan: SelectPlan,
2789 base_schema: Arc<Schema>,
2790 batches: Vec<RecordBatch>,
2791 column_lookup_map: FxHashMap<String, usize>,
2792 ) -> ExecutorResult<SelectExecution<P>> {
2793 use llkv_expr::expr::AggregateCall;
2794
2795 let mut key_indices = Vec::with_capacity(plan.group_by.len());
2797 for column in &plan.group_by {
2798 let key = column.to_ascii_lowercase();
2799 let index = column_lookup_map.get(&key).ok_or_else(|| {
2800 Error::InvalidArgumentError(format!(
2801 "column '{}' not found in GROUP BY input",
2802 column
2803 ))
2804 })?;
2805 key_indices.push(*index);
2806 }
2807
2808 let mut aggregate_specs: Vec<(String, AggregateCall<String>)> = Vec::new();
2810 for proj in &plan.projections {
2811 if let SelectProjection::Computed { expr, .. } = proj {
2812 Self::collect_aggregates(expr, &mut aggregate_specs);
2813 }
2814 }
2815
2816 if let Some(having_expr) = &plan.having {
2818 Self::collect_aggregates_from_predicate(having_expr, &mut aggregate_specs);
2819 }
2820
2821 let mut group_index: FxHashMap<Vec<GroupKeyValue>, usize> = FxHashMap::default();
2823 let mut group_states: Vec<GroupAggregateState> = Vec::new();
2824
2825 for (batch_idx, batch) in batches.iter().enumerate() {
2827 for row_idx in 0..batch.num_rows() {
2828 let key = build_group_key(batch, row_idx, &key_indices)?;
2829
2830 if let Some(&group_idx) = group_index.get(&key) {
2831 group_states[group_idx]
2833 .row_locations
2834 .push((batch_idx, row_idx));
2835 } else {
2836 let group_idx = group_states.len();
2838 group_index.insert(key, group_idx);
2839 group_states.push(GroupAggregateState {
2840 representative_batch_idx: batch_idx,
2841 representative_row: row_idx,
2842 row_locations: vec![(batch_idx, row_idx)],
2843 });
2844 }
2845 }
2846 }
2847
2848 let mut group_aggregate_values: Vec<FxHashMap<String, PlanValue>> =
2850 Vec::with_capacity(group_states.len());
2851
2852 for group_state in &group_states {
2853 tracing::debug!(
2854 "[GROUP BY] aggregate group rows={:?}",
2855 group_state.row_locations
2856 );
2857 let group_batch = {
2859 let representative_batch = &batches[group_state.representative_batch_idx];
2860 let schema = representative_batch.schema();
2861
2862 let mut per_batch_indices: Vec<(usize, Vec<u64>)> = Vec::new();
2864 for &(batch_idx, row_idx) in &group_state.row_locations {
2865 if let Some((_, indices)) = per_batch_indices
2866 .iter_mut()
2867 .find(|(idx, _)| *idx == batch_idx)
2868 {
2869 indices.push(row_idx as u64);
2870 } else {
2871 per_batch_indices.push((batch_idx, vec![row_idx as u64]));
2872 }
2873 }
2874
2875 let mut row_index_arrays: Vec<(usize, ArrayRef)> =
2876 Vec::with_capacity(per_batch_indices.len());
2877 for (batch_idx, indices) in per_batch_indices {
2878 let index_array: ArrayRef = Arc::new(arrow::array::UInt64Array::from(indices));
2879 row_index_arrays.push((batch_idx, index_array));
2880 }
2881
2882 let mut arrays: Vec<ArrayRef> = Vec::with_capacity(schema.fields().len());
2883
2884 for col_idx in 0..schema.fields().len() {
2885 let column_array = if row_index_arrays.len() == 1 {
2886 let (batch_idx, indices) = &row_index_arrays[0];
2887 let source_array = batches[*batch_idx].column(col_idx);
2888 arrow::compute::take(source_array.as_ref(), indices.as_ref(), None)?
2889 } else {
2890 let mut partial_arrays: Vec<ArrayRef> =
2891 Vec::with_capacity(row_index_arrays.len());
2892 for (batch_idx, indices) in &row_index_arrays {
2893 let source_array = batches[*batch_idx].column(col_idx);
2894 let taken = arrow::compute::take(
2895 source_array.as_ref(),
2896 indices.as_ref(),
2897 None,
2898 )?;
2899 partial_arrays.push(taken);
2900 }
2901 let slices: Vec<&dyn arrow::array::Array> =
2902 partial_arrays.iter().map(|arr| arr.as_ref()).collect();
2903 arrow::compute::concat(&slices)?
2904 };
2905 arrays.push(column_array);
2906 }
2907
2908 let batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
2909 tracing::debug!("[GROUP BY] group batch rows={}", batch.num_rows());
2910 batch
2911 };
2912
2913 let mut aggregate_values: FxHashMap<String, PlanValue> = FxHashMap::default();
2915
2916 let mut working_batch = group_batch.clone();
2918 let mut next_temp_col_idx = working_batch.num_columns();
2919
2920 for (key, agg_call) in &aggregate_specs {
2921 let (projection_idx, value_type) = match agg_call {
2923 AggregateCall::CountStar => (None, None),
2924 AggregateCall::Count { expr, .. }
2925 | AggregateCall::Sum { expr, .. }
2926 | AggregateCall::Avg { expr, .. }
2927 | AggregateCall::Min(expr)
2928 | AggregateCall::Max(expr)
2929 | AggregateCall::CountNulls(expr) => {
2930 if let Some(col_name) = try_extract_simple_column(expr) {
2931 let idx = resolve_column_name_to_index(col_name, &column_lookup_map)
2932 .ok_or_else(|| {
2933 Error::InvalidArgumentError(format!(
2934 "column '{}' not found for aggregate",
2935 col_name
2936 ))
2937 })?;
2938 let field_type = working_batch.schema().field(idx).data_type().clone();
2939 (Some(idx), Some(field_type))
2940 } else {
2941 let mut computed_values = Vec::with_capacity(working_batch.num_rows());
2943 for row_idx in 0..working_batch.num_rows() {
2944 let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
2945 expr,
2946 &FxHashMap::default(),
2947 Some(&working_batch),
2948 Some(&column_lookup_map),
2949 row_idx,
2950 )?;
2951 computed_values.push(value);
2952 }
2953
2954 let computed_array = plan_values_to_arrow_array(&computed_values)?;
2955 let computed_type = computed_array.data_type().clone();
2956
2957 let mut new_columns: Vec<ArrayRef> = working_batch.columns().to_vec();
2958 new_columns.push(computed_array);
2959
2960 let temp_field = Arc::new(Field::new(
2961 format!("__temp_agg_expr_{}", next_temp_col_idx),
2962 computed_type.clone(),
2963 true,
2964 ));
2965 let mut new_fields: Vec<Arc<Field>> =
2966 working_batch.schema().fields().iter().cloned().collect();
2967 new_fields.push(temp_field);
2968 let new_schema = Arc::new(Schema::new(new_fields));
2969
2970 working_batch = RecordBatch::try_new(new_schema, new_columns)?;
2971
2972 let col_idx = next_temp_col_idx;
2973 next_temp_col_idx += 1;
2974 (Some(col_idx), Some(computed_type))
2975 }
2976 }
2977 };
2978
2979 let spec = Self::build_aggregate_spec_for_cross_product(
2981 agg_call,
2982 key.clone(),
2983 value_type.clone(),
2984 )?;
2985
2986 let mut state = llkv_aggregate::AggregateState {
2987 alias: key.clone(),
2988 accumulator: llkv_aggregate::AggregateAccumulator::new_with_projection_index(
2989 &spec,
2990 projection_idx,
2991 None,
2992 )?,
2993 override_value: None,
2994 };
2995
2996 state.update(&working_batch)?;
2998
2999 let (_field, array) = state.finalize()?;
3001 let value = llkv_plan::plan_value_from_array(&array, 0)?;
3002 tracing::debug!(
3003 "[GROUP BY] aggregate result key={:?} value={:?}",
3004 key,
3005 value
3006 );
3007 aggregate_values.insert(key.clone(), value);
3008 }
3009
3010 group_aggregate_values.push(aggregate_values);
3011 }
3012
3013 let output_columns = self.build_group_by_output_columns(
3015 &plan,
3016 base_schema.as_ref(),
3017 &column_lookup_map,
3018 batches
3019 .first()
3020 .unwrap_or(&RecordBatch::new_empty(Arc::clone(&base_schema))),
3021 )?;
3022
3023 let mut rows: Vec<Vec<PlanValue>> = Vec::with_capacity(group_states.len());
3024
3025 for (group_idx, group_state) in group_states.iter().enumerate() {
3026 let aggregate_values = &group_aggregate_values[group_idx];
3027 let representative_batch = &batches[group_state.representative_batch_idx];
3028
3029 let mut row: Vec<PlanValue> = Vec::with_capacity(output_columns.len());
3030 for output in &output_columns {
3031 match output.source {
3032 OutputSource::TableColumn { index } => {
3033 let value = llkv_plan::plan_value_from_array(
3035 representative_batch.column(index),
3036 group_state.representative_row,
3037 )?;
3038 row.push(value);
3039 }
3040 OutputSource::Computed { projection_index } => {
3041 let expr = match &plan.projections[projection_index] {
3042 SelectProjection::Computed { expr, .. } => expr,
3043 _ => unreachable!("projection index mismatch for computed column"),
3044 };
3045 let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
3047 expr,
3048 aggregate_values,
3049 Some(representative_batch),
3050 Some(&column_lookup_map),
3051 group_state.representative_row,
3052 )?;
3053 row.push(value);
3054 }
3055 }
3056 }
3057 rows.push(row);
3058 }
3059
3060 let filtered_rows = if let Some(having) = &plan.having {
3062 let mut filtered = Vec::new();
3063 for (row_idx, row) in rows.iter().enumerate() {
3064 let aggregate_values = &group_aggregate_values[row_idx];
3065 let group_state = &group_states[row_idx];
3066 let representative_batch = &batches[group_state.representative_batch_idx];
3067 let passes = Self::evaluate_having_expr(
3069 having,
3070 aggregate_values,
3071 representative_batch,
3072 &column_lookup_map,
3073 group_state.representative_row,
3074 )?;
3075 if matches!(passes, Some(true)) {
3077 filtered.push(row.clone());
3078 }
3079 }
3080 filtered
3081 } else {
3082 rows
3083 };
3084
3085 let fields: Vec<Field> = output_columns
3086 .into_iter()
3087 .map(|output| output.field)
3088 .collect();
3089 let schema = Arc::new(Schema::new(fields));
3090
3091 let mut batch = rows_to_record_batch(Arc::clone(&schema), &filtered_rows)?;
3092
3093 if plan.distinct && batch.num_rows() > 0 {
3094 let mut state = DistinctState::default();
3095 batch = match distinct_filter_batch(batch, &mut state)? {
3096 Some(filtered) => filtered,
3097 None => RecordBatch::new_empty(Arc::clone(&schema)),
3098 };
3099 }
3100
3101 if !plan.order_by.is_empty() && batch.num_rows() > 0 {
3102 batch = sort_record_batch_with_order(&schema, &batch, &plan.order_by)?;
3103 }
3104
3105 Ok(SelectExecution::new_single_batch(
3106 display_name,
3107 schema,
3108 batch,
3109 ))
3110 }
3111
3112 fn execute_aggregates(
3113 &self,
3114 table: Arc<ExecutorTable<P>>,
3115 display_name: String,
3116 plan: SelectPlan,
3117 row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
3118 ) -> ExecutorResult<SelectExecution<P>> {
3119 let table_ref = table.as_ref();
3120 let distinct = plan.distinct;
3121 let mut specs: Vec<AggregateSpec> = Vec::with_capacity(plan.aggregates.len());
3122 for aggregate in plan.aggregates {
3123 match aggregate {
3124 AggregateExpr::CountStar { alias } => {
3125 specs.push(AggregateSpec {
3126 alias,
3127 kind: AggregateKind::Count {
3128 field_id: None,
3129 distinct: false,
3130 },
3131 });
3132 }
3133 AggregateExpr::Column {
3134 column,
3135 alias,
3136 function,
3137 distinct,
3138 } => {
3139 let col = table_ref.schema.resolve(&column).ok_or_else(|| {
3140 Error::InvalidArgumentError(format!(
3141 "unknown column '{}' in aggregate",
3142 column
3143 ))
3144 })?;
3145
3146 let kind = match function {
3147 AggregateFunction::Count => AggregateKind::Count {
3148 field_id: Some(col.field_id),
3149 distinct,
3150 },
3151 AggregateFunction::SumInt64 => {
3152 let input_type = Self::validate_aggregate_type(
3153 Some(col.data_type.clone()),
3154 "SUM",
3155 &[DataType::Int64, DataType::Float64],
3156 )?;
3157 AggregateKind::Sum {
3158 field_id: col.field_id,
3159 data_type: input_type,
3160 distinct,
3161 }
3162 }
3163 AggregateFunction::MinInt64 => {
3164 let input_type = Self::validate_aggregate_type(
3165 Some(col.data_type.clone()),
3166 "MIN",
3167 &[DataType::Int64, DataType::Float64],
3168 )?;
3169 AggregateKind::Min {
3170 field_id: col.field_id,
3171 data_type: input_type,
3172 }
3173 }
3174 AggregateFunction::MaxInt64 => {
3175 let input_type = Self::validate_aggregate_type(
3176 Some(col.data_type.clone()),
3177 "MAX",
3178 &[DataType::Int64, DataType::Float64],
3179 )?;
3180 AggregateKind::Max {
3181 field_id: col.field_id,
3182 data_type: input_type,
3183 }
3184 }
3185 AggregateFunction::CountNulls => {
3186 if distinct {
3187 return Err(Error::InvalidArgumentError(
3188 "DISTINCT is not supported for COUNT_NULLS".into(),
3189 ));
3190 }
3191 AggregateKind::CountNulls {
3192 field_id: col.field_id,
3193 }
3194 }
3195 };
3196 specs.push(AggregateSpec { alias, kind });
3197 }
3198 }
3199 }
3200
3201 if specs.is_empty() {
3202 return Err(Error::InvalidArgumentError(
3203 "aggregate query requires at least one aggregate expression".into(),
3204 ));
3205 }
3206
3207 let had_filter = plan.filter.is_some();
3208 let filter_expr = match &plan.filter {
3209 Some(filter_wrapper) => {
3210 if !filter_wrapper.subqueries.is_empty() {
3211 return Err(Error::InvalidArgumentError(
3212 "EXISTS subqueries not yet implemented in aggregate queries".into(),
3213 ));
3214 }
3215 crate::translation::expression::translate_predicate(
3216 filter_wrapper.predicate.clone(),
3217 table.schema.as_ref(),
3218 |name| Error::InvalidArgumentError(format!("unknown column '{}'", name)),
3219 )?
3220 }
3221 None => {
3222 let field_id = table.schema.first_field_id().ok_or_else(|| {
3223 Error::InvalidArgumentError(
3224 "table has no columns; cannot perform aggregate scan".into(),
3225 )
3226 })?;
3227 crate::translation::expression::full_table_scan_filter(field_id)
3228 }
3229 };
3230
3231 let mut projections = Vec::new();
3233 let mut spec_to_projection: Vec<Option<usize>> = Vec::with_capacity(specs.len());
3234
3235 for spec in &specs {
3236 if let Some(field_id) = spec.kind.field_id() {
3237 let proj_idx = projections.len();
3238 spec_to_projection.push(Some(proj_idx));
3239 projections.push(ScanProjection::from(StoreProjection::with_alias(
3240 LogicalFieldId::for_user(table.table.table_id(), field_id),
3241 table
3242 .schema
3243 .column_by_field_id(field_id)
3244 .map(|c| c.name.clone())
3245 .unwrap_or_else(|| format!("col{field_id}")),
3246 )));
3247 } else {
3248 spec_to_projection.push(None);
3249 }
3250 }
3251
3252 if projections.is_empty() {
3253 let field_id = table.schema.first_field_id().ok_or_else(|| {
3254 Error::InvalidArgumentError(
3255 "table has no columns; cannot perform aggregate scan".into(),
3256 )
3257 })?;
3258 projections.push(ScanProjection::from(StoreProjection::with_alias(
3259 LogicalFieldId::for_user(table.table.table_id(), field_id),
3260 table
3261 .schema
3262 .column_by_field_id(field_id)
3263 .map(|c| c.name.clone())
3264 .unwrap_or_else(|| format!("col{field_id}")),
3265 )));
3266 }
3267
3268 let options = ScanStreamOptions {
3269 include_nulls: true,
3270 order: None,
3271 row_id_filter: row_filter.clone(),
3272 };
3273
3274 let mut states: Vec<AggregateState> = Vec::with_capacity(specs.len());
3275 let mut count_star_override: Option<i64> = None;
3279 if !had_filter && row_filter.is_none() {
3280 let total_rows = table.total_rows.load(Ordering::SeqCst);
3282 tracing::debug!(
3283 "[AGGREGATE] Using COUNT(*) shortcut: total_rows={}",
3284 total_rows
3285 );
3286 if total_rows > i64::MAX as u64 {
3287 return Err(Error::InvalidArgumentError(
3288 "COUNT(*) result exceeds supported range".into(),
3289 ));
3290 }
3291 count_star_override = Some(total_rows as i64);
3292 } else {
3293 tracing::debug!(
3294 "[AGGREGATE] NOT using COUNT(*) shortcut: had_filter={}, has_row_filter={}",
3295 had_filter,
3296 row_filter.is_some()
3297 );
3298 }
3299
3300 for (idx, spec) in specs.iter().enumerate() {
3301 states.push(AggregateState {
3302 alias: spec.alias.clone(),
3303 accumulator: AggregateAccumulator::new_with_projection_index(
3304 spec,
3305 spec_to_projection[idx],
3306 count_star_override,
3307 )?,
3308 override_value: match &spec.kind {
3309 AggregateKind::Count { field_id: None, .. } => {
3310 tracing::debug!(
3311 "[AGGREGATE] CountStar override_value={:?}",
3312 count_star_override
3313 );
3314 count_star_override
3315 }
3316 _ => None,
3317 },
3318 });
3319 }
3320
3321 let mut error: Option<Error> = None;
3322 match table.table.scan_stream(
3323 projections,
3324 &filter_expr,
3325 ScanStreamOptions {
3326 row_id_filter: row_filter.clone(),
3327 ..options
3328 },
3329 |batch| {
3330 if error.is_some() {
3331 return;
3332 }
3333 for state in &mut states {
3334 if let Err(err) = state.update(&batch) {
3335 error = Some(err);
3336 return;
3337 }
3338 }
3339 },
3340 ) {
3341 Ok(()) => {}
3342 Err(llkv_result::Error::NotFound) => {
3343 }
3346 Err(err) => return Err(err),
3347 }
3348 if let Some(err) = error {
3349 return Err(err);
3350 }
3351
3352 let mut fields = Vec::with_capacity(states.len());
3353 let mut arrays: Vec<ArrayRef> = Vec::with_capacity(states.len());
3354 for state in states {
3355 let (field, array) = state.finalize()?;
3356 fields.push(field);
3357 arrays.push(array);
3358 }
3359
3360 let schema = Arc::new(Schema::new(fields));
3361 let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
3362
3363 if distinct {
3364 let mut state = DistinctState::default();
3365 batch = match distinct_filter_batch(batch, &mut state)? {
3366 Some(filtered) => filtered,
3367 None => RecordBatch::new_empty(Arc::clone(&schema)),
3368 };
3369 }
3370
3371 let schema = batch.schema();
3372
3373 Ok(SelectExecution::new_single_batch(
3374 display_name,
3375 schema,
3376 batch,
3377 ))
3378 }
3379
3380 fn execute_computed_aggregates(
3383 &self,
3384 table: Arc<ExecutorTable<P>>,
3385 display_name: String,
3386 plan: SelectPlan,
3387 row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
3388 ) -> ExecutorResult<SelectExecution<P>> {
3389 use arrow::array::Int64Array;
3390 use llkv_expr::expr::AggregateCall;
3391
3392 let table_ref = table.as_ref();
3393 let distinct = plan.distinct;
3394
3395 let mut aggregate_specs: Vec<(String, AggregateCall<String>)> = Vec::new();
3397 for proj in &plan.projections {
3398 if let SelectProjection::Computed { expr, .. } = proj {
3399 Self::collect_aggregates(expr, &mut aggregate_specs);
3400 }
3401 }
3402
3403 let filter_predicate = plan
3405 .filter
3406 .as_ref()
3407 .map(|wrapper| {
3408 if !wrapper.subqueries.is_empty() {
3409 return Err(Error::InvalidArgumentError(
3410 "EXISTS subqueries not yet implemented with aggregates".into(),
3411 ));
3412 }
3413 Ok(wrapper.predicate.clone())
3414 })
3415 .transpose()?;
3416
3417 let computed_aggregates = self.compute_aggregate_values(
3418 table.clone(),
3419 &filter_predicate,
3420 &aggregate_specs,
3421 row_filter.clone(),
3422 )?;
3423
3424 let mut fields = Vec::with_capacity(plan.projections.len());
3426 let mut arrays: Vec<ArrayRef> = Vec::with_capacity(plan.projections.len());
3427
3428 for proj in &plan.projections {
3429 match proj {
3430 SelectProjection::AllColumns | SelectProjection::AllColumnsExcept { .. } => {
3431 return Err(Error::InvalidArgumentError(
3432 "Wildcard projections not supported with computed aggregates".into(),
3433 ));
3434 }
3435 SelectProjection::Column { name, alias } => {
3436 let col = table_ref.schema.resolve(name).ok_or_else(|| {
3437 Error::InvalidArgumentError(format!("unknown column '{}'", name))
3438 })?;
3439 let field_name = alias.as_ref().unwrap_or(name);
3440 fields.push(arrow::datatypes::Field::new(
3441 field_name,
3442 col.data_type.clone(),
3443 col.nullable,
3444 ));
3445 return Err(Error::InvalidArgumentError(
3448 "Regular columns not supported in aggregate queries without GROUP BY"
3449 .into(),
3450 ));
3451 }
3452 SelectProjection::Computed { expr, alias } => {
3453 let value = Self::evaluate_expr_with_aggregates(expr, &computed_aggregates)?;
3455
3456 fields.push(arrow::datatypes::Field::new(alias, DataType::Int64, false));
3457
3458 let array = Arc::new(Int64Array::from(vec![value])) as ArrayRef;
3459 arrays.push(array);
3460 }
3461 }
3462 }
3463
3464 let schema = Arc::new(Schema::new(fields));
3465 let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
3466
3467 if distinct {
3468 let mut state = DistinctState::default();
3469 batch = match distinct_filter_batch(batch, &mut state)? {
3470 Some(filtered) => filtered,
3471 None => RecordBatch::new_empty(Arc::clone(&schema)),
3472 };
3473 }
3474
3475 let schema = batch.schema();
3476
3477 Ok(SelectExecution::new_single_batch(
3478 display_name,
3479 schema,
3480 batch,
3481 ))
3482 }
3483
3484 fn build_aggregate_spec_for_cross_product(
3487 agg_call: &llkv_expr::expr::AggregateCall<String>,
3488 alias: String,
3489 data_type: Option<DataType>,
3490 ) -> ExecutorResult<llkv_aggregate::AggregateSpec> {
3491 use llkv_expr::expr::AggregateCall;
3492
3493 let kind = match agg_call {
3494 AggregateCall::CountStar => llkv_aggregate::AggregateKind::Count {
3495 field_id: None,
3496 distinct: false,
3497 },
3498 AggregateCall::Count { distinct, .. } => llkv_aggregate::AggregateKind::Count {
3499 field_id: Some(0),
3500 distinct: *distinct,
3501 },
3502 AggregateCall::Sum { distinct, .. } => llkv_aggregate::AggregateKind::Sum {
3503 field_id: 0,
3504 data_type: Self::validate_aggregate_type(
3505 data_type.clone(),
3506 "SUM",
3507 &[DataType::Int64, DataType::Float64],
3508 )?,
3509 distinct: *distinct,
3510 },
3511 AggregateCall::Avg { distinct, .. } => llkv_aggregate::AggregateKind::Avg {
3512 field_id: 0,
3513 data_type: Self::validate_aggregate_type(
3514 data_type.clone(),
3515 "AVG",
3516 &[DataType::Int64, DataType::Float64],
3517 )?,
3518 distinct: *distinct,
3519 },
3520 AggregateCall::Min(_) => llkv_aggregate::AggregateKind::Min {
3521 field_id: 0,
3522 data_type: Self::validate_aggregate_type(
3523 data_type.clone(),
3524 "MIN",
3525 &[DataType::Int64, DataType::Float64],
3526 )?,
3527 },
3528 AggregateCall::Max(_) => llkv_aggregate::AggregateKind::Max {
3529 field_id: 0,
3530 data_type: Self::validate_aggregate_type(
3531 data_type.clone(),
3532 "MAX",
3533 &[DataType::Int64, DataType::Float64],
3534 )?,
3535 },
3536 AggregateCall::CountNulls(_) => {
3537 llkv_aggregate::AggregateKind::CountNulls { field_id: 0 }
3538 }
3539 };
3540
3541 Ok(llkv_aggregate::AggregateSpec { alias, kind })
3542 }
3543
3544 fn validate_aggregate_type(
3545 data_type: Option<DataType>,
3546 func_name: &str,
3547 allowed: &[DataType],
3548 ) -> ExecutorResult<DataType> {
3549 let dt = data_type.ok_or_else(|| {
3550 Error::Internal(format!(
3551 "missing input type metadata for {func_name} aggregate"
3552 ))
3553 })?;
3554 if allowed.iter().any(|candidate| candidate == &dt) {
3555 Ok(dt)
3556 } else {
3557 Err(Error::InvalidArgumentError(format!(
3558 "{func_name} aggregate not supported for column type {:?}",
3559 dt
3560 )))
3561 }
3562 }
3563
3564 fn collect_aggregates(
3566 expr: &ScalarExpr<String>,
3567 aggregates: &mut Vec<(String, llkv_expr::expr::AggregateCall<String>)>,
3568 ) {
3569 match expr {
3570 ScalarExpr::Aggregate(agg) => {
3571 let key = format!("{:?}", agg);
3573 if !aggregates.iter().any(|(k, _)| k == &key) {
3574 aggregates.push((key, agg.clone()));
3575 }
3576 }
3577 ScalarExpr::Binary { left, right, .. } => {
3578 Self::collect_aggregates(left, aggregates);
3579 Self::collect_aggregates(right, aggregates);
3580 }
3581 ScalarExpr::Compare { left, right, .. } => {
3582 Self::collect_aggregates(left, aggregates);
3583 Self::collect_aggregates(right, aggregates);
3584 }
3585 ScalarExpr::GetField { base, .. } => {
3586 Self::collect_aggregates(base, aggregates);
3587 }
3588 ScalarExpr::Cast { expr, .. } => {
3589 Self::collect_aggregates(expr, aggregates);
3590 }
3591 ScalarExpr::Not(expr) => {
3592 Self::collect_aggregates(expr, aggregates);
3593 }
3594 ScalarExpr::IsNull { expr, .. } => {
3595 Self::collect_aggregates(expr, aggregates);
3596 }
3597 ScalarExpr::Case {
3598 operand,
3599 branches,
3600 else_expr,
3601 } => {
3602 if let Some(inner) = operand.as_deref() {
3603 Self::collect_aggregates(inner, aggregates);
3604 }
3605 for (when_expr, then_expr) in branches {
3606 Self::collect_aggregates(when_expr, aggregates);
3607 Self::collect_aggregates(then_expr, aggregates);
3608 }
3609 if let Some(inner) = else_expr.as_deref() {
3610 Self::collect_aggregates(inner, aggregates);
3611 }
3612 }
3613 ScalarExpr::Coalesce(items) => {
3614 for item in items {
3615 Self::collect_aggregates(item, aggregates);
3616 }
3617 }
3618 ScalarExpr::Column(_) | ScalarExpr::Literal(_) => {}
3619 ScalarExpr::ScalarSubquery(_) => {}
3620 }
3621 }
3622
3623 fn collect_aggregates_from_predicate(
3625 expr: &llkv_expr::expr::Expr<String>,
3626 aggregates: &mut Vec<(String, llkv_expr::expr::AggregateCall<String>)>,
3627 ) {
3628 match expr {
3629 llkv_expr::expr::Expr::Compare { left, right, .. } => {
3630 Self::collect_aggregates(left, aggregates);
3631 Self::collect_aggregates(right, aggregates);
3632 }
3633 llkv_expr::expr::Expr::And(exprs) | llkv_expr::expr::Expr::Or(exprs) => {
3634 for e in exprs {
3635 Self::collect_aggregates_from_predicate(e, aggregates);
3636 }
3637 }
3638 llkv_expr::expr::Expr::Not(inner) => {
3639 Self::collect_aggregates_from_predicate(inner, aggregates);
3640 }
3641 llkv_expr::expr::Expr::InList {
3642 expr: test_expr,
3643 list,
3644 ..
3645 } => {
3646 Self::collect_aggregates(test_expr, aggregates);
3647 for item in list {
3648 Self::collect_aggregates(item, aggregates);
3649 }
3650 }
3651 llkv_expr::expr::Expr::IsNull { expr, .. } => {
3652 Self::collect_aggregates(expr, aggregates);
3653 }
3654 llkv_expr::expr::Expr::Literal(_) => {}
3655 llkv_expr::expr::Expr::Pred(_) => {}
3656 llkv_expr::expr::Expr::Exists(_) => {}
3657 }
3658 }
3659
3660 fn compute_aggregate_values(
3662 &self,
3663 table: Arc<ExecutorTable<P>>,
3664 filter: &Option<llkv_expr::expr::Expr<'static, String>>,
3665 aggregate_specs: &[(String, llkv_expr::expr::AggregateCall<String>)],
3666 row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
3667 ) -> ExecutorResult<FxHashMap<String, AggregateValue>> {
3668 use llkv_expr::expr::AggregateCall;
3669
3670 let table_ref = table.as_ref();
3671 let mut results =
3672 FxHashMap::with_capacity_and_hasher(aggregate_specs.len(), Default::default());
3673
3674 let mut specs: Vec<AggregateSpec> = Vec::new();
3676 for (key, agg) in aggregate_specs {
3677 let kind = match agg {
3678 AggregateCall::CountStar => AggregateKind::Count {
3679 field_id: None,
3680 distinct: false,
3681 },
3682 AggregateCall::Count {
3683 expr: col_expr,
3684 distinct,
3685 } => {
3686 let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3687 Error::InvalidArgumentError(
3688 "complex expressions in COUNT not yet fully supported".into(),
3689 )
3690 })?;
3691 let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3692 Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3693 })?;
3694 AggregateKind::Count {
3695 field_id: Some(col.field_id),
3696 distinct: *distinct,
3697 }
3698 }
3699 AggregateCall::Sum {
3700 expr: col_expr,
3701 distinct,
3702 } => {
3703 let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3704 Error::InvalidArgumentError(
3705 "complex expressions in SUM not yet fully supported".into(),
3706 )
3707 })?;
3708 let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3709 Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3710 })?;
3711 AggregateKind::Sum {
3712 field_id: col.field_id,
3713 data_type: Self::validate_aggregate_type(
3714 Some(col.data_type.clone()),
3715 "SUM",
3716 &[DataType::Int64, DataType::Float64],
3717 )?,
3718 distinct: *distinct,
3719 }
3720 }
3721 AggregateCall::Avg {
3722 expr: col_expr,
3723 distinct,
3724 } => {
3725 let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3726 Error::InvalidArgumentError(
3727 "complex expressions in AVG not yet fully supported".into(),
3728 )
3729 })?;
3730 let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3731 Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3732 })?;
3733 AggregateKind::Avg {
3734 field_id: col.field_id,
3735 data_type: Self::validate_aggregate_type(
3736 Some(col.data_type.clone()),
3737 "AVG",
3738 &[DataType::Int64, DataType::Float64],
3739 )?,
3740 distinct: *distinct,
3741 }
3742 }
3743 AggregateCall::Min(col_expr) => {
3744 let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3745 Error::InvalidArgumentError(
3746 "complex expressions in MIN not yet fully supported".into(),
3747 )
3748 })?;
3749 let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3750 Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3751 })?;
3752 AggregateKind::Min {
3753 field_id: col.field_id,
3754 data_type: Self::validate_aggregate_type(
3755 Some(col.data_type.clone()),
3756 "MIN",
3757 &[DataType::Int64, DataType::Float64],
3758 )?,
3759 }
3760 }
3761 AggregateCall::Max(col_expr) => {
3762 let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3763 Error::InvalidArgumentError(
3764 "complex expressions in MAX not yet fully supported".into(),
3765 )
3766 })?;
3767 let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3768 Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3769 })?;
3770 AggregateKind::Max {
3771 field_id: col.field_id,
3772 data_type: Self::validate_aggregate_type(
3773 Some(col.data_type.clone()),
3774 "MAX",
3775 &[DataType::Int64, DataType::Float64],
3776 )?,
3777 }
3778 }
3779 AggregateCall::CountNulls(col_expr) => {
3780 let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3781 Error::InvalidArgumentError(
3782 "complex expressions in CountNulls not yet fully supported".into(),
3783 )
3784 })?;
3785 let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3786 Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3787 })?;
3788 AggregateKind::CountNulls {
3789 field_id: col.field_id,
3790 }
3791 }
3792 };
3793 specs.push(AggregateSpec {
3794 alias: key.clone(),
3795 kind,
3796 });
3797 }
3798
3799 let filter_expr = match filter {
3801 Some(expr) => crate::translation::expression::translate_predicate(
3802 expr.clone(),
3803 table_ref.schema.as_ref(),
3804 |name| Error::InvalidArgumentError(format!("unknown column '{}'", name)),
3805 )?,
3806 None => {
3807 let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
3808 Error::InvalidArgumentError(
3809 "table has no columns; cannot perform aggregate scan".into(),
3810 )
3811 })?;
3812 crate::translation::expression::full_table_scan_filter(field_id)
3813 }
3814 };
3815
3816 let mut projections: Vec<ScanProjection> = Vec::new();
3817 let mut spec_to_projection: Vec<Option<usize>> = Vec::with_capacity(specs.len());
3818 let count_star_override: Option<i64> = None;
3819
3820 for spec in &specs {
3821 if let Some(field_id) = spec.kind.field_id() {
3822 spec_to_projection.push(Some(projections.len()));
3823 projections.push(ScanProjection::from(StoreProjection::with_alias(
3824 LogicalFieldId::for_user(table.table.table_id(), field_id),
3825 table
3826 .schema
3827 .column_by_field_id(field_id)
3828 .map(|c| c.name.clone())
3829 .unwrap_or_else(|| format!("col{field_id}")),
3830 )));
3831 } else {
3832 spec_to_projection.push(None);
3833 }
3834 }
3835
3836 if projections.is_empty() {
3837 let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
3838 Error::InvalidArgumentError(
3839 "table has no columns; cannot perform aggregate scan".into(),
3840 )
3841 })?;
3842 projections.push(ScanProjection::from(StoreProjection::with_alias(
3843 LogicalFieldId::for_user(table.table.table_id(), field_id),
3844 table
3845 .schema
3846 .column_by_field_id(field_id)
3847 .map(|c| c.name.clone())
3848 .unwrap_or_else(|| format!("col{field_id}")),
3849 )));
3850 }
3851
3852 let base_options = ScanStreamOptions {
3853 include_nulls: true,
3854 order: None,
3855 row_id_filter: None,
3856 };
3857
3858 let mut states: Vec<AggregateState> = Vec::with_capacity(specs.len());
3859 for (idx, spec) in specs.iter().enumerate() {
3860 states.push(AggregateState {
3861 alias: spec.alias.clone(),
3862 accumulator: AggregateAccumulator::new_with_projection_index(
3863 spec,
3864 spec_to_projection[idx],
3865 count_star_override,
3866 )?,
3867 override_value: match &spec.kind {
3868 AggregateKind::Count { field_id: None, .. } => count_star_override,
3869 _ => None,
3870 },
3871 });
3872 }
3873
3874 let mut error: Option<Error> = None;
3875 match table.table.scan_stream(
3876 projections,
3877 &filter_expr,
3878 ScanStreamOptions {
3879 row_id_filter: row_filter.clone(),
3880 ..base_options
3881 },
3882 |batch| {
3883 if error.is_some() {
3884 return;
3885 }
3886 for state in &mut states {
3887 if let Err(err) = state.update(&batch) {
3888 error = Some(err);
3889 return;
3890 }
3891 }
3892 },
3893 ) {
3894 Ok(()) => {}
3895 Err(llkv_result::Error::NotFound) => {}
3896 Err(err) => return Err(err),
3897 }
3898 if let Some(err) = error {
3899 return Err(err);
3900 }
3901
3902 for state in states {
3904 let alias = state.alias.clone();
3905 let (_field, array) = state.finalize()?;
3906
3907 if let Some(int64_array) = array.as_any().downcast_ref::<arrow::array::Int64Array>() {
3909 if int64_array.len() != 1 {
3910 return Err(Error::Internal(format!(
3911 "Expected single value from aggregate, got {}",
3912 int64_array.len()
3913 )));
3914 }
3915 let value = if int64_array.is_null(0) {
3916 AggregateValue::Int64(0)
3917 } else {
3918 AggregateValue::Int64(int64_array.value(0))
3919 };
3920 results.insert(alias, value);
3921 }
3922 else if let Some(float64_array) =
3924 array.as_any().downcast_ref::<arrow::array::Float64Array>()
3925 {
3926 if float64_array.len() != 1 {
3927 return Err(Error::Internal(format!(
3928 "Expected single value from aggregate, got {}",
3929 float64_array.len()
3930 )));
3931 }
3932 let value = if float64_array.is_null(0) {
3933 AggregateValue::Float64(0.0)
3934 } else {
3935 AggregateValue::Float64(float64_array.value(0))
3936 };
3937 results.insert(alias, value);
3938 } else {
3939 return Err(Error::Internal(format!(
3940 "Unexpected array type from aggregate: {:?}",
3941 array.data_type()
3942 )));
3943 }
3944 }
3945
3946 Ok(results)
3947 }
3948
3949 fn evaluate_having_expr(
3950 expr: &llkv_expr::expr::Expr<String>,
3951 aggregates: &FxHashMap<String, PlanValue>,
3952 row_batch: &RecordBatch,
3953 column_lookup: &FxHashMap<String, usize>,
3954 row_idx: usize,
3955 ) -> ExecutorResult<Option<bool>> {
3956 fn compare_plan_values_for_pred(
3957 left: &PlanValue,
3958 right: &PlanValue,
3959 ) -> Option<std::cmp::Ordering> {
3960 match (left, right) {
3961 (PlanValue::Integer(l), PlanValue::Integer(r)) => Some(l.cmp(r)),
3962 (PlanValue::Float(l), PlanValue::Float(r)) => l.partial_cmp(r),
3963 (PlanValue::Integer(l), PlanValue::Float(r)) => (*l as f64).partial_cmp(r),
3964 (PlanValue::Float(l), PlanValue::Integer(r)) => l.partial_cmp(&(*r as f64)),
3965 (PlanValue::String(l), PlanValue::String(r)) => Some(l.cmp(r)),
3966 _ => None,
3967 }
3968 }
3969
3970 fn evaluate_ordering_predicate<F>(
3971 value: &PlanValue,
3972 literal: &Literal,
3973 predicate: F,
3974 ) -> ExecutorResult<Option<bool>>
3975 where
3976 F: Fn(std::cmp::Ordering) -> bool,
3977 {
3978 if matches!(value, PlanValue::Null) {
3979 return Ok(None);
3980 }
3981 let expected = llkv_plan::plan_value_from_literal(literal)?;
3982 if matches!(expected, PlanValue::Null) {
3983 return Ok(None);
3984 }
3985
3986 match compare_plan_values_for_pred(value, &expected) {
3987 Some(ordering) => Ok(Some(predicate(ordering))),
3988 None => Err(Error::InvalidArgumentError(
3989 "unsupported HAVING comparison between column value and literal".into(),
3990 )),
3991 }
3992 }
3993
3994 match expr {
3995 llkv_expr::expr::Expr::Compare { left, op, right } => {
3996 let left_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
3997 left,
3998 aggregates,
3999 Some(row_batch),
4000 Some(column_lookup),
4001 row_idx,
4002 )?;
4003 let right_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4004 right,
4005 aggregates,
4006 Some(row_batch),
4007 Some(column_lookup),
4008 row_idx,
4009 )?;
4010
4011 let (left_val, right_val) = match (&left_val, &right_val) {
4013 (PlanValue::Integer(i), PlanValue::Float(_)) => {
4014 (PlanValue::Float(*i as f64), right_val)
4015 }
4016 (PlanValue::Float(_), PlanValue::Integer(i)) => {
4017 (left_val, PlanValue::Float(*i as f64))
4018 }
4019 _ => (left_val, right_val),
4020 };
4021
4022 match (left_val, right_val) {
4023 (PlanValue::Null, _) | (_, PlanValue::Null) => Ok(None),
4025 (PlanValue::Integer(l), PlanValue::Integer(r)) => {
4026 use llkv_expr::expr::CompareOp;
4027 Ok(Some(match op {
4028 CompareOp::Eq => l == r,
4029 CompareOp::NotEq => l != r,
4030 CompareOp::Lt => l < r,
4031 CompareOp::LtEq => l <= r,
4032 CompareOp::Gt => l > r,
4033 CompareOp::GtEq => l >= r,
4034 }))
4035 }
4036 (PlanValue::Float(l), PlanValue::Float(r)) => {
4037 use llkv_expr::expr::CompareOp;
4038 Ok(Some(match op {
4039 CompareOp::Eq => l == r,
4040 CompareOp::NotEq => l != r,
4041 CompareOp::Lt => l < r,
4042 CompareOp::LtEq => l <= r,
4043 CompareOp::Gt => l > r,
4044 CompareOp::GtEq => l >= r,
4045 }))
4046 }
4047 _ => Ok(Some(false)),
4048 }
4049 }
4050 llkv_expr::expr::Expr::Not(inner) => {
4051 match Self::evaluate_having_expr(
4053 inner,
4054 aggregates,
4055 row_batch,
4056 column_lookup,
4057 row_idx,
4058 )? {
4059 Some(b) => Ok(Some(!b)),
4060 None => Ok(None), }
4062 }
4063 llkv_expr::expr::Expr::InList {
4064 expr: test_expr,
4065 list,
4066 negated,
4067 } => {
4068 let test_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4069 test_expr,
4070 aggregates,
4071 Some(row_batch),
4072 Some(column_lookup),
4073 row_idx,
4074 )?;
4075
4076 if matches!(test_val, PlanValue::Null) {
4079 return Ok(None);
4080 }
4081
4082 let mut found = false;
4083 let mut has_null = false;
4084
4085 for list_item in list {
4086 let list_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4087 list_item,
4088 aggregates,
4089 Some(row_batch),
4090 Some(column_lookup),
4091 row_idx,
4092 )?;
4093
4094 if matches!(list_val, PlanValue::Null) {
4096 has_null = true;
4097 continue;
4098 }
4099
4100 let matches = match (&test_val, &list_val) {
4102 (PlanValue::Integer(a), PlanValue::Integer(b)) => a == b,
4103 (PlanValue::Float(a), PlanValue::Float(b)) => a == b,
4104 (PlanValue::Integer(a), PlanValue::Float(b)) => (*a as f64) == *b,
4105 (PlanValue::Float(a), PlanValue::Integer(b)) => *a == (*b as f64),
4106 (PlanValue::String(a), PlanValue::String(b)) => a == b,
4107 _ => false,
4108 };
4109
4110 if matches {
4111 found = true;
4112 break;
4113 }
4114 }
4115
4116 if *negated {
4120 Ok(if found {
4122 Some(false)
4123 } else if has_null {
4124 None } else {
4126 Some(true)
4127 })
4128 } else {
4129 Ok(if found {
4131 Some(true)
4132 } else if has_null {
4133 None } else {
4135 Some(false)
4136 })
4137 }
4138 }
4139 llkv_expr::expr::Expr::IsNull { expr, negated } => {
4140 let val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4142 expr,
4143 aggregates,
4144 Some(row_batch),
4145 Some(column_lookup),
4146 row_idx,
4147 )?;
4148
4149 let is_null = matches!(val, PlanValue::Null);
4153 Ok(Some(if *negated { !is_null } else { is_null }))
4154 }
4155 llkv_expr::expr::Expr::Literal(val) => Ok(Some(*val)),
4156 llkv_expr::expr::Expr::And(exprs) => {
4157 let mut has_null = false;
4159 for e in exprs {
4160 match Self::evaluate_having_expr(
4161 e,
4162 aggregates,
4163 row_batch,
4164 column_lookup,
4165 row_idx,
4166 )? {
4167 Some(false) => return Ok(Some(false)), None => has_null = true,
4169 Some(true) => {} }
4171 }
4172 Ok(if has_null { None } else { Some(true) })
4173 }
4174 llkv_expr::expr::Expr::Or(exprs) => {
4175 let mut has_null = false;
4177 for e in exprs {
4178 match Self::evaluate_having_expr(
4179 e,
4180 aggregates,
4181 row_batch,
4182 column_lookup,
4183 row_idx,
4184 )? {
4185 Some(true) => return Ok(Some(true)), None => has_null = true,
4187 Some(false) => {} }
4189 }
4190 Ok(if has_null { None } else { Some(false) })
4191 }
4192 llkv_expr::expr::Expr::Pred(filter) => {
4193 use llkv_expr::expr::Operator;
4196
4197 let col_name = &filter.field_id;
4198 let col_idx = column_lookup
4199 .get(&col_name.to_ascii_lowercase())
4200 .ok_or_else(|| {
4201 Error::InvalidArgumentError(format!(
4202 "column '{}' not found in HAVING context",
4203 col_name
4204 ))
4205 })?;
4206
4207 let value = llkv_plan::plan_value_from_array(row_batch.column(*col_idx), row_idx)?;
4208
4209 match &filter.op {
4210 Operator::IsNull => Ok(Some(matches!(value, PlanValue::Null))),
4211 Operator::IsNotNull => Ok(Some(!matches!(value, PlanValue::Null))),
4212 Operator::Equals(expected) => {
4213 if matches!(value, PlanValue::Null) {
4215 return Ok(None);
4216 }
4217 let expected_value = llkv_plan::plan_value_from_literal(expected)?;
4219 if matches!(expected_value, PlanValue::Null) {
4220 return Ok(None);
4221 }
4222 Ok(Some(value == expected_value))
4223 }
4224 Operator::GreaterThan(expected) => {
4225 evaluate_ordering_predicate(&value, expected, |ordering| {
4226 ordering == std::cmp::Ordering::Greater
4227 })
4228 }
4229 Operator::GreaterThanOrEquals(expected) => {
4230 evaluate_ordering_predicate(&value, expected, |ordering| {
4231 ordering == std::cmp::Ordering::Greater
4232 || ordering == std::cmp::Ordering::Equal
4233 })
4234 }
4235 Operator::LessThan(expected) => {
4236 evaluate_ordering_predicate(&value, expected, |ordering| {
4237 ordering == std::cmp::Ordering::Less
4238 })
4239 }
4240 Operator::LessThanOrEquals(expected) => {
4241 evaluate_ordering_predicate(&value, expected, |ordering| {
4242 ordering == std::cmp::Ordering::Less
4243 || ordering == std::cmp::Ordering::Equal
4244 })
4245 }
4246 _ => {
4247 Err(Error::InvalidArgumentError(format!(
4250 "Operator {:?} not supported for column predicates in HAVING clause",
4251 filter.op
4252 )))
4253 }
4254 }
4255 }
4256 llkv_expr::expr::Expr::Exists(_) => Err(Error::InvalidArgumentError(
4257 "EXISTS subqueries not supported in HAVING clause".into(),
4258 )),
4259 }
4260 }
4261
4262 fn evaluate_expr_with_plan_value_aggregates_and_row(
4263 expr: &ScalarExpr<String>,
4264 aggregates: &FxHashMap<String, PlanValue>,
4265 row_batch: Option<&RecordBatch>,
4266 column_lookup: Option<&FxHashMap<String, usize>>,
4267 row_idx: usize,
4268 ) -> ExecutorResult<PlanValue> {
4269 use llkv_expr::expr::BinaryOp;
4270 use llkv_expr::literal::Literal;
4271
4272 match expr {
4273 ScalarExpr::Literal(Literal::Integer(v)) => Ok(PlanValue::Integer(*v as i64)),
4274 ScalarExpr::Literal(Literal::Float(v)) => Ok(PlanValue::Float(*v)),
4275 ScalarExpr::Literal(Literal::Boolean(v)) => {
4276 Ok(PlanValue::Integer(if *v { 1 } else { 0 }))
4277 }
4278 ScalarExpr::Literal(Literal::String(s)) => Ok(PlanValue::String(s.clone())),
4279 ScalarExpr::Literal(Literal::Null) => Ok(PlanValue::Null),
4280 ScalarExpr::Literal(Literal::Struct(_)) => Err(Error::InvalidArgumentError(
4281 "Struct literals not supported in aggregate expressions".into(),
4282 )),
4283 ScalarExpr::Column(col_name) => {
4284 if let (Some(batch), Some(lookup)) = (row_batch, column_lookup) {
4286 let col_idx = lookup.get(&col_name.to_ascii_lowercase()).ok_or_else(|| {
4287 Error::InvalidArgumentError(format!("column '{}' not found", col_name))
4288 })?;
4289 llkv_plan::plan_value_from_array(batch.column(*col_idx), row_idx)
4290 } else {
4291 Err(Error::InvalidArgumentError(
4292 "Column references not supported in aggregate-only expressions".into(),
4293 ))
4294 }
4295 }
4296 ScalarExpr::Compare { left, op, right } => {
4297 let left_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4299 left,
4300 aggregates,
4301 row_batch,
4302 column_lookup,
4303 row_idx,
4304 )?;
4305 let right_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4306 right,
4307 aggregates,
4308 row_batch,
4309 column_lookup,
4310 row_idx,
4311 )?;
4312
4313 if matches!(left_val, PlanValue::Null) || matches!(right_val, PlanValue::Null) {
4315 return Ok(PlanValue::Null);
4316 }
4317
4318 let (left_val, right_val) = match (&left_val, &right_val) {
4320 (PlanValue::Integer(i), PlanValue::Float(_)) => {
4321 (PlanValue::Float(*i as f64), right_val)
4322 }
4323 (PlanValue::Float(_), PlanValue::Integer(i)) => {
4324 (left_val, PlanValue::Float(*i as f64))
4325 }
4326 _ => (left_val, right_val),
4327 };
4328
4329 let result = match (&left_val, &right_val) {
4331 (PlanValue::Integer(l), PlanValue::Integer(r)) => {
4332 use llkv_expr::expr::CompareOp;
4333 match op {
4334 CompareOp::Eq => l == r,
4335 CompareOp::NotEq => l != r,
4336 CompareOp::Lt => l < r,
4337 CompareOp::LtEq => l <= r,
4338 CompareOp::Gt => l > r,
4339 CompareOp::GtEq => l >= r,
4340 }
4341 }
4342 (PlanValue::Float(l), PlanValue::Float(r)) => {
4343 use llkv_expr::expr::CompareOp;
4344 match op {
4345 CompareOp::Eq => l == r,
4346 CompareOp::NotEq => l != r,
4347 CompareOp::Lt => l < r,
4348 CompareOp::LtEq => l <= r,
4349 CompareOp::Gt => l > r,
4350 CompareOp::GtEq => l >= r,
4351 }
4352 }
4353 (PlanValue::String(l), PlanValue::String(r)) => {
4354 use llkv_expr::expr::CompareOp;
4355 match op {
4356 CompareOp::Eq => l == r,
4357 CompareOp::NotEq => l != r,
4358 CompareOp::Lt => l < r,
4359 CompareOp::LtEq => l <= r,
4360 CompareOp::Gt => l > r,
4361 CompareOp::GtEq => l >= r,
4362 }
4363 }
4364 _ => false,
4365 };
4366
4367 Ok(PlanValue::Integer(if result { 1 } else { 0 }))
4369 }
4370 ScalarExpr::Not(inner) => {
4371 let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4372 inner,
4373 aggregates,
4374 row_batch,
4375 column_lookup,
4376 row_idx,
4377 )?;
4378 match value {
4379 PlanValue::Integer(v) => Ok(PlanValue::Integer(if v != 0 { 0 } else { 1 })),
4380 PlanValue::Float(v) => Ok(PlanValue::Integer(if v != 0.0 { 0 } else { 1 })),
4381 PlanValue::Null => Ok(PlanValue::Null),
4382 other => Err(Error::InvalidArgumentError(format!(
4383 "logical NOT does not support value {other:?}"
4384 ))),
4385 }
4386 }
4387 ScalarExpr::IsNull { expr, negated } => {
4388 let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4389 expr,
4390 aggregates,
4391 row_batch,
4392 column_lookup,
4393 row_idx,
4394 )?;
4395 let is_null = matches!(value, PlanValue::Null);
4396 let condition = if is_null { !negated } else { *negated };
4397 Ok(PlanValue::Integer(if condition { 1 } else { 0 }))
4398 }
4399 ScalarExpr::Aggregate(agg) => {
4400 let key = format!("{:?}", agg);
4401 aggregates
4402 .get(&key)
4403 .cloned()
4404 .ok_or_else(|| Error::Internal(format!("Aggregate value not found: {}", key)))
4405 }
4406 ScalarExpr::Binary { left, op, right } => {
4407 let left_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4408 left,
4409 aggregates,
4410 row_batch,
4411 column_lookup,
4412 row_idx,
4413 )?;
4414 let right_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4415 right,
4416 aggregates,
4417 row_batch,
4418 column_lookup,
4419 row_idx,
4420 )?;
4421
4422 let left_num = match left_val {
4424 PlanValue::Integer(i) => i as f64,
4425 PlanValue::Float(f) => f,
4426 PlanValue::Null => return Ok(PlanValue::Null),
4427 _ => {
4428 return Err(Error::InvalidArgumentError(
4429 "Non-numeric value in binary operation".into(),
4430 ));
4431 }
4432 };
4433 let right_num = match right_val {
4434 PlanValue::Integer(i) => i as f64,
4435 PlanValue::Float(f) => f,
4436 PlanValue::Null => return Ok(PlanValue::Null),
4437 _ => {
4438 return Err(Error::InvalidArgumentError(
4439 "Non-numeric value in binary operation".into(),
4440 ));
4441 }
4442 };
4443
4444 let result = match op {
4445 BinaryOp::Add => left_num + right_num,
4446 BinaryOp::Subtract => left_num - right_num,
4447 BinaryOp::Multiply => left_num * right_num,
4448 BinaryOp::Divide => {
4449 if right_num == 0.0 {
4450 return Ok(PlanValue::Null);
4451 }
4452 left_num / right_num
4453 }
4454 BinaryOp::Modulo => {
4455 if right_num == 0.0 {
4456 return Ok(PlanValue::Null);
4457 }
4458 left_num % right_num
4459 }
4460 };
4461
4462 if matches!(left_val, PlanValue::Float(_))
4464 || matches!(right_val, PlanValue::Float(_))
4465 {
4466 Ok(PlanValue::Float(result))
4467 } else {
4468 Ok(PlanValue::Integer(result as i64))
4469 }
4470 }
4471 ScalarExpr::Cast { expr, data_type } => {
4472 let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4474 expr,
4475 aggregates,
4476 row_batch,
4477 column_lookup,
4478 row_idx,
4479 )?;
4480
4481 if matches!(value, PlanValue::Null) {
4483 return Ok(PlanValue::Null);
4484 }
4485
4486 match data_type {
4488 DataType::Int64 | DataType::Int32 | DataType::Int16 | DataType::Int8 => {
4489 match value {
4490 PlanValue::Integer(i) => Ok(PlanValue::Integer(i)),
4491 PlanValue::Float(f) => Ok(PlanValue::Integer(f as i64)),
4492 PlanValue::String(s) => {
4493 s.parse::<i64>().map(PlanValue::Integer).map_err(|_| {
4494 Error::InvalidArgumentError(format!(
4495 "Cannot cast '{}' to integer",
4496 s
4497 ))
4498 })
4499 }
4500 _ => Err(Error::InvalidArgumentError(format!(
4501 "Cannot cast {:?} to integer",
4502 value
4503 ))),
4504 }
4505 }
4506 DataType::Float64 | DataType::Float32 => match value {
4507 PlanValue::Integer(i) => Ok(PlanValue::Float(i as f64)),
4508 PlanValue::Float(f) => Ok(PlanValue::Float(f)),
4509 PlanValue::String(s) => {
4510 s.parse::<f64>().map(PlanValue::Float).map_err(|_| {
4511 Error::InvalidArgumentError(format!("Cannot cast '{}' to float", s))
4512 })
4513 }
4514 _ => Err(Error::InvalidArgumentError(format!(
4515 "Cannot cast {:?} to float",
4516 value
4517 ))),
4518 },
4519 DataType::Utf8 | DataType::LargeUtf8 => match value {
4520 PlanValue::String(s) => Ok(PlanValue::String(s)),
4521 PlanValue::Integer(i) => Ok(PlanValue::String(i.to_string())),
4522 PlanValue::Float(f) => Ok(PlanValue::String(f.to_string())),
4523 _ => Err(Error::InvalidArgumentError(format!(
4524 "Cannot cast {:?} to string",
4525 value
4526 ))),
4527 },
4528 _ => Err(Error::InvalidArgumentError(format!(
4529 "CAST to {:?} not supported in aggregate expressions",
4530 data_type
4531 ))),
4532 }
4533 }
4534 ScalarExpr::Case {
4535 operand,
4536 branches,
4537 else_expr,
4538 } => {
4539 let operand_value = if let Some(op) = operand {
4541 Some(Self::evaluate_expr_with_plan_value_aggregates_and_row(
4542 op,
4543 aggregates,
4544 row_batch,
4545 column_lookup,
4546 row_idx,
4547 )?)
4548 } else {
4549 None
4550 };
4551
4552 for (when_expr, then_expr) in branches {
4554 let matches = if let Some(ref op_val) = operand_value {
4555 let when_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4557 when_expr,
4558 aggregates,
4559 row_batch,
4560 column_lookup,
4561 row_idx,
4562 )?;
4563 Self::simple_case_branch_matches(op_val, &when_val)
4564 } else {
4565 let when_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4567 when_expr,
4568 aggregates,
4569 row_batch,
4570 column_lookup,
4571 row_idx,
4572 )?;
4573 match when_val {
4575 PlanValue::Integer(i) => i != 0,
4576 PlanValue::Float(f) => f != 0.0,
4577 PlanValue::Null => false,
4578 _ => false,
4579 }
4580 };
4581
4582 if matches {
4583 return Self::evaluate_expr_with_plan_value_aggregates_and_row(
4584 then_expr,
4585 aggregates,
4586 row_batch,
4587 column_lookup,
4588 row_idx,
4589 );
4590 }
4591 }
4592
4593 if let Some(else_e) = else_expr {
4595 Self::evaluate_expr_with_plan_value_aggregates_and_row(
4596 else_e,
4597 aggregates,
4598 row_batch,
4599 column_lookup,
4600 row_idx,
4601 )
4602 } else {
4603 Ok(PlanValue::Null)
4604 }
4605 }
4606 ScalarExpr::Coalesce(exprs) => {
4607 for expr in exprs {
4609 let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4610 expr,
4611 aggregates,
4612 row_batch,
4613 column_lookup,
4614 row_idx,
4615 )?;
4616 if !matches!(value, PlanValue::Null) {
4617 return Ok(value);
4618 }
4619 }
4620 Ok(PlanValue::Null)
4621 }
4622 ScalarExpr::GetField { .. } => Err(Error::InvalidArgumentError(
4623 "GetField not supported in aggregate expressions".into(),
4624 )),
4625 ScalarExpr::ScalarSubquery(_) => Err(Error::InvalidArgumentError(
4626 "Scalar subqueries not supported in aggregate expressions".into(),
4627 )),
4628 }
4629 }
4630
4631 fn simple_case_branch_matches(operand: &PlanValue, candidate: &PlanValue) -> bool {
4632 if matches!(operand, PlanValue::Null) || matches!(candidate, PlanValue::Null) {
4633 return false;
4634 }
4635
4636 match (operand, candidate) {
4637 (PlanValue::Integer(left), PlanValue::Integer(right)) => left == right,
4638 (PlanValue::Integer(left), PlanValue::Float(right)) => (*left as f64) == *right,
4639 (PlanValue::Float(left), PlanValue::Integer(right)) => *left == (*right as f64),
4640 (PlanValue::Float(left), PlanValue::Float(right)) => left == right,
4641 (PlanValue::String(left), PlanValue::String(right)) => left == right,
4642 (PlanValue::Struct(left), PlanValue::Struct(right)) => left == right,
4643 _ => operand == candidate,
4644 }
4645 }
4646
4647 fn evaluate_expr_with_aggregates(
4648 expr: &ScalarExpr<String>,
4649 aggregates: &FxHashMap<String, AggregateValue>,
4650 ) -> ExecutorResult<i64> {
4651 use llkv_expr::expr::BinaryOp;
4652 use llkv_expr::literal::Literal;
4653
4654 match expr {
4655 ScalarExpr::Literal(Literal::Integer(v)) => Ok(*v as i64),
4656 ScalarExpr::Literal(Literal::Float(v)) => Ok(*v as i64),
4657 ScalarExpr::Literal(Literal::Boolean(v)) => Ok(if *v { 1 } else { 0 }),
4658 ScalarExpr::Literal(Literal::String(_)) => Err(Error::InvalidArgumentError(
4659 "String literals not supported in aggregate expressions".into(),
4660 )),
4661 ScalarExpr::Literal(Literal::Null) => Err(Error::InvalidArgumentError(
4662 "NULL literals not supported in aggregate expressions".into(),
4663 )),
4664 ScalarExpr::Literal(Literal::Struct(_)) => Err(Error::InvalidArgumentError(
4665 "Struct literals not supported in aggregate expressions".into(),
4666 )),
4667 ScalarExpr::Column(_) => Err(Error::InvalidArgumentError(
4668 "Column references not supported in aggregate-only expressions".into(),
4669 )),
4670 ScalarExpr::Compare { .. } => Err(Error::InvalidArgumentError(
4671 "Comparisons not supported in aggregate-only expressions".into(),
4672 )),
4673 ScalarExpr::Aggregate(agg) => {
4674 let key = format!("{:?}", agg);
4675 let value = aggregates.get(&key).ok_or_else(|| {
4676 Error::Internal(format!("Aggregate value not found for key: {}", key))
4677 })?;
4678 Ok(value.to_i64())
4680 }
4681 ScalarExpr::Not(inner) => {
4682 let value = Self::evaluate_expr_with_aggregates(inner, aggregates)?;
4683 Ok(if value != 0 { 0 } else { 1 })
4684 }
4685 ScalarExpr::IsNull { expr, negated } => {
4686 let _ = Self::evaluate_expr_with_aggregates(expr, aggregates)?;
4688 Ok(if *negated { 1 } else { 0 })
4689 }
4690 ScalarExpr::Binary { left, op, right } => {
4691 let left_val = Self::evaluate_expr_with_aggregates(left, aggregates)?;
4692 let right_val = Self::evaluate_expr_with_aggregates(right, aggregates)?;
4693
4694 let result = match op {
4695 BinaryOp::Add => left_val.checked_add(right_val),
4696 BinaryOp::Subtract => left_val.checked_sub(right_val),
4697 BinaryOp::Multiply => left_val.checked_mul(right_val),
4698 BinaryOp::Divide => {
4699 if right_val == 0 {
4700 return Err(Error::InvalidArgumentError("Division by zero".into()));
4701 }
4702 left_val.checked_div(right_val)
4703 }
4704 BinaryOp::Modulo => {
4705 if right_val == 0 {
4706 return Err(Error::InvalidArgumentError("Modulo by zero".into()));
4707 }
4708 left_val.checked_rem(right_val)
4709 }
4710 };
4711
4712 result.ok_or_else(|| {
4713 Error::InvalidArgumentError("Arithmetic overflow in expression".into())
4714 })
4715 }
4716 ScalarExpr::Cast { .. } => Err(Error::InvalidArgumentError(
4717 "CAST is not supported in aggregate-only expressions".into(),
4718 )),
4719 ScalarExpr::GetField { .. } => Err(Error::InvalidArgumentError(
4720 "GetField not supported in aggregate-only expressions".into(),
4721 )),
4722 ScalarExpr::Case { .. } => Err(Error::InvalidArgumentError(
4723 "CASE not supported in aggregate-only expressions".into(),
4724 )),
4725 ScalarExpr::Coalesce(_) => Err(Error::InvalidArgumentError(
4726 "COALESCE not supported in aggregate-only expressions".into(),
4727 )),
4728 ScalarExpr::ScalarSubquery(_) => Err(Error::InvalidArgumentError(
4729 "Scalar subqueries not supported in aggregate-only expressions".into(),
4730 )),
4731 }
4732 }
4733}
4734
4735struct CrossProductExpressionContext {
4736 schema: Arc<ExecutorSchema>,
4737 field_id_to_index: FxHashMap<FieldId, usize>,
4738 numeric_cache: FxHashMap<FieldId, NumericArray>,
4739 column_cache: FxHashMap<FieldId, ColumnAccessor>,
4740 next_field_id: FieldId,
4741}
4742
4743#[derive(Clone)]
4744enum ColumnAccessor {
4745 Int64(Arc<Int64Array>),
4746 Float64(Arc<Float64Array>),
4747 Boolean(Arc<BooleanArray>),
4748 Utf8(Arc<StringArray>),
4749 Null(usize),
4750}
4751
4752impl ColumnAccessor {
4753 fn from_array(array: &ArrayRef) -> ExecutorResult<Self> {
4754 match array.data_type() {
4755 DataType::Int64 => {
4756 let typed = array
4757 .as_any()
4758 .downcast_ref::<Int64Array>()
4759 .ok_or_else(|| Error::Internal("expected Int64 array".into()))?
4760 .clone();
4761 Ok(Self::Int64(Arc::new(typed)))
4762 }
4763 DataType::Float64 => {
4764 let typed = array
4765 .as_any()
4766 .downcast_ref::<Float64Array>()
4767 .ok_or_else(|| Error::Internal("expected Float64 array".into()))?
4768 .clone();
4769 Ok(Self::Float64(Arc::new(typed)))
4770 }
4771 DataType::Boolean => {
4772 let typed = array
4773 .as_any()
4774 .downcast_ref::<BooleanArray>()
4775 .ok_or_else(|| Error::Internal("expected Boolean array".into()))?
4776 .clone();
4777 Ok(Self::Boolean(Arc::new(typed)))
4778 }
4779 DataType::Utf8 => {
4780 let typed = array
4781 .as_any()
4782 .downcast_ref::<StringArray>()
4783 .ok_or_else(|| Error::Internal("expected Utf8 array".into()))?
4784 .clone();
4785 Ok(Self::Utf8(Arc::new(typed)))
4786 }
4787 DataType::Null => Ok(Self::Null(array.len())),
4788 other => Err(Error::InvalidArgumentError(format!(
4789 "unsupported column type {:?} in cross product filter",
4790 other
4791 ))),
4792 }
4793 }
4794
4795 fn len(&self) -> usize {
4796 match self {
4797 ColumnAccessor::Int64(array) => array.len(),
4798 ColumnAccessor::Float64(array) => array.len(),
4799 ColumnAccessor::Boolean(array) => array.len(),
4800 ColumnAccessor::Utf8(array) => array.len(),
4801 ColumnAccessor::Null(len) => *len,
4802 }
4803 }
4804
4805 fn is_null(&self, idx: usize) -> bool {
4806 match self {
4807 ColumnAccessor::Int64(array) => array.is_null(idx),
4808 ColumnAccessor::Float64(array) => array.is_null(idx),
4809 ColumnAccessor::Boolean(array) => array.is_null(idx),
4810 ColumnAccessor::Utf8(array) => array.is_null(idx),
4811 ColumnAccessor::Null(_) => true,
4812 }
4813 }
4814
4815 fn literal_at(&self, idx: usize) -> ExecutorResult<Literal> {
4816 if self.is_null(idx) {
4817 return Ok(Literal::Null);
4818 }
4819 match self {
4820 ColumnAccessor::Int64(array) => Ok(Literal::Integer(array.value(idx) as i128)),
4821 ColumnAccessor::Float64(array) => Ok(Literal::Float(array.value(idx))),
4822 ColumnAccessor::Boolean(array) => Ok(Literal::Boolean(array.value(idx))),
4823 ColumnAccessor::Utf8(array) => Ok(Literal::String(array.value(idx).to_string())),
4824 ColumnAccessor::Null(_) => Ok(Literal::Null),
4825 }
4826 }
4827
4828 fn as_array_ref(&self) -> ArrayRef {
4829 match self {
4830 ColumnAccessor::Int64(array) => Arc::clone(array) as ArrayRef,
4831 ColumnAccessor::Float64(array) => Arc::clone(array) as ArrayRef,
4832 ColumnAccessor::Boolean(array) => Arc::clone(array) as ArrayRef,
4833 ColumnAccessor::Utf8(array) => Arc::clone(array) as ArrayRef,
4834 ColumnAccessor::Null(len) => new_null_array(&DataType::Null, *len),
4835 }
4836 }
4837}
4838
4839#[derive(Clone)]
4840enum ValueArray {
4841 Numeric(NumericArray),
4842 Boolean(Arc<BooleanArray>),
4843 Utf8(Arc<StringArray>),
4844 Null(usize),
4845}
4846
4847impl ValueArray {
4848 fn from_array(array: ArrayRef) -> ExecutorResult<Self> {
4849 match array.data_type() {
4850 DataType::Boolean => {
4851 let typed = array
4852 .as_any()
4853 .downcast_ref::<BooleanArray>()
4854 .ok_or_else(|| Error::Internal("expected Boolean array".into()))?
4855 .clone();
4856 Ok(Self::Boolean(Arc::new(typed)))
4857 }
4858 DataType::Utf8 => {
4859 let typed = array
4860 .as_any()
4861 .downcast_ref::<StringArray>()
4862 .ok_or_else(|| Error::Internal("expected Utf8 array".into()))?
4863 .clone();
4864 Ok(Self::Utf8(Arc::new(typed)))
4865 }
4866 DataType::Null => Ok(Self::Null(array.len())),
4867 DataType::Int8
4868 | DataType::Int16
4869 | DataType::Int32
4870 | DataType::Int64
4871 | DataType::UInt8
4872 | DataType::UInt16
4873 | DataType::UInt32
4874 | DataType::UInt64
4875 | DataType::Float32
4876 | DataType::Float64 => {
4877 let numeric = NumericArray::try_from_arrow(&array)?;
4878 Ok(Self::Numeric(numeric))
4879 }
4880 other => Err(Error::InvalidArgumentError(format!(
4881 "unsupported data type {:?} in cross product expression",
4882 other
4883 ))),
4884 }
4885 }
4886
4887 fn len(&self) -> usize {
4888 match self {
4889 ValueArray::Numeric(array) => array.len(),
4890 ValueArray::Boolean(array) => array.len(),
4891 ValueArray::Utf8(array) => array.len(),
4892 ValueArray::Null(len) => *len,
4893 }
4894 }
4895}
4896
4897fn truth_and(lhs: Option<bool>, rhs: Option<bool>) -> Option<bool> {
4898 match (lhs, rhs) {
4899 (Some(false), _) | (_, Some(false)) => Some(false),
4900 (Some(true), Some(true)) => Some(true),
4901 (Some(true), None) | (None, Some(true)) | (None, None) => None,
4902 }
4903}
4904
4905fn truth_or(lhs: Option<bool>, rhs: Option<bool>) -> Option<bool> {
4906 match (lhs, rhs) {
4907 (Some(true), _) | (_, Some(true)) => Some(true),
4908 (Some(false), Some(false)) => Some(false),
4909 (Some(false), None) | (None, Some(false)) | (None, None) => None,
4910 }
4911}
4912
4913fn truth_not(value: Option<bool>) -> Option<bool> {
4914 match value {
4915 Some(true) => Some(false),
4916 Some(false) => Some(true),
4917 None => None,
4918 }
4919}
4920
4921fn compare_bool(op: CompareOp, lhs: bool, rhs: bool) -> bool {
4922 let l = lhs as u8;
4923 let r = rhs as u8;
4924 match op {
4925 CompareOp::Eq => lhs == rhs,
4926 CompareOp::NotEq => lhs != rhs,
4927 CompareOp::Lt => l < r,
4928 CompareOp::LtEq => l <= r,
4929 CompareOp::Gt => l > r,
4930 CompareOp::GtEq => l >= r,
4931 }
4932}
4933
4934fn compare_str(op: CompareOp, lhs: &str, rhs: &str) -> bool {
4935 match op {
4936 CompareOp::Eq => lhs == rhs,
4937 CompareOp::NotEq => lhs != rhs,
4938 CompareOp::Lt => lhs < rhs,
4939 CompareOp::LtEq => lhs <= rhs,
4940 CompareOp::Gt => lhs > rhs,
4941 CompareOp::GtEq => lhs >= rhs,
4942 }
4943}
4944
4945fn finalize_in_list_result(has_match: bool, saw_null: bool, negated: bool) -> Option<bool> {
4946 if has_match {
4947 Some(!negated)
4948 } else if saw_null {
4949 None
4950 } else if negated {
4951 Some(true)
4952 } else {
4953 Some(false)
4954 }
4955}
4956
4957fn literal_to_constant_array(literal: &Literal, len: usize) -> ExecutorResult<ArrayRef> {
4958 match literal {
4959 Literal::Integer(v) => {
4960 let value = i64::try_from(*v).unwrap_or(0);
4961 let values = vec![value; len];
4962 Ok(Arc::new(Int64Array::from(values)) as ArrayRef)
4963 }
4964 Literal::Float(v) => {
4965 let values = vec![*v; len];
4966 Ok(Arc::new(Float64Array::from(values)) as ArrayRef)
4967 }
4968 Literal::Boolean(v) => {
4969 let values = vec![Some(*v); len];
4970 Ok(Arc::new(BooleanArray::from(values)) as ArrayRef)
4971 }
4972 Literal::String(v) => {
4973 let values: Vec<Option<String>> = (0..len).map(|_| Some(v.clone())).collect();
4974 Ok(Arc::new(StringArray::from(values)) as ArrayRef)
4975 }
4976 Literal::Null => Ok(new_null_array(&DataType::Null, len)),
4977 Literal::Struct(_) => Err(Error::InvalidArgumentError(
4978 "struct literals are not supported in cross product filters".into(),
4979 )),
4980 }
4981}
4982
4983impl CrossProductExpressionContext {
4984 fn new(schema: &Schema, lookup: FxHashMap<String, usize>) -> ExecutorResult<Self> {
4985 let mut columns = Vec::with_capacity(schema.fields().len());
4986 let mut field_id_to_index = FxHashMap::default();
4987 let mut next_field_id: FieldId = 1;
4988
4989 for (idx, field) in schema.fields().iter().enumerate() {
4990 if next_field_id == u32::MAX {
4991 return Err(Error::Internal(
4992 "cross product projection exhausted FieldId space".into(),
4993 ));
4994 }
4995
4996 let executor_column = ExecutorColumn {
4997 name: field.name().clone(),
4998 data_type: field.data_type().clone(),
4999 nullable: field.is_nullable(),
5000 primary_key: false,
5001 unique: false,
5002 field_id: next_field_id,
5003 check_expr: None,
5004 };
5005 let field_id = next_field_id;
5006 next_field_id = next_field_id.saturating_add(1);
5007
5008 columns.push(executor_column);
5009 field_id_to_index.insert(field_id, idx);
5010 }
5011
5012 Ok(Self {
5013 schema: Arc::new(ExecutorSchema { columns, lookup }),
5014 field_id_to_index,
5015 numeric_cache: FxHashMap::default(),
5016 column_cache: FxHashMap::default(),
5017 next_field_id,
5018 })
5019 }
5020
5021 fn schema(&self) -> &ExecutorSchema {
5022 self.schema.as_ref()
5023 }
5024
5025 fn field_id_for_column(&self, name: &str) -> Option<FieldId> {
5026 self.schema.resolve(name).map(|column| column.field_id)
5027 }
5028
5029 fn reset(&mut self) {
5030 self.numeric_cache.clear();
5031 self.column_cache.clear();
5032 }
5033
5034 fn allocate_synthetic_field_id(&mut self) -> ExecutorResult<FieldId> {
5035 if self.next_field_id == FieldId::MAX {
5036 return Err(Error::Internal(
5037 "cross product projection exhausted FieldId space".into(),
5038 ));
5039 }
5040 let field_id = self.next_field_id;
5041 self.next_field_id = self.next_field_id.saturating_add(1);
5042 Ok(field_id)
5043 }
5044
5045 #[cfg(test)]
5046 fn evaluate(
5047 &mut self,
5048 expr: &ScalarExpr<String>,
5049 batch: &RecordBatch,
5050 ) -> ExecutorResult<ArrayRef> {
5051 let translated = translate_scalar(expr, self.schema.as_ref(), |name| {
5052 Error::InvalidArgumentError(format!(
5053 "column '{}' not found in cross product result",
5054 name
5055 ))
5056 })?;
5057
5058 self.evaluate_numeric(&translated, batch)
5059 }
5060
5061 fn evaluate_predicate_mask(
5062 &mut self,
5063 expr: &LlkvExpr<'static, FieldId>,
5064 batch: &RecordBatch,
5065 mut exists_eval: impl FnMut(
5066 &mut Self,
5067 &llkv_expr::SubqueryExpr,
5068 usize,
5069 &RecordBatch,
5070 ) -> ExecutorResult<Option<bool>>,
5071 ) -> ExecutorResult<BooleanArray> {
5072 let truths = self.evaluate_predicate_truths(expr, batch, &mut exists_eval)?;
5073 let mut builder = BooleanBuilder::with_capacity(truths.len());
5074 for value in truths {
5075 builder.append_value(value.unwrap_or(false));
5076 }
5077 Ok(builder.finish())
5078 }
5079
5080 fn evaluate_predicate_truths(
5081 &mut self,
5082 expr: &LlkvExpr<'static, FieldId>,
5083 batch: &RecordBatch,
5084 exists_eval: &mut impl FnMut(
5085 &mut Self,
5086 &llkv_expr::SubqueryExpr,
5087 usize,
5088 &RecordBatch,
5089 ) -> ExecutorResult<Option<bool>>,
5090 ) -> ExecutorResult<Vec<Option<bool>>> {
5091 match expr {
5092 LlkvExpr::Literal(value) => Ok(vec![Some(*value); batch.num_rows()]),
5093 LlkvExpr::And(children) => {
5094 if children.is_empty() {
5095 return Ok(vec![Some(true); batch.num_rows()]);
5096 }
5097 let mut result =
5098 self.evaluate_predicate_truths(&children[0], batch, exists_eval)?;
5099 for child in &children[1..] {
5100 let next = self.evaluate_predicate_truths(child, batch, exists_eval)?;
5101 for (lhs, rhs) in result.iter_mut().zip(next.into_iter()) {
5102 *lhs = truth_and(*lhs, rhs);
5103 }
5104 }
5105 Ok(result)
5106 }
5107 LlkvExpr::Or(children) => {
5108 if children.is_empty() {
5109 return Ok(vec![Some(false); batch.num_rows()]);
5110 }
5111 let mut result =
5112 self.evaluate_predicate_truths(&children[0], batch, exists_eval)?;
5113 for child in &children[1..] {
5114 let next = self.evaluate_predicate_truths(child, batch, exists_eval)?;
5115 for (lhs, rhs) in result.iter_mut().zip(next.into_iter()) {
5116 *lhs = truth_or(*lhs, rhs);
5117 }
5118 }
5119 Ok(result)
5120 }
5121 LlkvExpr::Not(inner) => {
5122 let mut values = self.evaluate_predicate_truths(inner, batch, exists_eval)?;
5123 for value in &mut values {
5124 *value = truth_not(*value);
5125 }
5126 Ok(values)
5127 }
5128 LlkvExpr::Pred(filter) => self.evaluate_filter_truths(filter, batch),
5129 LlkvExpr::Compare { left, op, right } => {
5130 self.evaluate_compare_truths(left, *op, right, batch)
5131 }
5132 LlkvExpr::InList {
5133 expr: target,
5134 list,
5135 negated,
5136 } => self.evaluate_in_list_truths(target, list, *negated, batch),
5137 LlkvExpr::IsNull { expr, negated } => {
5138 self.evaluate_is_null_truths(expr, *negated, batch)
5139 }
5140 LlkvExpr::Exists(subquery_expr) => {
5141 let mut values = Vec::with_capacity(batch.num_rows());
5142 for row_idx in 0..batch.num_rows() {
5143 let value = exists_eval(self, subquery_expr, row_idx, batch)?;
5144 values.push(value);
5145 }
5146 Ok(values)
5147 }
5148 }
5149 }
5150
5151 fn evaluate_filter_truths(
5152 &mut self,
5153 filter: &Filter<FieldId>,
5154 batch: &RecordBatch,
5155 ) -> ExecutorResult<Vec<Option<bool>>> {
5156 let accessor = self.column_accessor(filter.field_id, batch)?;
5157 let len = accessor.len();
5158
5159 match &filter.op {
5160 Operator::IsNull => {
5161 let mut out = Vec::with_capacity(len);
5162 for idx in 0..len {
5163 out.push(Some(accessor.is_null(idx)));
5164 }
5165 Ok(out)
5166 }
5167 Operator::IsNotNull => {
5168 let mut out = Vec::with_capacity(len);
5169 for idx in 0..len {
5170 out.push(Some(!accessor.is_null(idx)));
5171 }
5172 Ok(out)
5173 }
5174 _ => match accessor {
5175 ColumnAccessor::Int64(array) => {
5176 let predicate = build_fixed_width_predicate::<Int64Type>(&filter.op)
5177 .map_err(Error::predicate_build)?;
5178 let mut out = Vec::with_capacity(len);
5179 for idx in 0..len {
5180 if array.is_null(idx) {
5181 out.push(None);
5182 } else {
5183 let value = array.value(idx);
5184 out.push(Some(predicate.matches(&value)));
5185 }
5186 }
5187 Ok(out)
5188 }
5189 ColumnAccessor::Float64(array) => {
5190 let predicate = build_fixed_width_predicate::<Float64Type>(&filter.op)
5191 .map_err(Error::predicate_build)?;
5192 let mut out = Vec::with_capacity(len);
5193 for idx in 0..len {
5194 if array.is_null(idx) {
5195 out.push(None);
5196 } else {
5197 let value = array.value(idx);
5198 out.push(Some(predicate.matches(&value)));
5199 }
5200 }
5201 Ok(out)
5202 }
5203 ColumnAccessor::Boolean(array) => {
5204 let predicate =
5205 build_bool_predicate(&filter.op).map_err(Error::predicate_build)?;
5206 let mut out = Vec::with_capacity(len);
5207 for idx in 0..len {
5208 if array.is_null(idx) {
5209 out.push(None);
5210 } else {
5211 let value = array.value(idx);
5212 out.push(Some(predicate.matches(&value)));
5213 }
5214 }
5215 Ok(out)
5216 }
5217 ColumnAccessor::Utf8(array) => {
5218 let predicate =
5219 build_var_width_predicate(&filter.op).map_err(Error::predicate_build)?;
5220 let mut out = Vec::with_capacity(len);
5221 for idx in 0..len {
5222 if array.is_null(idx) {
5223 out.push(None);
5224 } else {
5225 let value = array.value(idx);
5226 out.push(Some(predicate.matches(value)));
5227 }
5228 }
5229 Ok(out)
5230 }
5231 ColumnAccessor::Null(len) => Ok(vec![None; len]),
5232 },
5233 }
5234 }
5235
5236 fn evaluate_compare_truths(
5237 &mut self,
5238 left: &ScalarExpr<FieldId>,
5239 op: CompareOp,
5240 right: &ScalarExpr<FieldId>,
5241 batch: &RecordBatch,
5242 ) -> ExecutorResult<Vec<Option<bool>>> {
5243 let left_values = self.materialize_value_array(left, batch)?;
5244 let right_values = self.materialize_value_array(right, batch)?;
5245
5246 if left_values.len() != right_values.len() {
5247 return Err(Error::Internal(
5248 "mismatched compare operand lengths in cross product filter".into(),
5249 ));
5250 }
5251
5252 let len = left_values.len();
5253 match (&left_values, &right_values) {
5254 (ValueArray::Null(_), _) | (_, ValueArray::Null(_)) => Ok(vec![None; len]),
5255 (ValueArray::Numeric(lhs), ValueArray::Numeric(rhs)) => {
5256 let mut out = Vec::with_capacity(len);
5257 for idx in 0..len {
5258 match (lhs.value(idx), rhs.value(idx)) {
5259 (Some(lv), Some(rv)) => out.push(Some(NumericKernels::compare(op, lv, rv))),
5260 _ => out.push(None),
5261 }
5262 }
5263 Ok(out)
5264 }
5265 (ValueArray::Boolean(lhs), ValueArray::Boolean(rhs)) => {
5266 let lhs = lhs.as_ref();
5267 let rhs = rhs.as_ref();
5268 let mut out = Vec::with_capacity(len);
5269 for idx in 0..len {
5270 if lhs.is_null(idx) || rhs.is_null(idx) {
5271 out.push(None);
5272 } else {
5273 out.push(Some(compare_bool(op, lhs.value(idx), rhs.value(idx))));
5274 }
5275 }
5276 Ok(out)
5277 }
5278 (ValueArray::Utf8(lhs), ValueArray::Utf8(rhs)) => {
5279 let lhs = lhs.as_ref();
5280 let rhs = rhs.as_ref();
5281 let mut out = Vec::with_capacity(len);
5282 for idx in 0..len {
5283 if lhs.is_null(idx) || rhs.is_null(idx) {
5284 out.push(None);
5285 } else {
5286 out.push(Some(compare_str(op, lhs.value(idx), rhs.value(idx))));
5287 }
5288 }
5289 Ok(out)
5290 }
5291 _ => Err(Error::InvalidArgumentError(
5292 "unsupported comparison between mismatched types in cross product filter".into(),
5293 )),
5294 }
5295 }
5296
5297 fn evaluate_is_null_truths(
5298 &mut self,
5299 expr: &ScalarExpr<FieldId>,
5300 negated: bool,
5301 batch: &RecordBatch,
5302 ) -> ExecutorResult<Vec<Option<bool>>> {
5303 let values = self.materialize_value_array(expr, batch)?;
5304 let len = values.len();
5305
5306 match &values {
5307 ValueArray::Null(len) => {
5308 let result = if negated {
5310 Some(false) } else {
5312 Some(true) };
5314 Ok(vec![result; *len])
5315 }
5316 ValueArray::Numeric(arr) => {
5317 let mut out = Vec::with_capacity(len);
5318 for idx in 0..len {
5319 let is_null = arr.value(idx).is_none();
5320 let result = if negated {
5321 !is_null } else {
5323 is_null };
5325 out.push(Some(result));
5326 }
5327 Ok(out)
5328 }
5329 ValueArray::Boolean(arr) => {
5330 let mut out = Vec::with_capacity(len);
5331 for idx in 0..len {
5332 let is_null = arr.is_null(idx);
5333 let result = if negated { !is_null } else { is_null };
5334 out.push(Some(result));
5335 }
5336 Ok(out)
5337 }
5338 ValueArray::Utf8(arr) => {
5339 let mut out = Vec::with_capacity(len);
5340 for idx in 0..len {
5341 let is_null = arr.is_null(idx);
5342 let result = if negated { !is_null } else { is_null };
5343 out.push(Some(result));
5344 }
5345 Ok(out)
5346 }
5347 }
5348 }
5349
5350 fn evaluate_in_list_truths(
5351 &mut self,
5352 target: &ScalarExpr<FieldId>,
5353 list: &[ScalarExpr<FieldId>],
5354 negated: bool,
5355 batch: &RecordBatch,
5356 ) -> ExecutorResult<Vec<Option<bool>>> {
5357 let target_values = self.materialize_value_array(target, batch)?;
5358 let list_values = list
5359 .iter()
5360 .map(|expr| self.materialize_value_array(expr, batch))
5361 .collect::<ExecutorResult<Vec<_>>>()?;
5362
5363 let len = target_values.len();
5364 for values in &list_values {
5365 if values.len() != len {
5366 return Err(Error::Internal(
5367 "mismatched IN list operand lengths in cross product filter".into(),
5368 ));
5369 }
5370 }
5371
5372 match &target_values {
5373 ValueArray::Numeric(target_numeric) => {
5374 let mut out = Vec::with_capacity(len);
5375 for idx in 0..len {
5376 let target_value = match target_numeric.value(idx) {
5377 Some(value) => value,
5378 None => {
5379 out.push(None);
5380 continue;
5381 }
5382 };
5383 let mut has_match = false;
5384 let mut saw_null = false;
5385 for candidate in &list_values {
5386 match candidate {
5387 ValueArray::Numeric(array) => match array.value(idx) {
5388 Some(value) => {
5389 if NumericKernels::compare(CompareOp::Eq, target_value, value) {
5390 has_match = true;
5391 break;
5392 }
5393 }
5394 None => saw_null = true,
5395 },
5396 ValueArray::Null(_) => saw_null = true,
5397 _ => {
5398 return Err(Error::InvalidArgumentError(
5399 "type mismatch in IN list evaluation".into(),
5400 ));
5401 }
5402 }
5403 }
5404 out.push(finalize_in_list_result(has_match, saw_null, negated));
5405 }
5406 Ok(out)
5407 }
5408 ValueArray::Boolean(target_bool) => {
5409 let mut out = Vec::with_capacity(len);
5410 for idx in 0..len {
5411 if target_bool.is_null(idx) {
5412 out.push(None);
5413 continue;
5414 }
5415 let target_value = target_bool.value(idx);
5416 let mut has_match = false;
5417 let mut saw_null = false;
5418 for candidate in &list_values {
5419 match candidate {
5420 ValueArray::Boolean(array) => {
5421 if array.is_null(idx) {
5422 saw_null = true;
5423 } else if array.value(idx) == target_value {
5424 has_match = true;
5425 break;
5426 }
5427 }
5428 ValueArray::Null(_) => saw_null = true,
5429 _ => {
5430 return Err(Error::InvalidArgumentError(
5431 "type mismatch in IN list evaluation".into(),
5432 ));
5433 }
5434 }
5435 }
5436 out.push(finalize_in_list_result(has_match, saw_null, negated));
5437 }
5438 Ok(out)
5439 }
5440 ValueArray::Utf8(target_utf8) => {
5441 let mut out = Vec::with_capacity(len);
5442 for idx in 0..len {
5443 if target_utf8.is_null(idx) {
5444 out.push(None);
5445 continue;
5446 }
5447 let target_value = target_utf8.value(idx);
5448 let mut has_match = false;
5449 let mut saw_null = false;
5450 for candidate in &list_values {
5451 match candidate {
5452 ValueArray::Utf8(array) => {
5453 if array.is_null(idx) {
5454 saw_null = true;
5455 } else if array.value(idx) == target_value {
5456 has_match = true;
5457 break;
5458 }
5459 }
5460 ValueArray::Null(_) => saw_null = true,
5461 _ => {
5462 return Err(Error::InvalidArgumentError(
5463 "type mismatch in IN list evaluation".into(),
5464 ));
5465 }
5466 }
5467 }
5468 out.push(finalize_in_list_result(has_match, saw_null, negated));
5469 }
5470 Ok(out)
5471 }
5472 ValueArray::Null(len) => Ok(vec![None; *len]),
5473 }
5474 }
5475
5476 fn evaluate_numeric(
5477 &mut self,
5478 expr: &ScalarExpr<FieldId>,
5479 batch: &RecordBatch,
5480 ) -> ExecutorResult<ArrayRef> {
5481 let mut required = FxHashSet::default();
5482 collect_field_ids(expr, &mut required);
5483
5484 let mut arrays = NumericArrayMap::default();
5485 for field_id in required {
5486 let numeric = self.numeric_array(field_id, batch)?;
5487 arrays.insert(field_id, numeric);
5488 }
5489
5490 NumericKernels::evaluate_batch(expr, batch.num_rows(), &arrays)
5491 }
5492
5493 fn numeric_array(
5494 &mut self,
5495 field_id: FieldId,
5496 batch: &RecordBatch,
5497 ) -> ExecutorResult<NumericArray> {
5498 if let Some(existing) = self.numeric_cache.get(&field_id) {
5499 return Ok(existing.clone());
5500 }
5501
5502 let column_index = *self.field_id_to_index.get(&field_id).ok_or_else(|| {
5503 Error::Internal("field mapping missing during cross product evaluation".into())
5504 })?;
5505
5506 let array_ref = batch.column(column_index).clone();
5507 let numeric = NumericArray::try_from_arrow(&array_ref)?;
5508 self.numeric_cache.insert(field_id, numeric.clone());
5509 Ok(numeric)
5510 }
5511
5512 fn column_accessor(
5513 &mut self,
5514 field_id: FieldId,
5515 batch: &RecordBatch,
5516 ) -> ExecutorResult<ColumnAccessor> {
5517 if let Some(existing) = self.column_cache.get(&field_id) {
5518 return Ok(existing.clone());
5519 }
5520
5521 let column_index = *self.field_id_to_index.get(&field_id).ok_or_else(|| {
5522 Error::Internal("field mapping missing during cross product evaluation".into())
5523 })?;
5524
5525 let accessor = ColumnAccessor::from_array(batch.column(column_index))?;
5526 self.column_cache.insert(field_id, accessor.clone());
5527 Ok(accessor)
5528 }
5529
5530 fn materialize_scalar_array(
5531 &mut self,
5532 expr: &ScalarExpr<FieldId>,
5533 batch: &RecordBatch,
5534 ) -> ExecutorResult<ArrayRef> {
5535 match expr {
5536 ScalarExpr::Column(field_id) => {
5537 let accessor = self.column_accessor(*field_id, batch)?;
5538 Ok(accessor.as_array_ref())
5539 }
5540 ScalarExpr::Literal(literal) => literal_to_constant_array(literal, batch.num_rows()),
5541 ScalarExpr::Binary { .. } => self.evaluate_numeric(expr, batch),
5542 ScalarExpr::Compare { .. } => self.evaluate_numeric(expr, batch),
5543 ScalarExpr::Not(_) => self.evaluate_numeric(expr, batch),
5544 ScalarExpr::IsNull { .. } => self.evaluate_numeric(expr, batch),
5545 ScalarExpr::Aggregate(_) => Err(Error::InvalidArgumentError(
5546 "aggregate expressions are not supported in cross product filters".into(),
5547 )),
5548 ScalarExpr::GetField { .. } => Err(Error::InvalidArgumentError(
5549 "struct field access is not supported in cross product filters".into(),
5550 )),
5551 ScalarExpr::Cast { expr, data_type } => {
5552 let source = self.materialize_scalar_array(expr.as_ref(), batch)?;
5553 let casted = cast(source.as_ref(), data_type).map_err(|err| {
5554 Error::InvalidArgumentError(format!("failed to cast expression: {err}"))
5555 })?;
5556 Ok(casted)
5557 }
5558 ScalarExpr::Case { .. } => self.evaluate_numeric(expr, batch),
5559 ScalarExpr::Coalesce(_) => self.evaluate_numeric(expr, batch),
5560 ScalarExpr::ScalarSubquery(_) => Err(Error::InvalidArgumentError(
5561 "scalar subqueries are not supported in cross product filters".into(),
5562 )),
5563 }
5564 }
5565
5566 fn materialize_value_array(
5567 &mut self,
5568 expr: &ScalarExpr<FieldId>,
5569 batch: &RecordBatch,
5570 ) -> ExecutorResult<ValueArray> {
5571 let array = self.materialize_scalar_array(expr, batch)?;
5572 ValueArray::from_array(array)
5573 }
5574}
5575
5576fn collect_field_ids(expr: &ScalarExpr<FieldId>, out: &mut FxHashSet<FieldId>) {
5578 match expr {
5579 ScalarExpr::Column(fid) => {
5580 out.insert(*fid);
5581 }
5582 ScalarExpr::Binary { left, right, .. } => {
5583 collect_field_ids(left, out);
5584 collect_field_ids(right, out);
5585 }
5586 ScalarExpr::Compare { left, right, .. } => {
5587 collect_field_ids(left, out);
5588 collect_field_ids(right, out);
5589 }
5590 ScalarExpr::Aggregate(call) => match call {
5591 AggregateCall::CountStar => {}
5592 AggregateCall::Count { expr, .. }
5593 | AggregateCall::Sum { expr, .. }
5594 | AggregateCall::Avg { expr, .. }
5595 | AggregateCall::Min(expr)
5596 | AggregateCall::Max(expr)
5597 | AggregateCall::CountNulls(expr) => {
5598 collect_field_ids(expr, out);
5599 }
5600 },
5601 ScalarExpr::GetField { base, .. } => collect_field_ids(base, out),
5602 ScalarExpr::Cast { expr, .. } => collect_field_ids(expr, out),
5603 ScalarExpr::Not(expr) => collect_field_ids(expr, out),
5604 ScalarExpr::IsNull { expr, .. } => collect_field_ids(expr, out),
5605 ScalarExpr::Case {
5606 operand,
5607 branches,
5608 else_expr,
5609 } => {
5610 if let Some(inner) = operand.as_deref() {
5611 collect_field_ids(inner, out);
5612 }
5613 for (when_expr, then_expr) in branches {
5614 collect_field_ids(when_expr, out);
5615 collect_field_ids(then_expr, out);
5616 }
5617 if let Some(inner) = else_expr.as_deref() {
5618 collect_field_ids(inner, out);
5619 }
5620 }
5621 ScalarExpr::Coalesce(items) => {
5622 for item in items {
5623 collect_field_ids(item, out);
5624 }
5625 }
5626 ScalarExpr::Literal(_) => {}
5627 ScalarExpr::ScalarSubquery(_) => {}
5628 }
5629}
5630
5631fn strip_exists(expr: &LlkvExpr<'static, FieldId>) -> LlkvExpr<'static, FieldId> {
5632 match expr {
5633 LlkvExpr::And(children) => LlkvExpr::And(children.iter().map(strip_exists).collect()),
5634 LlkvExpr::Or(children) => LlkvExpr::Or(children.iter().map(strip_exists).collect()),
5635 LlkvExpr::Not(inner) => LlkvExpr::Not(Box::new(strip_exists(inner))),
5636 LlkvExpr::Pred(filter) => LlkvExpr::Pred(filter.clone()),
5637 LlkvExpr::Compare { left, op, right } => LlkvExpr::Compare {
5638 left: left.clone(),
5639 op: *op,
5640 right: right.clone(),
5641 },
5642 LlkvExpr::InList {
5643 expr,
5644 list,
5645 negated,
5646 } => LlkvExpr::InList {
5647 expr: expr.clone(),
5648 list: list.clone(),
5649 negated: *negated,
5650 },
5651 LlkvExpr::IsNull { expr, negated } => LlkvExpr::IsNull {
5652 expr: expr.clone(),
5653 negated: *negated,
5654 },
5655 LlkvExpr::Literal(value) => LlkvExpr::Literal(*value),
5656 LlkvExpr::Exists(_) => LlkvExpr::Literal(true),
5657 }
5658}
5659
5660fn bind_select_plan(
5661 plan: &SelectPlan,
5662 bindings: &FxHashMap<String, Literal>,
5663) -> ExecutorResult<SelectPlan> {
5664 if bindings.is_empty() {
5665 return Ok(plan.clone());
5666 }
5667
5668 let projections = plan
5669 .projections
5670 .iter()
5671 .map(|projection| bind_projection(projection, bindings))
5672 .collect::<ExecutorResult<Vec<_>>>()?;
5673
5674 let filter = match &plan.filter {
5675 Some(wrapper) => Some(bind_select_filter(wrapper, bindings)?),
5676 None => None,
5677 };
5678
5679 let aggregates = plan
5680 .aggregates
5681 .iter()
5682 .map(|aggregate| bind_aggregate_expr(aggregate, bindings))
5683 .collect::<ExecutorResult<Vec<_>>>()?;
5684
5685 let scalar_subqueries = plan
5686 .scalar_subqueries
5687 .iter()
5688 .map(|subquery| bind_scalar_subquery(subquery, bindings))
5689 .collect::<ExecutorResult<Vec<_>>>()?;
5690
5691 if let Some(compound) = &plan.compound {
5692 let bound_compound = bind_compound_select(compound, bindings)?;
5693 return Ok(SelectPlan {
5694 tables: Vec::new(),
5695 joins: Vec::new(),
5696 projections: Vec::new(),
5697 filter: None,
5698 having: None,
5699 aggregates: Vec::new(),
5700 order_by: plan.order_by.clone(),
5701 distinct: false,
5702 scalar_subqueries: Vec::new(),
5703 compound: Some(bound_compound),
5704 group_by: Vec::new(),
5705 value_table_mode: None,
5706 });
5707 }
5708
5709 Ok(SelectPlan {
5710 tables: plan.tables.clone(),
5711 joins: plan.joins.clone(),
5712 projections,
5713 filter,
5714 having: plan.having.clone(),
5715 aggregates,
5716 order_by: Vec::new(),
5717 distinct: plan.distinct,
5718 scalar_subqueries,
5719 compound: None,
5720 group_by: plan.group_by.clone(),
5721 value_table_mode: plan.value_table_mode.clone(),
5722 })
5723}
5724
5725fn bind_compound_select(
5726 compound: &CompoundSelectPlan,
5727 bindings: &FxHashMap<String, Literal>,
5728) -> ExecutorResult<CompoundSelectPlan> {
5729 let initial = bind_select_plan(&compound.initial, bindings)?;
5730 let mut operations = Vec::with_capacity(compound.operations.len());
5731 for component in &compound.operations {
5732 let bound_plan = bind_select_plan(&component.plan, bindings)?;
5733 operations.push(CompoundSelectComponent {
5734 operator: component.operator.clone(),
5735 quantifier: component.quantifier.clone(),
5736 plan: bound_plan,
5737 });
5738 }
5739 Ok(CompoundSelectPlan {
5740 initial: Box::new(initial),
5741 operations,
5742 })
5743}
5744
5745fn ensure_schema_compatibility(base: &Schema, other: &Schema) -> ExecutorResult<()> {
5746 if base.fields().len() != other.fields().len() {
5747 return Err(Error::InvalidArgumentError(
5748 "compound SELECT requires matching column counts".into(),
5749 ));
5750 }
5751 for (left, right) in base.fields().iter().zip(other.fields().iter()) {
5752 if left.data_type() != right.data_type() {
5753 return Err(Error::InvalidArgumentError(format!(
5754 "compound SELECT column type mismatch: {} vs {}",
5755 left.data_type(),
5756 right.data_type()
5757 )));
5758 }
5759 }
5760 Ok(())
5761}
5762
5763fn ensure_distinct_rows(rows: &mut Vec<Vec<PlanValue>>, cache: &mut Option<FxHashSet<Vec<u8>>>) {
5764 if cache.is_some() {
5765 return;
5766 }
5767 let mut set = FxHashSet::default();
5768 let mut deduped: Vec<Vec<PlanValue>> = Vec::with_capacity(rows.len());
5769 for row in rows.drain(..) {
5770 let key = encode_row(&row);
5771 if set.insert(key) {
5772 deduped.push(row);
5773 }
5774 }
5775 *rows = deduped;
5776 *cache = Some(set);
5777}
5778
5779fn encode_row(row: &[PlanValue]) -> Vec<u8> {
5780 let mut buf = Vec::new();
5781 for value in row {
5782 encode_plan_value(&mut buf, value);
5783 buf.push(0x1F);
5784 }
5785 buf
5786}
5787
5788fn encode_plan_value(buf: &mut Vec<u8>, value: &PlanValue) {
5789 match value {
5790 PlanValue::Null => buf.push(0),
5791 PlanValue::Integer(v) => {
5792 buf.push(1);
5793 buf.extend_from_slice(&v.to_be_bytes());
5794 }
5795 PlanValue::Float(v) => {
5796 buf.push(2);
5797 buf.extend_from_slice(&v.to_bits().to_be_bytes());
5798 }
5799 PlanValue::String(s) => {
5800 buf.push(3);
5801 let bytes = s.as_bytes();
5802 let len = u32::try_from(bytes.len()).unwrap_or(u32::MAX);
5803 buf.extend_from_slice(&len.to_be_bytes());
5804 buf.extend_from_slice(bytes);
5805 }
5806 PlanValue::Struct(map) => {
5807 buf.push(4);
5808 let mut entries: Vec<_> = map.iter().collect();
5809 entries.sort_by(|a, b| a.0.cmp(b.0));
5810 let len = u32::try_from(entries.len()).unwrap_or(u32::MAX);
5811 buf.extend_from_slice(&len.to_be_bytes());
5812 for (key, val) in entries {
5813 let key_bytes = key.as_bytes();
5814 let key_len = u32::try_from(key_bytes.len()).unwrap_or(u32::MAX);
5815 buf.extend_from_slice(&key_len.to_be_bytes());
5816 buf.extend_from_slice(key_bytes);
5817 encode_plan_value(buf, val);
5818 }
5819 }
5820 }
5821}
5822
5823fn rows_to_record_batch(
5824 schema: Arc<Schema>,
5825 rows: &[Vec<PlanValue>],
5826) -> ExecutorResult<RecordBatch> {
5827 let column_count = schema.fields().len();
5828 let mut columns: Vec<Vec<PlanValue>> = vec![Vec::with_capacity(rows.len()); column_count];
5829 for row in rows {
5830 if row.len() != column_count {
5831 return Err(Error::InvalidArgumentError(
5832 "compound SELECT produced mismatched column counts".into(),
5833 ));
5834 }
5835 for (idx, value) in row.iter().enumerate() {
5836 columns[idx].push(value.clone());
5837 }
5838 }
5839
5840 let mut arrays: Vec<ArrayRef> = Vec::with_capacity(column_count);
5841 for (idx, field) in schema.fields().iter().enumerate() {
5842 let array = build_array_for_column(field.data_type(), &columns[idx])?;
5843 arrays.push(array);
5844 }
5845
5846 RecordBatch::try_new(schema, arrays).map_err(|err| {
5847 Error::InvalidArgumentError(format!("failed to materialize compound SELECT: {err}"))
5848 })
5849}
5850
5851fn build_column_lookup_map(schema: &Schema) -> FxHashMap<String, usize> {
5852 let mut lookup = FxHashMap::default();
5853 for (idx, field) in schema.fields().iter().enumerate() {
5854 lookup.insert(field.name().to_ascii_lowercase(), idx);
5855 }
5856 lookup
5857}
5858
5859fn build_group_key(
5860 batch: &RecordBatch,
5861 row_idx: usize,
5862 key_indices: &[usize],
5863) -> ExecutorResult<Vec<GroupKeyValue>> {
5864 let mut values = Vec::with_capacity(key_indices.len());
5865 for &index in key_indices {
5866 values.push(group_key_value(batch.column(index), row_idx)?);
5867 }
5868 Ok(values)
5869}
5870
5871fn group_key_value(array: &ArrayRef, row_idx: usize) -> ExecutorResult<GroupKeyValue> {
5872 if !array.is_valid(row_idx) {
5873 return Ok(GroupKeyValue::Null);
5874 }
5875
5876 match array.data_type() {
5877 DataType::Int8 => {
5878 let values = array
5879 .as_any()
5880 .downcast_ref::<Int8Array>()
5881 .ok_or_else(|| Error::Internal("failed to downcast to Int8Array".into()))?;
5882 Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5883 }
5884 DataType::Int16 => {
5885 let values = array
5886 .as_any()
5887 .downcast_ref::<Int16Array>()
5888 .ok_or_else(|| Error::Internal("failed to downcast to Int16Array".into()))?;
5889 Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5890 }
5891 DataType::Int32 => {
5892 let values = array
5893 .as_any()
5894 .downcast_ref::<Int32Array>()
5895 .ok_or_else(|| Error::Internal("failed to downcast to Int32Array".into()))?;
5896 Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5897 }
5898 DataType::Int64 => {
5899 let values = array
5900 .as_any()
5901 .downcast_ref::<Int64Array>()
5902 .ok_or_else(|| Error::Internal("failed to downcast to Int64Array".into()))?;
5903 Ok(GroupKeyValue::Int(values.value(row_idx)))
5904 }
5905 DataType::UInt8 => {
5906 let values = array
5907 .as_any()
5908 .downcast_ref::<UInt8Array>()
5909 .ok_or_else(|| Error::Internal("failed to downcast to UInt8Array".into()))?;
5910 Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5911 }
5912 DataType::UInt16 => {
5913 let values = array
5914 .as_any()
5915 .downcast_ref::<UInt16Array>()
5916 .ok_or_else(|| Error::Internal("failed to downcast to UInt16Array".into()))?;
5917 Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5918 }
5919 DataType::UInt32 => {
5920 let values = array
5921 .as_any()
5922 .downcast_ref::<UInt32Array>()
5923 .ok_or_else(|| Error::Internal("failed to downcast to UInt32Array".into()))?;
5924 Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5925 }
5926 DataType::UInt64 => {
5927 let values = array
5928 .as_any()
5929 .downcast_ref::<UInt64Array>()
5930 .ok_or_else(|| Error::Internal("failed to downcast to UInt64Array".into()))?;
5931 let value = values.value(row_idx);
5932 if value > i64::MAX as u64 {
5933 return Err(Error::InvalidArgumentError(
5934 "GROUP BY value exceeds supported integer range".into(),
5935 ));
5936 }
5937 Ok(GroupKeyValue::Int(value as i64))
5938 }
5939 DataType::Boolean => {
5940 let values = array
5941 .as_any()
5942 .downcast_ref::<BooleanArray>()
5943 .ok_or_else(|| Error::Internal("failed to downcast to BooleanArray".into()))?;
5944 Ok(GroupKeyValue::Bool(values.value(row_idx)))
5945 }
5946 DataType::Utf8 => {
5947 let values = array
5948 .as_any()
5949 .downcast_ref::<StringArray>()
5950 .ok_or_else(|| Error::Internal("failed to downcast to StringArray".into()))?;
5951 Ok(GroupKeyValue::String(values.value(row_idx).to_string()))
5952 }
5953 other => Err(Error::InvalidArgumentError(format!(
5954 "GROUP BY does not support column type {:?}",
5955 other
5956 ))),
5957 }
5958}
5959
5960fn evaluate_constant_predicate(expr: &LlkvExpr<'static, String>) -> Option<Option<bool>> {
5961 match expr {
5962 LlkvExpr::Literal(value) => Some(Some(*value)),
5963 LlkvExpr::Not(inner) => {
5964 let inner_val = evaluate_constant_predicate(inner)?;
5965 Some(truth_not(inner_val))
5966 }
5967 LlkvExpr::And(children) => {
5968 let mut acc = Some(true);
5969 for child in children {
5970 let child_val = evaluate_constant_predicate(child)?;
5971 acc = truth_and(acc, child_val);
5972 }
5973 Some(acc)
5974 }
5975 LlkvExpr::Or(children) => {
5976 let mut acc = Some(false);
5977 for child in children {
5978 let child_val = evaluate_constant_predicate(child)?;
5979 acc = truth_or(acc, child_val);
5980 }
5981 Some(acc)
5982 }
5983 LlkvExpr::Compare { left, op, right } => {
5984 let left_literal = evaluate_constant_scalar(left)?;
5985 let right_literal = evaluate_constant_scalar(right)?;
5986 Some(compare_literals(*op, &left_literal, &right_literal))
5987 }
5988 _ => None,
5989 }
5990}
5991
5992fn evaluate_constant_scalar(expr: &ScalarExpr<String>) -> Option<Literal> {
5993 match expr {
5994 ScalarExpr::Literal(lit) => Some(lit.clone()),
5995 _ => None,
5996 }
5997}
5998
5999fn compare_literals(op: CompareOp, left: &Literal, right: &Literal) -> Option<bool> {
6000 use std::cmp::Ordering;
6001
6002 match (left, right) {
6003 (Literal::Null, _) | (_, Literal::Null) => None,
6004 (Literal::Integer(lhs), Literal::Integer(rhs)) => {
6005 let ord = lhs.cmp(rhs);
6006 Some(match op {
6007 CompareOp::Eq => ord == Ordering::Equal,
6008 CompareOp::NotEq => ord != Ordering::Equal,
6009 CompareOp::Lt => ord == Ordering::Less,
6010 CompareOp::LtEq => ord != Ordering::Greater,
6011 CompareOp::Gt => ord == Ordering::Greater,
6012 CompareOp::GtEq => ord != Ordering::Less,
6013 })
6014 }
6015 (Literal::Float(lhs), Literal::Float(rhs)) => Some(match op {
6016 CompareOp::Eq => lhs == rhs,
6017 CompareOp::NotEq => lhs != rhs,
6018 CompareOp::Lt => lhs < rhs,
6019 CompareOp::LtEq => lhs <= rhs,
6020 CompareOp::Gt => lhs > rhs,
6021 CompareOp::GtEq => lhs >= rhs,
6022 }),
6023 (Literal::Integer(lhs), Literal::Float(_rhs)) => {
6024 compare_literals(op, &Literal::Float(*lhs as f64), right)
6025 }
6026 (Literal::Float(_lhs), Literal::Integer(rhs)) => {
6027 compare_literals(op, left, &Literal::Float(*rhs as f64))
6028 }
6029 (Literal::Boolean(lhs), Literal::Boolean(rhs)) => Some(match op {
6030 CompareOp::Eq => lhs == rhs,
6031 CompareOp::NotEq => lhs != rhs,
6032 CompareOp::Lt => (*lhs as u8) < (*rhs as u8),
6033 CompareOp::LtEq => (*lhs as u8) <= (*rhs as u8),
6034 CompareOp::Gt => (*lhs as u8) > (*rhs as u8),
6035 CompareOp::GtEq => (*lhs as u8) >= (*rhs as u8),
6036 }),
6037 (Literal::String(lhs), Literal::String(rhs)) => {
6038 let ord = lhs.cmp(rhs);
6039 Some(match op {
6040 CompareOp::Eq => ord == Ordering::Equal,
6041 CompareOp::NotEq => ord != Ordering::Equal,
6042 CompareOp::Lt => ord == Ordering::Less,
6043 CompareOp::LtEq => ord != Ordering::Greater,
6044 CompareOp::Gt => ord == Ordering::Greater,
6045 CompareOp::GtEq => ord != Ordering::Less,
6046 })
6047 }
6048 _ => None,
6049 }
6050}
6051
6052fn bind_select_filter(
6053 filter: &llkv_plan::SelectFilter,
6054 bindings: &FxHashMap<String, Literal>,
6055) -> ExecutorResult<llkv_plan::SelectFilter> {
6056 let predicate = bind_predicate_expr(&filter.predicate, bindings)?;
6057 let subqueries = filter
6058 .subqueries
6059 .iter()
6060 .map(|subquery| bind_filter_subquery(subquery, bindings))
6061 .collect::<ExecutorResult<Vec<_>>>()?;
6062
6063 Ok(llkv_plan::SelectFilter {
6064 predicate,
6065 subqueries,
6066 })
6067}
6068
6069fn bind_filter_subquery(
6070 subquery: &llkv_plan::FilterSubquery,
6071 bindings: &FxHashMap<String, Literal>,
6072) -> ExecutorResult<llkv_plan::FilterSubquery> {
6073 let bound_plan = bind_select_plan(&subquery.plan, bindings)?;
6074 Ok(llkv_plan::FilterSubquery {
6075 id: subquery.id,
6076 plan: Box::new(bound_plan),
6077 correlated_columns: subquery.correlated_columns.clone(),
6078 })
6079}
6080
6081fn bind_scalar_subquery(
6082 subquery: &llkv_plan::ScalarSubquery,
6083 bindings: &FxHashMap<String, Literal>,
6084) -> ExecutorResult<llkv_plan::ScalarSubquery> {
6085 let bound_plan = bind_select_plan(&subquery.plan, bindings)?;
6086 Ok(llkv_plan::ScalarSubquery {
6087 id: subquery.id,
6088 plan: Box::new(bound_plan),
6089 correlated_columns: subquery.correlated_columns.clone(),
6090 })
6091}
6092
6093fn bind_projection(
6094 projection: &SelectProjection,
6095 bindings: &FxHashMap<String, Literal>,
6096) -> ExecutorResult<SelectProjection> {
6097 match projection {
6098 SelectProjection::AllColumns => Ok(projection.clone()),
6099 SelectProjection::AllColumnsExcept { exclude } => Ok(SelectProjection::AllColumnsExcept {
6100 exclude: exclude.clone(),
6101 }),
6102 SelectProjection::Column { name, alias } => {
6103 if let Some(literal) = bindings.get(name) {
6104 let expr = ScalarExpr::Literal(literal.clone());
6105 Ok(SelectProjection::Computed {
6106 expr,
6107 alias: alias.clone().unwrap_or_else(|| name.clone()),
6108 })
6109 } else {
6110 Ok(projection.clone())
6111 }
6112 }
6113 SelectProjection::Computed { expr, alias } => Ok(SelectProjection::Computed {
6114 expr: bind_scalar_expr(expr, bindings)?,
6115 alias: alias.clone(),
6116 }),
6117 }
6118}
6119
6120fn bind_aggregate_expr(
6121 aggregate: &AggregateExpr,
6122 bindings: &FxHashMap<String, Literal>,
6123) -> ExecutorResult<AggregateExpr> {
6124 match aggregate {
6125 AggregateExpr::CountStar { .. } => Ok(aggregate.clone()),
6126 AggregateExpr::Column {
6127 column,
6128 alias,
6129 function,
6130 distinct,
6131 } => {
6132 if bindings.contains_key(column) {
6133 return Err(Error::InvalidArgumentError(
6134 "correlated columns are not supported inside aggregate expressions".into(),
6135 ));
6136 }
6137 Ok(AggregateExpr::Column {
6138 column: column.clone(),
6139 alias: alias.clone(),
6140 function: function.clone(),
6141 distinct: *distinct,
6142 })
6143 }
6144 }
6145}
6146
6147fn bind_scalar_expr(
6148 expr: &ScalarExpr<String>,
6149 bindings: &FxHashMap<String, Literal>,
6150) -> ExecutorResult<ScalarExpr<String>> {
6151 match expr {
6152 ScalarExpr::Column(name) => {
6153 if let Some(literal) = bindings.get(name) {
6154 Ok(ScalarExpr::Literal(literal.clone()))
6155 } else {
6156 Ok(ScalarExpr::Column(name.clone()))
6157 }
6158 }
6159 ScalarExpr::Literal(literal) => Ok(ScalarExpr::Literal(literal.clone())),
6160 ScalarExpr::Binary { left, op, right } => Ok(ScalarExpr::Binary {
6161 left: Box::new(bind_scalar_expr(left, bindings)?),
6162 op: *op,
6163 right: Box::new(bind_scalar_expr(right, bindings)?),
6164 }),
6165 ScalarExpr::Compare { left, op, right } => Ok(ScalarExpr::Compare {
6166 left: Box::new(bind_scalar_expr(left, bindings)?),
6167 op: *op,
6168 right: Box::new(bind_scalar_expr(right, bindings)?),
6169 }),
6170 ScalarExpr::Aggregate(call) => Ok(ScalarExpr::Aggregate(call.clone())),
6171 ScalarExpr::GetField { base, field_name } => {
6172 let bound_base = bind_scalar_expr(base, bindings)?;
6173 match bound_base {
6174 ScalarExpr::Literal(literal) => {
6175 let value = extract_struct_field(&literal, field_name).unwrap_or(Literal::Null);
6176 Ok(ScalarExpr::Literal(value))
6177 }
6178 other => Ok(ScalarExpr::GetField {
6179 base: Box::new(other),
6180 field_name: field_name.clone(),
6181 }),
6182 }
6183 }
6184 ScalarExpr::Cast { expr, data_type } => Ok(ScalarExpr::Cast {
6185 expr: Box::new(bind_scalar_expr(expr, bindings)?),
6186 data_type: data_type.clone(),
6187 }),
6188 ScalarExpr::Case {
6189 operand,
6190 branches,
6191 else_expr,
6192 } => {
6193 let bound_operand = match operand {
6194 Some(inner) => Some(Box::new(bind_scalar_expr(inner, bindings)?)),
6195 None => None,
6196 };
6197 let mut bound_branches = Vec::with_capacity(branches.len());
6198 for (when_expr, then_expr) in branches {
6199 bound_branches.push((
6200 bind_scalar_expr(when_expr, bindings)?,
6201 bind_scalar_expr(then_expr, bindings)?,
6202 ));
6203 }
6204 let bound_else = match else_expr {
6205 Some(inner) => Some(Box::new(bind_scalar_expr(inner, bindings)?)),
6206 None => None,
6207 };
6208 Ok(ScalarExpr::Case {
6209 operand: bound_operand,
6210 branches: bound_branches,
6211 else_expr: bound_else,
6212 })
6213 }
6214 ScalarExpr::Coalesce(items) => {
6215 let mut bound_items = Vec::with_capacity(items.len());
6216 for item in items {
6217 bound_items.push(bind_scalar_expr(item, bindings)?);
6218 }
6219 Ok(ScalarExpr::Coalesce(bound_items))
6220 }
6221 ScalarExpr::Not(inner) => Ok(ScalarExpr::Not(Box::new(bind_scalar_expr(
6222 inner, bindings,
6223 )?))),
6224 ScalarExpr::IsNull { expr, negated } => Ok(ScalarExpr::IsNull {
6225 expr: Box::new(bind_scalar_expr(expr, bindings)?),
6226 negated: *negated,
6227 }),
6228 ScalarExpr::ScalarSubquery(sub) => Ok(ScalarExpr::ScalarSubquery(sub.clone())),
6229 }
6230}
6231
6232fn bind_predicate_expr(
6233 expr: &LlkvExpr<'static, String>,
6234 bindings: &FxHashMap<String, Literal>,
6235) -> ExecutorResult<LlkvExpr<'static, String>> {
6236 match expr {
6237 LlkvExpr::And(children) => {
6238 let mut bound = Vec::with_capacity(children.len());
6239 for child in children {
6240 bound.push(bind_predicate_expr(child, bindings)?);
6241 }
6242 Ok(LlkvExpr::And(bound))
6243 }
6244 LlkvExpr::Or(children) => {
6245 let mut bound = Vec::with_capacity(children.len());
6246 for child in children {
6247 bound.push(bind_predicate_expr(child, bindings)?);
6248 }
6249 Ok(LlkvExpr::Or(bound))
6250 }
6251 LlkvExpr::Not(inner) => Ok(LlkvExpr::Not(Box::new(bind_predicate_expr(
6252 inner, bindings,
6253 )?))),
6254 LlkvExpr::Pred(filter) => bind_filter_predicate(filter, bindings),
6255 LlkvExpr::Compare { left, op, right } => Ok(LlkvExpr::Compare {
6256 left: bind_scalar_expr(left, bindings)?,
6257 op: *op,
6258 right: bind_scalar_expr(right, bindings)?,
6259 }),
6260 LlkvExpr::InList {
6261 expr,
6262 list,
6263 negated,
6264 } => {
6265 let target = bind_scalar_expr(expr, bindings)?;
6266 let mut bound_list = Vec::with_capacity(list.len());
6267 for item in list {
6268 bound_list.push(bind_scalar_expr(item, bindings)?);
6269 }
6270 Ok(LlkvExpr::InList {
6271 expr: target,
6272 list: bound_list,
6273 negated: *negated,
6274 })
6275 }
6276 LlkvExpr::IsNull { expr, negated } => Ok(LlkvExpr::IsNull {
6277 expr: bind_scalar_expr(expr, bindings)?,
6278 negated: *negated,
6279 }),
6280 LlkvExpr::Literal(value) => Ok(LlkvExpr::Literal(*value)),
6281 LlkvExpr::Exists(subquery) => Ok(LlkvExpr::Exists(subquery.clone())),
6282 }
6283}
6284
6285fn bind_filter_predicate(
6286 filter: &Filter<'static, String>,
6287 bindings: &FxHashMap<String, Literal>,
6288) -> ExecutorResult<LlkvExpr<'static, String>> {
6289 if let Some(literal) = bindings.get(&filter.field_id) {
6290 let result = evaluate_filter_against_literal(literal, &filter.op)?;
6291 return Ok(LlkvExpr::Literal(result));
6292 }
6293 Ok(LlkvExpr::Pred(filter.clone()))
6294}
6295
6296fn evaluate_filter_against_literal(value: &Literal, op: &Operator) -> ExecutorResult<bool> {
6297 use std::ops::Bound;
6298
6299 match op {
6300 Operator::IsNull => Ok(matches!(value, Literal::Null)),
6301 Operator::IsNotNull => Ok(!matches!(value, Literal::Null)),
6302 Operator::Equals(rhs) => Ok(literal_equals(value, rhs).unwrap_or(false)),
6303 Operator::GreaterThan(rhs) => Ok(literal_compare(value, rhs)
6304 .map(|cmp| cmp == std::cmp::Ordering::Greater)
6305 .unwrap_or(false)),
6306 Operator::GreaterThanOrEquals(rhs) => Ok(literal_compare(value, rhs)
6307 .map(|cmp| matches!(cmp, std::cmp::Ordering::Greater | std::cmp::Ordering::Equal))
6308 .unwrap_or(false)),
6309 Operator::LessThan(rhs) => Ok(literal_compare(value, rhs)
6310 .map(|cmp| cmp == std::cmp::Ordering::Less)
6311 .unwrap_or(false)),
6312 Operator::LessThanOrEquals(rhs) => Ok(literal_compare(value, rhs)
6313 .map(|cmp| matches!(cmp, std::cmp::Ordering::Less | std::cmp::Ordering::Equal))
6314 .unwrap_or(false)),
6315 Operator::In(values) => Ok(values
6316 .iter()
6317 .any(|candidate| literal_equals(value, candidate).unwrap_or(false))),
6318 Operator::Range { lower, upper } => {
6319 let lower_ok = match lower {
6320 Bound::Unbounded => Some(true),
6321 Bound::Included(bound) => literal_compare(value, bound).map(|cmp| {
6322 matches!(cmp, std::cmp::Ordering::Greater | std::cmp::Ordering::Equal)
6323 }),
6324 Bound::Excluded(bound) => {
6325 literal_compare(value, bound).map(|cmp| cmp == std::cmp::Ordering::Greater)
6326 }
6327 }
6328 .unwrap_or(false);
6329
6330 let upper_ok = match upper {
6331 Bound::Unbounded => Some(true),
6332 Bound::Included(bound) => literal_compare(value, bound)
6333 .map(|cmp| matches!(cmp, std::cmp::Ordering::Less | std::cmp::Ordering::Equal)),
6334 Bound::Excluded(bound) => {
6335 literal_compare(value, bound).map(|cmp| cmp == std::cmp::Ordering::Less)
6336 }
6337 }
6338 .unwrap_or(false);
6339
6340 Ok(lower_ok && upper_ok)
6341 }
6342 Operator::StartsWith {
6343 pattern,
6344 case_sensitive,
6345 } => {
6346 let target = if *case_sensitive {
6347 pattern.to_string()
6348 } else {
6349 pattern.to_ascii_lowercase()
6350 };
6351 Ok(literal_string(value, *case_sensitive)
6352 .map(|source| source.starts_with(&target))
6353 .unwrap_or(false))
6354 }
6355 Operator::EndsWith {
6356 pattern,
6357 case_sensitive,
6358 } => {
6359 let target = if *case_sensitive {
6360 pattern.to_string()
6361 } else {
6362 pattern.to_ascii_lowercase()
6363 };
6364 Ok(literal_string(value, *case_sensitive)
6365 .map(|source| source.ends_with(&target))
6366 .unwrap_or(false))
6367 }
6368 Operator::Contains {
6369 pattern,
6370 case_sensitive,
6371 } => {
6372 let target = if *case_sensitive {
6373 pattern.to_string()
6374 } else {
6375 pattern.to_ascii_lowercase()
6376 };
6377 Ok(literal_string(value, *case_sensitive)
6378 .map(|source| source.contains(&target))
6379 .unwrap_or(false))
6380 }
6381 }
6382}
6383
6384fn literal_compare(lhs: &Literal, rhs: &Literal) -> Option<std::cmp::Ordering> {
6385 match (lhs, rhs) {
6386 (Literal::Integer(a), Literal::Integer(b)) => Some(a.cmp(b)),
6387 (Literal::Float(a), Literal::Float(b)) => a.partial_cmp(b),
6388 (Literal::Integer(a), Literal::Float(b)) => (*a as f64).partial_cmp(b),
6389 (Literal::Float(a), Literal::Integer(b)) => a.partial_cmp(&(*b as f64)),
6390 (Literal::String(a), Literal::String(b)) => Some(a.cmp(b)),
6391 _ => None,
6392 }
6393}
6394
6395fn literal_equals(lhs: &Literal, rhs: &Literal) -> Option<bool> {
6396 match (lhs, rhs) {
6397 (Literal::Boolean(a), Literal::Boolean(b)) => Some(a == b),
6398 (Literal::String(a), Literal::String(b)) => Some(a == b),
6399 (Literal::Integer(_), Literal::Integer(_))
6400 | (Literal::Integer(_), Literal::Float(_))
6401 | (Literal::Float(_), Literal::Integer(_))
6402 | (Literal::Float(_), Literal::Float(_)) => {
6403 literal_compare(lhs, rhs).map(|cmp| cmp == std::cmp::Ordering::Equal)
6404 }
6405 _ => None,
6406 }
6407}
6408
6409fn literal_string(literal: &Literal, case_sensitive: bool) -> Option<String> {
6410 match literal {
6411 Literal::String(value) => {
6412 if case_sensitive {
6413 Some(value.clone())
6414 } else {
6415 Some(value.to_ascii_lowercase())
6416 }
6417 }
6418 _ => None,
6419 }
6420}
6421
6422fn extract_struct_field(literal: &Literal, field_name: &str) -> Option<Literal> {
6423 if let Literal::Struct(fields) = literal {
6424 for (name, value) in fields {
6425 if name.eq_ignore_ascii_case(field_name) {
6426 return Some((**value).clone());
6427 }
6428 }
6429 }
6430 None
6431}
6432
6433fn array_value_to_literal(array: &ArrayRef, idx: usize) -> ExecutorResult<Literal> {
6434 if array.is_null(idx) {
6435 return Ok(Literal::Null);
6436 }
6437
6438 match array.data_type() {
6439 DataType::Boolean => {
6440 let array = array
6441 .as_any()
6442 .downcast_ref::<BooleanArray>()
6443 .ok_or_else(|| Error::Internal("failed to downcast boolean array".into()))?;
6444 Ok(Literal::Boolean(array.value(idx)))
6445 }
6446 DataType::Int8 => {
6447 let array = array
6448 .as_any()
6449 .downcast_ref::<Int8Array>()
6450 .ok_or_else(|| Error::Internal("failed to downcast int8 array".into()))?;
6451 Ok(Literal::Integer(array.value(idx) as i128))
6452 }
6453 DataType::Int16 => {
6454 let array = array
6455 .as_any()
6456 .downcast_ref::<Int16Array>()
6457 .ok_or_else(|| Error::Internal("failed to downcast int16 array".into()))?;
6458 Ok(Literal::Integer(array.value(idx) as i128))
6459 }
6460 DataType::Int32 => {
6461 let array = array
6462 .as_any()
6463 .downcast_ref::<Int32Array>()
6464 .ok_or_else(|| Error::Internal("failed to downcast int32 array".into()))?;
6465 Ok(Literal::Integer(array.value(idx) as i128))
6466 }
6467 DataType::Int64 => {
6468 let array = array
6469 .as_any()
6470 .downcast_ref::<Int64Array>()
6471 .ok_or_else(|| Error::Internal("failed to downcast int64 array".into()))?;
6472 Ok(Literal::Integer(array.value(idx) as i128))
6473 }
6474 DataType::UInt8 => {
6475 let array = array
6476 .as_any()
6477 .downcast_ref::<UInt8Array>()
6478 .ok_or_else(|| Error::Internal("failed to downcast uint8 array".into()))?;
6479 Ok(Literal::Integer(array.value(idx) as i128))
6480 }
6481 DataType::UInt16 => {
6482 let array = array
6483 .as_any()
6484 .downcast_ref::<UInt16Array>()
6485 .ok_or_else(|| Error::Internal("failed to downcast uint16 array".into()))?;
6486 Ok(Literal::Integer(array.value(idx) as i128))
6487 }
6488 DataType::UInt32 => {
6489 let array = array
6490 .as_any()
6491 .downcast_ref::<UInt32Array>()
6492 .ok_or_else(|| Error::Internal("failed to downcast uint32 array".into()))?;
6493 Ok(Literal::Integer(array.value(idx) as i128))
6494 }
6495 DataType::UInt64 => {
6496 let array = array
6497 .as_any()
6498 .downcast_ref::<UInt64Array>()
6499 .ok_or_else(|| Error::Internal("failed to downcast uint64 array".into()))?;
6500 Ok(Literal::Integer(array.value(idx) as i128))
6501 }
6502 DataType::Float32 => {
6503 let array = array
6504 .as_any()
6505 .downcast_ref::<Float32Array>()
6506 .ok_or_else(|| Error::Internal("failed to downcast float32 array".into()))?;
6507 Ok(Literal::Float(array.value(idx) as f64))
6508 }
6509 DataType::Float64 => {
6510 let array = array
6511 .as_any()
6512 .downcast_ref::<Float64Array>()
6513 .ok_or_else(|| Error::Internal("failed to downcast float64 array".into()))?;
6514 Ok(Literal::Float(array.value(idx)))
6515 }
6516 DataType::Utf8 => {
6517 let array = array
6518 .as_any()
6519 .downcast_ref::<StringArray>()
6520 .ok_or_else(|| Error::Internal("failed to downcast utf8 array".into()))?;
6521 Ok(Literal::String(array.value(idx).to_string()))
6522 }
6523 DataType::LargeUtf8 => {
6524 let array = array
6525 .as_any()
6526 .downcast_ref::<LargeStringArray>()
6527 .ok_or_else(|| Error::Internal("failed to downcast large utf8 array".into()))?;
6528 Ok(Literal::String(array.value(idx).to_string()))
6529 }
6530 DataType::Struct(fields) => {
6531 let struct_array = array
6532 .as_any()
6533 .downcast_ref::<StructArray>()
6534 .ok_or_else(|| Error::Internal("failed to downcast struct array".into()))?;
6535 let mut members = Vec::with_capacity(fields.len());
6536 for (field_idx, field) in fields.iter().enumerate() {
6537 let child = struct_array.column(field_idx);
6538 let literal = array_value_to_literal(child, idx)?;
6539 members.push((field.name().clone(), Box::new(literal)));
6540 }
6541 Ok(Literal::Struct(members))
6542 }
6543 other => Err(Error::InvalidArgumentError(format!(
6544 "unsupported scalar subquery result type: {other:?}"
6545 ))),
6546 }
6547}
6548
6549fn collect_scalar_subquery_ids(expr: &ScalarExpr<FieldId>, ids: &mut FxHashSet<SubqueryId>) {
6550 match expr {
6551 ScalarExpr::ScalarSubquery(subquery) => {
6552 ids.insert(subquery.id);
6553 }
6554 ScalarExpr::Binary { left, right, .. } => {
6555 collect_scalar_subquery_ids(left, ids);
6556 collect_scalar_subquery_ids(right, ids);
6557 }
6558 ScalarExpr::Compare { left, right, .. } => {
6559 collect_scalar_subquery_ids(left, ids);
6560 collect_scalar_subquery_ids(right, ids);
6561 }
6562 ScalarExpr::GetField { base, .. } => {
6563 collect_scalar_subquery_ids(base, ids);
6564 }
6565 ScalarExpr::Cast { expr, .. } => {
6566 collect_scalar_subquery_ids(expr, ids);
6567 }
6568 ScalarExpr::Not(expr) => {
6569 collect_scalar_subquery_ids(expr, ids);
6570 }
6571 ScalarExpr::IsNull { expr, .. } => {
6572 collect_scalar_subquery_ids(expr, ids);
6573 }
6574 ScalarExpr::Case {
6575 operand,
6576 branches,
6577 else_expr,
6578 } => {
6579 if let Some(op) = operand {
6580 collect_scalar_subquery_ids(op, ids);
6581 }
6582 for (when_expr, then_expr) in branches {
6583 collect_scalar_subquery_ids(when_expr, ids);
6584 collect_scalar_subquery_ids(then_expr, ids);
6585 }
6586 if let Some(else_expr) = else_expr {
6587 collect_scalar_subquery_ids(else_expr, ids);
6588 }
6589 }
6590 ScalarExpr::Coalesce(items) => {
6591 for item in items {
6592 collect_scalar_subquery_ids(item, ids);
6593 }
6594 }
6595 ScalarExpr::Aggregate(_) | ScalarExpr::Column(_) | ScalarExpr::Literal(_) => {}
6596 }
6597}
6598
6599fn rewrite_scalar_expr_for_subqueries(
6600 expr: &ScalarExpr<FieldId>,
6601 mapping: &FxHashMap<SubqueryId, FieldId>,
6602) -> ScalarExpr<FieldId> {
6603 match expr {
6604 ScalarExpr::ScalarSubquery(subquery) => mapping
6605 .get(&subquery.id)
6606 .map(|field_id| ScalarExpr::Column(*field_id))
6607 .unwrap_or_else(|| ScalarExpr::ScalarSubquery(subquery.clone())),
6608 ScalarExpr::Binary { left, op, right } => ScalarExpr::Binary {
6609 left: Box::new(rewrite_scalar_expr_for_subqueries(left, mapping)),
6610 op: *op,
6611 right: Box::new(rewrite_scalar_expr_for_subqueries(right, mapping)),
6612 },
6613 ScalarExpr::Compare { left, op, right } => ScalarExpr::Compare {
6614 left: Box::new(rewrite_scalar_expr_for_subqueries(left, mapping)),
6615 op: *op,
6616 right: Box::new(rewrite_scalar_expr_for_subqueries(right, mapping)),
6617 },
6618 ScalarExpr::GetField { base, field_name } => ScalarExpr::GetField {
6619 base: Box::new(rewrite_scalar_expr_for_subqueries(base, mapping)),
6620 field_name: field_name.clone(),
6621 },
6622 ScalarExpr::Cast { expr, data_type } => ScalarExpr::Cast {
6623 expr: Box::new(rewrite_scalar_expr_for_subqueries(expr, mapping)),
6624 data_type: data_type.clone(),
6625 },
6626 ScalarExpr::Not(expr) => {
6627 ScalarExpr::Not(Box::new(rewrite_scalar_expr_for_subqueries(expr, mapping)))
6628 }
6629 ScalarExpr::IsNull { expr, negated } => ScalarExpr::IsNull {
6630 expr: Box::new(rewrite_scalar_expr_for_subqueries(expr, mapping)),
6631 negated: *negated,
6632 },
6633 ScalarExpr::Case {
6634 operand,
6635 branches,
6636 else_expr,
6637 } => ScalarExpr::Case {
6638 operand: operand
6639 .as_ref()
6640 .map(|op| Box::new(rewrite_scalar_expr_for_subqueries(op, mapping))),
6641 branches: branches
6642 .iter()
6643 .map(|(when_expr, then_expr)| {
6644 (
6645 rewrite_scalar_expr_for_subqueries(when_expr, mapping),
6646 rewrite_scalar_expr_for_subqueries(then_expr, mapping),
6647 )
6648 })
6649 .collect(),
6650 else_expr: else_expr
6651 .as_ref()
6652 .map(|expr| Box::new(rewrite_scalar_expr_for_subqueries(expr, mapping))),
6653 },
6654 ScalarExpr::Coalesce(items) => ScalarExpr::Coalesce(
6655 items
6656 .iter()
6657 .map(|item| rewrite_scalar_expr_for_subqueries(item, mapping))
6658 .collect(),
6659 ),
6660 ScalarExpr::Aggregate(_) | ScalarExpr::Column(_) | ScalarExpr::Literal(_) => expr.clone(),
6661 }
6662}
6663
6664fn collect_correlated_bindings(
6665 context: &mut CrossProductExpressionContext,
6666 batch: &RecordBatch,
6667 row_idx: usize,
6668 columns: &[llkv_plan::CorrelatedColumn],
6669) -> ExecutorResult<FxHashMap<String, Literal>> {
6670 let mut out = FxHashMap::default();
6671
6672 for correlated in columns {
6673 if !correlated.field_path.is_empty() {
6674 return Err(Error::InvalidArgumentError(
6675 "correlated field path resolution is not yet supported".into(),
6676 ));
6677 }
6678
6679 let field_id = context
6680 .field_id_for_column(&correlated.column)
6681 .ok_or_else(|| {
6682 Error::InvalidArgumentError(format!(
6683 "correlated column '{}' not found in outer query output",
6684 correlated.column
6685 ))
6686 })?;
6687
6688 let accessor = context.column_accessor(field_id, batch)?;
6689 let literal = accessor.literal_at(row_idx)?;
6690 out.insert(correlated.placeholder.clone(), literal);
6691 }
6692
6693 Ok(out)
6694}
6695
6696#[derive(Clone)]
6698pub struct SelectExecution<P>
6699where
6700 P: Pager<Blob = EntryHandle> + Send + Sync,
6701{
6702 table_name: String,
6703 schema: Arc<Schema>,
6704 stream: SelectStream<P>,
6705}
6706
6707#[derive(Clone)]
6708enum SelectStream<P>
6709where
6710 P: Pager<Blob = EntryHandle> + Send + Sync,
6711{
6712 Projection {
6713 table: Arc<ExecutorTable<P>>,
6714 projections: Vec<ScanProjection>,
6715 filter_expr: LlkvExpr<'static, FieldId>,
6716 options: ScanStreamOptions<P>,
6717 full_table_scan: bool,
6718 order_by: Vec<OrderByPlan>,
6719 distinct: bool,
6720 },
6721 Aggregation {
6722 batch: RecordBatch,
6723 },
6724}
6725
6726impl<P> SelectExecution<P>
6727where
6728 P: Pager<Blob = EntryHandle> + Send + Sync,
6729{
6730 #[allow(clippy::too_many_arguments)]
6731 fn new_projection(
6732 table_name: String,
6733 schema: Arc<Schema>,
6734 table: Arc<ExecutorTable<P>>,
6735 projections: Vec<ScanProjection>,
6736 filter_expr: LlkvExpr<'static, FieldId>,
6737 options: ScanStreamOptions<P>,
6738 full_table_scan: bool,
6739 order_by: Vec<OrderByPlan>,
6740 distinct: bool,
6741 ) -> Self {
6742 Self {
6743 table_name,
6744 schema,
6745 stream: SelectStream::Projection {
6746 table,
6747 projections,
6748 filter_expr,
6749 options,
6750 full_table_scan,
6751 order_by,
6752 distinct,
6753 },
6754 }
6755 }
6756
6757 pub fn new_single_batch(table_name: String, schema: Arc<Schema>, batch: RecordBatch) -> Self {
6758 Self {
6759 table_name,
6760 schema,
6761 stream: SelectStream::Aggregation { batch },
6762 }
6763 }
6764
6765 pub fn from_batch(table_name: String, schema: Arc<Schema>, batch: RecordBatch) -> Self {
6766 Self::new_single_batch(table_name, schema, batch)
6767 }
6768
6769 pub fn table_name(&self) -> &str {
6770 &self.table_name
6771 }
6772
6773 pub fn schema(&self) -> Arc<Schema> {
6774 Arc::clone(&self.schema)
6775 }
6776
6777 pub fn stream(
6778 self,
6779 mut on_batch: impl FnMut(RecordBatch) -> ExecutorResult<()>,
6780 ) -> ExecutorResult<()> {
6781 let schema = Arc::clone(&self.schema);
6782 match self.stream {
6783 SelectStream::Projection {
6784 table,
6785 projections,
6786 filter_expr,
6787 options,
6788 full_table_scan,
6789 order_by,
6790 distinct,
6791 } => {
6792 let total_rows = table.total_rows.load(Ordering::SeqCst);
6794 if total_rows == 0 {
6795 return Ok(());
6797 }
6798
6799 let mut error: Option<Error> = None;
6800 let mut produced = false;
6801 let mut produced_rows: u64 = 0;
6802 let capture_nulls_first = matches!(options.order, Some(spec) if spec.nulls_first);
6803 let needs_post_sort =
6804 !order_by.is_empty() && (order_by.len() > 1 || options.order.is_none());
6805 let collect_batches = needs_post_sort || capture_nulls_first;
6806 let include_nulls = options.include_nulls;
6807 let has_row_id_filter = options.row_id_filter.is_some();
6808 let mut distinct_state = if distinct {
6809 Some(DistinctState::default())
6810 } else {
6811 None
6812 };
6813 let scan_options = options;
6814 let mut buffered_batches: Vec<RecordBatch> = Vec::new();
6815 table
6816 .table
6817 .scan_stream(projections, &filter_expr, scan_options, |batch| {
6818 if error.is_some() {
6819 return;
6820 }
6821 let mut batch = batch;
6822 if let Some(state) = distinct_state.as_mut() {
6823 match distinct_filter_batch(batch, state) {
6824 Ok(Some(filtered)) => {
6825 batch = filtered;
6826 }
6827 Ok(None) => {
6828 return;
6829 }
6830 Err(err) => {
6831 error = Some(err);
6832 return;
6833 }
6834 }
6835 }
6836 produced = true;
6837 produced_rows = produced_rows.saturating_add(batch.num_rows() as u64);
6838 if collect_batches {
6839 buffered_batches.push(batch);
6840 } else if let Err(err) = on_batch(batch) {
6841 error = Some(err);
6842 }
6843 })?;
6844 if let Some(err) = error {
6845 return Err(err);
6846 }
6847 if !produced {
6848 if !distinct && full_table_scan && total_rows > 0 {
6851 for batch in synthesize_null_scan(Arc::clone(&schema), total_rows)? {
6852 on_batch(batch)?;
6853 }
6854 }
6855 return Ok(());
6856 }
6857 let mut null_batches: Vec<RecordBatch> = Vec::new();
6858 if !distinct
6864 && include_nulls
6865 && full_table_scan
6866 && produced_rows < total_rows
6867 && !has_row_id_filter
6868 {
6869 let missing = total_rows - produced_rows;
6870 if missing > 0 {
6871 null_batches = synthesize_null_scan(Arc::clone(&schema), missing)?;
6872 }
6873 }
6874
6875 if collect_batches {
6876 if needs_post_sort {
6877 if !null_batches.is_empty() {
6878 buffered_batches.extend(null_batches);
6879 }
6880 if !buffered_batches.is_empty() {
6881 let combined =
6882 concat_batches(&schema, &buffered_batches).map_err(|err| {
6883 Error::InvalidArgumentError(format!(
6884 "failed to concatenate result batches for ORDER BY: {}",
6885 err
6886 ))
6887 })?;
6888 let sorted_batch =
6889 sort_record_batch_with_order(&schema, &combined, &order_by)?;
6890 on_batch(sorted_batch)?;
6891 }
6892 } else if capture_nulls_first {
6893 for batch in null_batches {
6894 on_batch(batch)?;
6895 }
6896 for batch in buffered_batches {
6897 on_batch(batch)?;
6898 }
6899 }
6900 } else if !null_batches.is_empty() {
6901 for batch in null_batches {
6902 on_batch(batch)?;
6903 }
6904 }
6905 Ok(())
6906 }
6907 SelectStream::Aggregation { batch } => on_batch(batch),
6908 }
6909 }
6910
6911 pub fn collect(self) -> ExecutorResult<Vec<RecordBatch>> {
6912 let mut batches = Vec::new();
6913 self.stream(|batch| {
6914 batches.push(batch);
6915 Ok(())
6916 })?;
6917 Ok(batches)
6918 }
6919
6920 pub fn collect_rows(self) -> ExecutorResult<ExecutorRowBatch> {
6921 let schema = self.schema();
6922 let mut rows: Vec<Vec<PlanValue>> = Vec::new();
6923 self.stream(|batch| {
6924 for row_idx in 0..batch.num_rows() {
6925 let mut row: Vec<PlanValue> = Vec::with_capacity(batch.num_columns());
6926 for col_idx in 0..batch.num_columns() {
6927 let value = llkv_plan::plan_value_from_array(batch.column(col_idx), row_idx)?;
6928 row.push(value);
6929 }
6930 rows.push(row);
6931 }
6932 Ok(())
6933 })?;
6934 let columns = schema
6935 .fields()
6936 .iter()
6937 .map(|field| field.name().to_string())
6938 .collect();
6939 Ok(ExecutorRowBatch { columns, rows })
6940 }
6941
6942 pub fn into_rows(self) -> ExecutorResult<Vec<Vec<PlanValue>>> {
6943 Ok(self.collect_rows()?.rows)
6944 }
6945}
6946
6947impl<P> fmt::Debug for SelectExecution<P>
6948where
6949 P: Pager<Blob = EntryHandle> + Send + Sync,
6950{
6951 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6952 f.debug_struct("SelectExecution")
6953 .field("table_name", &self.table_name)
6954 .field("schema", &self.schema)
6955 .finish()
6956 }
6957}
6958
6959fn expand_order_targets(
6964 order_items: &[OrderByPlan],
6965 projections: &[ScanProjection],
6966) -> ExecutorResult<Vec<OrderByPlan>> {
6967 let mut expanded = Vec::new();
6968
6969 for item in order_items {
6970 match &item.target {
6971 OrderTarget::All => {
6972 if projections.is_empty() {
6973 return Err(Error::InvalidArgumentError(
6974 "ORDER BY ALL requires at least one projection".into(),
6975 ));
6976 }
6977
6978 for (idx, projection) in projections.iter().enumerate() {
6979 if matches!(projection, ScanProjection::Computed { .. }) {
6980 return Err(Error::InvalidArgumentError(
6981 "ORDER BY ALL cannot reference computed projections".into(),
6982 ));
6983 }
6984
6985 let mut clone = item.clone();
6986 clone.target = OrderTarget::Index(idx);
6987 expanded.push(clone);
6988 }
6989 }
6990 _ => expanded.push(item.clone()),
6991 }
6992 }
6993
6994 Ok(expanded)
6995}
6996
6997fn resolve_scan_order<P>(
6998 table: &ExecutorTable<P>,
6999 projections: &[ScanProjection],
7000 order_plan: &OrderByPlan,
7001) -> ExecutorResult<ScanOrderSpec>
7002where
7003 P: Pager<Blob = EntryHandle> + Send + Sync,
7004{
7005 let (column, field_id) = match &order_plan.target {
7006 OrderTarget::Column(name) => {
7007 let column = table.schema.resolve(name).ok_or_else(|| {
7008 Error::InvalidArgumentError(format!("unknown column '{}' in ORDER BY", name))
7009 })?;
7010 (column, column.field_id)
7011 }
7012 OrderTarget::Index(position) => {
7013 let projection = projections.get(*position).ok_or_else(|| {
7014 Error::InvalidArgumentError(format!(
7015 "ORDER BY position {} is out of range",
7016 position + 1
7017 ))
7018 })?;
7019 match projection {
7020 ScanProjection::Column(store_projection) => {
7021 let field_id = store_projection.logical_field_id.field_id();
7022 let column = table.schema.column_by_field_id(field_id).ok_or_else(|| {
7023 Error::InvalidArgumentError(format!(
7024 "unknown column with field id {field_id} in ORDER BY"
7025 ))
7026 })?;
7027 (column, field_id)
7028 }
7029 ScanProjection::Computed { .. } => {
7030 return Err(Error::InvalidArgumentError(
7031 "ORDER BY position referring to computed projection is not supported"
7032 .into(),
7033 ));
7034 }
7035 }
7036 }
7037 OrderTarget::All => {
7038 return Err(Error::InvalidArgumentError(
7039 "ORDER BY ALL should be expanded before execution".into(),
7040 ));
7041 }
7042 };
7043
7044 let transform = match order_plan.sort_type {
7045 OrderSortType::Native => match column.data_type {
7046 DataType::Int64 => ScanOrderTransform::IdentityInteger,
7047 DataType::Utf8 => ScanOrderTransform::IdentityUtf8,
7048 ref other => {
7049 return Err(Error::InvalidArgumentError(format!(
7050 "ORDER BY on column type {:?} is not supported",
7051 other
7052 )));
7053 }
7054 },
7055 OrderSortType::CastTextToInteger => {
7056 if column.data_type != DataType::Utf8 {
7057 return Err(Error::InvalidArgumentError(
7058 "ORDER BY CAST expects a text column".into(),
7059 ));
7060 }
7061 ScanOrderTransform::CastUtf8ToInteger
7062 }
7063 };
7064
7065 let direction = if order_plan.ascending {
7066 ScanOrderDirection::Ascending
7067 } else {
7068 ScanOrderDirection::Descending
7069 };
7070
7071 Ok(ScanOrderSpec {
7072 field_id,
7073 direction,
7074 nulls_first: order_plan.nulls_first,
7075 transform,
7076 })
7077}
7078
7079fn synthesize_null_scan(schema: Arc<Schema>, total_rows: u64) -> ExecutorResult<Vec<RecordBatch>> {
7080 let row_count = usize::try_from(total_rows).map_err(|_| {
7081 Error::InvalidArgumentError("table row count exceeds supported in-memory batch size".into())
7082 })?;
7083
7084 let mut arrays: Vec<ArrayRef> = Vec::with_capacity(schema.fields().len());
7085 for field in schema.fields() {
7086 match field.data_type() {
7087 DataType::Int64 => {
7088 let mut builder = Int64Builder::with_capacity(row_count);
7089 for _ in 0..row_count {
7090 builder.append_null();
7091 }
7092 arrays.push(Arc::new(builder.finish()));
7093 }
7094 DataType::Float64 => {
7095 let mut builder = arrow::array::Float64Builder::with_capacity(row_count);
7096 for _ in 0..row_count {
7097 builder.append_null();
7098 }
7099 arrays.push(Arc::new(builder.finish()));
7100 }
7101 DataType::Utf8 => {
7102 let mut builder = arrow::array::StringBuilder::with_capacity(row_count, 0);
7103 for _ in 0..row_count {
7104 builder.append_null();
7105 }
7106 arrays.push(Arc::new(builder.finish()));
7107 }
7108 DataType::Date32 => {
7109 let mut builder = arrow::array::Date32Builder::with_capacity(row_count);
7110 for _ in 0..row_count {
7111 builder.append_null();
7112 }
7113 arrays.push(Arc::new(builder.finish()));
7114 }
7115 other => {
7116 return Err(Error::InvalidArgumentError(format!(
7117 "unsupported data type in null synthesis: {other:?}"
7118 )));
7119 }
7120 }
7121 }
7122
7123 let batch = RecordBatch::try_new(schema, arrays)?;
7124 Ok(vec![batch])
7125}
7126
7127struct TableCrossProductData {
7128 schema: Arc<Schema>,
7129 batches: Vec<RecordBatch>,
7130 column_counts: Vec<usize>,
7131 table_indices: Vec<usize>,
7132}
7133
7134fn collect_table_data<P>(
7135 table_index: usize,
7136 table_ref: &llkv_plan::TableRef,
7137 table: &ExecutorTable<P>,
7138 constraints: &[ColumnConstraint],
7139) -> ExecutorResult<TableCrossProductData>
7140where
7141 P: Pager<Blob = EntryHandle> + Send + Sync,
7142{
7143 if table.schema.columns.is_empty() {
7144 return Err(Error::InvalidArgumentError(format!(
7145 "table '{}' has no columns; cross products require at least one column",
7146 table_ref.qualified_name()
7147 )));
7148 }
7149
7150 let mut projections = Vec::with_capacity(table.schema.columns.len());
7151 let mut fields = Vec::with_capacity(table.schema.columns.len());
7152
7153 for column in &table.schema.columns {
7154 let table_component = table_ref
7155 .alias
7156 .as_deref()
7157 .unwrap_or(table_ref.table.as_str());
7158 let qualified_name = format!("{}.{}.{}", table_ref.schema, table_component, column.name);
7159 projections.push(ScanProjection::from(StoreProjection::with_alias(
7160 LogicalFieldId::for_user(table.table.table_id(), column.field_id),
7161 qualified_name.clone(),
7162 )));
7163 fields.push(Field::new(
7164 qualified_name,
7165 column.data_type.clone(),
7166 column.nullable,
7167 ));
7168 }
7169
7170 let schema = Arc::new(Schema::new(fields));
7171
7172 let filter_field_id = table.schema.first_field_id().unwrap_or(ROW_ID_FIELD_ID);
7173 let filter_expr = crate::translation::expression::full_table_scan_filter(filter_field_id);
7174
7175 let mut raw_batches = Vec::new();
7176 table.table.scan_stream(
7177 projections,
7178 &filter_expr,
7179 ScanStreamOptions {
7180 include_nulls: true,
7181 ..ScanStreamOptions::default()
7182 },
7183 |batch| {
7184 raw_batches.push(batch);
7185 },
7186 )?;
7187
7188 let mut normalized_batches = Vec::with_capacity(raw_batches.len());
7189 for batch in raw_batches {
7190 let normalized = RecordBatch::try_new(Arc::clone(&schema), batch.columns().to_vec())
7191 .map_err(|err| {
7192 Error::Internal(format!(
7193 "failed to align scan batch for table '{}': {}",
7194 table_ref.qualified_name(),
7195 err
7196 ))
7197 })?;
7198 normalized_batches.push(normalized);
7199 }
7200
7201 if !constraints.is_empty() {
7202 normalized_batches = apply_column_constraints_to_batches(normalized_batches, constraints)?;
7203 }
7204
7205 Ok(TableCrossProductData {
7206 schema,
7207 batches: normalized_batches,
7208 column_counts: vec![table.schema.columns.len()],
7209 table_indices: vec![table_index],
7210 })
7211}
7212
7213fn apply_column_constraints_to_batches(
7214 batches: Vec<RecordBatch>,
7215 constraints: &[ColumnConstraint],
7216) -> ExecutorResult<Vec<RecordBatch>> {
7217 if batches.is_empty() {
7218 return Ok(batches);
7219 }
7220
7221 let mut filtered = batches;
7222 for constraint in constraints {
7223 match constraint {
7224 ColumnConstraint::Equality(lit) => {
7225 filtered = filter_batches_by_literal(filtered, lit.column.column, &lit.value)?;
7226 }
7227 ColumnConstraint::InList(in_list) => {
7228 filtered =
7229 filter_batches_by_in_list(filtered, in_list.column.column, &in_list.values)?;
7230 }
7231 }
7232 if filtered.is_empty() {
7233 break;
7234 }
7235 }
7236
7237 Ok(filtered)
7238}
7239
7240fn filter_batches_by_literal(
7241 batches: Vec<RecordBatch>,
7242 column_idx: usize,
7243 literal: &PlanValue,
7244) -> ExecutorResult<Vec<RecordBatch>> {
7245 let mut result = Vec::with_capacity(batches.len());
7246
7247 for batch in batches {
7248 if column_idx >= batch.num_columns() {
7249 return Err(Error::Internal(
7250 "literal constraint referenced invalid column index".into(),
7251 ));
7252 }
7253
7254 if batch.num_rows() == 0 {
7255 result.push(batch);
7256 continue;
7257 }
7258
7259 let column = batch.column(column_idx);
7260 let mut keep_rows: Vec<u32> = Vec::with_capacity(batch.num_rows());
7261
7262 for row_idx in 0..batch.num_rows() {
7263 if array_value_equals_plan_value(column.as_ref(), row_idx, literal)? {
7264 keep_rows.push(row_idx as u32);
7265 }
7266 }
7267
7268 if keep_rows.len() == batch.num_rows() {
7269 result.push(batch);
7270 continue;
7271 }
7272
7273 if keep_rows.is_empty() {
7274 continue;
7276 }
7277
7278 let indices = UInt32Array::from(keep_rows);
7279 let mut filtered_columns: Vec<ArrayRef> = Vec::with_capacity(batch.num_columns());
7280 for col_idx in 0..batch.num_columns() {
7281 let filtered = take(batch.column(col_idx).as_ref(), &indices, None)
7282 .map_err(|err| Error::Internal(format!("failed to apply literal filter: {err}")))?;
7283 filtered_columns.push(filtered);
7284 }
7285
7286 let filtered_batch =
7287 RecordBatch::try_new(batch.schema(), filtered_columns).map_err(|err| {
7288 Error::Internal(format!(
7289 "failed to rebuild batch after literal filter: {err}"
7290 ))
7291 })?;
7292 result.push(filtered_batch);
7293 }
7294
7295 Ok(result)
7296}
7297
7298fn filter_batches_by_in_list(
7299 batches: Vec<RecordBatch>,
7300 column_idx: usize,
7301 values: &[PlanValue],
7302) -> ExecutorResult<Vec<RecordBatch>> {
7303 use arrow::array::*;
7304 use arrow::compute::or;
7305
7306 if values.is_empty() {
7307 return Ok(Vec::new());
7309 }
7310
7311 let mut result = Vec::with_capacity(batches.len());
7312
7313 for batch in batches {
7314 if column_idx >= batch.num_columns() {
7315 return Err(Error::Internal(
7316 "IN list constraint referenced invalid column index".into(),
7317 ));
7318 }
7319
7320 if batch.num_rows() == 0 {
7321 result.push(batch);
7322 continue;
7323 }
7324
7325 let column = batch.column(column_idx);
7326
7327 let mut mask = BooleanArray::from(vec![false; batch.num_rows()]);
7330
7331 for value in values {
7332 let comparison_mask = build_comparison_mask(column.as_ref(), value)?;
7333 mask = or(&mask, &comparison_mask)
7334 .map_err(|err| Error::Internal(format!("failed to OR comparison masks: {err}")))?;
7335 }
7336
7337 let true_count = mask.true_count();
7339 if true_count == batch.num_rows() {
7340 result.push(batch);
7341 continue;
7342 }
7343
7344 if true_count == 0 {
7345 continue;
7347 }
7348
7349 let filtered_batch = arrow::compute::filter_record_batch(&batch, &mask)
7351 .map_err(|err| Error::Internal(format!("failed to apply IN list filter: {err}")))?;
7352
7353 result.push(filtered_batch);
7354 }
7355
7356 Ok(result)
7357}
7358
7359fn build_comparison_mask(column: &dyn Array, value: &PlanValue) -> ExecutorResult<BooleanArray> {
7361 use arrow::array::*;
7362 use arrow::datatypes::DataType;
7363
7364 match value {
7365 PlanValue::Null => {
7366 let mut builder = BooleanBuilder::with_capacity(column.len());
7368 for i in 0..column.len() {
7369 builder.append_value(column.is_null(i));
7370 }
7371 Ok(builder.finish())
7372 }
7373 PlanValue::Integer(val) => {
7374 let mut builder = BooleanBuilder::with_capacity(column.len());
7375 match column.data_type() {
7376 DataType::Int8 => {
7377 let arr = column
7378 .as_any()
7379 .downcast_ref::<Int8Array>()
7380 .ok_or_else(|| Error::Internal("failed to downcast to Int8Array".into()))?;
7381 let target = *val as i8;
7382 for i in 0..arr.len() {
7383 builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7384 }
7385 }
7386 DataType::Int16 => {
7387 let arr = column
7388 .as_any()
7389 .downcast_ref::<Int16Array>()
7390 .ok_or_else(|| {
7391 Error::Internal("failed to downcast to Int16Array".into())
7392 })?;
7393 let target = *val as i16;
7394 for i in 0..arr.len() {
7395 builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7396 }
7397 }
7398 DataType::Int32 => {
7399 let arr = column
7400 .as_any()
7401 .downcast_ref::<Int32Array>()
7402 .ok_or_else(|| {
7403 Error::Internal("failed to downcast to Int32Array".into())
7404 })?;
7405 let target = *val as i32;
7406 for i in 0..arr.len() {
7407 builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7408 }
7409 }
7410 DataType::Int64 => {
7411 let arr = column
7412 .as_any()
7413 .downcast_ref::<Int64Array>()
7414 .ok_or_else(|| {
7415 Error::Internal("failed to downcast to Int64Array".into())
7416 })?;
7417 for i in 0..arr.len() {
7418 builder.append_value(!arr.is_null(i) && arr.value(i) == *val);
7419 }
7420 }
7421 DataType::UInt8 => {
7422 let arr = column
7423 .as_any()
7424 .downcast_ref::<UInt8Array>()
7425 .ok_or_else(|| {
7426 Error::Internal("failed to downcast to UInt8Array".into())
7427 })?;
7428 let target = *val as u8;
7429 for i in 0..arr.len() {
7430 builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7431 }
7432 }
7433 DataType::UInt16 => {
7434 let arr = column
7435 .as_any()
7436 .downcast_ref::<UInt16Array>()
7437 .ok_or_else(|| {
7438 Error::Internal("failed to downcast to UInt16Array".into())
7439 })?;
7440 let target = *val as u16;
7441 for i in 0..arr.len() {
7442 builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7443 }
7444 }
7445 DataType::UInt32 => {
7446 let arr = column
7447 .as_any()
7448 .downcast_ref::<UInt32Array>()
7449 .ok_or_else(|| {
7450 Error::Internal("failed to downcast to UInt32Array".into())
7451 })?;
7452 let target = *val as u32;
7453 for i in 0..arr.len() {
7454 builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7455 }
7456 }
7457 DataType::UInt64 => {
7458 let arr = column
7459 .as_any()
7460 .downcast_ref::<UInt64Array>()
7461 .ok_or_else(|| {
7462 Error::Internal("failed to downcast to UInt64Array".into())
7463 })?;
7464 let target = *val as u64;
7465 for i in 0..arr.len() {
7466 builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7467 }
7468 }
7469 _ => {
7470 return Err(Error::Internal(format!(
7471 "unsupported integer type for IN list: {:?}",
7472 column.data_type()
7473 )));
7474 }
7475 }
7476 Ok(builder.finish())
7477 }
7478 PlanValue::Float(val) => {
7479 let mut builder = BooleanBuilder::with_capacity(column.len());
7480 match column.data_type() {
7481 DataType::Float32 => {
7482 let arr = column
7483 .as_any()
7484 .downcast_ref::<Float32Array>()
7485 .ok_or_else(|| {
7486 Error::Internal("failed to downcast to Float32Array".into())
7487 })?;
7488 let target = *val as f32;
7489 for i in 0..arr.len() {
7490 builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7491 }
7492 }
7493 DataType::Float64 => {
7494 let arr = column
7495 .as_any()
7496 .downcast_ref::<Float64Array>()
7497 .ok_or_else(|| {
7498 Error::Internal("failed to downcast to Float64Array".into())
7499 })?;
7500 for i in 0..arr.len() {
7501 builder.append_value(!arr.is_null(i) && arr.value(i) == *val);
7502 }
7503 }
7504 _ => {
7505 return Err(Error::Internal(format!(
7506 "unsupported float type for IN list: {:?}",
7507 column.data_type()
7508 )));
7509 }
7510 }
7511 Ok(builder.finish())
7512 }
7513 PlanValue::String(val) => {
7514 let mut builder = BooleanBuilder::with_capacity(column.len());
7515 let arr = column
7516 .as_any()
7517 .downcast_ref::<StringArray>()
7518 .ok_or_else(|| Error::Internal("failed to downcast to StringArray".into()))?;
7519 for i in 0..arr.len() {
7520 builder.append_value(!arr.is_null(i) && arr.value(i) == val.as_str());
7521 }
7522 Ok(builder.finish())
7523 }
7524 PlanValue::Struct(_) => Err(Error::Internal(
7525 "struct comparison in IN list not supported".into(),
7526 )),
7527 }
7528}
7529
7530fn array_value_equals_plan_value(
7531 array: &dyn Array,
7532 row_idx: usize,
7533 literal: &PlanValue,
7534) -> ExecutorResult<bool> {
7535 use arrow::array::*;
7536 use arrow::datatypes::DataType;
7537
7538 match literal {
7539 PlanValue::Null => Ok(array.is_null(row_idx)),
7540 PlanValue::Integer(expected) => match array.data_type() {
7541 DataType::Int8 => Ok(!array.is_null(row_idx)
7542 && array
7543 .as_any()
7544 .downcast_ref::<Int8Array>()
7545 .expect("int8 array")
7546 .value(row_idx) as i64
7547 == *expected),
7548 DataType::Int16 => Ok(!array.is_null(row_idx)
7549 && array
7550 .as_any()
7551 .downcast_ref::<Int16Array>()
7552 .expect("int16 array")
7553 .value(row_idx) as i64
7554 == *expected),
7555 DataType::Int32 => Ok(!array.is_null(row_idx)
7556 && array
7557 .as_any()
7558 .downcast_ref::<Int32Array>()
7559 .expect("int32 array")
7560 .value(row_idx) as i64
7561 == *expected),
7562 DataType::Int64 => Ok(!array.is_null(row_idx)
7563 && array
7564 .as_any()
7565 .downcast_ref::<Int64Array>()
7566 .expect("int64 array")
7567 .value(row_idx)
7568 == *expected),
7569 DataType::UInt8 if *expected >= 0 => Ok(!array.is_null(row_idx)
7570 && array
7571 .as_any()
7572 .downcast_ref::<UInt8Array>()
7573 .expect("uint8 array")
7574 .value(row_idx) as i64
7575 == *expected),
7576 DataType::UInt16 if *expected >= 0 => Ok(!array.is_null(row_idx)
7577 && array
7578 .as_any()
7579 .downcast_ref::<UInt16Array>()
7580 .expect("uint16 array")
7581 .value(row_idx) as i64
7582 == *expected),
7583 DataType::UInt32 if *expected >= 0 => Ok(!array.is_null(row_idx)
7584 && array
7585 .as_any()
7586 .downcast_ref::<UInt32Array>()
7587 .expect("uint32 array")
7588 .value(row_idx) as i64
7589 == *expected),
7590 DataType::UInt64 if *expected >= 0 => Ok(!array.is_null(row_idx)
7591 && array
7592 .as_any()
7593 .downcast_ref::<UInt64Array>()
7594 .expect("uint64 array")
7595 .value(row_idx)
7596 == *expected as u64),
7597 DataType::Boolean => {
7598 if array.is_null(row_idx) {
7599 Ok(false)
7600 } else if *expected == 0 || *expected == 1 {
7601 let value = array
7602 .as_any()
7603 .downcast_ref::<BooleanArray>()
7604 .expect("bool array")
7605 .value(row_idx);
7606 Ok(value == (*expected == 1))
7607 } else {
7608 Ok(false)
7609 }
7610 }
7611 _ => Err(Error::InvalidArgumentError(format!(
7612 "literal integer comparison not supported for {:?}",
7613 array.data_type()
7614 ))),
7615 },
7616 PlanValue::Float(expected) => match array.data_type() {
7617 DataType::Float32 => Ok(!array.is_null(row_idx)
7618 && (array
7619 .as_any()
7620 .downcast_ref::<Float32Array>()
7621 .expect("float32 array")
7622 .value(row_idx) as f64
7623 - *expected)
7624 .abs()
7625 .eq(&0.0)),
7626 DataType::Float64 => Ok(!array.is_null(row_idx)
7627 && (array
7628 .as_any()
7629 .downcast_ref::<Float64Array>()
7630 .expect("float64 array")
7631 .value(row_idx)
7632 - *expected)
7633 .abs()
7634 .eq(&0.0)),
7635 _ => Err(Error::InvalidArgumentError(format!(
7636 "literal float comparison not supported for {:?}",
7637 array.data_type()
7638 ))),
7639 },
7640 PlanValue::String(expected) => match array.data_type() {
7641 DataType::Utf8 => Ok(!array.is_null(row_idx)
7642 && array
7643 .as_any()
7644 .downcast_ref::<StringArray>()
7645 .expect("string array")
7646 .value(row_idx)
7647 == expected),
7648 DataType::LargeUtf8 => Ok(!array.is_null(row_idx)
7649 && array
7650 .as_any()
7651 .downcast_ref::<LargeStringArray>()
7652 .expect("large string array")
7653 .value(row_idx)
7654 == expected),
7655 _ => Err(Error::InvalidArgumentError(format!(
7656 "literal string comparison not supported for {:?}",
7657 array.data_type()
7658 ))),
7659 },
7660 PlanValue::Struct(_) => Err(Error::InvalidArgumentError(
7661 "struct literals are not supported in join filters".into(),
7662 )),
7663 }
7664}
7665
7666fn hash_join_table_batches(
7667 left: TableCrossProductData,
7668 right: TableCrossProductData,
7669 join_keys: &[(usize, usize)],
7670 join_type: llkv_join::JoinType,
7671) -> ExecutorResult<TableCrossProductData> {
7672 let TableCrossProductData {
7673 schema: left_schema,
7674 batches: left_batches,
7675 column_counts: left_counts,
7676 table_indices: left_tables,
7677 } = left;
7678
7679 let TableCrossProductData {
7680 schema: right_schema,
7681 batches: right_batches,
7682 column_counts: right_counts,
7683 table_indices: right_tables,
7684 } = right;
7685
7686 let combined_fields: Vec<Field> = left_schema
7687 .fields()
7688 .iter()
7689 .chain(right_schema.fields().iter())
7690 .map(|field| field.as_ref().clone())
7691 .collect();
7692
7693 let combined_schema = Arc::new(Schema::new(combined_fields));
7694
7695 let mut column_counts = Vec::with_capacity(left_counts.len() + right_counts.len());
7696 column_counts.extend(left_counts.iter());
7697 column_counts.extend(right_counts.iter());
7698
7699 let mut table_indices = Vec::with_capacity(left_tables.len() + right_tables.len());
7700 table_indices.extend(left_tables.iter().copied());
7701 table_indices.extend(right_tables.iter().copied());
7702
7703 if left_batches.is_empty() {
7705 return Ok(TableCrossProductData {
7706 schema: combined_schema,
7707 batches: Vec::new(),
7708 column_counts,
7709 table_indices,
7710 });
7711 }
7712
7713 if right_batches.is_empty() {
7714 if join_type == llkv_join::JoinType::Left {
7716 let total_left_rows: usize = left_batches.iter().map(|b| b.num_rows()).sum();
7717 let mut left_arrays = Vec::new();
7718 for field in left_schema.fields() {
7719 let column_idx = left_schema.index_of(field.name()).map_err(|e| {
7720 Error::Internal(format!("failed to find field {}: {}", field.name(), e))
7721 })?;
7722 let arrays: Vec<ArrayRef> = left_batches
7723 .iter()
7724 .map(|batch| batch.column(column_idx).clone())
7725 .collect();
7726 let concatenated =
7727 arrow::compute::concat(&arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>())
7728 .map_err(|e| {
7729 Error::Internal(format!("failed to concat left arrays: {}", e))
7730 })?;
7731 left_arrays.push(concatenated);
7732 }
7733
7734 for field in right_schema.fields() {
7736 let null_array = arrow::array::new_null_array(field.data_type(), total_left_rows);
7737 left_arrays.push(null_array);
7738 }
7739
7740 let joined_batch = RecordBatch::try_new(Arc::clone(&combined_schema), left_arrays)
7741 .map_err(|err| {
7742 Error::Internal(format!(
7743 "failed to create LEFT JOIN batch with NULL right: {err}"
7744 ))
7745 })?;
7746
7747 return Ok(TableCrossProductData {
7748 schema: combined_schema,
7749 batches: vec![joined_batch],
7750 column_counts,
7751 table_indices,
7752 });
7753 } else {
7754 return Ok(TableCrossProductData {
7756 schema: combined_schema,
7757 batches: Vec::new(),
7758 column_counts,
7759 table_indices,
7760 });
7761 }
7762 }
7763
7764 match join_type {
7765 llkv_join::JoinType::Inner => {
7766 let (left_matches, right_matches) =
7767 build_join_match_indices(&left_batches, &right_batches, join_keys)?;
7768
7769 if left_matches.is_empty() {
7770 return Ok(TableCrossProductData {
7771 schema: combined_schema,
7772 batches: Vec::new(),
7773 column_counts,
7774 table_indices,
7775 });
7776 }
7777
7778 let left_arrays = gather_indices_from_batches(&left_batches, &left_matches)?;
7779 let right_arrays = gather_indices_from_batches(&right_batches, &right_matches)?;
7780
7781 let mut combined_columns = Vec::with_capacity(left_arrays.len() + right_arrays.len());
7782 combined_columns.extend(left_arrays);
7783 combined_columns.extend(right_arrays);
7784
7785 let joined_batch = RecordBatch::try_new(Arc::clone(&combined_schema), combined_columns)
7786 .map_err(|err| {
7787 Error::Internal(format!("failed to materialize INNER JOIN batch: {err}"))
7788 })?;
7789
7790 Ok(TableCrossProductData {
7791 schema: combined_schema,
7792 batches: vec![joined_batch],
7793 column_counts,
7794 table_indices,
7795 })
7796 }
7797 llkv_join::JoinType::Left => {
7798 let (left_matches, right_optional_matches) =
7799 build_left_join_match_indices(&left_batches, &right_batches, join_keys)?;
7800
7801 if left_matches.is_empty() {
7802 return Ok(TableCrossProductData {
7804 schema: combined_schema,
7805 batches: Vec::new(),
7806 column_counts,
7807 table_indices,
7808 });
7809 }
7810
7811 let left_arrays = gather_indices_from_batches(&left_batches, &left_matches)?;
7812 let right_arrays = llkv_column_map::gather::gather_optional_indices_from_batches(
7814 &right_batches,
7815 &right_optional_matches,
7816 )?;
7817
7818 let mut combined_columns = Vec::with_capacity(left_arrays.len() + right_arrays.len());
7819 combined_columns.extend(left_arrays);
7820 combined_columns.extend(right_arrays);
7821
7822 let joined_batch = RecordBatch::try_new(Arc::clone(&combined_schema), combined_columns)
7823 .map_err(|err| {
7824 Error::Internal(format!("failed to materialize LEFT JOIN batch: {err}"))
7825 })?;
7826
7827 Ok(TableCrossProductData {
7828 schema: combined_schema,
7829 batches: vec![joined_batch],
7830 column_counts,
7831 table_indices,
7832 })
7833 }
7834 _ => Err(Error::Internal(format!(
7836 "join type {:?} not supported in hash_join_table_batches; use llkv-join",
7837 join_type
7838 ))),
7839 }
7840}
7841
7842type JoinMatchIndices = Vec<(usize, usize)>;
7844type JoinHashTable = FxHashMap<Vec<u8>, Vec<(usize, usize)>>;
7846type JoinMatchPairs = (JoinMatchIndices, JoinMatchIndices);
7848type OptionalJoinMatches = Vec<Option<(usize, usize)>>;
7850type LeftJoinMatchPairs = (JoinMatchIndices, OptionalJoinMatches);
7852
7853fn build_join_match_indices(
7883 left_batches: &[RecordBatch],
7884 right_batches: &[RecordBatch],
7885 join_keys: &[(usize, usize)],
7886) -> ExecutorResult<JoinMatchPairs> {
7887 let right_key_indices: Vec<usize> = join_keys.iter().map(|(_, right)| *right).collect();
7888
7889 let hash_table: JoinHashTable = llkv_column_map::parallel::with_thread_pool(|| {
7892 let local_tables: Vec<JoinHashTable> = right_batches
7893 .par_iter()
7894 .enumerate()
7895 .map(|(batch_idx, batch)| {
7896 let mut local_table: JoinHashTable = FxHashMap::default();
7897 let mut key_buffer: Vec<u8> = Vec::new();
7898
7899 for row_idx in 0..batch.num_rows() {
7900 key_buffer.clear();
7901 match build_join_key(batch, &right_key_indices, row_idx, &mut key_buffer) {
7902 Ok(true) => {
7903 local_table
7904 .entry(key_buffer.clone())
7905 .or_default()
7906 .push((batch_idx, row_idx));
7907 }
7908 Ok(false) => continue,
7909 Err(_) => continue, }
7911 }
7912
7913 local_table
7914 })
7915 .collect();
7916
7917 let mut merged_table: JoinHashTable = FxHashMap::default();
7919 for local_table in local_tables {
7920 for (key, mut positions) in local_table {
7921 merged_table.entry(key).or_default().append(&mut positions);
7922 }
7923 }
7924
7925 merged_table
7926 });
7927
7928 if hash_table.is_empty() {
7929 return Ok((Vec::new(), Vec::new()));
7930 }
7931
7932 let left_key_indices: Vec<usize> = join_keys.iter().map(|(left, _)| *left).collect();
7933
7934 let matches: Vec<JoinMatchPairs> = llkv_column_map::parallel::with_thread_pool(|| {
7937 left_batches
7938 .par_iter()
7939 .enumerate()
7940 .map(|(batch_idx, batch)| {
7941 let mut local_left_matches: JoinMatchIndices = Vec::new();
7942 let mut local_right_matches: JoinMatchIndices = Vec::new();
7943 let mut key_buffer: Vec<u8> = Vec::new();
7944
7945 for row_idx in 0..batch.num_rows() {
7946 key_buffer.clear();
7947 match build_join_key(batch, &left_key_indices, row_idx, &mut key_buffer) {
7948 Ok(true) => {
7949 if let Some(entries) = hash_table.get(&key_buffer) {
7950 for &(r_batch, r_row) in entries {
7951 local_left_matches.push((batch_idx, row_idx));
7952 local_right_matches.push((r_batch, r_row));
7953 }
7954 }
7955 }
7956 Ok(false) => continue,
7957 Err(_) => continue, }
7959 }
7960
7961 (local_left_matches, local_right_matches)
7962 })
7963 .collect()
7964 });
7965
7966 let mut left_matches: JoinMatchIndices = Vec::new();
7968 let mut right_matches: JoinMatchIndices = Vec::new();
7969 for (mut left, mut right) in matches {
7970 left_matches.append(&mut left);
7971 right_matches.append(&mut right);
7972 }
7973
7974 Ok((left_matches, right_matches))
7975}
7976
7977fn build_left_join_match_indices(
7988 left_batches: &[RecordBatch],
7989 right_batches: &[RecordBatch],
7990 join_keys: &[(usize, usize)],
7991) -> ExecutorResult<LeftJoinMatchPairs> {
7992 let right_key_indices: Vec<usize> = join_keys.iter().map(|(_, right)| *right).collect();
7993
7994 let hash_table: JoinHashTable = llkv_column_map::parallel::with_thread_pool(|| {
7996 let local_tables: Vec<JoinHashTable> = right_batches
7997 .par_iter()
7998 .enumerate()
7999 .map(|(batch_idx, batch)| {
8000 let mut local_table: JoinHashTable = FxHashMap::default();
8001 let mut key_buffer: Vec<u8> = Vec::new();
8002
8003 for row_idx in 0..batch.num_rows() {
8004 key_buffer.clear();
8005 match build_join_key(batch, &right_key_indices, row_idx, &mut key_buffer) {
8006 Ok(true) => {
8007 local_table
8008 .entry(key_buffer.clone())
8009 .or_default()
8010 .push((batch_idx, row_idx));
8011 }
8012 Ok(false) => continue,
8013 Err(_) => continue,
8014 }
8015 }
8016
8017 local_table
8018 })
8019 .collect();
8020
8021 let mut merged_table: JoinHashTable = FxHashMap::default();
8022 for local_table in local_tables {
8023 for (key, mut positions) in local_table {
8024 merged_table.entry(key).or_default().append(&mut positions);
8025 }
8026 }
8027
8028 merged_table
8029 });
8030
8031 let left_key_indices: Vec<usize> = join_keys.iter().map(|(left, _)| *left).collect();
8032
8033 let matches: Vec<LeftJoinMatchPairs> = llkv_column_map::parallel::with_thread_pool(|| {
8035 left_batches
8036 .par_iter()
8037 .enumerate()
8038 .map(|(batch_idx, batch)| {
8039 let mut local_left_matches: JoinMatchIndices = Vec::new();
8040 let mut local_right_optional: Vec<Option<(usize, usize)>> = Vec::new();
8041 let mut key_buffer: Vec<u8> = Vec::new();
8042
8043 for row_idx in 0..batch.num_rows() {
8044 key_buffer.clear();
8045 match build_join_key(batch, &left_key_indices, row_idx, &mut key_buffer) {
8046 Ok(true) => {
8047 if let Some(entries) = hash_table.get(&key_buffer) {
8048 for &(r_batch, r_row) in entries {
8050 local_left_matches.push((batch_idx, row_idx));
8051 local_right_optional.push(Some((r_batch, r_row)));
8052 }
8053 } else {
8054 local_left_matches.push((batch_idx, row_idx));
8056 local_right_optional.push(None);
8057 }
8058 }
8059 Ok(false) => {
8060 local_left_matches.push((batch_idx, row_idx));
8062 local_right_optional.push(None);
8063 }
8064 Err(_) => {
8065 local_left_matches.push((batch_idx, row_idx));
8067 local_right_optional.push(None);
8068 }
8069 }
8070 }
8071
8072 (local_left_matches, local_right_optional)
8073 })
8074 .collect()
8075 });
8076
8077 let mut left_matches: JoinMatchIndices = Vec::new();
8079 let mut right_optional: Vec<Option<(usize, usize)>> = Vec::new();
8080 for (mut left, mut right) in matches {
8081 left_matches.append(&mut left);
8082 right_optional.append(&mut right);
8083 }
8084
8085 Ok((left_matches, right_optional))
8086}
8087
8088fn build_join_key(
8089 batch: &RecordBatch,
8090 column_indices: &[usize],
8091 row_idx: usize,
8092 buffer: &mut Vec<u8>,
8093) -> ExecutorResult<bool> {
8094 buffer.clear();
8095
8096 for &col_idx in column_indices {
8097 let array = batch.column(col_idx);
8098 if array.is_null(row_idx) {
8099 return Ok(false);
8100 }
8101 append_array_value_to_key(array.as_ref(), row_idx, buffer)?;
8102 }
8103
8104 Ok(true)
8105}
8106
8107fn append_array_value_to_key(
8108 array: &dyn Array,
8109 row_idx: usize,
8110 buffer: &mut Vec<u8>,
8111) -> ExecutorResult<()> {
8112 use arrow::array::*;
8113 use arrow::datatypes::DataType;
8114
8115 match array.data_type() {
8116 DataType::Int8 => buffer.extend_from_slice(
8117 &array
8118 .as_any()
8119 .downcast_ref::<Int8Array>()
8120 .expect("int8 array")
8121 .value(row_idx)
8122 .to_le_bytes(),
8123 ),
8124 DataType::Int16 => buffer.extend_from_slice(
8125 &array
8126 .as_any()
8127 .downcast_ref::<Int16Array>()
8128 .expect("int16 array")
8129 .value(row_idx)
8130 .to_le_bytes(),
8131 ),
8132 DataType::Int32 => buffer.extend_from_slice(
8133 &array
8134 .as_any()
8135 .downcast_ref::<Int32Array>()
8136 .expect("int32 array")
8137 .value(row_idx)
8138 .to_le_bytes(),
8139 ),
8140 DataType::Int64 => buffer.extend_from_slice(
8141 &array
8142 .as_any()
8143 .downcast_ref::<Int64Array>()
8144 .expect("int64 array")
8145 .value(row_idx)
8146 .to_le_bytes(),
8147 ),
8148 DataType::UInt8 => buffer.extend_from_slice(
8149 &array
8150 .as_any()
8151 .downcast_ref::<UInt8Array>()
8152 .expect("uint8 array")
8153 .value(row_idx)
8154 .to_le_bytes(),
8155 ),
8156 DataType::UInt16 => buffer.extend_from_slice(
8157 &array
8158 .as_any()
8159 .downcast_ref::<UInt16Array>()
8160 .expect("uint16 array")
8161 .value(row_idx)
8162 .to_le_bytes(),
8163 ),
8164 DataType::UInt32 => buffer.extend_from_slice(
8165 &array
8166 .as_any()
8167 .downcast_ref::<UInt32Array>()
8168 .expect("uint32 array")
8169 .value(row_idx)
8170 .to_le_bytes(),
8171 ),
8172 DataType::UInt64 => buffer.extend_from_slice(
8173 &array
8174 .as_any()
8175 .downcast_ref::<UInt64Array>()
8176 .expect("uint64 array")
8177 .value(row_idx)
8178 .to_le_bytes(),
8179 ),
8180 DataType::Float32 => buffer.extend_from_slice(
8181 &array
8182 .as_any()
8183 .downcast_ref::<Float32Array>()
8184 .expect("float32 array")
8185 .value(row_idx)
8186 .to_le_bytes(),
8187 ),
8188 DataType::Float64 => buffer.extend_from_slice(
8189 &array
8190 .as_any()
8191 .downcast_ref::<Float64Array>()
8192 .expect("float64 array")
8193 .value(row_idx)
8194 .to_le_bytes(),
8195 ),
8196 DataType::Boolean => buffer.push(
8197 array
8198 .as_any()
8199 .downcast_ref::<BooleanArray>()
8200 .expect("bool array")
8201 .value(row_idx) as u8,
8202 ),
8203 DataType::Utf8 => {
8204 let value = array
8205 .as_any()
8206 .downcast_ref::<StringArray>()
8207 .expect("utf8 array")
8208 .value(row_idx);
8209 buffer.extend_from_slice(&(value.len() as u32).to_le_bytes());
8210 buffer.extend_from_slice(value.as_bytes());
8211 }
8212 DataType::LargeUtf8 => {
8213 let value = array
8214 .as_any()
8215 .downcast_ref::<LargeStringArray>()
8216 .expect("large utf8 array")
8217 .value(row_idx);
8218 buffer.extend_from_slice(&(value.len() as u32).to_le_bytes());
8219 buffer.extend_from_slice(value.as_bytes());
8220 }
8221 DataType::Binary => {
8222 let value = array
8223 .as_any()
8224 .downcast_ref::<BinaryArray>()
8225 .expect("binary array")
8226 .value(row_idx);
8227 buffer.extend_from_slice(&(value.len() as u32).to_le_bytes());
8228 buffer.extend_from_slice(value);
8229 }
8230 other => {
8231 return Err(Error::InvalidArgumentError(format!(
8232 "hash join does not support join key type {:?}",
8233 other
8234 )));
8235 }
8236 }
8237
8238 Ok(())
8239}
8240
8241fn table_has_join_with_used(
8242 candidate: usize,
8243 used_tables: &FxHashSet<usize>,
8244 equalities: &[ColumnEquality],
8245) -> bool {
8246 equalities.iter().any(|equality| {
8247 (equality.left.table == candidate && used_tables.contains(&equality.right.table))
8248 || (equality.right.table == candidate && used_tables.contains(&equality.left.table))
8249 })
8250}
8251
8252fn gather_join_keys(
8253 left: &TableCrossProductData,
8254 right: &TableCrossProductData,
8255 used_tables: &FxHashSet<usize>,
8256 right_table_index: usize,
8257 equalities: &[ColumnEquality],
8258) -> ExecutorResult<Vec<(usize, usize)>> {
8259 let mut keys = Vec::new();
8260
8261 for equality in equalities {
8262 if equality.left.table == right_table_index && used_tables.contains(&equality.right.table) {
8263 let left_idx = resolve_column_index(left, &equality.right).ok_or_else(|| {
8264 Error::Internal("failed to resolve column offset for hash join".into())
8265 })?;
8266 let right_idx = resolve_column_index(right, &equality.left).ok_or_else(|| {
8267 Error::Internal("failed to resolve column offset for hash join".into())
8268 })?;
8269 keys.push((left_idx, right_idx));
8270 } else if equality.right.table == right_table_index
8271 && used_tables.contains(&equality.left.table)
8272 {
8273 let left_idx = resolve_column_index(left, &equality.left).ok_or_else(|| {
8274 Error::Internal("failed to resolve column offset for hash join".into())
8275 })?;
8276 let right_idx = resolve_column_index(right, &equality.right).ok_or_else(|| {
8277 Error::Internal("failed to resolve column offset for hash join".into())
8278 })?;
8279 keys.push((left_idx, right_idx));
8280 }
8281 }
8282
8283 Ok(keys)
8284}
8285
8286fn resolve_column_index(data: &TableCrossProductData, column: &ColumnRef) -> Option<usize> {
8287 let mut offset = 0;
8288 for (table_idx, count) in data.table_indices.iter().zip(data.column_counts.iter()) {
8289 if *table_idx == column.table {
8290 if column.column < *count {
8291 return Some(offset + column.column);
8292 } else {
8293 return None;
8294 }
8295 }
8296 offset += count;
8297 }
8298 None
8299}
8300
8301fn build_cross_product_column_lookup(
8302 schema: &Schema,
8303 tables: &[llkv_plan::TableRef],
8304 column_counts: &[usize],
8305 table_indices: &[usize],
8306) -> FxHashMap<String, usize> {
8307 debug_assert_eq!(tables.len(), column_counts.len());
8308 debug_assert_eq!(column_counts.len(), table_indices.len());
8309
8310 let mut column_occurrences: FxHashMap<String, usize> = FxHashMap::default();
8311 let mut table_column_counts: FxHashMap<String, usize> = FxHashMap::default();
8312 for field in schema.fields() {
8313 let column_name = extract_column_name(field.name());
8314 *column_occurrences.entry(column_name).or_insert(0) += 1;
8315 if let Some(pair) = table_column_suffix(field.name()) {
8316 *table_column_counts.entry(pair).or_insert(0) += 1;
8317 }
8318 }
8319
8320 let mut base_table_totals: FxHashMap<String, usize> = FxHashMap::default();
8321 let mut base_table_unaliased: FxHashMap<String, usize> = FxHashMap::default();
8322 for table_ref in tables {
8323 let key = base_table_key(table_ref);
8324 *base_table_totals.entry(key.clone()).or_insert(0) += 1;
8325 if table_ref.alias.is_none() {
8326 *base_table_unaliased.entry(key).or_insert(0) += 1;
8327 }
8328 }
8329
8330 let mut lookup = FxHashMap::default();
8331
8332 if table_indices.is_empty() || column_counts.is_empty() {
8333 for (idx, field) in schema.fields().iter().enumerate() {
8334 let field_name_lower = field.name().to_ascii_lowercase();
8335 lookup.entry(field_name_lower).or_insert(idx);
8336
8337 let trimmed_lower = field.name().trim_start_matches('.').to_ascii_lowercase();
8338 lookup.entry(trimmed_lower).or_insert(idx);
8339
8340 if let Some(pair) = table_column_suffix(field.name())
8341 && table_column_counts.get(&pair).copied().unwrap_or(0) == 1
8342 {
8343 lookup.entry(pair).or_insert(idx);
8344 }
8345
8346 let column_name = extract_column_name(field.name());
8347 if column_occurrences.get(&column_name).copied().unwrap_or(0) == 1 {
8348 lookup.entry(column_name).or_insert(idx);
8349 }
8350 }
8351 return lookup;
8352 }
8353
8354 let mut offset = 0usize;
8355 for (&table_idx, &count) in table_indices.iter().zip(column_counts.iter()) {
8356 if table_idx >= tables.len() {
8357 continue;
8358 }
8359 let table_ref = &tables[table_idx];
8360 let alias_lower = table_ref
8361 .alias
8362 .as_ref()
8363 .map(|alias| alias.to_ascii_lowercase());
8364 let table_lower = table_ref.table.to_ascii_lowercase();
8365 let schema_lower = table_ref.schema.to_ascii_lowercase();
8366 let base_key = base_table_key(table_ref);
8367 let total_refs = base_table_totals.get(&base_key).copied().unwrap_or(0);
8368 let unaliased_refs = base_table_unaliased.get(&base_key).copied().unwrap_or(0);
8369
8370 let allow_base_mapping = if table_ref.alias.is_none() {
8371 unaliased_refs == 1
8372 } else {
8373 unaliased_refs == 0 && total_refs == 1
8374 };
8375
8376 let mut table_keys: Vec<String> = Vec::new();
8377
8378 if let Some(alias) = &alias_lower {
8379 table_keys.push(alias.clone());
8380 if !schema_lower.is_empty() {
8381 table_keys.push(format!("{}.{}", schema_lower, alias));
8382 }
8383 }
8384
8385 if allow_base_mapping {
8386 table_keys.push(table_lower.clone());
8387 if !schema_lower.is_empty() {
8388 table_keys.push(format!("{}.{}", schema_lower, table_lower));
8389 }
8390 }
8391
8392 for local_idx in 0..count {
8393 let field_index = offset + local_idx;
8394 let field = schema.field(field_index);
8395 let field_name_lower = field.name().to_ascii_lowercase();
8396 lookup.entry(field_name_lower).or_insert(field_index);
8397
8398 let trimmed_lower = field.name().trim_start_matches('.').to_ascii_lowercase();
8399 lookup.entry(trimmed_lower).or_insert(field_index);
8400
8401 let column_name = extract_column_name(field.name());
8402 for table_key in &table_keys {
8403 lookup
8404 .entry(format!("{}.{}", table_key, column_name))
8405 .or_insert(field_index);
8406 }
8407
8408 lookup.entry(column_name.clone()).or_insert(field_index);
8412
8413 if table_keys.is_empty()
8414 && let Some(pair) = table_column_suffix(field.name())
8415 && table_column_counts.get(&pair).copied().unwrap_or(0) == 1
8416 {
8417 lookup.entry(pair).or_insert(field_index);
8418 }
8419 }
8420
8421 offset = offset.saturating_add(count);
8422 }
8423
8424 lookup
8425}
8426
8427fn base_table_key(table_ref: &llkv_plan::TableRef) -> String {
8428 let schema_lower = table_ref.schema.to_ascii_lowercase();
8429 let table_lower = table_ref.table.to_ascii_lowercase();
8430 if schema_lower.is_empty() {
8431 table_lower
8432 } else {
8433 format!("{}.{}", schema_lower, table_lower)
8434 }
8435}
8436
8437fn extract_column_name(name: &str) -> String {
8438 name.trim_start_matches('.')
8439 .rsplit('.')
8440 .next()
8441 .unwrap_or(name)
8442 .to_ascii_lowercase()
8443}
8444
8445fn table_column_suffix(name: &str) -> Option<String> {
8446 let trimmed = name.trim_start_matches('.');
8447 let mut parts: Vec<&str> = trimmed.split('.').collect();
8448 if parts.len() < 2 {
8449 return None;
8450 }
8451 let column = parts.pop()?.to_ascii_lowercase();
8452 let table = parts.pop()?.to_ascii_lowercase();
8453 Some(format!("{}.{}", table, column))
8454}
8455
8456fn cross_join_table_batches(
8481 left: TableCrossProductData,
8482 right: TableCrossProductData,
8483) -> ExecutorResult<TableCrossProductData> {
8484 let TableCrossProductData {
8485 schema: left_schema,
8486 batches: left_batches,
8487 column_counts: mut left_counts,
8488 table_indices: mut left_tables,
8489 } = left;
8490 let TableCrossProductData {
8491 schema: right_schema,
8492 batches: right_batches,
8493 column_counts: right_counts,
8494 table_indices: right_tables,
8495 } = right;
8496
8497 let combined_fields: Vec<Field> = left_schema
8498 .fields()
8499 .iter()
8500 .chain(right_schema.fields().iter())
8501 .map(|field| field.as_ref().clone())
8502 .collect();
8503
8504 let mut column_counts = Vec::with_capacity(left_counts.len() + right_counts.len());
8505 column_counts.append(&mut left_counts);
8506 column_counts.extend(right_counts);
8507
8508 let mut table_indices = Vec::with_capacity(left_tables.len() + right_tables.len());
8509 table_indices.append(&mut left_tables);
8510 table_indices.extend(right_tables);
8511
8512 let combined_schema = Arc::new(Schema::new(combined_fields));
8513
8514 let left_has_rows = left_batches.iter().any(|batch| batch.num_rows() > 0);
8515 let right_has_rows = right_batches.iter().any(|batch| batch.num_rows() > 0);
8516
8517 if !left_has_rows || !right_has_rows {
8518 return Ok(TableCrossProductData {
8519 schema: combined_schema,
8520 batches: Vec::new(),
8521 column_counts,
8522 table_indices,
8523 });
8524 }
8525
8526 let output_batches: Vec<RecordBatch> = llkv_column_map::parallel::with_thread_pool(|| {
8529 left_batches
8530 .par_iter()
8531 .filter(|left_batch| left_batch.num_rows() > 0)
8532 .flat_map(|left_batch| {
8533 right_batches
8534 .par_iter()
8535 .filter(|right_batch| right_batch.num_rows() > 0)
8536 .filter_map(|right_batch| {
8537 cross_join_pair(left_batch, right_batch, &combined_schema).ok()
8538 })
8539 .collect::<Vec<_>>()
8540 })
8541 .collect()
8542 });
8543
8544 Ok(TableCrossProductData {
8545 schema: combined_schema,
8546 batches: output_batches,
8547 column_counts,
8548 table_indices,
8549 })
8550}
8551
8552fn cross_join_all(staged: Vec<TableCrossProductData>) -> ExecutorResult<TableCrossProductData> {
8553 let mut iter = staged.into_iter();
8554 let mut current = iter
8555 .next()
8556 .ok_or_else(|| Error::Internal("cross product preparation yielded no tables".into()))?;
8557 for next in iter {
8558 current = cross_join_table_batches(current, next)?;
8559 }
8560 Ok(current)
8561}
8562
8563struct TableInfo<'a> {
8564 index: usize,
8565 table_ref: &'a llkv_plan::TableRef,
8566 column_map: FxHashMap<String, usize>,
8567}
8568
8569#[derive(Clone, Copy)]
8570struct ColumnRef {
8571 table: usize,
8572 column: usize,
8573}
8574
8575#[derive(Clone, Copy)]
8576struct ColumnEquality {
8577 left: ColumnRef,
8578 right: ColumnRef,
8579}
8580
8581#[derive(Clone)]
8582struct ColumnLiteral {
8583 column: ColumnRef,
8584 value: PlanValue,
8585}
8586
8587#[derive(Clone)]
8588struct ColumnInList {
8589 column: ColumnRef,
8590 values: Vec<PlanValue>,
8591}
8592
8593#[derive(Clone)]
8594enum ColumnConstraint {
8595 Equality(ColumnLiteral),
8596 InList(ColumnInList),
8597}
8598
8599struct JoinConstraintPlan {
8601 equalities: Vec<ColumnEquality>,
8602 literals: Vec<ColumnConstraint>,
8603 unsatisfiable: bool,
8604 total_conjuncts: usize,
8606 handled_conjuncts: usize,
8608}
8609
8610fn extract_literal_pushdown_filters<P>(
8629 expr: &LlkvExpr<'static, String>,
8630 tables_with_handles: &[(llkv_plan::TableRef, Arc<ExecutorTable<P>>)],
8631) -> Vec<Vec<ColumnConstraint>>
8632where
8633 P: Pager<Blob = EntryHandle> + Send + Sync,
8634{
8635 let mut table_infos = Vec::with_capacity(tables_with_handles.len());
8636 for (index, (table_ref, executor_table)) in tables_with_handles.iter().enumerate() {
8637 let mut column_map = FxHashMap::default();
8638 for (column_idx, column) in executor_table.schema.columns.iter().enumerate() {
8639 let column_name = column.name.to_ascii_lowercase();
8640 column_map.entry(column_name).or_insert(column_idx);
8641 }
8642 table_infos.push(TableInfo {
8643 index,
8644 table_ref,
8645 column_map,
8646 });
8647 }
8648
8649 let mut constraints: Vec<Vec<ColumnConstraint>> = vec![Vec::new(); tables_with_handles.len()];
8650
8651 let mut conjuncts = Vec::new();
8653 collect_conjuncts_lenient(expr, &mut conjuncts);
8654
8655 for conjunct in conjuncts {
8656 if let LlkvExpr::Compare {
8658 left,
8659 op: CompareOp::Eq,
8660 right,
8661 } = conjunct
8662 {
8663 match (
8664 resolve_column_reference(left, &table_infos),
8665 resolve_column_reference(right, &table_infos),
8666 ) {
8667 (Some(column), None) => {
8668 if let Some(literal) = extract_literal(right)
8669 && let Some(value) = literal_to_plan_value_for_join(literal)
8670 && column.table < constraints.len()
8671 {
8672 constraints[column.table]
8673 .push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
8674 }
8675 }
8676 (None, Some(column)) => {
8677 if let Some(literal) = extract_literal(left)
8678 && let Some(value) = literal_to_plan_value_for_join(literal)
8679 && column.table < constraints.len()
8680 {
8681 constraints[column.table]
8682 .push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
8683 }
8684 }
8685 _ => {}
8686 }
8687 }
8688 else if let LlkvExpr::Pred(filter) = conjunct {
8691 if let Operator::Equals(ref literal_val) = filter.op {
8692 let field_name = filter.field_id.trim().to_ascii_lowercase();
8694
8695 for info in &table_infos {
8697 if let Some(&col_idx) = info.column_map.get(&field_name) {
8698 if let Some(value) = plan_value_from_operator_literal(literal_val) {
8699 let column_ref = ColumnRef {
8700 table: info.index,
8701 column: col_idx,
8702 };
8703 if info.index < constraints.len() {
8704 constraints[info.index].push(ColumnConstraint::Equality(
8705 ColumnLiteral {
8706 column: column_ref,
8707 value,
8708 },
8709 ));
8710 }
8711 }
8712 break; }
8714 }
8715 }
8716 }
8717 else if let LlkvExpr::InList {
8719 expr: col_expr,
8720 list,
8721 negated: false,
8722 } = conjunct
8723 {
8724 if let Some(column) = resolve_column_reference(col_expr, &table_infos) {
8725 let mut values = Vec::new();
8726 for item in list {
8727 if let Some(literal) = extract_literal(item)
8728 && let Some(value) = literal_to_plan_value_for_join(literal)
8729 {
8730 values.push(value);
8731 }
8732 }
8733 if !values.is_empty() && column.table < constraints.len() {
8734 constraints[column.table]
8735 .push(ColumnConstraint::InList(ColumnInList { column, values }));
8736 }
8737 }
8738 }
8739 else if let LlkvExpr::Or(or_children) = conjunct
8741 && let Some((column, values)) = try_extract_or_as_in_list(or_children, &table_infos)
8742 && !values.is_empty()
8743 && column.table < constraints.len()
8744 {
8745 constraints[column.table]
8746 .push(ColumnConstraint::InList(ColumnInList { column, values }));
8747 }
8748 }
8749
8750 constraints
8751}
8752
8753fn collect_conjuncts_lenient<'a>(
8758 expr: &'a LlkvExpr<'static, String>,
8759 out: &mut Vec<&'a LlkvExpr<'static, String>>,
8760) {
8761 match expr {
8762 LlkvExpr::And(children) => {
8763 for child in children {
8764 collect_conjuncts_lenient(child, out);
8765 }
8766 }
8767 other => {
8768 out.push(other);
8770 }
8771 }
8772}
8773
8774fn try_extract_or_as_in_list(
8778 or_children: &[LlkvExpr<'static, String>],
8779 table_infos: &[TableInfo<'_>],
8780) -> Option<(ColumnRef, Vec<PlanValue>)> {
8781 if or_children.is_empty() {
8782 return None;
8783 }
8784
8785 let mut common_column: Option<ColumnRef> = None;
8786 let mut values = Vec::new();
8787
8788 for child in or_children {
8789 if let LlkvExpr::Compare {
8791 left,
8792 op: CompareOp::Eq,
8793 right,
8794 } = child
8795 {
8796 if let (Some(column), None) = (
8798 resolve_column_reference(left, table_infos),
8799 resolve_column_reference(right, table_infos),
8800 ) && let Some(literal) = extract_literal(right)
8801 && let Some(value) = literal_to_plan_value_for_join(literal)
8802 {
8803 match common_column {
8805 None => common_column = Some(column),
8806 Some(ref prev)
8807 if prev.table == column.table && prev.column == column.column =>
8808 {
8809 }
8811 _ => {
8812 return None;
8814 }
8815 }
8816 values.push(value);
8817 continue;
8818 }
8819
8820 if let (None, Some(column)) = (
8822 resolve_column_reference(left, table_infos),
8823 resolve_column_reference(right, table_infos),
8824 ) && let Some(literal) = extract_literal(left)
8825 && let Some(value) = literal_to_plan_value_for_join(literal)
8826 {
8827 match common_column {
8828 None => common_column = Some(column),
8829 Some(ref prev)
8830 if prev.table == column.table && prev.column == column.column => {}
8831 _ => return None,
8832 }
8833 values.push(value);
8834 continue;
8835 }
8836 }
8837 else if let LlkvExpr::Pred(filter) = child
8839 && let Operator::Equals(ref literal) = filter.op
8840 && let Some(column) =
8841 resolve_column_reference(&ScalarExpr::Column(filter.field_id.clone()), table_infos)
8842 && let Some(value) = literal_to_plan_value_for_join(literal)
8843 {
8844 match common_column {
8845 None => common_column = Some(column),
8846 Some(ref prev) if prev.table == column.table && prev.column == column.column => {}
8847 _ => return None,
8848 }
8849 values.push(value);
8850 continue;
8851 }
8852
8853 return None;
8855 }
8856
8857 common_column.map(|col| (col, values))
8858}
8859
8860fn extract_join_constraints(
8887 expr: &LlkvExpr<'static, String>,
8888 table_infos: &[TableInfo<'_>],
8889) -> Option<JoinConstraintPlan> {
8890 let mut conjuncts = Vec::new();
8891 collect_conjuncts_lenient(expr, &mut conjuncts);
8893
8894 let total_conjuncts = conjuncts.len();
8895 let mut equalities = Vec::new();
8896 let mut literals = Vec::new();
8897 let mut unsatisfiable = false;
8898 let mut handled_conjuncts = 0;
8899
8900 for conjunct in conjuncts {
8901 match conjunct {
8902 LlkvExpr::Literal(true) => {
8903 handled_conjuncts += 1;
8904 }
8905 LlkvExpr::Literal(false) => {
8906 unsatisfiable = true;
8907 handled_conjuncts += 1;
8908 break;
8909 }
8910 LlkvExpr::Compare {
8911 left,
8912 op: CompareOp::Eq,
8913 right,
8914 } => {
8915 match (
8916 resolve_column_reference(left, table_infos),
8917 resolve_column_reference(right, table_infos),
8918 ) {
8919 (Some(left_col), Some(right_col)) => {
8920 equalities.push(ColumnEquality {
8921 left: left_col,
8922 right: right_col,
8923 });
8924 handled_conjuncts += 1;
8925 continue;
8926 }
8927 (Some(column), None) => {
8928 if let Some(literal) = extract_literal(right)
8929 && let Some(value) = literal_to_plan_value_for_join(literal)
8930 {
8931 literals
8932 .push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
8933 handled_conjuncts += 1;
8934 continue;
8935 }
8936 }
8937 (None, Some(column)) => {
8938 if let Some(literal) = extract_literal(left)
8939 && let Some(value) = literal_to_plan_value_for_join(literal)
8940 {
8941 literals
8942 .push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
8943 handled_conjuncts += 1;
8944 continue;
8945 }
8946 }
8947 _ => {}
8948 }
8949 }
8951 LlkvExpr::InList {
8953 expr: col_expr,
8954 list,
8955 negated: false,
8956 } => {
8957 if let Some(column) = resolve_column_reference(col_expr, table_infos) {
8958 let mut in_list_values = Vec::new();
8960 for item in list {
8961 if let Some(literal) = extract_literal(item)
8962 && let Some(value) = literal_to_plan_value_for_join(literal)
8963 {
8964 in_list_values.push(value);
8965 }
8966 }
8967 if !in_list_values.is_empty() {
8968 literals.push(ColumnConstraint::InList(ColumnInList {
8969 column,
8970 values: in_list_values,
8971 }));
8972 handled_conjuncts += 1;
8973 continue;
8974 }
8975 }
8976 }
8978 LlkvExpr::Or(or_children) => {
8980 if let Some((column, values)) = try_extract_or_as_in_list(or_children, table_infos)
8981 {
8982 literals.push(ColumnConstraint::InList(ColumnInList { column, values }));
8984 handled_conjuncts += 1;
8985 continue;
8986 }
8987 }
8989 LlkvExpr::Pred(filter) => {
8991 if let Operator::Equals(ref literal) = filter.op
8993 && let Some(column) = resolve_column_reference(
8994 &ScalarExpr::Column(filter.field_id.clone()),
8995 table_infos,
8996 )
8997 && let Some(value) = literal_to_plan_value_for_join(literal)
8998 {
8999 literals.push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
9000 handled_conjuncts += 1;
9001 continue;
9002 }
9003 }
9005 _ => {
9006 }
9008 }
9009 }
9010
9011 Some(JoinConstraintPlan {
9012 equalities,
9013 literals,
9014 unsatisfiable,
9015 total_conjuncts,
9016 handled_conjuncts,
9017 })
9018}
9019
9020fn resolve_column_reference(
9021 expr: &ScalarExpr<String>,
9022 table_infos: &[TableInfo<'_>],
9023) -> Option<ColumnRef> {
9024 let name = match expr {
9025 ScalarExpr::Column(name) => name.trim(),
9026 _ => return None,
9027 };
9028
9029 let mut parts: Vec<&str> = name
9030 .trim_start_matches('.')
9031 .split('.')
9032 .filter(|segment| !segment.is_empty())
9033 .collect();
9034
9035 if parts.is_empty() {
9036 return None;
9037 }
9038
9039 let column_part = parts.pop()?.to_ascii_lowercase();
9040 if parts.is_empty() {
9041 for info in table_infos {
9045 if let Some(&col_idx) = info.column_map.get(&column_part) {
9046 return Some(ColumnRef {
9047 table: info.index,
9048 column: col_idx,
9049 });
9050 }
9051 }
9052 return None;
9053 }
9054
9055 let table_ident = parts.join(".").to_ascii_lowercase();
9056 for info in table_infos {
9057 if matches_table_ident(info.table_ref, &table_ident) {
9058 if let Some(&col_idx) = info.column_map.get(&column_part) {
9059 return Some(ColumnRef {
9060 table: info.index,
9061 column: col_idx,
9062 });
9063 } else {
9064 return None;
9065 }
9066 }
9067 }
9068 None
9069}
9070
9071fn matches_table_ident(table_ref: &llkv_plan::TableRef, ident: &str) -> bool {
9072 if ident.is_empty() {
9073 return false;
9074 }
9075 if let Some(alias) = &table_ref.alias
9076 && alias.to_ascii_lowercase() == ident
9077 {
9078 return true;
9079 }
9080 if table_ref.table.to_ascii_lowercase() == ident {
9081 return true;
9082 }
9083 if !table_ref.schema.is_empty() {
9084 let full = format!(
9085 "{}.{}",
9086 table_ref.schema.to_ascii_lowercase(),
9087 table_ref.table.to_ascii_lowercase()
9088 );
9089 if full == ident {
9090 return true;
9091 }
9092 }
9093 false
9094}
9095
9096fn extract_literal(expr: &ScalarExpr<String>) -> Option<&Literal> {
9097 match expr {
9098 ScalarExpr::Literal(lit) => Some(lit),
9099 _ => None,
9100 }
9101}
9102
9103fn plan_value_from_operator_literal(op_value: &llkv_expr::literal::Literal) -> Option<PlanValue> {
9104 match op_value {
9105 llkv_expr::literal::Literal::Integer(v) => i64::try_from(*v).ok().map(PlanValue::Integer),
9106 llkv_expr::literal::Literal::Float(v) => Some(PlanValue::Float(*v)),
9107 llkv_expr::literal::Literal::Boolean(v) => Some(PlanValue::Integer(if *v { 1 } else { 0 })),
9108 llkv_expr::literal::Literal::String(v) => Some(PlanValue::String(v.clone())),
9109 _ => None,
9110 }
9111}
9112
9113fn literal_to_plan_value_for_join(literal: &Literal) -> Option<PlanValue> {
9114 match literal {
9115 Literal::Integer(v) => i64::try_from(*v).ok().map(PlanValue::Integer),
9116 Literal::Float(v) => Some(PlanValue::Float(*v)),
9117 Literal::Boolean(v) => Some(PlanValue::Integer(if *v { 1 } else { 0 })),
9118 Literal::String(v) => Some(PlanValue::String(v.clone())),
9119 _ => None,
9120 }
9121}
9122
9123#[derive(Default)]
9124struct DistinctState {
9125 seen: FxHashSet<CanonicalRow>,
9126}
9127
9128impl DistinctState {
9129 fn insert(&mut self, row: CanonicalRow) -> bool {
9130 self.seen.insert(row)
9131 }
9132}
9133
9134fn distinct_filter_batch(
9135 batch: RecordBatch,
9136 state: &mut DistinctState,
9137) -> ExecutorResult<Option<RecordBatch>> {
9138 if batch.num_rows() == 0 {
9139 return Ok(None);
9140 }
9141
9142 let mut keep_flags = Vec::with_capacity(batch.num_rows());
9143 let mut keep_count = 0usize;
9144
9145 for row_idx in 0..batch.num_rows() {
9146 let row = CanonicalRow::from_batch(&batch, row_idx)?;
9147 if state.insert(row) {
9148 keep_flags.push(true);
9149 keep_count += 1;
9150 } else {
9151 keep_flags.push(false);
9152 }
9153 }
9154
9155 if keep_count == 0 {
9156 return Ok(None);
9157 }
9158
9159 if keep_count == batch.num_rows() {
9160 return Ok(Some(batch));
9161 }
9162
9163 let mut builder = BooleanBuilder::with_capacity(batch.num_rows());
9164 for flag in keep_flags {
9165 builder.append_value(flag);
9166 }
9167 let mask = Arc::new(builder.finish());
9168
9169 let filtered = filter_record_batch(&batch, &mask).map_err(|err| {
9170 Error::InvalidArgumentError(format!("failed to apply DISTINCT filter: {err}"))
9171 })?;
9172
9173 Ok(Some(filtered))
9174}
9175
9176fn sort_record_batch_with_order(
9177 schema: &Arc<Schema>,
9178 batch: &RecordBatch,
9179 order_by: &[OrderByPlan],
9180) -> ExecutorResult<RecordBatch> {
9181 if order_by.is_empty() {
9182 return Ok(batch.clone());
9183 }
9184
9185 let mut sort_columns: Vec<SortColumn> = Vec::with_capacity(order_by.len());
9186
9187 for order in order_by {
9188 let column_index = match &order.target {
9189 OrderTarget::Column(name) => schema.index_of(name).map_err(|_| {
9190 Error::InvalidArgumentError(format!(
9191 "ORDER BY references unknown column '{}'",
9192 name
9193 ))
9194 })?,
9195 OrderTarget::Index(idx) => {
9196 if *idx >= batch.num_columns() {
9197 return Err(Error::InvalidArgumentError(format!(
9198 "ORDER BY position {} is out of bounds for {} columns",
9199 idx + 1,
9200 batch.num_columns()
9201 )));
9202 }
9203 *idx
9204 }
9205 OrderTarget::All => {
9206 return Err(Error::InvalidArgumentError(
9207 "ORDER BY ALL should be expanded before sorting".into(),
9208 ));
9209 }
9210 };
9211
9212 let source_array = batch.column(column_index);
9213
9214 let values: ArrayRef = match order.sort_type {
9215 OrderSortType::Native => Arc::clone(source_array),
9216 OrderSortType::CastTextToInteger => {
9217 let strings = source_array
9218 .as_any()
9219 .downcast_ref::<StringArray>()
9220 .ok_or_else(|| {
9221 Error::InvalidArgumentError(
9222 "ORDER BY CAST expects the underlying column to be TEXT".into(),
9223 )
9224 })?;
9225 let mut builder = Int64Builder::with_capacity(strings.len());
9226 for i in 0..strings.len() {
9227 if strings.is_null(i) {
9228 builder.append_null();
9229 } else {
9230 match strings.value(i).parse::<i64>() {
9231 Ok(value) => builder.append_value(value),
9232 Err(_) => builder.append_null(),
9233 }
9234 }
9235 }
9236 Arc::new(builder.finish()) as ArrayRef
9237 }
9238 };
9239
9240 let sort_options = SortOptions {
9241 descending: !order.ascending,
9242 nulls_first: order.nulls_first,
9243 };
9244
9245 sort_columns.push(SortColumn {
9246 values,
9247 options: Some(sort_options),
9248 });
9249 }
9250
9251 let indices = lexsort_to_indices(&sort_columns, None).map_err(|err| {
9252 Error::InvalidArgumentError(format!("failed to compute ORDER BY indices: {err}"))
9253 })?;
9254
9255 let perm = indices
9256 .as_any()
9257 .downcast_ref::<UInt32Array>()
9258 .ok_or_else(|| Error::Internal("ORDER BY sorting produced unexpected index type".into()))?;
9259
9260 let mut reordered_columns: Vec<ArrayRef> = Vec::with_capacity(batch.num_columns());
9261 for col_idx in 0..batch.num_columns() {
9262 let reordered = take(batch.column(col_idx), perm, None).map_err(|err| {
9263 Error::InvalidArgumentError(format!(
9264 "failed to apply ORDER BY permutation to column {col_idx}: {err}"
9265 ))
9266 })?;
9267 reordered_columns.push(reordered);
9268 }
9269
9270 RecordBatch::try_new(Arc::clone(schema), reordered_columns)
9271 .map_err(|err| Error::Internal(format!("failed to build reordered ORDER BY batch: {err}")))
9272}
9273
9274#[cfg(test)]
9275mod tests {
9276 use super::*;
9277 use arrow::array::{Array, ArrayRef, Int64Array};
9278 use arrow::datatypes::{DataType, Field, Schema};
9279 use llkv_expr::expr::BinaryOp;
9280 use std::sync::Arc;
9281
9282 #[test]
9283 fn cross_product_context_evaluates_expressions() {
9284 let schema = Arc::new(Schema::new(vec![
9285 Field::new("main.tab2.a", DataType::Int64, false),
9286 Field::new("main.tab2.b", DataType::Int64, false),
9287 ]));
9288
9289 let batch = RecordBatch::try_new(
9290 Arc::clone(&schema),
9291 vec![
9292 Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef,
9293 Arc::new(Int64Array::from(vec![10, 20, 30])) as ArrayRef,
9294 ],
9295 )
9296 .expect("valid batch");
9297
9298 let lookup = build_cross_product_column_lookup(schema.as_ref(), &[], &[], &[]);
9299 let mut ctx = CrossProductExpressionContext::new(schema.as_ref(), lookup)
9300 .expect("context builds from schema");
9301
9302 let literal_expr: ScalarExpr<String> = ScalarExpr::literal(67);
9303 let literal = ctx
9304 .evaluate(&literal_expr, &batch)
9305 .expect("literal evaluation succeeds");
9306 let literal_array = literal
9307 .as_any()
9308 .downcast_ref::<Int64Array>()
9309 .expect("int64 literal result");
9310 assert_eq!(literal_array.len(), 3);
9311 assert!(literal_array.iter().all(|value| value == Some(67)));
9312
9313 let add_expr = ScalarExpr::binary(
9314 ScalarExpr::column("tab2.a".to_string()),
9315 BinaryOp::Add,
9316 ScalarExpr::literal(5),
9317 );
9318 let added = ctx
9319 .evaluate(&add_expr, &batch)
9320 .expect("column addition succeeds");
9321 let added_array = added
9322 .as_any()
9323 .downcast_ref::<Int64Array>()
9324 .expect("int64 addition result");
9325 assert_eq!(added_array.values(), &[6, 7, 8]);
9326 }
9327
9328 #[test]
9329 fn cross_product_handles_more_than_two_tables() {
9330 let schema_a = Arc::new(Schema::new(vec![Field::new(
9331 "main.t1.a",
9332 DataType::Int64,
9333 false,
9334 )]));
9335 let schema_b = Arc::new(Schema::new(vec![Field::new(
9336 "main.t2.b",
9337 DataType::Int64,
9338 false,
9339 )]));
9340 let schema_c = Arc::new(Schema::new(vec![Field::new(
9341 "main.t3.c",
9342 DataType::Int64,
9343 false,
9344 )]));
9345
9346 let batch_a = RecordBatch::try_new(
9347 Arc::clone(&schema_a),
9348 vec![Arc::new(Int64Array::from(vec![1, 2])) as ArrayRef],
9349 )
9350 .expect("valid batch");
9351 let batch_b = RecordBatch::try_new(
9352 Arc::clone(&schema_b),
9353 vec![Arc::new(Int64Array::from(vec![10, 20, 30])) as ArrayRef],
9354 )
9355 .expect("valid batch");
9356 let batch_c = RecordBatch::try_new(
9357 Arc::clone(&schema_c),
9358 vec![Arc::new(Int64Array::from(vec![100])) as ArrayRef],
9359 )
9360 .expect("valid batch");
9361
9362 let data_a = TableCrossProductData {
9363 schema: schema_a,
9364 batches: vec![batch_a],
9365 column_counts: vec![1],
9366 table_indices: vec![0],
9367 };
9368 let data_b = TableCrossProductData {
9369 schema: schema_b,
9370 batches: vec![batch_b],
9371 column_counts: vec![1],
9372 table_indices: vec![1],
9373 };
9374 let data_c = TableCrossProductData {
9375 schema: schema_c,
9376 batches: vec![batch_c],
9377 column_counts: vec![1],
9378 table_indices: vec![2],
9379 };
9380
9381 let ab = cross_join_table_batches(data_a, data_b).expect("two-table product");
9382 assert_eq!(ab.schema.fields().len(), 2);
9383 assert_eq!(ab.batches.len(), 1);
9384 assert_eq!(ab.batches[0].num_rows(), 6);
9385
9386 let abc = cross_join_table_batches(ab, data_c).expect("three-table product");
9387 assert_eq!(abc.schema.fields().len(), 3);
9388 assert_eq!(abc.batches.len(), 1);
9389
9390 let final_batch = &abc.batches[0];
9391 assert_eq!(final_batch.num_rows(), 6);
9392
9393 let col_a = final_batch
9394 .column(0)
9395 .as_any()
9396 .downcast_ref::<Int64Array>()
9397 .expect("left column values");
9398 assert_eq!(col_a.values(), &[1, 1, 1, 2, 2, 2]);
9399
9400 let col_b = final_batch
9401 .column(1)
9402 .as_any()
9403 .downcast_ref::<Int64Array>()
9404 .expect("middle column values");
9405 assert_eq!(col_b.values(), &[10, 20, 30, 10, 20, 30]);
9406
9407 let col_c = final_batch
9408 .column(2)
9409 .as_any()
9410 .downcast_ref::<Int64Array>()
9411 .expect("right column values");
9412 assert_eq!(col_c.values(), &[100, 100, 100, 100, 100, 100]);
9413 }
9414}