llkv_executor/
lib.rs

1//! Query execution engine for LLKV.
2//!
3//! This crate provides the query execution layer that sits between the query planner
4//! (`llkv-plan`) and the storage layer (`llkv-table`, `llkv-column-map`).
5//!
6//! # Module Organization
7//!
8//! - [`translation`]: Expression and projection translation utilities
9//! - [`types`]: Core type definitions (tables, schemas, columns)  
10//! - [`insert`]: INSERT operation support (value coercion)
11//! - [`utils`]: Utility functions (time)
12//!
13//! The [`QueryExecutor`] and [`SelectExecution`] implementations are defined inline
14//! in this module for now, but should be extracted to a dedicated `query` module
15//! in a future refactoring.
16
17use arrow::array::{
18    Array, ArrayRef, BooleanArray, BooleanBuilder, Float32Array, Float64Array, Int8Array,
19    Int16Array, Int32Array, Int64Array, Int64Builder, LargeStringArray, RecordBatch, StringArray,
20    StructArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, new_null_array,
21};
22use arrow::compute::{
23    SortColumn, SortOptions, cast, concat_batches, filter_record_batch, lexsort_to_indices, take,
24};
25use arrow::datatypes::{DataType, Field, Float64Type, Int64Type, Schema};
26use llkv_aggregate::{AggregateAccumulator, AggregateKind, AggregateSpec, AggregateState};
27use llkv_column_map::gather::gather_indices_from_batches;
28use llkv_column_map::store::Projection as StoreProjection;
29use llkv_column_map::types::LogicalFieldId;
30use llkv_expr::SubqueryId;
31use llkv_expr::expr::{
32    AggregateCall, BinaryOp, CompareOp, Expr as LlkvExpr, Filter, Operator, ScalarExpr,
33};
34use llkv_expr::literal::Literal;
35use llkv_expr::typed_predicate::{
36    build_bool_predicate, build_fixed_width_predicate, build_var_width_predicate,
37};
38use llkv_join::cross_join_pair;
39use llkv_plan::{
40    AggregateExpr, AggregateFunction, CanonicalRow, CompoundOperator, CompoundQuantifier,
41    CompoundSelectComponent, CompoundSelectPlan, OrderByPlan, OrderSortType, OrderTarget,
42    PlanValue, SelectPlan, SelectProjection,
43};
44use llkv_result::Error;
45use llkv_storage::pager::Pager;
46use llkv_table::table::{
47    RowIdFilter, ScanOrderDirection, ScanOrderSpec, ScanOrderTransform, ScanProjection,
48    ScanStreamOptions,
49};
50use llkv_table::types::FieldId;
51use llkv_table::{NumericArray, NumericArrayMap, NumericKernels, ROW_ID_FIELD_ID};
52use rayon::prelude::*;
53use rustc_hash::{FxHashMap, FxHashSet};
54use simd_r_drive_entry_handle::EntryHandle;
55use std::convert::TryFrom;
56use std::fmt;
57use std::sync::Arc;
58use std::sync::atomic::Ordering;
59
60#[cfg(test)]
61use std::cell::RefCell;
62
63// ============================================================================
64// Module Declarations
65// ============================================================================
66
67pub mod insert;
68pub mod translation;
69pub mod types;
70pub mod utils;
71
72// ============================================================================
73// Type Aliases and Re-exports
74// ============================================================================
75
76/// Result type for executor operations.
77pub type ExecutorResult<T> = Result<T, Error>;
78
79pub use insert::{
80    build_array_for_column, normalize_insert_value_for_column, resolve_insert_columns,
81};
82pub use translation::{
83    build_projected_columns, build_wildcard_projections, full_table_scan_filter,
84    resolve_field_id_from_schema, schema_for_projections, translate_predicate,
85    translate_predicate_with, translate_scalar, translate_scalar_with,
86};
87pub use types::{
88    ExecutorColumn, ExecutorMultiColumnUnique, ExecutorRowBatch, ExecutorSchema, ExecutorTable,
89    ExecutorTableProvider,
90};
91pub use utils::current_time_micros;
92
93#[derive(Clone, Debug, PartialEq, Eq, Hash)]
94enum GroupKeyValue {
95    Null,
96    Int(i64),
97    Bool(bool),
98    String(String),
99}
100
101/// Represents the result value from an aggregate computation.
102/// Different aggregates return different types (e.g., AVG returns Float64, COUNT returns Int64).
103#[derive(Clone, Debug, PartialEq)]
104enum AggregateValue {
105    Int64(i64),
106    Float64(f64),
107}
108
109impl AggregateValue {
110    /// Convert to i64, truncating floats if necessary
111    fn to_i64(&self) -> i64 {
112        match self {
113            AggregateValue::Int64(v) => *v,
114            AggregateValue::Float64(v) => *v as i64,
115        }
116    }
117
118    /// Convert to f64, promoting integers if necessary
119    #[allow(dead_code)]
120    fn to_f64(&self) -> f64 {
121        match self {
122            AggregateValue::Int64(v) => *v as f64,
123            AggregateValue::Float64(v) => *v,
124        }
125    }
126}
127
128struct GroupState {
129    batch: RecordBatch,
130    row_idx: usize,
131}
132
133/// State for a group when computing aggregates
134struct GroupAggregateState {
135    representative_batch_idx: usize,
136    representative_row: usize,
137    row_locations: Vec<(usize, usize)>,
138}
139
140struct OutputColumn {
141    field: Field,
142    source: OutputSource,
143}
144
145enum OutputSource {
146    TableColumn { index: usize },
147    Computed { projection_index: usize },
148}
149
150// ============================================================================
151// Query Logging Helpers
152// ============================================================================
153
154#[cfg(test)]
155thread_local! {
156    static QUERY_LABEL_STACK: RefCell<Vec<String>> = const { RefCell::new(Vec::new()) };
157}
158
159/// Guard object that pops the current query label when dropped.
160pub struct QueryLogGuard {
161    _private: (),
162}
163
164/// Install a query label for the current thread so that executor logs can
165/// annotate diagnostics with the originating SQL statement.
166#[cfg(test)]
167pub fn push_query_label(label: impl Into<String>) -> QueryLogGuard {
168    QUERY_LABEL_STACK.with(|stack| stack.borrow_mut().push(label.into()));
169    QueryLogGuard { _private: () }
170}
171
172/// Install a query label for the current thread so that executor logs can
173/// annotate diagnostics with the originating SQL statement.
174///
175/// No-op in non-test builds.
176#[cfg(not(test))]
177#[inline]
178pub fn push_query_label(_label: impl Into<String>) -> QueryLogGuard {
179    QueryLogGuard { _private: () }
180}
181
182#[cfg(test)]
183impl Drop for QueryLogGuard {
184    fn drop(&mut self) {
185        QUERY_LABEL_STACK.with(|stack| {
186            let _ = stack.borrow_mut().pop();
187        });
188    }
189}
190
191#[cfg(not(test))]
192impl Drop for QueryLogGuard {
193    #[inline]
194    fn drop(&mut self) {
195        // No-op in non-test builds
196    }
197}
198
199/// Fetch the innermost query label associated with the current execution thread.
200#[cfg(test)]
201pub fn current_query_label() -> Option<String> {
202    QUERY_LABEL_STACK.with(|stack| stack.borrow().last().cloned())
203}
204
205/// Fetch the innermost query label associated with the current execution thread.
206///
207/// Always returns None in non-test builds.
208#[cfg(not(test))]
209#[inline]
210pub fn current_query_label() -> Option<String> {
211    None
212}
213
214// ============================================================================
215// Query Executor - Implementation
216// ============================================================================
217// TODO: Extract this implementation into a dedicated query/ module
218
219/// Extract a simple column name from a ScalarExpr when possible.
220///
221/// Returns `Some(column_name)` if the expression is a plain column reference
222/// (possibly wrapped in unary + or - operators), otherwise returns `None`
223/// (indicating a complex expression that needs full evaluation).
224///
225/// This handles common cases like `col`, `+col`, `-col`, `++col`, etc.
226fn try_extract_simple_column<F: AsRef<str>>(expr: &ScalarExpr<F>) -> Option<&str> {
227    match expr {
228        ScalarExpr::Column(name) => Some(name.as_ref()),
229        // Unwrap unary operators to check if there's a column underneath
230        ScalarExpr::Binary { left, op, right } => {
231            // Check for unary-like patterns: left or right is a literal that acts as identity
232            match op {
233                BinaryOp::Add => {
234                    // Check if one side is zero (identity for addition)
235                    if matches!(left.as_ref(), ScalarExpr::Literal(Literal::Integer(0))) {
236                        return try_extract_simple_column(right);
237                    }
238                    if matches!(right.as_ref(), ScalarExpr::Literal(Literal::Integer(0))) {
239                        return try_extract_simple_column(left);
240                    }
241                }
242                // Note: We do NOT handle Subtract here because 0 - col is NOT the same as col
243                // It needs to be evaluated as a negation
244                BinaryOp::Multiply => {
245                    // -col is represented as Multiply(-1, col)
246                    if matches!(left.as_ref(), ScalarExpr::Literal(Literal::Integer(-1))) {
247                        return try_extract_simple_column(right);
248                    }
249                    if matches!(right.as_ref(), ScalarExpr::Literal(Literal::Integer(-1))) {
250                        return try_extract_simple_column(left);
251                    }
252                    // +col might be Multiply(1, col)
253                    if matches!(left.as_ref(), ScalarExpr::Literal(Literal::Integer(1))) {
254                        return try_extract_simple_column(right);
255                    }
256                    if matches!(right.as_ref(), ScalarExpr::Literal(Literal::Integer(1))) {
257                        return try_extract_simple_column(left);
258                    }
259                }
260                _ => {}
261            }
262            None
263        }
264        _ => None,
265    }
266}
267
268/// Convert a vector of PlanValues to an Arrow array.
269///
270/// This currently supports Integer, Float, Null, and String values.
271/// The array type is inferred from the first non-null value.
272fn plan_values_to_arrow_array(values: &[PlanValue]) -> ExecutorResult<ArrayRef> {
273    use arrow::array::{Float64Array, Int64Array, StringArray};
274
275    // Infer type from first non-null value
276    let mut value_type = None;
277    for v in values {
278        if !matches!(v, PlanValue::Null) {
279            value_type = Some(v);
280            break;
281        }
282    }
283
284    match value_type {
285        Some(PlanValue::Integer(_)) => {
286            let int_values: Vec<Option<i64>> = values
287                .iter()
288                .map(|v| match v {
289                    PlanValue::Integer(i) => Some(*i),
290                    PlanValue::Null => None,
291                    _ => Some(0), // Type mismatch, use default
292                })
293                .collect();
294            Ok(Arc::new(Int64Array::from(int_values)) as ArrayRef)
295        }
296        Some(PlanValue::Float(_)) => {
297            let float_values: Vec<Option<f64>> = values
298                .iter()
299                .map(|v| match v {
300                    PlanValue::Float(f) => Some(*f),
301                    PlanValue::Integer(i) => Some(*i as f64),
302                    PlanValue::Null => None,
303                    _ => Some(0.0), // Type mismatch, use default
304                })
305                .collect();
306            Ok(Arc::new(Float64Array::from(float_values)) as ArrayRef)
307        }
308        Some(PlanValue::String(_)) => {
309            let string_values: Vec<Option<&str>> = values
310                .iter()
311                .map(|v| match v {
312                    PlanValue::String(s) => Some(s.as_str()),
313                    PlanValue::Null => None,
314                    _ => Some(""), // Type mismatch, use default
315                })
316                .collect();
317            Ok(Arc::new(StringArray::from(string_values)) as ArrayRef)
318        }
319        _ => {
320            // All nulls, create an Int64 array of nulls
321            let null_values: Vec<Option<i64>> = vec![None; values.len()];
322            Ok(Arc::new(Int64Array::from(null_values)) as ArrayRef)
323        }
324    }
325}
326
327/// Resolve a column name to its index using flexible name matching.
328///
329/// This function handles various column name formats:
330/// 1. Exact match (case-insensitive)
331/// 2. Unqualified match (e.g., "col0" matches "table.col0" or "alias.col0")
332///
333/// This is useful when aggregate expressions reference columns with table qualifiers
334/// (like "cor0.col0") but the schema has different qualification patterns.
335fn resolve_column_name_to_index(
336    col_name: &str,
337    column_lookup_map: &FxHashMap<String, usize>,
338) -> Option<usize> {
339    let col_lower = col_name.to_ascii_lowercase();
340
341    // Try exact match first
342    if let Some(&idx) = column_lookup_map.get(&col_lower) {
343        return Some(idx);
344    }
345
346    // Try matching just the column name without table qualifier
347    // e.g., "cor0.col0" should match a field ending in ".col0" or exactly "col0"
348    let unqualified = col_name
349        .rsplit('.')
350        .next()
351        .unwrap_or(col_name)
352        .to_ascii_lowercase();
353    column_lookup_map
354        .iter()
355        .find(|(k, _)| k.ends_with(&format!(".{}", unqualified)) || k == &&unqualified)
356        .map(|(_, &idx)| idx)
357}
358
359/// Query executor that executes SELECT plans.
360pub struct QueryExecutor<P>
361where
362    P: Pager<Blob = EntryHandle> + Send + Sync,
363{
364    provider: Arc<dyn ExecutorTableProvider<P>>,
365}
366
367impl<P> QueryExecutor<P>
368where
369    P: Pager<Blob = EntryHandle> + Send + Sync + 'static,
370{
371    pub fn new(provider: Arc<dyn ExecutorTableProvider<P>>) -> Self {
372        Self { provider }
373    }
374
375    pub fn execute_select(&self, plan: SelectPlan) -> ExecutorResult<SelectExecution<P>> {
376        self.execute_select_with_filter(plan, None)
377    }
378
379    pub fn execute_select_with_filter(
380        &self,
381        plan: SelectPlan,
382        row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
383    ) -> ExecutorResult<SelectExecution<P>> {
384        if plan.compound.is_some() {
385            return self.execute_compound_select(plan, row_filter);
386        }
387
388        // Handle SELECT without FROM clause (e.g., SELECT 42, SELECT {'a': 1})
389        if plan.tables.is_empty() {
390            return self.execute_select_without_table(plan);
391        }
392
393        if !plan.group_by.is_empty() {
394            if plan.tables.len() > 1 {
395                return self.execute_cross_product(plan);
396            }
397            let table_ref = &plan.tables[0];
398            let table = self.provider.get_table(&table_ref.qualified_name())?;
399            let display_name = table_ref.qualified_name();
400            return self.execute_group_by_single_table(table, display_name, plan, row_filter);
401        }
402
403        // Handle multi-table queries (cross products/joins)
404        if plan.tables.len() > 1 {
405            return self.execute_cross_product(plan);
406        }
407
408        // Single table query
409        let table_ref = &plan.tables[0];
410        let table = self.provider.get_table(&table_ref.qualified_name())?;
411        let display_name = table_ref.qualified_name();
412
413        if !plan.aggregates.is_empty() {
414            self.execute_aggregates(table, display_name, plan, row_filter)
415        } else if self.has_computed_aggregates(&plan) {
416            // Handle computed projections that contain embedded aggregates
417            self.execute_computed_aggregates(table, display_name, plan, row_filter)
418        } else {
419            self.execute_projection(table, display_name, plan, row_filter)
420        }
421    }
422
423    /// Execute a compound SELECT query (UNION, EXCEPT, INTERSECT).
424    ///
425    /// Evaluates the initial SELECT and each subsequent operation, combining results
426    /// according to the specified operator and quantifier. Handles deduplication for
427    /// DISTINCT quantifiers using hash-based row encoding.
428    ///
429    /// # Arguments
430    ///
431    /// * `plan` - SELECT plan containing compound operations
432    /// * `row_filter` - Optional row ID filter to apply to all component queries
433    ///
434    /// # Implementation Notes
435    ///
436    /// - UNION ALL: Simple concatenation with no deduplication
437    /// - UNION DISTINCT: Hash-based deduplication across all rows
438    /// - EXCEPT DISTINCT: Removes right-side rows from left-side results
439    /// - INTERSECT DISTINCT: Keeps only rows present in both sides
440    /// - EXCEPT ALL: Not yet implemented
441    /// - INTERSECT ALL: Not yet implemented
442    fn execute_compound_select(
443        &self,
444        plan: SelectPlan,
445        row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
446    ) -> ExecutorResult<SelectExecution<P>> {
447        let order_by = plan.order_by.clone();
448        let compound = plan.compound.expect("compound plan should be present");
449
450        let CompoundSelectPlan {
451            initial,
452            operations,
453        } = compound;
454
455        let initial_exec = self.execute_select_with_filter(*initial, row_filter.clone())?;
456        let schema = initial_exec.schema();
457        let mut rows = initial_exec.into_rows()?;
458        let mut distinct_cache: Option<FxHashSet<Vec<u8>>> = None;
459
460        for component in operations {
461            let exec = self.execute_select_with_filter(component.plan, row_filter.clone())?;
462            let other_schema = exec.schema();
463            ensure_schema_compatibility(schema.as_ref(), other_schema.as_ref())?;
464            let other_rows = exec.into_rows()?;
465
466            match (component.operator, component.quantifier) {
467                (CompoundOperator::Union, CompoundQuantifier::All) => {
468                    rows.extend(other_rows);
469                    distinct_cache = None;
470                }
471                (CompoundOperator::Union, CompoundQuantifier::Distinct) => {
472                    ensure_distinct_rows(&mut rows, &mut distinct_cache);
473                    let cache = distinct_cache
474                        .as_mut()
475                        .expect("distinct cache should be initialized");
476                    for row in other_rows {
477                        let key = encode_row(&row);
478                        if cache.insert(key) {
479                            rows.push(row);
480                        }
481                    }
482                }
483                (CompoundOperator::Except, CompoundQuantifier::Distinct) => {
484                    ensure_distinct_rows(&mut rows, &mut distinct_cache);
485                    let cache = distinct_cache
486                        .as_mut()
487                        .expect("distinct cache should be initialized");
488                    if rows.is_empty() {
489                        continue;
490                    }
491                    let mut remove_keys = FxHashSet::default();
492                    for row in other_rows {
493                        remove_keys.insert(encode_row(&row));
494                    }
495                    if remove_keys.is_empty() {
496                        continue;
497                    }
498                    rows.retain(|row| {
499                        let key = encode_row(row);
500                        if remove_keys.contains(&key) {
501                            cache.remove(&key);
502                            false
503                        } else {
504                            true
505                        }
506                    });
507                }
508                (CompoundOperator::Except, CompoundQuantifier::All) => {
509                    return Err(Error::InvalidArgumentError(
510                        "EXCEPT ALL is not supported yet".into(),
511                    ));
512                }
513                (CompoundOperator::Intersect, CompoundQuantifier::Distinct) => {
514                    ensure_distinct_rows(&mut rows, &mut distinct_cache);
515                    let mut right_keys = FxHashSet::default();
516                    for row in other_rows {
517                        right_keys.insert(encode_row(&row));
518                    }
519                    if right_keys.is_empty() {
520                        rows.clear();
521                        distinct_cache = Some(FxHashSet::default());
522                        continue;
523                    }
524                    let mut new_rows = Vec::new();
525                    let mut new_cache = FxHashSet::default();
526                    for row in rows.drain(..) {
527                        let key = encode_row(&row);
528                        if right_keys.contains(&key) && new_cache.insert(key) {
529                            new_rows.push(row);
530                        }
531                    }
532                    rows = new_rows;
533                    distinct_cache = Some(new_cache);
534                }
535                (CompoundOperator::Intersect, CompoundQuantifier::All) => {
536                    return Err(Error::InvalidArgumentError(
537                        "INTERSECT ALL is not supported yet".into(),
538                    ));
539                }
540            }
541        }
542
543        let mut batch = rows_to_record_batch(schema.clone(), &rows)?;
544        if !order_by.is_empty() && batch.num_rows() > 0 {
545            batch = sort_record_batch_with_order(&schema, &batch, &order_by)?;
546        }
547
548        Ok(SelectExecution::new_single_batch(
549            String::new(),
550            schema,
551            batch,
552        ))
553    }
554
555    /// Check if any computed projections contain aggregate functions
556    fn has_computed_aggregates(&self, plan: &SelectPlan) -> bool {
557        plan.projections.iter().any(|proj| {
558            if let SelectProjection::Computed { expr, .. } = proj {
559                Self::expr_contains_aggregate(expr)
560            } else {
561                false
562            }
563        })
564    }
565
566    /// Recursively check if a predicate expression contains aggregates
567    fn predicate_contains_aggregate(expr: &llkv_expr::expr::Expr<String>) -> bool {
568        match expr {
569            llkv_expr::expr::Expr::And(exprs) | llkv_expr::expr::Expr::Or(exprs) => {
570                exprs.iter().any(Self::predicate_contains_aggregate)
571            }
572            llkv_expr::expr::Expr::Not(inner) => Self::predicate_contains_aggregate(inner),
573            llkv_expr::expr::Expr::Compare { left, right, .. } => {
574                Self::expr_contains_aggregate(left) || Self::expr_contains_aggregate(right)
575            }
576            llkv_expr::expr::Expr::InList { expr, list, .. } => {
577                Self::expr_contains_aggregate(expr)
578                    || list.iter().any(|e| Self::expr_contains_aggregate(e))
579            }
580            llkv_expr::expr::Expr::IsNull { expr, .. } => Self::expr_contains_aggregate(expr),
581            llkv_expr::expr::Expr::Literal(_) => false,
582            llkv_expr::expr::Expr::Pred(_) => false,
583            llkv_expr::expr::Expr::Exists(_) => false,
584        }
585    }
586
587    /// Recursively check if a scalar expression contains aggregates
588    fn expr_contains_aggregate(expr: &ScalarExpr<String>) -> bool {
589        match expr {
590            ScalarExpr::Aggregate(_) => true,
591            ScalarExpr::Binary { left, right, .. } => {
592                Self::expr_contains_aggregate(left) || Self::expr_contains_aggregate(right)
593            }
594            ScalarExpr::Compare { left, right, .. } => {
595                Self::expr_contains_aggregate(left) || Self::expr_contains_aggregate(right)
596            }
597            ScalarExpr::GetField { base, .. } => Self::expr_contains_aggregate(base),
598            ScalarExpr::Cast { expr, .. } => Self::expr_contains_aggregate(expr),
599            ScalarExpr::Not(expr) => Self::expr_contains_aggregate(expr),
600            ScalarExpr::IsNull { expr, .. } => Self::expr_contains_aggregate(expr),
601            ScalarExpr::Case {
602                operand,
603                branches,
604                else_expr,
605            } => {
606                operand
607                    .as_deref()
608                    .map(Self::expr_contains_aggregate)
609                    .unwrap_or(false)
610                    || branches.iter().any(|(when_expr, then_expr)| {
611                        Self::expr_contains_aggregate(when_expr)
612                            || Self::expr_contains_aggregate(then_expr)
613                    })
614                    || else_expr
615                        .as_deref()
616                        .map(Self::expr_contains_aggregate)
617                        .unwrap_or(false)
618            }
619            ScalarExpr::Coalesce(items) => items.iter().any(Self::expr_contains_aggregate),
620            ScalarExpr::Column(_) | ScalarExpr::Literal(_) => false,
621            ScalarExpr::ScalarSubquery(_) => false,
622        }
623    }
624
625    fn evaluate_exists_subquery(
626        &self,
627        context: &mut CrossProductExpressionContext,
628        subquery: &llkv_plan::FilterSubquery,
629        batch: &RecordBatch,
630        row_idx: usize,
631    ) -> ExecutorResult<bool> {
632        let bindings =
633            collect_correlated_bindings(context, batch, row_idx, &subquery.correlated_columns)?;
634        let bound_plan = bind_select_plan(&subquery.plan, &bindings)?;
635        let execution = self.execute_select(bound_plan)?;
636        let mut found = false;
637        execution.stream(|inner_batch| {
638            if inner_batch.num_rows() > 0 {
639                found = true;
640            }
641            Ok(())
642        })?;
643        Ok(found)
644    }
645
646    fn evaluate_scalar_subquery_literal(
647        &self,
648        context: &mut CrossProductExpressionContext,
649        subquery: &llkv_plan::ScalarSubquery,
650        batch: &RecordBatch,
651        row_idx: usize,
652    ) -> ExecutorResult<Literal> {
653        let bindings =
654            collect_correlated_bindings(context, batch, row_idx, &subquery.correlated_columns)?;
655        let bound_plan = bind_select_plan(&subquery.plan, &bindings)?;
656        let execution = self.execute_select(bound_plan)?;
657        let mut rows_seen: usize = 0;
658        let mut result: Option<Literal> = None;
659        execution.stream(|inner_batch| {
660            if inner_batch.num_columns() != 1 {
661                return Err(Error::InvalidArgumentError(
662                    "scalar subquery must return exactly one column".into(),
663                ));
664            }
665            let column = inner_batch.column(0).clone();
666            for idx in 0..inner_batch.num_rows() {
667                if rows_seen >= 1 {
668                    return Err(Error::InvalidArgumentError(
669                        "scalar subquery produced more than one row".into(),
670                    ));
671                }
672                rows_seen = rows_seen.saturating_add(1);
673                result = Some(array_value_to_literal(&column, idx)?);
674            }
675            Ok(())
676        })?;
677
678        if rows_seen == 0 {
679            Ok(Literal::Null)
680        } else {
681            result
682                .ok_or_else(|| Error::Internal("scalar subquery evaluation missing result".into()))
683        }
684    }
685
686    fn evaluate_scalar_subquery_numeric(
687        &self,
688        context: &mut CrossProductExpressionContext,
689        subquery: &llkv_plan::ScalarSubquery,
690        batch: &RecordBatch,
691    ) -> ExecutorResult<NumericArray> {
692        let mut values: Vec<Option<f64>> = Vec::with_capacity(batch.num_rows());
693        let mut all_integer = true;
694
695        for row_idx in 0..batch.num_rows() {
696            let literal =
697                self.evaluate_scalar_subquery_literal(context, subquery, batch, row_idx)?;
698            match literal {
699                Literal::Null => values.push(None),
700                Literal::Integer(value) => {
701                    let cast = i64::try_from(value).map_err(|_| {
702                        Error::InvalidArgumentError(
703                            "scalar subquery integer result exceeds supported range".into(),
704                        )
705                    })?;
706                    values.push(Some(cast as f64));
707                }
708                Literal::Float(value) => {
709                    all_integer = false;
710                    values.push(Some(value));
711                }
712                Literal::Boolean(flag) => {
713                    let numeric = if flag { 1.0 } else { 0.0 };
714                    values.push(Some(numeric));
715                }
716                Literal::String(_) | Literal::Struct(_) => {
717                    return Err(Error::InvalidArgumentError(
718                        "scalar subquery produced non-numeric result in numeric context".into(),
719                    ));
720                }
721            }
722        }
723
724        if all_integer {
725            let iter = values.into_iter().map(|opt| opt.map(|v| v as i64));
726            let array = Int64Array::from_iter(iter);
727            NumericArray::try_from_arrow(&(Arc::new(array) as ArrayRef))
728        } else {
729            let array = Float64Array::from_iter(values);
730            NumericArray::try_from_arrow(&(Arc::new(array) as ArrayRef))
731        }
732    }
733
734    fn evaluate_projection_expression(
735        &self,
736        context: &mut CrossProductExpressionContext,
737        expr: &ScalarExpr<String>,
738        batch: &RecordBatch,
739        scalar_lookup: &FxHashMap<SubqueryId, &llkv_plan::ScalarSubquery>,
740    ) -> ExecutorResult<ArrayRef> {
741        let translated = translate_scalar(expr, context.schema(), |name| {
742            Error::InvalidArgumentError(format!(
743                "column '{}' not found in cross product result",
744                name
745            ))
746        })?;
747
748        let mut subquery_ids: FxHashSet<SubqueryId> = FxHashSet::default();
749        collect_scalar_subquery_ids(&translated, &mut subquery_ids);
750
751        let mut mapping: FxHashMap<SubqueryId, FieldId> = FxHashMap::default();
752        for subquery_id in subquery_ids {
753            let info = scalar_lookup
754                .get(&subquery_id)
755                .ok_or_else(|| Error::Internal("missing scalar subquery metadata".into()))?;
756            let field_id = context.allocate_synthetic_field_id()?;
757            let numeric = self.evaluate_scalar_subquery_numeric(context, info, batch)?;
758            context.numeric_cache.insert(field_id, numeric);
759            mapping.insert(subquery_id, field_id);
760        }
761
762        let rewritten = rewrite_scalar_expr_for_subqueries(&translated, &mapping);
763        context.evaluate_numeric(&rewritten, batch)
764    }
765
766    /// Execute a SELECT without a FROM clause (e.g., SELECT 42, SELECT {'a': 1})
767    fn execute_select_without_table(&self, plan: SelectPlan) -> ExecutorResult<SelectExecution<P>> {
768        use arrow::array::ArrayRef;
769        use arrow::datatypes::Field;
770
771        // Build schema from computed projections
772        let mut fields = Vec::new();
773        let mut arrays: Vec<ArrayRef> = Vec::new();
774
775        for proj in &plan.projections {
776            match proj {
777                SelectProjection::Computed { expr, alias } => {
778                    // Infer the data type from the expression
779                    let (field_name, dtype, array) = match expr {
780                        ScalarExpr::Literal(lit) => {
781                            let (dtype, array) = Self::literal_to_array(lit)?;
782                            (alias.clone(), dtype, array)
783                        }
784                        _ => {
785                            return Err(Error::InvalidArgumentError(
786                                "SELECT without FROM only supports literal expressions".into(),
787                            ));
788                        }
789                    };
790
791                    fields.push(Field::new(field_name, dtype, true));
792                    arrays.push(array);
793                }
794                _ => {
795                    return Err(Error::InvalidArgumentError(
796                        "SELECT without FROM only supports computed projections".into(),
797                    ));
798                }
799            }
800        }
801
802        let schema = Arc::new(Schema::new(fields));
803        let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)
804            .map_err(|e| Error::Internal(format!("failed to create record batch: {}", e)))?;
805
806        if plan.distinct {
807            let mut state = DistinctState::default();
808            batch = match distinct_filter_batch(batch, &mut state)? {
809                Some(filtered) => filtered,
810                None => RecordBatch::new_empty(Arc::clone(&schema)),
811            };
812        }
813
814        let schema = batch.schema();
815
816        Ok(SelectExecution::new_single_batch(
817            String::new(), // No table name
818            schema,
819            batch,
820        ))
821    }
822
823    /// Convert a Literal to an Arrow array (recursive for nested structs)
824    fn literal_to_array(lit: &llkv_expr::literal::Literal) -> ExecutorResult<(DataType, ArrayRef)> {
825        use arrow::array::{
826            ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray, StructArray,
827            new_null_array,
828        };
829        use arrow::datatypes::{DataType, Field};
830        use llkv_expr::literal::Literal;
831
832        match lit {
833            Literal::Integer(v) => {
834                let val = i64::try_from(*v).unwrap_or(0);
835                Ok((
836                    DataType::Int64,
837                    Arc::new(Int64Array::from(vec![val])) as ArrayRef,
838                ))
839            }
840            Literal::Float(v) => Ok((
841                DataType::Float64,
842                Arc::new(Float64Array::from(vec![*v])) as ArrayRef,
843            )),
844            Literal::Boolean(v) => Ok((
845                DataType::Boolean,
846                Arc::new(BooleanArray::from(vec![*v])) as ArrayRef,
847            )),
848            Literal::String(v) => Ok((
849                DataType::Utf8,
850                Arc::new(StringArray::from(vec![v.clone()])) as ArrayRef,
851            )),
852            Literal::Null => Ok((DataType::Null, new_null_array(&DataType::Null, 1))),
853            Literal::Struct(struct_fields) => {
854                // Build a struct array recursively
855                let mut inner_fields = Vec::new();
856                let mut inner_arrays = Vec::new();
857
858                for (field_name, field_lit) in struct_fields {
859                    let (field_dtype, field_array) = Self::literal_to_array(field_lit)?;
860                    inner_fields.push(Field::new(field_name.clone(), field_dtype, true));
861                    inner_arrays.push(field_array);
862                }
863
864                let struct_array =
865                    StructArray::try_new(inner_fields.clone().into(), inner_arrays, None).map_err(
866                        |e| Error::Internal(format!("failed to create struct array: {}", e)),
867                    )?;
868
869                Ok((
870                    DataType::Struct(inner_fields.into()),
871                    Arc::new(struct_array) as ArrayRef,
872                ))
873            }
874        }
875    }
876
877    /// Execute a cross product query (FROM table1, table2, ...)
878    fn execute_cross_product(&self, plan: SelectPlan) -> ExecutorResult<SelectExecution<P>> {
879        use arrow::compute::concat_batches;
880
881        if plan.tables.len() < 2 {
882            return Err(Error::InvalidArgumentError(
883                "cross product requires at least 2 tables".into(),
884            ));
885        }
886
887        let mut tables_with_handles = Vec::with_capacity(plan.tables.len());
888        for table_ref in &plan.tables {
889            let qualified_name = table_ref.qualified_name();
890            let table = self.provider.get_table(&qualified_name)?;
891            tables_with_handles.push((table_ref.clone(), table));
892        }
893
894        let display_name = tables_with_handles
895            .iter()
896            .map(|(table_ref, _)| table_ref.qualified_name())
897            .collect::<Vec<_>>()
898            .join(",");
899
900        let mut remaining_filter = plan.filter.clone();
901
902        // Try hash join optimization first - this avoids materializing all tables
903        let join_data = if plan.scalar_subqueries.is_empty() && remaining_filter.as_ref().is_some()
904        {
905            self.try_execute_hash_join(&plan, &tables_with_handles)?
906        } else {
907            None
908        };
909
910        let current = if let Some((joined, handled_all_predicates)) = join_data {
911            // Clear filter only if hash join handled all predicates
912            if handled_all_predicates {
913                remaining_filter = None;
914            }
915            joined
916        } else {
917            // Hash join not applicable - use llkv-join for proper join support or fall back to cartesian product
918            let has_joins = !plan.joins.is_empty();
919
920            if has_joins && tables_with_handles.len() == 2 {
921                // Use llkv-join for 2-table joins (including LEFT JOIN)
922                use llkv_join::{JoinKey, JoinOptions, TableJoinExt};
923
924                let (left_ref, left_table) = &tables_with_handles[0];
925                let (right_ref, right_table) = &tables_with_handles[1];
926
927                // Determine join type from plan and convert to llkv_join::JoinType
928                let join_type = plan
929                    .joins
930                    .first()
931                    .map(|j| match j.join_type {
932                        llkv_plan::JoinPlan::Inner => llkv_join::JoinType::Inner,
933                        llkv_plan::JoinPlan::Left => llkv_join::JoinType::Left,
934                        llkv_plan::JoinPlan::Right => llkv_join::JoinType::Right,
935                        llkv_plan::JoinPlan::Full => llkv_join::JoinType::Full,
936                    })
937                    .unwrap_or(llkv_join::JoinType::Inner);
938
939                tracing::debug!(
940                    "Using llkv-join for {join_type:?} join between {} and {}",
941                    left_ref.qualified_name(),
942                    right_ref.qualified_name()
943                );
944
945                // Extract join keys from constraints if available
946                // For now, use empty keys (cross product) and rely on filter
947                // TODO: Parse ON conditions to extract proper join keys
948                let join_keys: Vec<JoinKey> = Vec::new();
949
950                let mut result_batches = Vec::new();
951                left_table.table.join_stream(
952                    &right_table.table,
953                    &join_keys,
954                    &JoinOptions {
955                        join_type,
956                        ..Default::default()
957                    },
958                    |batch| {
959                        result_batches.push(batch);
960                    },
961                )?;
962
963                // Build combined schema and convert to TableCrossProductData
964                let mut combined_fields = Vec::new();
965                for col in &left_table.schema.columns {
966                    combined_fields.push(Field::new(
967                        col.name.clone(),
968                        col.data_type.clone(),
969                        col.nullable,
970                    ));
971                }
972                for col in &right_table.schema.columns {
973                    combined_fields.push(Field::new(
974                        col.name.clone(),
975                        col.data_type.clone(),
976                        col.nullable,
977                    ));
978                }
979                let combined_schema = Arc::new(Schema::new(combined_fields));
980
981                let column_counts = vec![
982                    left_table.schema.columns.len(),
983                    right_table.schema.columns.len(),
984                ];
985                let table_indices = vec![0, 1];
986
987                TableCrossProductData {
988                    schema: combined_schema,
989                    batches: result_batches,
990                    column_counts,
991                    table_indices,
992                }
993            } else {
994                // Fall back to cartesian product for other cases
995                let constraint_map = if let Some(filter_wrapper) = remaining_filter.as_ref() {
996                    extract_literal_pushdown_filters(
997                        &filter_wrapper.predicate,
998                        &tables_with_handles,
999                    )
1000                } else {
1001                    vec![Vec::new(); tables_with_handles.len()]
1002                };
1003
1004                let mut staged: Vec<TableCrossProductData> =
1005                    Vec::with_capacity(tables_with_handles.len());
1006                for (idx, (table_ref, table)) in tables_with_handles.iter().enumerate() {
1007                    let constraints = constraint_map.get(idx).map(|v| v.as_slice()).unwrap_or(&[]);
1008                    staged.push(collect_table_data(
1009                        idx,
1010                        table_ref,
1011                        table.as_ref(),
1012                        constraints,
1013                    )?);
1014                }
1015                cross_join_all(staged)?
1016            }
1017        };
1018
1019        let TableCrossProductData {
1020            schema: combined_schema,
1021            batches: mut combined_batches,
1022            column_counts,
1023            table_indices,
1024        } = current;
1025
1026        let column_lookup_map = build_cross_product_column_lookup(
1027            combined_schema.as_ref(),
1028            &plan.tables,
1029            &column_counts,
1030            &table_indices,
1031        );
1032
1033        if let Some(filter_wrapper) = remaining_filter.as_ref() {
1034            let mut filter_context = CrossProductExpressionContext::new(
1035                combined_schema.as_ref(),
1036                column_lookup_map.clone(),
1037            )?;
1038            let translated_filter = translate_predicate(
1039                filter_wrapper.predicate.clone(),
1040                filter_context.schema(),
1041                |name| {
1042                    Error::InvalidArgumentError(format!(
1043                        "column '{}' not found in cross product result",
1044                        name
1045                    ))
1046                },
1047            )?;
1048
1049            let subquery_lookup: FxHashMap<llkv_expr::SubqueryId, &llkv_plan::FilterSubquery> =
1050                filter_wrapper
1051                    .subqueries
1052                    .iter()
1053                    .map(|subquery| (subquery.id, subquery))
1054                    .collect();
1055
1056            let mut filtered_batches = Vec::with_capacity(combined_batches.len());
1057            for batch in combined_batches.into_iter() {
1058                filter_context.reset();
1059                let mask = filter_context.evaluate_predicate_mask(
1060                    &translated_filter,
1061                    &batch,
1062                    |ctx, subquery_expr, row_idx, current_batch| {
1063                        let subquery = subquery_lookup.get(&subquery_expr.id).ok_or_else(|| {
1064                            Error::Internal("missing correlated subquery metadata".into())
1065                        })?;
1066                        let exists =
1067                            self.evaluate_exists_subquery(ctx, subquery, current_batch, row_idx)?;
1068                        let value = if subquery_expr.negated {
1069                            !exists
1070                        } else {
1071                            exists
1072                        };
1073                        Ok(Some(value))
1074                    },
1075                )?;
1076                let filtered = filter_record_batch(&batch, &mask).map_err(|err| {
1077                    Error::InvalidArgumentError(format!(
1078                        "failed to apply cross product filter: {err}"
1079                    ))
1080                })?;
1081                if filtered.num_rows() > 0 {
1082                    filtered_batches.push(filtered);
1083                }
1084            }
1085            combined_batches = filtered_batches;
1086        }
1087
1088        // GROUP BY takes precedence - it can also have aggregates in projections
1089        if !plan.group_by.is_empty() {
1090            return self.execute_group_by_from_batches(
1091                display_name,
1092                plan,
1093                combined_schema,
1094                combined_batches,
1095                column_lookup_map,
1096            );
1097        }
1098
1099        if !plan.aggregates.is_empty() {
1100            return self.execute_cross_product_aggregates(
1101                Arc::clone(&combined_schema),
1102                combined_batches,
1103                &column_lookup_map,
1104                &plan,
1105                &display_name,
1106            );
1107        }
1108
1109        if self.has_computed_aggregates(&plan) {
1110            return self.execute_cross_product_computed_aggregates(
1111                Arc::clone(&combined_schema),
1112                combined_batches,
1113                &column_lookup_map,
1114                &plan,
1115                &display_name,
1116            );
1117        }
1118
1119        let mut combined_batch = if combined_batches.is_empty() {
1120            RecordBatch::new_empty(Arc::clone(&combined_schema))
1121        } else if combined_batches.len() == 1 {
1122            combined_batches.pop().unwrap()
1123        } else {
1124            concat_batches(&combined_schema, &combined_batches).map_err(|e| {
1125                Error::Internal(format!(
1126                    "failed to concatenate cross product batches: {}",
1127                    e
1128                ))
1129            })?
1130        };
1131
1132        let scalar_lookup: FxHashMap<SubqueryId, &llkv_plan::ScalarSubquery> = plan
1133            .scalar_subqueries
1134            .iter()
1135            .map(|subquery| (subquery.id, subquery))
1136            .collect();
1137
1138        // Apply SELECT projections if specified
1139        if !plan.projections.is_empty() {
1140            let mut selected_fields = Vec::new();
1141            let mut selected_columns = Vec::new();
1142            let mut expr_context: Option<CrossProductExpressionContext> = None;
1143
1144            for proj in &plan.projections {
1145                match proj {
1146                    SelectProjection::AllColumns => {
1147                        // Keep all columns
1148                        selected_fields = combined_schema.fields().iter().cloned().collect();
1149                        selected_columns = combined_batch.columns().to_vec();
1150                        break;
1151                    }
1152                    SelectProjection::AllColumnsExcept { exclude } => {
1153                        // Keep all columns except the excluded ones
1154                        let exclude_lower: Vec<String> =
1155                            exclude.iter().map(|e| e.to_ascii_lowercase()).collect();
1156
1157                        for (idx, field) in combined_schema.fields().iter().enumerate() {
1158                            let field_name_lower = field.name().to_ascii_lowercase();
1159                            if !exclude_lower.contains(&field_name_lower) {
1160                                selected_fields.push(field.clone());
1161                                selected_columns.push(combined_batch.column(idx).clone());
1162                            }
1163                        }
1164                        break;
1165                    }
1166                    SelectProjection::Column { name, alias } => {
1167                        // Find the column by qualified name
1168                        let col_name = name.to_ascii_lowercase();
1169                        if let Some(&idx) = column_lookup_map.get(&col_name) {
1170                            let field = combined_schema.field(idx);
1171                            let output_name = alias.as_ref().unwrap_or(name).clone();
1172                            selected_fields.push(Arc::new(arrow::datatypes::Field::new(
1173                                output_name,
1174                                field.data_type().clone(),
1175                                field.is_nullable(),
1176                            )));
1177                            selected_columns.push(combined_batch.column(idx).clone());
1178                        } else {
1179                            return Err(Error::InvalidArgumentError(format!(
1180                                "column '{}' not found in cross product result",
1181                                name
1182                            )));
1183                        }
1184                    }
1185                    SelectProjection::Computed { expr, alias } => {
1186                        if expr_context.is_none() {
1187                            expr_context = Some(CrossProductExpressionContext::new(
1188                                combined_schema.as_ref(),
1189                                column_lookup_map.clone(),
1190                            )?);
1191                        }
1192                        let context = expr_context
1193                            .as_mut()
1194                            .expect("projection context must be initialized");
1195                        context.reset();
1196                        let evaluated = self.evaluate_projection_expression(
1197                            context,
1198                            expr,
1199                            &combined_batch,
1200                            &scalar_lookup,
1201                        )?;
1202                        let field = Arc::new(arrow::datatypes::Field::new(
1203                            alias.clone(),
1204                            evaluated.data_type().clone(),
1205                            true,
1206                        ));
1207                        selected_fields.push(field);
1208                        selected_columns.push(evaluated);
1209                    }
1210                }
1211            }
1212
1213            let projected_schema = Arc::new(Schema::new(selected_fields));
1214            combined_batch = RecordBatch::try_new(projected_schema, selected_columns)
1215                .map_err(|e| Error::Internal(format!("failed to apply projections: {}", e)))?;
1216        }
1217
1218        if plan.distinct {
1219            let mut state = DistinctState::default();
1220            let source_schema = combined_batch.schema();
1221            combined_batch = match distinct_filter_batch(combined_batch, &mut state)? {
1222                Some(filtered) => filtered,
1223                None => RecordBatch::new_empty(source_schema),
1224            };
1225        }
1226
1227        let schema = combined_batch.schema();
1228
1229        Ok(SelectExecution::new_single_batch(
1230            display_name,
1231            schema,
1232            combined_batch,
1233        ))
1234    }
1235
1236    fn execute_cross_product_aggregates(
1237        &self,
1238        combined_schema: Arc<Schema>,
1239        batches: Vec<RecordBatch>,
1240        column_lookup_map: &FxHashMap<String, usize>,
1241        plan: &SelectPlan,
1242        display_name: &str,
1243    ) -> ExecutorResult<SelectExecution<P>> {
1244        if !plan.scalar_subqueries.is_empty() {
1245            return Err(Error::InvalidArgumentError(
1246                "scalar subqueries not supported in aggregate joins".into(),
1247            ));
1248        }
1249
1250        let mut specs: Vec<AggregateSpec> = Vec::with_capacity(plan.aggregates.len());
1251        let mut spec_to_projection: Vec<Option<usize>> = Vec::with_capacity(plan.aggregates.len());
1252
1253        for aggregate in &plan.aggregates {
1254            match aggregate {
1255                AggregateExpr::CountStar { alias } => {
1256                    specs.push(AggregateSpec {
1257                        alias: alias.clone(),
1258                        kind: AggregateKind::Count {
1259                            field_id: None,
1260                            distinct: false,
1261                        },
1262                    });
1263                    spec_to_projection.push(None);
1264                }
1265                AggregateExpr::Column {
1266                    column,
1267                    alias,
1268                    function,
1269                    distinct,
1270                } => {
1271                    let key = column.to_ascii_lowercase();
1272                    let column_index = *column_lookup_map.get(&key).ok_or_else(|| {
1273                        Error::InvalidArgumentError(format!(
1274                            "unknown column '{column}' in aggregate"
1275                        ))
1276                    })?;
1277                    let field = combined_schema.field(column_index);
1278                    let kind = match function {
1279                        AggregateFunction::Count => AggregateKind::Count {
1280                            field_id: Some(column_index as u32),
1281                            distinct: *distinct,
1282                        },
1283                        AggregateFunction::SumInt64 => {
1284                            let input_type = Self::validate_aggregate_type(
1285                                Some(field.data_type().clone()),
1286                                "SUM",
1287                                &[DataType::Int64, DataType::Float64],
1288                            )?;
1289                            AggregateKind::Sum {
1290                                field_id: column_index as u32,
1291                                data_type: input_type,
1292                                distinct: *distinct,
1293                            }
1294                        }
1295                        AggregateFunction::MinInt64 => {
1296                            let input_type = Self::validate_aggregate_type(
1297                                Some(field.data_type().clone()),
1298                                "MIN",
1299                                &[DataType::Int64, DataType::Float64],
1300                            )?;
1301                            AggregateKind::Min {
1302                                field_id: column_index as u32,
1303                                data_type: input_type,
1304                            }
1305                        }
1306                        AggregateFunction::MaxInt64 => {
1307                            let input_type = Self::validate_aggregate_type(
1308                                Some(field.data_type().clone()),
1309                                "MAX",
1310                                &[DataType::Int64, DataType::Float64],
1311                            )?;
1312                            AggregateKind::Max {
1313                                field_id: column_index as u32,
1314                                data_type: input_type,
1315                            }
1316                        }
1317                        AggregateFunction::CountNulls => AggregateKind::CountNulls {
1318                            field_id: column_index as u32,
1319                        },
1320                    };
1321
1322                    specs.push(AggregateSpec {
1323                        alias: alias.clone(),
1324                        kind,
1325                    });
1326                    spec_to_projection.push(Some(column_index));
1327                }
1328            }
1329        }
1330
1331        if specs.is_empty() {
1332            return Err(Error::InvalidArgumentError(
1333                "aggregate query requires at least one aggregate expression".into(),
1334            ));
1335        }
1336
1337        let mut states = Vec::with_capacity(specs.len());
1338        for (idx, spec) in specs.iter().enumerate() {
1339            states.push(AggregateState {
1340                alias: spec.alias.clone(),
1341                accumulator: AggregateAccumulator::new_with_projection_index(
1342                    spec,
1343                    spec_to_projection[idx],
1344                    None,
1345                )?,
1346                override_value: None,
1347            });
1348        }
1349
1350        for batch in &batches {
1351            for state in &mut states {
1352                state.update(batch)?;
1353            }
1354        }
1355
1356        let mut fields = Vec::with_capacity(states.len());
1357        let mut arrays: Vec<ArrayRef> = Vec::with_capacity(states.len());
1358        for state in states {
1359            let (field, array) = state.finalize()?;
1360            fields.push(Arc::new(field));
1361            arrays.push(array);
1362        }
1363
1364        let schema = Arc::new(Schema::new(fields));
1365        let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
1366
1367        if plan.distinct {
1368            let mut distinct_state = DistinctState::default();
1369            batch = match distinct_filter_batch(batch, &mut distinct_state)? {
1370                Some(filtered) => filtered,
1371                None => RecordBatch::new_empty(Arc::clone(&schema)),
1372            };
1373        }
1374
1375        if !plan.order_by.is_empty() && batch.num_rows() > 0 {
1376            batch = sort_record_batch_with_order(&schema, &batch, &plan.order_by)?;
1377        }
1378
1379        Ok(SelectExecution::new_single_batch(
1380            display_name.to_string(),
1381            schema,
1382            batch,
1383        ))
1384    }
1385
1386    fn execute_cross_product_computed_aggregates(
1387        &self,
1388        combined_schema: Arc<Schema>,
1389        batches: Vec<RecordBatch>,
1390        column_lookup_map: &FxHashMap<String, usize>,
1391        plan: &SelectPlan,
1392        display_name: &str,
1393    ) -> ExecutorResult<SelectExecution<P>> {
1394        let mut aggregate_specs: Vec<(String, AggregateCall<String>)> = Vec::new();
1395        for projection in &plan.projections {
1396            match projection {
1397                SelectProjection::Computed { expr, .. } => {
1398                    Self::collect_aggregates(expr, &mut aggregate_specs);
1399                }
1400                SelectProjection::AllColumns
1401                | SelectProjection::AllColumnsExcept { .. }
1402                | SelectProjection::Column { .. } => {
1403                    return Err(Error::InvalidArgumentError(
1404                        "non-computed projections not supported with aggregate expressions".into(),
1405                    ));
1406                }
1407            }
1408        }
1409
1410        if aggregate_specs.is_empty() {
1411            return Err(Error::InvalidArgumentError(
1412                "computed aggregate query requires at least one aggregate expression".into(),
1413            ));
1414        }
1415
1416        let aggregate_values = self.compute_cross_product_aggregate_values(
1417            &combined_schema,
1418            &batches,
1419            column_lookup_map,
1420            &aggregate_specs,
1421        )?;
1422
1423        let mut fields = Vec::with_capacity(plan.projections.len());
1424        let mut arrays: Vec<ArrayRef> = Vec::with_capacity(plan.projections.len());
1425
1426        for projection in &plan.projections {
1427            if let SelectProjection::Computed { expr, alias } = projection {
1428                let value = Self::evaluate_expr_with_aggregates(expr, &aggregate_values)?;
1429                fields.push(Arc::new(Field::new(alias, DataType::Int64, false)));
1430                arrays.push(Arc::new(Int64Array::from(vec![value])) as ArrayRef);
1431            }
1432        }
1433
1434        let schema = Arc::new(Schema::new(fields));
1435        let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
1436
1437        if plan.distinct {
1438            let mut distinct_state = DistinctState::default();
1439            batch = match distinct_filter_batch(batch, &mut distinct_state)? {
1440                Some(filtered) => filtered,
1441                None => RecordBatch::new_empty(Arc::clone(&schema)),
1442            };
1443        }
1444
1445        if !plan.order_by.is_empty() && batch.num_rows() > 0 {
1446            batch = sort_record_batch_with_order(&schema, &batch, &plan.order_by)?;
1447        }
1448
1449        Ok(SelectExecution::new_single_batch(
1450            display_name.to_string(),
1451            schema,
1452            batch,
1453        ))
1454    }
1455
1456    fn compute_cross_product_aggregate_values(
1457        &self,
1458        combined_schema: &Arc<Schema>,
1459        batches: &[RecordBatch],
1460        column_lookup_map: &FxHashMap<String, usize>,
1461        aggregate_specs: &[(String, AggregateCall<String>)],
1462    ) -> ExecutorResult<FxHashMap<String, AggregateValue>> {
1463        let mut specs: Vec<AggregateSpec> = Vec::with_capacity(aggregate_specs.len());
1464        let mut spec_to_projection: Vec<Option<usize>> = Vec::with_capacity(aggregate_specs.len());
1465
1466        for (key, agg) in aggregate_specs {
1467            match agg {
1468                AggregateCall::CountStar => {
1469                    specs.push(AggregateSpec {
1470                        alias: key.clone(),
1471                        kind: AggregateKind::Count {
1472                            field_id: None,
1473                            distinct: false,
1474                        },
1475                    });
1476                    spec_to_projection.push(None);
1477                }
1478                AggregateCall::Count { expr, .. }
1479                | AggregateCall::Sum { expr, .. }
1480                | AggregateCall::Avg { expr, .. }
1481                | AggregateCall::Min(expr)
1482                | AggregateCall::Max(expr)
1483                | AggregateCall::CountNulls(expr) => {
1484                    // For now, we only support simple column references in aggregates at this level
1485                    // Complex expressions in aggregates need expression evaluation support
1486                    let column = try_extract_simple_column(expr).ok_or_else(|| {
1487                        Error::InvalidArgumentError(
1488                            "complex expressions in aggregates not yet supported in this context"
1489                                .into(),
1490                        )
1491                    })?;
1492                    let key_lower = column.to_ascii_lowercase();
1493                    let column_index = *column_lookup_map.get(&key_lower).ok_or_else(|| {
1494                        Error::InvalidArgumentError(format!(
1495                            "unknown column '{column}' in aggregate"
1496                        ))
1497                    })?;
1498                    let field = combined_schema.field(column_index);
1499                    let kind = match agg {
1500                        AggregateCall::Count { distinct, .. } => AggregateKind::Count {
1501                            field_id: Some(column_index as u32),
1502                            distinct: *distinct,
1503                        },
1504                        AggregateCall::Sum { distinct, .. } => {
1505                            let input_type = Self::validate_aggregate_type(
1506                                Some(field.data_type().clone()),
1507                                "SUM",
1508                                &[DataType::Int64, DataType::Float64],
1509                            )?;
1510                            AggregateKind::Sum {
1511                                field_id: column_index as u32,
1512                                data_type: input_type,
1513                                distinct: *distinct,
1514                            }
1515                        }
1516                        AggregateCall::Avg { distinct, .. } => {
1517                            let input_type = Self::validate_aggregate_type(
1518                                Some(field.data_type().clone()),
1519                                "AVG",
1520                                &[DataType::Int64, DataType::Float64],
1521                            )?;
1522                            AggregateKind::Avg {
1523                                field_id: column_index as u32,
1524                                data_type: input_type,
1525                                distinct: *distinct,
1526                            }
1527                        }
1528                        AggregateCall::Min(_) => {
1529                            let input_type = Self::validate_aggregate_type(
1530                                Some(field.data_type().clone()),
1531                                "MIN",
1532                                &[DataType::Int64, DataType::Float64],
1533                            )?;
1534                            AggregateKind::Min {
1535                                field_id: column_index as u32,
1536                                data_type: input_type,
1537                            }
1538                        }
1539                        AggregateCall::Max(_) => {
1540                            let input_type = Self::validate_aggregate_type(
1541                                Some(field.data_type().clone()),
1542                                "MAX",
1543                                &[DataType::Int64, DataType::Float64],
1544                            )?;
1545                            AggregateKind::Max {
1546                                field_id: column_index as u32,
1547                                data_type: input_type,
1548                            }
1549                        }
1550                        AggregateCall::CountNulls(_) => AggregateKind::CountNulls {
1551                            field_id: column_index as u32,
1552                        },
1553                        _ => unreachable!(),
1554                    };
1555
1556                    specs.push(AggregateSpec {
1557                        alias: key.clone(),
1558                        kind,
1559                    });
1560                    spec_to_projection.push(Some(column_index));
1561                }
1562            }
1563        }
1564
1565        let mut states = Vec::with_capacity(specs.len());
1566        for (idx, spec) in specs.iter().enumerate() {
1567            states.push(AggregateState {
1568                alias: spec.alias.clone(),
1569                accumulator: AggregateAccumulator::new_with_projection_index(
1570                    spec,
1571                    spec_to_projection[idx],
1572                    None,
1573                )?,
1574                override_value: None,
1575            });
1576        }
1577
1578        for batch in batches {
1579            for state in &mut states {
1580                state.update(batch)?;
1581            }
1582        }
1583
1584        let mut results = FxHashMap::default();
1585        for state in states {
1586            let (field, array) = state.finalize()?;
1587
1588            // Try Int64Array first
1589            if let Some(int_array) = array.as_any().downcast_ref::<Int64Array>() {
1590                if int_array.len() != 1 {
1591                    return Err(Error::Internal(format!(
1592                        "Expected single value from aggregate, got {}",
1593                        int_array.len()
1594                    )));
1595                }
1596                let value = if int_array.is_null(0) {
1597                    AggregateValue::Int64(0)
1598                } else {
1599                    AggregateValue::Int64(int_array.value(0))
1600                };
1601                results.insert(field.name().to_string(), value);
1602            }
1603            // Try Float64Array for AVG
1604            else if let Some(float_array) = array.as_any().downcast_ref::<Float64Array>() {
1605                if float_array.len() != 1 {
1606                    return Err(Error::Internal(format!(
1607                        "Expected single value from aggregate, got {}",
1608                        float_array.len()
1609                    )));
1610                }
1611                let value = if float_array.is_null(0) {
1612                    AggregateValue::Float64(0.0)
1613                } else {
1614                    AggregateValue::Float64(float_array.value(0))
1615                };
1616                results.insert(field.name().to_string(), value);
1617            } else {
1618                return Err(Error::Internal(format!(
1619                    "Unexpected array type from aggregate: {:?}",
1620                    array.data_type()
1621                )));
1622            }
1623        }
1624
1625        Ok(results)
1626    }
1627
1628    /// Attempt to optimize a multi-table query using hash joins instead of cartesian product.
1629    ///
1630    /// This replaces the O(n₁×n₂×...×nₖ) backtracking algorithm with O(n₁+n₂+...+nₖ)
1631    /// hash join execution. For two-table joins, applies a single hash join. For N-way joins,
1632    /// performs left-associative pairwise joins: ((T₁ ⋈ T₂) ⋈ T₃) ⋈ ... ⋈ Tₙ.
1633    ///
1634    /// # Arguments
1635    ///
1636    /// * `plan` - The SELECT plan containing table references and filter predicates
1637    /// * `tables_with_handles` - Vector of (TableRef, ExecutorTable) pairs for all tables in the query
1638    ///
1639    /// # Returns
1640    ///
1641    /// * `Ok(Some((data, handled_all)))` - Join optimization succeeded, returning joined batches and whether all predicates were handled
1642    /// * `Ok(None)` - Optimization cannot be applied (falls back to cartesian product)
1643    /// * `Err(...)` - Join execution failed
1644    fn try_execute_hash_join(
1645        &self,
1646        plan: &SelectPlan,
1647        tables_with_handles: &[(llkv_plan::TableRef, Arc<ExecutorTable<P>>)],
1648    ) -> ExecutorResult<Option<(TableCrossProductData, bool)>> {
1649        let query_label_opt = current_query_label();
1650        let query_label = query_label_opt.as_deref().unwrap_or("<unknown query>");
1651
1652        // Validate preconditions for hash join optimization
1653        let filter_wrapper = match &plan.filter {
1654            Some(filter) if filter.subqueries.is_empty() => filter,
1655            _ => {
1656                tracing::debug!(
1657                    "join_opt[{query_label}]: skipping optimization – filter missing or uses subqueries"
1658                );
1659                return Ok(None);
1660            }
1661        };
1662
1663        if tables_with_handles.len() < 2 {
1664            tracing::debug!(
1665                "join_opt[{query_label}]: skipping optimization – requires at least 2 tables"
1666            );
1667            return Ok(None);
1668        }
1669
1670        // Build table metadata for join constraint extraction
1671        let mut table_infos = Vec::with_capacity(tables_with_handles.len());
1672        for (index, (table_ref, executor_table)) in tables_with_handles.iter().enumerate() {
1673            let mut column_map = FxHashMap::default();
1674            for (column_idx, column) in executor_table.schema.columns.iter().enumerate() {
1675                let column_name = column.name.to_ascii_lowercase();
1676                column_map.entry(column_name).or_insert(column_idx);
1677            }
1678            table_infos.push(TableInfo {
1679                index,
1680                table_ref,
1681                column_map,
1682            });
1683        }
1684
1685        // Extract join constraints from WHERE clause
1686        let constraint_plan = match extract_join_constraints(
1687            &filter_wrapper.predicate,
1688            &table_infos,
1689        ) {
1690            Some(plan) => plan,
1691            None => {
1692                tracing::debug!(
1693                    "join_opt[{query_label}]: skipping optimization – predicate parsing failed (contains OR or other unsupported top-level structure)"
1694                );
1695                return Ok(None);
1696            }
1697        };
1698
1699        tracing::debug!(
1700            "join_opt[{query_label}]: constraint extraction succeeded - equalities={}, literals={}, handled={}/{} predicates",
1701            constraint_plan.equalities.len(),
1702            constraint_plan.literals.len(),
1703            constraint_plan.handled_conjuncts,
1704            constraint_plan.total_conjuncts
1705        );
1706        tracing::debug!(
1707            "join_opt[{query_label}]: attempting hash join with tables={:?} filter={:?}",
1708            plan.tables
1709                .iter()
1710                .map(|t| t.qualified_name())
1711                .collect::<Vec<_>>(),
1712            filter_wrapper.predicate,
1713        );
1714
1715        // Handle unsatisfiable predicates (e.g., WHERE FALSE)
1716        if constraint_plan.unsatisfiable {
1717            tracing::debug!(
1718                "join_opt[{query_label}]: predicate unsatisfiable – returning empty result"
1719            );
1720            let mut combined_fields = Vec::new();
1721            let mut column_counts = Vec::new();
1722            for (_table_ref, executor_table) in tables_with_handles {
1723                for column in &executor_table.schema.columns {
1724                    combined_fields.push(Field::new(
1725                        column.name.clone(),
1726                        column.data_type.clone(),
1727                        column.nullable,
1728                    ));
1729                }
1730                column_counts.push(executor_table.schema.columns.len());
1731            }
1732            let combined_schema = Arc::new(Schema::new(combined_fields));
1733            let empty_batch = RecordBatch::new_empty(Arc::clone(&combined_schema));
1734            return Ok(Some((
1735                TableCrossProductData {
1736                    schema: combined_schema,
1737                    batches: vec![empty_batch],
1738                    column_counts,
1739                    table_indices: (0..tables_with_handles.len()).collect(),
1740                },
1741                true, // Handled all predicates (unsatisfiable predicate consumes everything)
1742            )));
1743        }
1744
1745        // Hash join requires equality predicates
1746        if constraint_plan.equalities.is_empty() {
1747            tracing::debug!(
1748                "join_opt[{query_label}]: skipping optimization – no join equalities found"
1749            );
1750            return Ok(None);
1751        }
1752
1753        // Note: Literal constraints (e.g., t1.x = 5) are currently ignored in the hash join path.
1754        // They should ideally be pushed down as pre-filters on individual tables before joining.
1755        // For now, we'll let the hash join proceed and any literal constraints will be handled
1756        // by the fallback cartesian product path if needed.
1757        if !constraint_plan.literals.is_empty() {
1758            tracing::debug!(
1759                "join_opt[{query_label}]: found {} literal constraints - proceeding with hash join but may need fallback",
1760                constraint_plan.literals.len()
1761            );
1762        }
1763
1764        tracing::debug!(
1765            "join_opt[{query_label}]: hash join optimization applicable with {} equality constraints",
1766            constraint_plan.equalities.len()
1767        );
1768
1769        let mut literal_map: Vec<Vec<ColumnConstraint>> =
1770            vec![Vec::new(); tables_with_handles.len()];
1771        for constraint in &constraint_plan.literals {
1772            let table_idx = match constraint {
1773                ColumnConstraint::Equality(lit) => lit.column.table,
1774                ColumnConstraint::InList(in_list) => in_list.column.table,
1775            };
1776            if table_idx >= literal_map.len() {
1777                tracing::debug!(
1778                    "join_opt[{query_label}]: constraint references unknown table index {}; falling back",
1779                    table_idx
1780                );
1781                return Ok(None);
1782            }
1783            tracing::debug!(
1784                "join_opt[{query_label}]: mapping constraint to table_idx={} (table={})",
1785                table_idx,
1786                tables_with_handles[table_idx].0.qualified_name()
1787            );
1788            literal_map[table_idx].push(constraint.clone());
1789        }
1790
1791        let mut per_table: Vec<Option<TableCrossProductData>> =
1792            Vec::with_capacity(tables_with_handles.len());
1793        for (idx, (table_ref, table)) in tables_with_handles.iter().enumerate() {
1794            let data =
1795                collect_table_data(idx, table_ref, table.as_ref(), literal_map[idx].as_slice())?;
1796            per_table.push(Some(data));
1797        }
1798
1799        // Determine if we should use llkv-join (when LEFT JOINs are present or for better architecture)
1800        let has_left_join = plan
1801            .joins
1802            .iter()
1803            .any(|j| j.join_type == llkv_plan::JoinPlan::Left);
1804
1805        let mut current: Option<TableCrossProductData> = None;
1806
1807        if has_left_join {
1808            // LEFT JOIN path: delegate to llkv-join crate which has proper implementation
1809            tracing::debug!(
1810                "join_opt[{query_label}]: delegating to llkv-join for LEFT JOIN support"
1811            );
1812            // Bail out of hash join optimization - let the fallback path use llkv-join properly
1813            return Ok(None);
1814        } else {
1815            // INNER JOIN path: use existing optimization that can reorder joins
1816            let mut remaining: Vec<usize> = (0..tables_with_handles.len()).collect();
1817            let mut used_tables: FxHashSet<usize> = FxHashSet::default();
1818
1819            while !remaining.is_empty() {
1820                let next_index = if used_tables.is_empty() {
1821                    remaining[0]
1822                } else {
1823                    match remaining.iter().copied().find(|idx| {
1824                        table_has_join_with_used(*idx, &used_tables, &constraint_plan.equalities)
1825                    }) {
1826                        Some(idx) => idx,
1827                        None => {
1828                            tracing::debug!(
1829                                "join_opt[{query_label}]: no remaining equality links – using cartesian expansion for table index {idx}",
1830                                idx = remaining[0]
1831                            );
1832                            remaining[0]
1833                        }
1834                    }
1835                };
1836
1837                let position = remaining
1838                    .iter()
1839                    .position(|&idx| idx == next_index)
1840                    .expect("next index present");
1841
1842                let next_data = per_table[next_index]
1843                    .take()
1844                    .ok_or_else(|| Error::Internal("hash join consumed table data twice".into()))?;
1845
1846                if let Some(current_data) = current.take() {
1847                    let join_keys = gather_join_keys(
1848                        &current_data,
1849                        &next_data,
1850                        &used_tables,
1851                        next_index,
1852                        &constraint_plan.equalities,
1853                    )?;
1854
1855                    let joined = if join_keys.is_empty() {
1856                        tracing::debug!(
1857                            "join_opt[{query_label}]: joining '{}' via cartesian expansion (no equality keys)",
1858                            tables_with_handles[next_index].0.qualified_name()
1859                        );
1860                        cross_join_table_batches(current_data, next_data)?
1861                    } else {
1862                        hash_join_table_batches(
1863                            current_data,
1864                            next_data,
1865                            &join_keys,
1866                            llkv_join::JoinType::Inner,
1867                        )?
1868                    };
1869                    current = Some(joined);
1870                } else {
1871                    current = Some(next_data);
1872                }
1873
1874                used_tables.insert(next_index);
1875                remaining.remove(position);
1876            }
1877        }
1878
1879        if let Some(result) = current {
1880            let handled_all = constraint_plan.handled_conjuncts == constraint_plan.total_conjuncts;
1881            tracing::debug!(
1882                "join_opt[{query_label}]: hash join succeeded across {} tables (handled {}/{} predicates)",
1883                tables_with_handles.len(),
1884                constraint_plan.handled_conjuncts,
1885                constraint_plan.total_conjuncts
1886            );
1887            return Ok(Some((result, handled_all)));
1888        }
1889
1890        Ok(None)
1891    }
1892
1893    fn execute_projection(
1894        &self,
1895        table: Arc<ExecutorTable<P>>,
1896        display_name: String,
1897        plan: SelectPlan,
1898        row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
1899    ) -> ExecutorResult<SelectExecution<P>> {
1900        if plan.having.is_some() {
1901            return Err(Error::InvalidArgumentError(
1902                "HAVING requires GROUP BY".into(),
1903            ));
1904        }
1905        if plan
1906            .filter
1907            .as_ref()
1908            .is_some_and(|filter| !filter.subqueries.is_empty())
1909            || !plan.scalar_subqueries.is_empty()
1910        {
1911            return self.execute_projection_with_subqueries(table, display_name, plan, row_filter);
1912        }
1913
1914        let table_ref = table.as_ref();
1915        let constant_filter = plan
1916            .filter
1917            .as_ref()
1918            .and_then(|filter| evaluate_constant_predicate(&filter.predicate));
1919        let projections = if plan.projections.is_empty() {
1920            build_wildcard_projections(table_ref)
1921        } else {
1922            build_projected_columns(table_ref, &plan.projections)?
1923        };
1924        let schema = schema_for_projections(table_ref, &projections)?;
1925
1926        if let Some(result) = constant_filter {
1927            match result {
1928                Some(true) => {
1929                    // Treat as full table scan by clearing the filter below.
1930                }
1931                Some(false) | None => {
1932                    let batch = RecordBatch::new_empty(Arc::clone(&schema));
1933                    return Ok(SelectExecution::new_single_batch(
1934                        display_name,
1935                        schema,
1936                        batch,
1937                    ));
1938                }
1939            }
1940        }
1941
1942        let (mut filter_expr, mut full_table_scan) = match &plan.filter {
1943            Some(filter_wrapper) => (
1944                crate::translation::expression::translate_predicate(
1945                    filter_wrapper.predicate.clone(),
1946                    table_ref.schema.as_ref(),
1947                    |name| Error::InvalidArgumentError(format!("unknown column '{}'", name)),
1948                )?,
1949                false,
1950            ),
1951            None => {
1952                let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
1953                    Error::InvalidArgumentError(
1954                        "table has no columns; cannot perform wildcard scan".into(),
1955                    )
1956                })?;
1957                (
1958                    crate::translation::expression::full_table_scan_filter(field_id),
1959                    true,
1960                )
1961            }
1962        };
1963
1964        if matches!(constant_filter, Some(Some(true))) {
1965            let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
1966                Error::InvalidArgumentError(
1967                    "table has no columns; cannot perform wildcard scan".into(),
1968                )
1969            })?;
1970            filter_expr = crate::translation::expression::full_table_scan_filter(field_id);
1971            full_table_scan = true;
1972        }
1973
1974        let expanded_order = expand_order_targets(&plan.order_by, &projections)?;
1975
1976        let mut physical_order: Option<ScanOrderSpec> = None;
1977
1978        if let Some(first) = expanded_order.first() {
1979            match &first.target {
1980                OrderTarget::Column(name) => {
1981                    if table_ref.schema.resolve(name).is_some() {
1982                        physical_order = Some(resolve_scan_order(table_ref, &projections, first)?);
1983                    }
1984                }
1985                OrderTarget::Index(position) => match projections.get(*position) {
1986                    Some(ScanProjection::Column(_)) => {
1987                        physical_order = Some(resolve_scan_order(table_ref, &projections, first)?);
1988                    }
1989                    Some(ScanProjection::Computed { .. }) => {}
1990                    None => {
1991                        return Err(Error::InvalidArgumentError(format!(
1992                            "ORDER BY position {} is out of range",
1993                            position + 1
1994                        )));
1995                    }
1996                },
1997                OrderTarget::All => {}
1998            }
1999        }
2000
2001        let options = if let Some(order_spec) = physical_order {
2002            if row_filter.is_some() {
2003                tracing::debug!("Applying MVCC row filter with ORDER BY");
2004            }
2005            ScanStreamOptions {
2006                include_nulls: true,
2007                order: Some(order_spec),
2008                row_id_filter: row_filter.clone(),
2009            }
2010        } else {
2011            if row_filter.is_some() {
2012                tracing::debug!("Applying MVCC row filter");
2013            }
2014            ScanStreamOptions {
2015                include_nulls: true,
2016                order: None,
2017                row_id_filter: row_filter.clone(),
2018            }
2019        };
2020
2021        Ok(SelectExecution::new_projection(
2022            display_name,
2023            schema,
2024            table,
2025            projections,
2026            filter_expr,
2027            options,
2028            full_table_scan,
2029            expanded_order,
2030            plan.distinct,
2031        ))
2032    }
2033
2034    fn execute_projection_with_subqueries(
2035        &self,
2036        table: Arc<ExecutorTable<P>>,
2037        display_name: String,
2038        plan: SelectPlan,
2039        row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
2040    ) -> ExecutorResult<SelectExecution<P>> {
2041        if plan.having.is_some() {
2042            return Err(Error::InvalidArgumentError(
2043                "HAVING requires GROUP BY".into(),
2044            ));
2045        }
2046        let table_ref = table.as_ref();
2047
2048        let (output_scan_projections, effective_projections): (
2049            Vec<ScanProjection>,
2050            Vec<SelectProjection>,
2051        ) = if plan.projections.is_empty() {
2052            (
2053                build_wildcard_projections(table_ref),
2054                vec![SelectProjection::AllColumns],
2055            )
2056        } else {
2057            (
2058                build_projected_columns(table_ref, &plan.projections)?,
2059                plan.projections.clone(),
2060            )
2061        };
2062
2063        let scalar_lookup: FxHashMap<SubqueryId, &llkv_plan::ScalarSubquery> = plan
2064            .scalar_subqueries
2065            .iter()
2066            .map(|subquery| (subquery.id, subquery))
2067            .collect();
2068
2069        let base_projections = build_wildcard_projections(table_ref);
2070
2071        let filter_wrapper_opt = plan.filter.as_ref();
2072
2073        let mut translated_filter: Option<llkv_expr::expr::Expr<'static, FieldId>> = None;
2074        let pushdown_filter = if let Some(filter_wrapper) = filter_wrapper_opt {
2075            let translated = crate::translation::expression::translate_predicate(
2076                filter_wrapper.predicate.clone(),
2077                table_ref.schema.as_ref(),
2078                |name| Error::InvalidArgumentError(format!("unknown column '{}'", name)),
2079            )?;
2080            if !filter_wrapper.subqueries.is_empty() {
2081                translated_filter = Some(translated.clone());
2082                strip_exists(&translated)
2083            } else {
2084                translated
2085            }
2086        } else {
2087            let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
2088                Error::InvalidArgumentError(
2089                    "table has no columns; cannot perform scalar subquery projection".into(),
2090                )
2091            })?;
2092            crate::translation::expression::full_table_scan_filter(field_id)
2093        };
2094
2095        let mut base_fields: Vec<Field> = Vec::with_capacity(table_ref.schema.columns.len());
2096        for column in &table_ref.schema.columns {
2097            base_fields.push(Field::new(
2098                column.name.clone(),
2099                column.data_type.clone(),
2100                column.nullable,
2101            ));
2102        }
2103        let base_schema = Arc::new(Schema::new(base_fields));
2104        let base_column_counts = vec![base_schema.fields().len()];
2105        let base_table_indices = vec![0usize];
2106        let base_lookup = build_cross_product_column_lookup(
2107            base_schema.as_ref(),
2108            &plan.tables,
2109            &base_column_counts,
2110            &base_table_indices,
2111        );
2112
2113        let mut filter_context = if translated_filter.is_some() {
2114            Some(CrossProductExpressionContext::new(
2115                base_schema.as_ref(),
2116                base_lookup.clone(),
2117            )?)
2118        } else {
2119            None
2120        };
2121
2122        let options = ScanStreamOptions {
2123            include_nulls: true,
2124            order: None,
2125            row_id_filter: row_filter.clone(),
2126        };
2127
2128        let subquery_lookup: FxHashMap<llkv_expr::SubqueryId, &llkv_plan::FilterSubquery> =
2129            filter_wrapper_opt
2130                .map(|wrapper| {
2131                    wrapper
2132                        .subqueries
2133                        .iter()
2134                        .map(|subquery| (subquery.id, subquery))
2135                        .collect()
2136                })
2137                .unwrap_or_default();
2138
2139        let mut projected_batches: Vec<RecordBatch> = Vec::new();
2140        let mut scan_error: Option<Error> = None;
2141
2142        table.table.scan_stream(
2143            base_projections.clone(),
2144            &pushdown_filter,
2145            options,
2146            |batch| {
2147                if scan_error.is_some() {
2148                    return;
2149                }
2150                let effective_batch = if let Some(context) = filter_context.as_mut() {
2151                    context.reset();
2152                    let translated = translated_filter
2153                        .as_ref()
2154                        .expect("filter context requires translated filter");
2155                    let mask = match context.evaluate_predicate_mask(
2156                        translated,
2157                        &batch,
2158                        |ctx, subquery_expr, row_idx, current_batch| {
2159                            let subquery =
2160                                subquery_lookup.get(&subquery_expr.id).ok_or_else(|| {
2161                                    Error::Internal("missing correlated subquery metadata".into())
2162                                })?;
2163                            let exists = self.evaluate_exists_subquery(
2164                                ctx,
2165                                subquery,
2166                                current_batch,
2167                                row_idx,
2168                            )?;
2169                            let value = if subquery_expr.negated {
2170                                !exists
2171                            } else {
2172                                exists
2173                            };
2174                            Ok(Some(value))
2175                        },
2176                    ) {
2177                        Ok(mask) => mask,
2178                        Err(err) => {
2179                            scan_error = Some(err);
2180                            return;
2181                        }
2182                    };
2183                    match filter_record_batch(&batch, &mask) {
2184                        Ok(filtered) => {
2185                            if filtered.num_rows() == 0 {
2186                                return;
2187                            }
2188                            filtered
2189                        }
2190                        Err(err) => {
2191                            scan_error = Some(Error::InvalidArgumentError(format!(
2192                                "failed to apply EXISTS filter: {err}"
2193                            )));
2194                            return;
2195                        }
2196                    }
2197                } else {
2198                    batch.clone()
2199                };
2200
2201                if effective_batch.num_rows() == 0 {
2202                    return;
2203                }
2204
2205                let projected = match self.project_record_batch(
2206                    &effective_batch,
2207                    &effective_projections,
2208                    &base_lookup,
2209                    &scalar_lookup,
2210                ) {
2211                    Ok(batch) => batch,
2212                    Err(err) => {
2213                        scan_error = Some(Error::InvalidArgumentError(format!(
2214                            "failed to evaluate projections: {err}"
2215                        )));
2216                        return;
2217                    }
2218                };
2219                projected_batches.push(projected);
2220            },
2221        )?;
2222
2223        if let Some(err) = scan_error {
2224            return Err(err);
2225        }
2226
2227        let mut result_batch = if projected_batches.is_empty() {
2228            let empty_batch = RecordBatch::new_empty(Arc::clone(&base_schema));
2229            self.project_record_batch(
2230                &empty_batch,
2231                &effective_projections,
2232                &base_lookup,
2233                &scalar_lookup,
2234            )?
2235        } else if projected_batches.len() == 1 {
2236            projected_batches.pop().unwrap()
2237        } else {
2238            let schema = projected_batches[0].schema();
2239            concat_batches(&schema, &projected_batches).map_err(|err| {
2240                Error::Internal(format!("failed to combine filtered batches: {err}"))
2241            })?
2242        };
2243
2244        if plan.distinct && result_batch.num_rows() > 0 {
2245            let mut state = DistinctState::default();
2246            let schema = result_batch.schema();
2247            result_batch = match distinct_filter_batch(result_batch, &mut state)? {
2248                Some(filtered) => filtered,
2249                None => RecordBatch::new_empty(schema),
2250            };
2251        }
2252
2253        if !plan.order_by.is_empty() && result_batch.num_rows() > 0 {
2254            let expanded_order = expand_order_targets(&plan.order_by, &output_scan_projections)?;
2255            if !expanded_order.is_empty() {
2256                result_batch = sort_record_batch_with_order(
2257                    &result_batch.schema(),
2258                    &result_batch,
2259                    &expanded_order,
2260                )?;
2261            }
2262        }
2263
2264        let schema = result_batch.schema();
2265
2266        Ok(SelectExecution::new_single_batch(
2267            display_name,
2268            schema,
2269            result_batch,
2270        ))
2271    }
2272
2273    fn execute_group_by_single_table(
2274        &self,
2275        table: Arc<ExecutorTable<P>>,
2276        display_name: String,
2277        plan: SelectPlan,
2278        row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
2279    ) -> ExecutorResult<SelectExecution<P>> {
2280        if plan
2281            .filter
2282            .as_ref()
2283            .is_some_and(|filter| !filter.subqueries.is_empty())
2284            || !plan.scalar_subqueries.is_empty()
2285        {
2286            return Err(Error::InvalidArgumentError(
2287                "GROUP BY with subqueries is not supported yet".into(),
2288            ));
2289        }
2290
2291        // Debug: check if we have aggregates in unexpected places
2292        tracing::debug!(
2293            "[GROUP BY] Original plan: projections={}, aggregates={}, has_filter={}, has_having={}",
2294            plan.projections.len(),
2295            plan.aggregates.len(),
2296            plan.filter.is_some(),
2297            plan.having.is_some()
2298        );
2299
2300        // For GROUP BY with aggregates, we need a two-phase execution:
2301        // 1. First phase: fetch all base table data (no projections, no aggregates)
2302        // 2. Second phase: group rows and compute aggregates
2303        let mut base_plan = plan.clone();
2304        base_plan.projections.clear();
2305        base_plan.aggregates.clear();
2306        base_plan.scalar_subqueries.clear();
2307        base_plan.order_by.clear();
2308        base_plan.distinct = false;
2309        base_plan.group_by.clear();
2310        base_plan.value_table_mode = None;
2311        base_plan.having = None;
2312
2313        tracing::debug!(
2314            "[GROUP BY] Base plan: projections={}, aggregates={}, has_filter={}, has_having={}",
2315            base_plan.projections.len(),
2316            base_plan.aggregates.len(),
2317            base_plan.filter.is_some(),
2318            base_plan.having.is_some()
2319        );
2320
2321        // For base scan, we want all columns from the table
2322        // We build wildcard projections directly to avoid any expression evaluation
2323        let table_ref = table.as_ref();
2324        let projections = build_wildcard_projections(table_ref);
2325        let base_schema = schema_for_projections(table_ref, &projections)?;
2326
2327        // Build filter if present (should NOT contain aggregates)
2328        tracing::debug!(
2329            "[GROUP BY] Building base filter: has_filter={}",
2330            base_plan.filter.is_some()
2331        );
2332        let (filter_expr, full_table_scan) = match &base_plan.filter {
2333            Some(filter_wrapper) => {
2334                tracing::debug!(
2335                    "[GROUP BY] Translating filter predicate: {:?}",
2336                    filter_wrapper.predicate
2337                );
2338                let expr = crate::translation::expression::translate_predicate(
2339                    filter_wrapper.predicate.clone(),
2340                    table_ref.schema.as_ref(),
2341                    |name| {
2342                        Error::InvalidArgumentError(format!(
2343                            "Binder Error: does not have a column named '{}'",
2344                            name
2345                        ))
2346                    },
2347                )?;
2348                tracing::debug!("[GROUP BY] Translated filter expr: {:?}", expr);
2349                (expr, false)
2350            }
2351            None => {
2352                // Use first column as dummy for full table scan
2353                let first_col =
2354                    table_ref.schema.columns.first().ok_or_else(|| {
2355                        Error::InvalidArgumentError("Table has no columns".into())
2356                    })?;
2357                (full_table_scan_filter(first_col.field_id), true)
2358            }
2359        };
2360
2361        let options = ScanStreamOptions {
2362            include_nulls: true,
2363            order: None,
2364            row_id_filter: row_filter.clone(),
2365        };
2366
2367        let execution = SelectExecution::new_projection(
2368            display_name.clone(),
2369            Arc::clone(&base_schema),
2370            Arc::clone(&table),
2371            projections,
2372            filter_expr,
2373            options,
2374            full_table_scan,
2375            vec![],
2376            false,
2377        );
2378
2379        let batches = execution.collect()?;
2380
2381        let column_lookup_map = build_column_lookup_map(base_schema.as_ref());
2382
2383        self.execute_group_by_from_batches(
2384            display_name,
2385            plan,
2386            base_schema,
2387            batches,
2388            column_lookup_map,
2389        )
2390    }
2391
2392    fn execute_group_by_from_batches(
2393        &self,
2394        display_name: String,
2395        plan: SelectPlan,
2396        base_schema: Arc<Schema>,
2397        batches: Vec<RecordBatch>,
2398        column_lookup_map: FxHashMap<String, usize>,
2399    ) -> ExecutorResult<SelectExecution<P>> {
2400        if plan
2401            .filter
2402            .as_ref()
2403            .is_some_and(|filter| !filter.subqueries.is_empty())
2404            || !plan.scalar_subqueries.is_empty()
2405        {
2406            return Err(Error::InvalidArgumentError(
2407                "GROUP BY with subqueries is not supported yet".into(),
2408            ));
2409        }
2410
2411        // If there are aggregates with GROUP BY, OR if HAVING contains aggregates, use aggregates path
2412        // Must check HAVING because aggregates can appear in HAVING even if not in SELECT projections
2413        let having_has_aggregates = plan
2414            .having
2415            .as_ref()
2416            .map(|h| Self::predicate_contains_aggregate(h))
2417            .unwrap_or(false);
2418
2419        tracing::debug!(
2420            "[GROUP BY PATH] aggregates={}, has_computed={}, having_has_agg={}",
2421            plan.aggregates.len(),
2422            self.has_computed_aggregates(&plan),
2423            having_has_aggregates
2424        );
2425
2426        if !plan.aggregates.is_empty()
2427            || self.has_computed_aggregates(&plan)
2428            || having_has_aggregates
2429        {
2430            tracing::debug!("[GROUP BY PATH] Taking aggregates path");
2431            return self.execute_group_by_with_aggregates(
2432                display_name,
2433                plan,
2434                base_schema,
2435                batches,
2436                column_lookup_map,
2437            );
2438        }
2439
2440        let mut key_indices = Vec::with_capacity(plan.group_by.len());
2441        for column in &plan.group_by {
2442            let key = column.to_ascii_lowercase();
2443            let index = column_lookup_map.get(&key).ok_or_else(|| {
2444                Error::InvalidArgumentError(format!(
2445                    "column '{}' not found in GROUP BY input",
2446                    column
2447                ))
2448            })?;
2449            key_indices.push(*index);
2450        }
2451
2452        let sample_batch = batches
2453            .first()
2454            .cloned()
2455            .unwrap_or_else(|| RecordBatch::new_empty(Arc::clone(&base_schema)));
2456
2457        let output_columns = self.build_group_by_output_columns(
2458            &plan,
2459            base_schema.as_ref(),
2460            &column_lookup_map,
2461            &sample_batch,
2462        )?;
2463
2464        let constant_having = plan.having.as_ref().and_then(evaluate_constant_predicate);
2465
2466        if let Some(result) = constant_having
2467            && !result.unwrap_or(false)
2468        {
2469            let fields: Vec<Field> = output_columns
2470                .iter()
2471                .map(|output| output.field.clone())
2472                .collect();
2473            let schema = Arc::new(Schema::new(fields));
2474            let batch = RecordBatch::new_empty(Arc::clone(&schema));
2475            return Ok(SelectExecution::new_single_batch(
2476                display_name,
2477                schema,
2478                batch,
2479            ));
2480        }
2481
2482        let translated_having = if plan.having.is_some() && constant_having.is_none() {
2483            let having = plan.having.clone().expect("checked above");
2484            // Only translate HAVING if it doesn't contain aggregates
2485            // Aggregates must be evaluated after GROUP BY aggregation
2486            if Self::predicate_contains_aggregate(&having) {
2487                None
2488            } else {
2489                let temp_context = CrossProductExpressionContext::new(
2490                    base_schema.as_ref(),
2491                    column_lookup_map.clone(),
2492                )?;
2493                Some(translate_predicate(
2494                    having,
2495                    temp_context.schema(),
2496                    |name| {
2497                        Error::InvalidArgumentError(format!(
2498                            "column '{}' not found in GROUP BY result",
2499                            name
2500                        ))
2501                    },
2502                )?)
2503            }
2504        } else {
2505            None
2506        };
2507
2508        let mut group_index: FxHashMap<Vec<GroupKeyValue>, usize> = FxHashMap::default();
2509        let mut groups: Vec<GroupState> = Vec::new();
2510
2511        for batch in &batches {
2512            for row_idx in 0..batch.num_rows() {
2513                let key = build_group_key(batch, row_idx, &key_indices)?;
2514                if group_index.contains_key(&key) {
2515                    continue;
2516                }
2517                group_index.insert(key, groups.len());
2518                groups.push(GroupState {
2519                    batch: batch.clone(),
2520                    row_idx,
2521                });
2522            }
2523        }
2524
2525        let mut rows: Vec<Vec<PlanValue>> = Vec::with_capacity(groups.len());
2526
2527        for group in &groups {
2528            if let Some(predicate) = translated_having.as_ref() {
2529                let mut context = CrossProductExpressionContext::new(
2530                    group.batch.schema().as_ref(),
2531                    column_lookup_map.clone(),
2532                )?;
2533                context.reset();
2534                let mut eval = |_ctx: &mut CrossProductExpressionContext,
2535                                _subquery_expr: &llkv_expr::SubqueryExpr,
2536                                _row_idx: usize,
2537                                _current_batch: &RecordBatch|
2538                 -> ExecutorResult<Option<bool>> {
2539                    Err(Error::InvalidArgumentError(
2540                        "HAVING subqueries are not supported yet".into(),
2541                    ))
2542                };
2543                let truths =
2544                    context.evaluate_predicate_truths(predicate, &group.batch, &mut eval)?;
2545                let passes = truths
2546                    .get(group.row_idx)
2547                    .copied()
2548                    .flatten()
2549                    .unwrap_or(false);
2550                if !passes {
2551                    continue;
2552                }
2553            }
2554
2555            let mut row: Vec<PlanValue> = Vec::with_capacity(output_columns.len());
2556            for output in &output_columns {
2557                match output.source {
2558                    OutputSource::TableColumn { index } => {
2559                        let value = llkv_plan::plan_value_from_array(
2560                            group.batch.column(index),
2561                            group.row_idx,
2562                        )?;
2563                        row.push(value);
2564                    }
2565                    OutputSource::Computed { projection_index } => {
2566                        let expr = match &plan.projections[projection_index] {
2567                            SelectProjection::Computed { expr, .. } => expr,
2568                            _ => unreachable!("projection index mismatch for computed column"),
2569                        };
2570                        let mut context = CrossProductExpressionContext::new(
2571                            group.batch.schema().as_ref(),
2572                            column_lookup_map.clone(),
2573                        )?;
2574                        context.reset();
2575                        let evaluated = self.evaluate_projection_expression(
2576                            &mut context,
2577                            expr,
2578                            &group.batch,
2579                            &FxHashMap::default(),
2580                        )?;
2581                        let value = llkv_plan::plan_value_from_array(&evaluated, group.row_idx)?;
2582                        row.push(value);
2583                    }
2584                }
2585            }
2586            rows.push(row);
2587        }
2588
2589        let fields: Vec<Field> = output_columns
2590            .into_iter()
2591            .map(|output| output.field)
2592            .collect();
2593        let schema = Arc::new(Schema::new(fields));
2594
2595        let mut batch = rows_to_record_batch(Arc::clone(&schema), &rows)?;
2596
2597        if plan.distinct && batch.num_rows() > 0 {
2598            let mut state = DistinctState::default();
2599            batch = match distinct_filter_batch(batch, &mut state)? {
2600                Some(filtered) => filtered,
2601                None => RecordBatch::new_empty(Arc::clone(&schema)),
2602            };
2603        }
2604
2605        if !plan.order_by.is_empty() && batch.num_rows() > 0 {
2606            batch = sort_record_batch_with_order(&schema, &batch, &plan.order_by)?;
2607        }
2608
2609        Ok(SelectExecution::new_single_batch(
2610            display_name,
2611            schema,
2612            batch,
2613        ))
2614    }
2615
2616    fn build_group_by_output_columns(
2617        &self,
2618        plan: &SelectPlan,
2619        base_schema: &Schema,
2620        column_lookup_map: &FxHashMap<String, usize>,
2621        _sample_batch: &RecordBatch,
2622    ) -> ExecutorResult<Vec<OutputColumn>> {
2623        let projections = if plan.projections.is_empty() {
2624            vec![SelectProjection::AllColumns]
2625        } else {
2626            plan.projections.clone()
2627        };
2628
2629        let mut columns: Vec<OutputColumn> = Vec::new();
2630
2631        for (proj_idx, projection) in projections.iter().enumerate() {
2632            match projection {
2633                SelectProjection::AllColumns => {
2634                    for (index, field) in base_schema.fields().iter().enumerate() {
2635                        columns.push(OutputColumn {
2636                            field: (**field).clone(),
2637                            source: OutputSource::TableColumn { index },
2638                        });
2639                    }
2640                }
2641                SelectProjection::AllColumnsExcept { exclude } => {
2642                    let exclude_lower: FxHashSet<String> = exclude
2643                        .iter()
2644                        .map(|name| name.to_ascii_lowercase())
2645                        .collect();
2646                    for (index, field) in base_schema.fields().iter().enumerate() {
2647                        if !exclude_lower.contains(&field.name().to_ascii_lowercase()) {
2648                            columns.push(OutputColumn {
2649                                field: (**field).clone(),
2650                                source: OutputSource::TableColumn { index },
2651                            });
2652                        }
2653                    }
2654                }
2655                SelectProjection::Column { name, alias } => {
2656                    let lookup_key = name.to_ascii_lowercase();
2657                    let index = column_lookup_map.get(&lookup_key).ok_or_else(|| {
2658                        Error::InvalidArgumentError(format!(
2659                            "column '{}' not found in GROUP BY result",
2660                            name
2661                        ))
2662                    })?;
2663                    let field = base_schema.field(*index);
2664                    let field = Field::new(
2665                        alias.as_ref().unwrap_or(name).clone(),
2666                        field.data_type().clone(),
2667                        field.is_nullable(),
2668                    );
2669                    columns.push(OutputColumn {
2670                        field,
2671                        source: OutputSource::TableColumn { index: *index },
2672                    });
2673                }
2674                SelectProjection::Computed { expr: _, alias } => {
2675                    // For GROUP BY with aggregates, we don't evaluate the expression here
2676                    // because it may contain aggregate functions. We'll evaluate it later
2677                    // per group. For now, assume Float64 as a conservative type.
2678                    let field = Field::new(alias.clone(), DataType::Float64, true);
2679                    columns.push(OutputColumn {
2680                        field,
2681                        source: OutputSource::Computed {
2682                            projection_index: proj_idx,
2683                        },
2684                    });
2685                }
2686            }
2687        }
2688
2689        if columns.is_empty() {
2690            for (index, field) in base_schema.fields().iter().enumerate() {
2691                columns.push(OutputColumn {
2692                    field: (**field).clone(),
2693                    source: OutputSource::TableColumn { index },
2694                });
2695            }
2696        }
2697
2698        Ok(columns)
2699    }
2700
2701    fn project_record_batch(
2702        &self,
2703        batch: &RecordBatch,
2704        projections: &[SelectProjection],
2705        lookup: &FxHashMap<String, usize>,
2706        scalar_lookup: &FxHashMap<SubqueryId, &llkv_plan::ScalarSubquery>,
2707    ) -> ExecutorResult<RecordBatch> {
2708        if projections.is_empty() {
2709            return Ok(batch.clone());
2710        }
2711
2712        let schema = batch.schema();
2713        let mut selected_fields: Vec<Arc<Field>> = Vec::new();
2714        let mut selected_columns: Vec<ArrayRef> = Vec::new();
2715        let mut expr_context: Option<CrossProductExpressionContext> = None;
2716
2717        for proj in projections {
2718            match proj {
2719                SelectProjection::AllColumns => {
2720                    selected_fields = schema.fields().iter().cloned().collect();
2721                    selected_columns = batch.columns().to_vec();
2722                    break;
2723                }
2724                SelectProjection::AllColumnsExcept { exclude } => {
2725                    let exclude_lower: FxHashSet<String> = exclude
2726                        .iter()
2727                        .map(|name| name.to_ascii_lowercase())
2728                        .collect();
2729                    for (idx, field) in schema.fields().iter().enumerate() {
2730                        let column_name = field.name().to_ascii_lowercase();
2731                        if !exclude_lower.contains(&column_name) {
2732                            selected_fields.push(Arc::clone(field));
2733                            selected_columns.push(batch.column(idx).clone());
2734                        }
2735                    }
2736                    break;
2737                }
2738                SelectProjection::Column { name, alias } => {
2739                    let normalized = name.to_ascii_lowercase();
2740                    let column_index = lookup.get(&normalized).ok_or_else(|| {
2741                        Error::InvalidArgumentError(format!(
2742                            "column '{}' not found in projection",
2743                            name
2744                        ))
2745                    })?;
2746                    let field = schema.field(*column_index);
2747                    let output_field = Arc::new(Field::new(
2748                        alias.as_ref().unwrap_or_else(|| field.name()),
2749                        field.data_type().clone(),
2750                        field.is_nullable(),
2751                    ));
2752                    selected_fields.push(output_field);
2753                    selected_columns.push(batch.column(*column_index).clone());
2754                }
2755                SelectProjection::Computed { expr, alias } => {
2756                    if expr_context.is_none() {
2757                        expr_context = Some(CrossProductExpressionContext::new(
2758                            schema.as_ref(),
2759                            lookup.clone(),
2760                        )?);
2761                    }
2762                    let context = expr_context
2763                        .as_mut()
2764                        .expect("projection context must be initialized");
2765                    context.reset();
2766                    let evaluated =
2767                        self.evaluate_projection_expression(context, expr, batch, scalar_lookup)?;
2768                    let field = Arc::new(Field::new(
2769                        alias.clone(),
2770                        evaluated.data_type().clone(),
2771                        true,
2772                    ));
2773                    selected_fields.push(field);
2774                    selected_columns.push(evaluated);
2775                }
2776            }
2777        }
2778
2779        let projected_schema = Arc::new(Schema::new(selected_fields));
2780        RecordBatch::try_new(projected_schema, selected_columns)
2781            .map_err(|e| Error::Internal(format!("failed to apply projections: {}", e)))
2782    }
2783
2784    /// Execute GROUP BY with aggregates - computes aggregates per group
2785    fn execute_group_by_with_aggregates(
2786        &self,
2787        display_name: String,
2788        plan: SelectPlan,
2789        base_schema: Arc<Schema>,
2790        batches: Vec<RecordBatch>,
2791        column_lookup_map: FxHashMap<String, usize>,
2792    ) -> ExecutorResult<SelectExecution<P>> {
2793        use llkv_expr::expr::AggregateCall;
2794
2795        // Extract GROUP BY key indices
2796        let mut key_indices = Vec::with_capacity(plan.group_by.len());
2797        for column in &plan.group_by {
2798            let key = column.to_ascii_lowercase();
2799            let index = column_lookup_map.get(&key).ok_or_else(|| {
2800                Error::InvalidArgumentError(format!(
2801                    "column '{}' not found in GROUP BY input",
2802                    column
2803                ))
2804            })?;
2805            key_indices.push(*index);
2806        }
2807
2808        // Extract all aggregates from computed projections
2809        let mut aggregate_specs: Vec<(String, AggregateCall<String>)> = Vec::new();
2810        for proj in &plan.projections {
2811            if let SelectProjection::Computed { expr, .. } = proj {
2812                Self::collect_aggregates(expr, &mut aggregate_specs);
2813            }
2814        }
2815
2816        // Also extract aggregates from HAVING clause
2817        if let Some(having_expr) = &plan.having {
2818            Self::collect_aggregates_from_predicate(having_expr, &mut aggregate_specs);
2819        }
2820
2821        // Build a hash map for groups - collect row indices per group
2822        let mut group_index: FxHashMap<Vec<GroupKeyValue>, usize> = FxHashMap::default();
2823        let mut group_states: Vec<GroupAggregateState> = Vec::new();
2824
2825        // First pass: collect all rows for each group
2826        for (batch_idx, batch) in batches.iter().enumerate() {
2827            for row_idx in 0..batch.num_rows() {
2828                let key = build_group_key(batch, row_idx, &key_indices)?;
2829
2830                if let Some(&group_idx) = group_index.get(&key) {
2831                    // Add row to existing group
2832                    group_states[group_idx]
2833                        .row_locations
2834                        .push((batch_idx, row_idx));
2835                } else {
2836                    // New group
2837                    let group_idx = group_states.len();
2838                    group_index.insert(key, group_idx);
2839                    group_states.push(GroupAggregateState {
2840                        representative_batch_idx: batch_idx,
2841                        representative_row: row_idx,
2842                        row_locations: vec![(batch_idx, row_idx)],
2843                    });
2844                }
2845            }
2846        }
2847
2848        // Second pass: compute aggregates for each group using proper AggregateState
2849        let mut group_aggregate_values: Vec<FxHashMap<String, PlanValue>> =
2850            Vec::with_capacity(group_states.len());
2851
2852        for group_state in &group_states {
2853            tracing::debug!(
2854                "[GROUP BY] aggregate group rows={:?}",
2855                group_state.row_locations
2856            );
2857            // Create a mini-batch containing only rows from this group
2858            let group_batch = {
2859                let representative_batch = &batches[group_state.representative_batch_idx];
2860                let schema = representative_batch.schema();
2861
2862                // Collect row indices per batch while preserving scan order
2863                let mut per_batch_indices: Vec<(usize, Vec<u64>)> = Vec::new();
2864                for &(batch_idx, row_idx) in &group_state.row_locations {
2865                    if let Some((_, indices)) = per_batch_indices
2866                        .iter_mut()
2867                        .find(|(idx, _)| *idx == batch_idx)
2868                    {
2869                        indices.push(row_idx as u64);
2870                    } else {
2871                        per_batch_indices.push((batch_idx, vec![row_idx as u64]));
2872                    }
2873                }
2874
2875                let mut row_index_arrays: Vec<(usize, ArrayRef)> =
2876                    Vec::with_capacity(per_batch_indices.len());
2877                for (batch_idx, indices) in per_batch_indices {
2878                    let index_array: ArrayRef = Arc::new(arrow::array::UInt64Array::from(indices));
2879                    row_index_arrays.push((batch_idx, index_array));
2880                }
2881
2882                let mut arrays: Vec<ArrayRef> = Vec::with_capacity(schema.fields().len());
2883
2884                for col_idx in 0..schema.fields().len() {
2885                    let column_array = if row_index_arrays.len() == 1 {
2886                        let (batch_idx, indices) = &row_index_arrays[0];
2887                        let source_array = batches[*batch_idx].column(col_idx);
2888                        arrow::compute::take(source_array.as_ref(), indices.as_ref(), None)?
2889                    } else {
2890                        let mut partial_arrays: Vec<ArrayRef> =
2891                            Vec::with_capacity(row_index_arrays.len());
2892                        for (batch_idx, indices) in &row_index_arrays {
2893                            let source_array = batches[*batch_idx].column(col_idx);
2894                            let taken = arrow::compute::take(
2895                                source_array.as_ref(),
2896                                indices.as_ref(),
2897                                None,
2898                            )?;
2899                            partial_arrays.push(taken);
2900                        }
2901                        let slices: Vec<&dyn arrow::array::Array> =
2902                            partial_arrays.iter().map(|arr| arr.as_ref()).collect();
2903                        arrow::compute::concat(&slices)?
2904                    };
2905                    arrays.push(column_array);
2906                }
2907
2908                let batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
2909                tracing::debug!("[GROUP BY] group batch rows={}", batch.num_rows());
2910                batch
2911            };
2912
2913            // Create AggregateState for each aggregate and compute
2914            let mut aggregate_values: FxHashMap<String, PlanValue> = FxHashMap::default();
2915
2916            // We might need to add computed columns to the batch for complex aggregate expressions
2917            let mut working_batch = group_batch.clone();
2918            let mut next_temp_col_idx = working_batch.num_columns();
2919
2920            for (key, agg_call) in &aggregate_specs {
2921                // Determine the column index and Arrow type that backs this aggregate input
2922                let (projection_idx, value_type) = match agg_call {
2923                    AggregateCall::CountStar => (None, None),
2924                    AggregateCall::Count { expr, .. }
2925                    | AggregateCall::Sum { expr, .. }
2926                    | AggregateCall::Avg { expr, .. }
2927                    | AggregateCall::Min(expr)
2928                    | AggregateCall::Max(expr)
2929                    | AggregateCall::CountNulls(expr) => {
2930                        if let Some(col_name) = try_extract_simple_column(expr) {
2931                            let idx = resolve_column_name_to_index(col_name, &column_lookup_map)
2932                                .ok_or_else(|| {
2933                                    Error::InvalidArgumentError(format!(
2934                                        "column '{}' not found for aggregate",
2935                                        col_name
2936                                    ))
2937                                })?;
2938                            let field_type = working_batch.schema().field(idx).data_type().clone();
2939                            (Some(idx), Some(field_type))
2940                        } else {
2941                            // Complex expression - evaluate it and add as temporary column
2942                            let mut computed_values = Vec::with_capacity(working_batch.num_rows());
2943                            for row_idx in 0..working_batch.num_rows() {
2944                                let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
2945                                    expr,
2946                                    &FxHashMap::default(),
2947                                    Some(&working_batch),
2948                                    Some(&column_lookup_map),
2949                                    row_idx,
2950                                )?;
2951                                computed_values.push(value);
2952                            }
2953
2954                            let computed_array = plan_values_to_arrow_array(&computed_values)?;
2955                            let computed_type = computed_array.data_type().clone();
2956
2957                            let mut new_columns: Vec<ArrayRef> = working_batch.columns().to_vec();
2958                            new_columns.push(computed_array);
2959
2960                            let temp_field = Arc::new(Field::new(
2961                                format!("__temp_agg_expr_{}", next_temp_col_idx),
2962                                computed_type.clone(),
2963                                true,
2964                            ));
2965                            let mut new_fields: Vec<Arc<Field>> =
2966                                working_batch.schema().fields().iter().cloned().collect();
2967                            new_fields.push(temp_field);
2968                            let new_schema = Arc::new(Schema::new(new_fields));
2969
2970                            working_batch = RecordBatch::try_new(new_schema, new_columns)?;
2971
2972                            let col_idx = next_temp_col_idx;
2973                            next_temp_col_idx += 1;
2974                            (Some(col_idx), Some(computed_type))
2975                        }
2976                    }
2977                };
2978
2979                // Build the AggregateSpec - use dummy field_id since projection_idx will override it
2980                let spec = Self::build_aggregate_spec_for_cross_product(
2981                    agg_call,
2982                    key.clone(),
2983                    value_type.clone(),
2984                )?;
2985
2986                let mut state = llkv_aggregate::AggregateState {
2987                    alias: key.clone(),
2988                    accumulator: llkv_aggregate::AggregateAccumulator::new_with_projection_index(
2989                        &spec,
2990                        projection_idx,
2991                        None,
2992                    )?,
2993                    override_value: None,
2994                };
2995
2996                // Update with the working batch (which may have temporary columns)
2997                state.update(&working_batch)?;
2998
2999                // Finalize and extract value
3000                let (_field, array) = state.finalize()?;
3001                let value = llkv_plan::plan_value_from_array(&array, 0)?;
3002                tracing::debug!(
3003                    "[GROUP BY] aggregate result key={:?} value={:?}",
3004                    key,
3005                    value
3006                );
3007                aggregate_values.insert(key.clone(), value);
3008            }
3009
3010            group_aggregate_values.push(aggregate_values);
3011        }
3012
3013        // Build result rows
3014        let output_columns = self.build_group_by_output_columns(
3015            &plan,
3016            base_schema.as_ref(),
3017            &column_lookup_map,
3018            batches
3019                .first()
3020                .unwrap_or(&RecordBatch::new_empty(Arc::clone(&base_schema))),
3021        )?;
3022
3023        let mut rows: Vec<Vec<PlanValue>> = Vec::with_capacity(group_states.len());
3024
3025        for (group_idx, group_state) in group_states.iter().enumerate() {
3026            let aggregate_values = &group_aggregate_values[group_idx];
3027            let representative_batch = &batches[group_state.representative_batch_idx];
3028
3029            let mut row: Vec<PlanValue> = Vec::with_capacity(output_columns.len());
3030            for output in &output_columns {
3031                match output.source {
3032                    OutputSource::TableColumn { index } => {
3033                        // Use the representative row from this group
3034                        let value = llkv_plan::plan_value_from_array(
3035                            representative_batch.column(index),
3036                            group_state.representative_row,
3037                        )?;
3038                        row.push(value);
3039                    }
3040                    OutputSource::Computed { projection_index } => {
3041                        let expr = match &plan.projections[projection_index] {
3042                            SelectProjection::Computed { expr, .. } => expr,
3043                            _ => unreachable!("projection index mismatch for computed column"),
3044                        };
3045                        // Evaluate expression with aggregates and row context
3046                        let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
3047                            expr,
3048                            aggregate_values,
3049                            Some(representative_batch),
3050                            Some(&column_lookup_map),
3051                            group_state.representative_row,
3052                        )?;
3053                        row.push(value);
3054                    }
3055                }
3056            }
3057            rows.push(row);
3058        }
3059
3060        // Apply HAVING clause if present
3061        let filtered_rows = if let Some(having) = &plan.having {
3062            let mut filtered = Vec::new();
3063            for (row_idx, row) in rows.iter().enumerate() {
3064                let aggregate_values = &group_aggregate_values[row_idx];
3065                let group_state = &group_states[row_idx];
3066                let representative_batch = &batches[group_state.representative_batch_idx];
3067                // Evaluate HAVING expression recursively
3068                let passes = Self::evaluate_having_expr(
3069                    having,
3070                    aggregate_values,
3071                    representative_batch,
3072                    &column_lookup_map,
3073                    group_state.representative_row,
3074                )?;
3075                // Only include row if HAVING evaluates to TRUE (not FALSE or NULL)
3076                if matches!(passes, Some(true)) {
3077                    filtered.push(row.clone());
3078                }
3079            }
3080            filtered
3081        } else {
3082            rows
3083        };
3084
3085        let fields: Vec<Field> = output_columns
3086            .into_iter()
3087            .map(|output| output.field)
3088            .collect();
3089        let schema = Arc::new(Schema::new(fields));
3090
3091        let mut batch = rows_to_record_batch(Arc::clone(&schema), &filtered_rows)?;
3092
3093        if plan.distinct && batch.num_rows() > 0 {
3094            let mut state = DistinctState::default();
3095            batch = match distinct_filter_batch(batch, &mut state)? {
3096                Some(filtered) => filtered,
3097                None => RecordBatch::new_empty(Arc::clone(&schema)),
3098            };
3099        }
3100
3101        if !plan.order_by.is_empty() && batch.num_rows() > 0 {
3102            batch = sort_record_batch_with_order(&schema, &batch, &plan.order_by)?;
3103        }
3104
3105        Ok(SelectExecution::new_single_batch(
3106            display_name,
3107            schema,
3108            batch,
3109        ))
3110    }
3111
3112    fn execute_aggregates(
3113        &self,
3114        table: Arc<ExecutorTable<P>>,
3115        display_name: String,
3116        plan: SelectPlan,
3117        row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
3118    ) -> ExecutorResult<SelectExecution<P>> {
3119        let table_ref = table.as_ref();
3120        let distinct = plan.distinct;
3121        let mut specs: Vec<AggregateSpec> = Vec::with_capacity(plan.aggregates.len());
3122        for aggregate in plan.aggregates {
3123            match aggregate {
3124                AggregateExpr::CountStar { alias } => {
3125                    specs.push(AggregateSpec {
3126                        alias,
3127                        kind: AggregateKind::Count {
3128                            field_id: None,
3129                            distinct: false,
3130                        },
3131                    });
3132                }
3133                AggregateExpr::Column {
3134                    column,
3135                    alias,
3136                    function,
3137                    distinct,
3138                } => {
3139                    let col = table_ref.schema.resolve(&column).ok_or_else(|| {
3140                        Error::InvalidArgumentError(format!(
3141                            "unknown column '{}' in aggregate",
3142                            column
3143                        ))
3144                    })?;
3145
3146                    let kind = match function {
3147                        AggregateFunction::Count => AggregateKind::Count {
3148                            field_id: Some(col.field_id),
3149                            distinct,
3150                        },
3151                        AggregateFunction::SumInt64 => {
3152                            let input_type = Self::validate_aggregate_type(
3153                                Some(col.data_type.clone()),
3154                                "SUM",
3155                                &[DataType::Int64, DataType::Float64],
3156                            )?;
3157                            AggregateKind::Sum {
3158                                field_id: col.field_id,
3159                                data_type: input_type,
3160                                distinct,
3161                            }
3162                        }
3163                        AggregateFunction::MinInt64 => {
3164                            let input_type = Self::validate_aggregate_type(
3165                                Some(col.data_type.clone()),
3166                                "MIN",
3167                                &[DataType::Int64, DataType::Float64],
3168                            )?;
3169                            AggregateKind::Min {
3170                                field_id: col.field_id,
3171                                data_type: input_type,
3172                            }
3173                        }
3174                        AggregateFunction::MaxInt64 => {
3175                            let input_type = Self::validate_aggregate_type(
3176                                Some(col.data_type.clone()),
3177                                "MAX",
3178                                &[DataType::Int64, DataType::Float64],
3179                            )?;
3180                            AggregateKind::Max {
3181                                field_id: col.field_id,
3182                                data_type: input_type,
3183                            }
3184                        }
3185                        AggregateFunction::CountNulls => {
3186                            if distinct {
3187                                return Err(Error::InvalidArgumentError(
3188                                    "DISTINCT is not supported for COUNT_NULLS".into(),
3189                                ));
3190                            }
3191                            AggregateKind::CountNulls {
3192                                field_id: col.field_id,
3193                            }
3194                        }
3195                    };
3196                    specs.push(AggregateSpec { alias, kind });
3197                }
3198            }
3199        }
3200
3201        if specs.is_empty() {
3202            return Err(Error::InvalidArgumentError(
3203                "aggregate query requires at least one aggregate expression".into(),
3204            ));
3205        }
3206
3207        let had_filter = plan.filter.is_some();
3208        let filter_expr = match &plan.filter {
3209            Some(filter_wrapper) => {
3210                if !filter_wrapper.subqueries.is_empty() {
3211                    return Err(Error::InvalidArgumentError(
3212                        "EXISTS subqueries not yet implemented in aggregate queries".into(),
3213                    ));
3214                }
3215                crate::translation::expression::translate_predicate(
3216                    filter_wrapper.predicate.clone(),
3217                    table.schema.as_ref(),
3218                    |name| Error::InvalidArgumentError(format!("unknown column '{}'", name)),
3219                )?
3220            }
3221            None => {
3222                let field_id = table.schema.first_field_id().ok_or_else(|| {
3223                    Error::InvalidArgumentError(
3224                        "table has no columns; cannot perform aggregate scan".into(),
3225                    )
3226                })?;
3227                crate::translation::expression::full_table_scan_filter(field_id)
3228            }
3229        };
3230
3231        // Build projections and track which projection index each spec uses
3232        let mut projections = Vec::new();
3233        let mut spec_to_projection: Vec<Option<usize>> = Vec::with_capacity(specs.len());
3234
3235        for spec in &specs {
3236            if let Some(field_id) = spec.kind.field_id() {
3237                let proj_idx = projections.len();
3238                spec_to_projection.push(Some(proj_idx));
3239                projections.push(ScanProjection::from(StoreProjection::with_alias(
3240                    LogicalFieldId::for_user(table.table.table_id(), field_id),
3241                    table
3242                        .schema
3243                        .column_by_field_id(field_id)
3244                        .map(|c| c.name.clone())
3245                        .unwrap_or_else(|| format!("col{field_id}")),
3246                )));
3247            } else {
3248                spec_to_projection.push(None);
3249            }
3250        }
3251
3252        if projections.is_empty() {
3253            let field_id = table.schema.first_field_id().ok_or_else(|| {
3254                Error::InvalidArgumentError(
3255                    "table has no columns; cannot perform aggregate scan".into(),
3256                )
3257            })?;
3258            projections.push(ScanProjection::from(StoreProjection::with_alias(
3259                LogicalFieldId::for_user(table.table.table_id(), field_id),
3260                table
3261                    .schema
3262                    .column_by_field_id(field_id)
3263                    .map(|c| c.name.clone())
3264                    .unwrap_or_else(|| format!("col{field_id}")),
3265            )));
3266        }
3267
3268        let options = ScanStreamOptions {
3269            include_nulls: true,
3270            order: None,
3271            row_id_filter: row_filter.clone(),
3272        };
3273
3274        let mut states: Vec<AggregateState> = Vec::with_capacity(specs.len());
3275        // MVCC Note: We cannot use the total_rows shortcut when MVCC visibility filtering
3276        // is enabled, because some rows may be invisible due to uncommitted or aborted transactions.
3277        // Always scan to apply proper visibility rules.
3278        let mut count_star_override: Option<i64> = None;
3279        if !had_filter && row_filter.is_none() {
3280            // Only use shortcut if no filter AND no MVCC row filtering
3281            let total_rows = table.total_rows.load(Ordering::SeqCst);
3282            tracing::debug!(
3283                "[AGGREGATE] Using COUNT(*) shortcut: total_rows={}",
3284                total_rows
3285            );
3286            if total_rows > i64::MAX as u64 {
3287                return Err(Error::InvalidArgumentError(
3288                    "COUNT(*) result exceeds supported range".into(),
3289                ));
3290            }
3291            count_star_override = Some(total_rows as i64);
3292        } else {
3293            tracing::debug!(
3294                "[AGGREGATE] NOT using COUNT(*) shortcut: had_filter={}, has_row_filter={}",
3295                had_filter,
3296                row_filter.is_some()
3297            );
3298        }
3299
3300        for (idx, spec) in specs.iter().enumerate() {
3301            states.push(AggregateState {
3302                alias: spec.alias.clone(),
3303                accumulator: AggregateAccumulator::new_with_projection_index(
3304                    spec,
3305                    spec_to_projection[idx],
3306                    count_star_override,
3307                )?,
3308                override_value: match &spec.kind {
3309                    AggregateKind::Count { field_id: None, .. } => {
3310                        tracing::debug!(
3311                            "[AGGREGATE] CountStar override_value={:?}",
3312                            count_star_override
3313                        );
3314                        count_star_override
3315                    }
3316                    _ => None,
3317                },
3318            });
3319        }
3320
3321        let mut error: Option<Error> = None;
3322        match table.table.scan_stream(
3323            projections,
3324            &filter_expr,
3325            ScanStreamOptions {
3326                row_id_filter: row_filter.clone(),
3327                ..options
3328            },
3329            |batch| {
3330                if error.is_some() {
3331                    return;
3332                }
3333                for state in &mut states {
3334                    if let Err(err) = state.update(&batch) {
3335                        error = Some(err);
3336                        return;
3337                    }
3338                }
3339            },
3340        ) {
3341            Ok(()) => {}
3342            Err(llkv_result::Error::NotFound) => {
3343                // Treat missing storage keys as an empty result set. This occurs
3344                // for freshly created tables that have no persisted chunks yet.
3345            }
3346            Err(err) => return Err(err),
3347        }
3348        if let Some(err) = error {
3349            return Err(err);
3350        }
3351
3352        let mut fields = Vec::with_capacity(states.len());
3353        let mut arrays: Vec<ArrayRef> = Vec::with_capacity(states.len());
3354        for state in states {
3355            let (field, array) = state.finalize()?;
3356            fields.push(field);
3357            arrays.push(array);
3358        }
3359
3360        let schema = Arc::new(Schema::new(fields));
3361        let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
3362
3363        if distinct {
3364            let mut state = DistinctState::default();
3365            batch = match distinct_filter_batch(batch, &mut state)? {
3366                Some(filtered) => filtered,
3367                None => RecordBatch::new_empty(Arc::clone(&schema)),
3368            };
3369        }
3370
3371        let schema = batch.schema();
3372
3373        Ok(SelectExecution::new_single_batch(
3374            display_name,
3375            schema,
3376            batch,
3377        ))
3378    }
3379
3380    /// Execute a query where computed projections contain embedded aggregates
3381    /// This extracts aggregates, computes them, then evaluates the scalar expressions
3382    fn execute_computed_aggregates(
3383        &self,
3384        table: Arc<ExecutorTable<P>>,
3385        display_name: String,
3386        plan: SelectPlan,
3387        row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
3388    ) -> ExecutorResult<SelectExecution<P>> {
3389        use arrow::array::Int64Array;
3390        use llkv_expr::expr::AggregateCall;
3391
3392        let table_ref = table.as_ref();
3393        let distinct = plan.distinct;
3394
3395        // First, extract all unique aggregates from the projections
3396        let mut aggregate_specs: Vec<(String, AggregateCall<String>)> = Vec::new();
3397        for proj in &plan.projections {
3398            if let SelectProjection::Computed { expr, .. } = proj {
3399                Self::collect_aggregates(expr, &mut aggregate_specs);
3400            }
3401        }
3402
3403        // Compute the aggregates using the existing aggregate execution infrastructure
3404        let filter_predicate = plan
3405            .filter
3406            .as_ref()
3407            .map(|wrapper| {
3408                if !wrapper.subqueries.is_empty() {
3409                    return Err(Error::InvalidArgumentError(
3410                        "EXISTS subqueries not yet implemented with aggregates".into(),
3411                    ));
3412                }
3413                Ok(wrapper.predicate.clone())
3414            })
3415            .transpose()?;
3416
3417        let computed_aggregates = self.compute_aggregate_values(
3418            table.clone(),
3419            &filter_predicate,
3420            &aggregate_specs,
3421            row_filter.clone(),
3422        )?;
3423
3424        // Now build the final projections by evaluating expressions with aggregates substituted
3425        let mut fields = Vec::with_capacity(plan.projections.len());
3426        let mut arrays: Vec<ArrayRef> = Vec::with_capacity(plan.projections.len());
3427
3428        for proj in &plan.projections {
3429            match proj {
3430                SelectProjection::AllColumns | SelectProjection::AllColumnsExcept { .. } => {
3431                    return Err(Error::InvalidArgumentError(
3432                        "Wildcard projections not supported with computed aggregates".into(),
3433                    ));
3434                }
3435                SelectProjection::Column { name, alias } => {
3436                    let col = table_ref.schema.resolve(name).ok_or_else(|| {
3437                        Error::InvalidArgumentError(format!("unknown column '{}'", name))
3438                    })?;
3439                    let field_name = alias.as_ref().unwrap_or(name);
3440                    fields.push(arrow::datatypes::Field::new(
3441                        field_name,
3442                        col.data_type.clone(),
3443                        col.nullable,
3444                    ));
3445                    // For regular columns in an aggregate query, we'd need to handle GROUP BY
3446                    // For now, return an error as this is not supported
3447                    return Err(Error::InvalidArgumentError(
3448                        "Regular columns not supported in aggregate queries without GROUP BY"
3449                            .into(),
3450                    ));
3451                }
3452                SelectProjection::Computed { expr, alias } => {
3453                    // Evaluate the expression with aggregates substituted
3454                    let value = Self::evaluate_expr_with_aggregates(expr, &computed_aggregates)?;
3455
3456                    fields.push(arrow::datatypes::Field::new(alias, DataType::Int64, false));
3457
3458                    let array = Arc::new(Int64Array::from(vec![value])) as ArrayRef;
3459                    arrays.push(array);
3460                }
3461            }
3462        }
3463
3464        let schema = Arc::new(Schema::new(fields));
3465        let mut batch = RecordBatch::try_new(Arc::clone(&schema), arrays)?;
3466
3467        if distinct {
3468            let mut state = DistinctState::default();
3469            batch = match distinct_filter_batch(batch, &mut state)? {
3470                Some(filtered) => filtered,
3471                None => RecordBatch::new_empty(Arc::clone(&schema)),
3472            };
3473        }
3474
3475        let schema = batch.schema();
3476
3477        Ok(SelectExecution::new_single_batch(
3478            display_name,
3479            schema,
3480            batch,
3481        ))
3482    }
3483
3484    /// Build an AggregateSpec for cross product GROUP BY (no field_id metadata required).
3485    /// Uses dummy field_id=0 since projection_index will override it in new_with_projection_index.
3486    fn build_aggregate_spec_for_cross_product(
3487        agg_call: &llkv_expr::expr::AggregateCall<String>,
3488        alias: String,
3489        data_type: Option<DataType>,
3490    ) -> ExecutorResult<llkv_aggregate::AggregateSpec> {
3491        use llkv_expr::expr::AggregateCall;
3492
3493        let kind = match agg_call {
3494            AggregateCall::CountStar => llkv_aggregate::AggregateKind::Count {
3495                field_id: None,
3496                distinct: false,
3497            },
3498            AggregateCall::Count { distinct, .. } => llkv_aggregate::AggregateKind::Count {
3499                field_id: Some(0),
3500                distinct: *distinct,
3501            },
3502            AggregateCall::Sum { distinct, .. } => llkv_aggregate::AggregateKind::Sum {
3503                field_id: 0,
3504                data_type: Self::validate_aggregate_type(
3505                    data_type.clone(),
3506                    "SUM",
3507                    &[DataType::Int64, DataType::Float64],
3508                )?,
3509                distinct: *distinct,
3510            },
3511            AggregateCall::Avg { distinct, .. } => llkv_aggregate::AggregateKind::Avg {
3512                field_id: 0,
3513                data_type: Self::validate_aggregate_type(
3514                    data_type.clone(),
3515                    "AVG",
3516                    &[DataType::Int64, DataType::Float64],
3517                )?,
3518                distinct: *distinct,
3519            },
3520            AggregateCall::Min(_) => llkv_aggregate::AggregateKind::Min {
3521                field_id: 0,
3522                data_type: Self::validate_aggregate_type(
3523                    data_type.clone(),
3524                    "MIN",
3525                    &[DataType::Int64, DataType::Float64],
3526                )?,
3527            },
3528            AggregateCall::Max(_) => llkv_aggregate::AggregateKind::Max {
3529                field_id: 0,
3530                data_type: Self::validate_aggregate_type(
3531                    data_type.clone(),
3532                    "MAX",
3533                    &[DataType::Int64, DataType::Float64],
3534                )?,
3535            },
3536            AggregateCall::CountNulls(_) => {
3537                llkv_aggregate::AggregateKind::CountNulls { field_id: 0 }
3538            }
3539        };
3540
3541        Ok(llkv_aggregate::AggregateSpec { alias, kind })
3542    }
3543
3544    fn validate_aggregate_type(
3545        data_type: Option<DataType>,
3546        func_name: &str,
3547        allowed: &[DataType],
3548    ) -> ExecutorResult<DataType> {
3549        let dt = data_type.ok_or_else(|| {
3550            Error::Internal(format!(
3551                "missing input type metadata for {func_name} aggregate"
3552            ))
3553        })?;
3554        if allowed.iter().any(|candidate| candidate == &dt) {
3555            Ok(dt)
3556        } else {
3557            Err(Error::InvalidArgumentError(format!(
3558                "{func_name} aggregate not supported for column type {:?}",
3559                dt
3560            )))
3561        }
3562    }
3563
3564    /// Collect all aggregate calls from an expression
3565    fn collect_aggregates(
3566        expr: &ScalarExpr<String>,
3567        aggregates: &mut Vec<(String, llkv_expr::expr::AggregateCall<String>)>,
3568    ) {
3569        match expr {
3570            ScalarExpr::Aggregate(agg) => {
3571                // Create a unique key for this aggregate
3572                let key = format!("{:?}", agg);
3573                if !aggregates.iter().any(|(k, _)| k == &key) {
3574                    aggregates.push((key, agg.clone()));
3575                }
3576            }
3577            ScalarExpr::Binary { left, right, .. } => {
3578                Self::collect_aggregates(left, aggregates);
3579                Self::collect_aggregates(right, aggregates);
3580            }
3581            ScalarExpr::Compare { left, right, .. } => {
3582                Self::collect_aggregates(left, aggregates);
3583                Self::collect_aggregates(right, aggregates);
3584            }
3585            ScalarExpr::GetField { base, .. } => {
3586                Self::collect_aggregates(base, aggregates);
3587            }
3588            ScalarExpr::Cast { expr, .. } => {
3589                Self::collect_aggregates(expr, aggregates);
3590            }
3591            ScalarExpr::Not(expr) => {
3592                Self::collect_aggregates(expr, aggregates);
3593            }
3594            ScalarExpr::IsNull { expr, .. } => {
3595                Self::collect_aggregates(expr, aggregates);
3596            }
3597            ScalarExpr::Case {
3598                operand,
3599                branches,
3600                else_expr,
3601            } => {
3602                if let Some(inner) = operand.as_deref() {
3603                    Self::collect_aggregates(inner, aggregates);
3604                }
3605                for (when_expr, then_expr) in branches {
3606                    Self::collect_aggregates(when_expr, aggregates);
3607                    Self::collect_aggregates(then_expr, aggregates);
3608                }
3609                if let Some(inner) = else_expr.as_deref() {
3610                    Self::collect_aggregates(inner, aggregates);
3611                }
3612            }
3613            ScalarExpr::Coalesce(items) => {
3614                for item in items {
3615                    Self::collect_aggregates(item, aggregates);
3616                }
3617            }
3618            ScalarExpr::Column(_) | ScalarExpr::Literal(_) => {}
3619            ScalarExpr::ScalarSubquery(_) => {}
3620        }
3621    }
3622
3623    /// Collect aggregates from predicate expressions (Expr, not ScalarExpr)
3624    fn collect_aggregates_from_predicate(
3625        expr: &llkv_expr::expr::Expr<String>,
3626        aggregates: &mut Vec<(String, llkv_expr::expr::AggregateCall<String>)>,
3627    ) {
3628        match expr {
3629            llkv_expr::expr::Expr::Compare { left, right, .. } => {
3630                Self::collect_aggregates(left, aggregates);
3631                Self::collect_aggregates(right, aggregates);
3632            }
3633            llkv_expr::expr::Expr::And(exprs) | llkv_expr::expr::Expr::Or(exprs) => {
3634                for e in exprs {
3635                    Self::collect_aggregates_from_predicate(e, aggregates);
3636                }
3637            }
3638            llkv_expr::expr::Expr::Not(inner) => {
3639                Self::collect_aggregates_from_predicate(inner, aggregates);
3640            }
3641            llkv_expr::expr::Expr::InList {
3642                expr: test_expr,
3643                list,
3644                ..
3645            } => {
3646                Self::collect_aggregates(test_expr, aggregates);
3647                for item in list {
3648                    Self::collect_aggregates(item, aggregates);
3649                }
3650            }
3651            llkv_expr::expr::Expr::IsNull { expr, .. } => {
3652                Self::collect_aggregates(expr, aggregates);
3653            }
3654            llkv_expr::expr::Expr::Literal(_) => {}
3655            llkv_expr::expr::Expr::Pred(_) => {}
3656            llkv_expr::expr::Expr::Exists(_) => {}
3657        }
3658    }
3659
3660    /// Compute the actual values for the aggregates
3661    fn compute_aggregate_values(
3662        &self,
3663        table: Arc<ExecutorTable<P>>,
3664        filter: &Option<llkv_expr::expr::Expr<'static, String>>,
3665        aggregate_specs: &[(String, llkv_expr::expr::AggregateCall<String>)],
3666        row_filter: Option<std::sync::Arc<dyn RowIdFilter<P>>>,
3667    ) -> ExecutorResult<FxHashMap<String, AggregateValue>> {
3668        use llkv_expr::expr::AggregateCall;
3669
3670        let table_ref = table.as_ref();
3671        let mut results =
3672            FxHashMap::with_capacity_and_hasher(aggregate_specs.len(), Default::default());
3673
3674        // Build aggregate specs for the aggregator
3675        let mut specs: Vec<AggregateSpec> = Vec::new();
3676        for (key, agg) in aggregate_specs {
3677            let kind = match agg {
3678                AggregateCall::CountStar => AggregateKind::Count {
3679                    field_id: None,
3680                    distinct: false,
3681                },
3682                AggregateCall::Count {
3683                    expr: col_expr,
3684                    distinct,
3685                } => {
3686                    let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3687                        Error::InvalidArgumentError(
3688                            "complex expressions in COUNT not yet fully supported".into(),
3689                        )
3690                    })?;
3691                    let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3692                        Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3693                    })?;
3694                    AggregateKind::Count {
3695                        field_id: Some(col.field_id),
3696                        distinct: *distinct,
3697                    }
3698                }
3699                AggregateCall::Sum {
3700                    expr: col_expr,
3701                    distinct,
3702                } => {
3703                    let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3704                        Error::InvalidArgumentError(
3705                            "complex expressions in SUM not yet fully supported".into(),
3706                        )
3707                    })?;
3708                    let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3709                        Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3710                    })?;
3711                    AggregateKind::Sum {
3712                        field_id: col.field_id,
3713                        data_type: Self::validate_aggregate_type(
3714                            Some(col.data_type.clone()),
3715                            "SUM",
3716                            &[DataType::Int64, DataType::Float64],
3717                        )?,
3718                        distinct: *distinct,
3719                    }
3720                }
3721                AggregateCall::Avg {
3722                    expr: col_expr,
3723                    distinct,
3724                } => {
3725                    let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3726                        Error::InvalidArgumentError(
3727                            "complex expressions in AVG not yet fully supported".into(),
3728                        )
3729                    })?;
3730                    let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3731                        Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3732                    })?;
3733                    AggregateKind::Avg {
3734                        field_id: col.field_id,
3735                        data_type: Self::validate_aggregate_type(
3736                            Some(col.data_type.clone()),
3737                            "AVG",
3738                            &[DataType::Int64, DataType::Float64],
3739                        )?,
3740                        distinct: *distinct,
3741                    }
3742                }
3743                AggregateCall::Min(col_expr) => {
3744                    let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3745                        Error::InvalidArgumentError(
3746                            "complex expressions in MIN not yet fully supported".into(),
3747                        )
3748                    })?;
3749                    let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3750                        Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3751                    })?;
3752                    AggregateKind::Min {
3753                        field_id: col.field_id,
3754                        data_type: Self::validate_aggregate_type(
3755                            Some(col.data_type.clone()),
3756                            "MIN",
3757                            &[DataType::Int64, DataType::Float64],
3758                        )?,
3759                    }
3760                }
3761                AggregateCall::Max(col_expr) => {
3762                    let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3763                        Error::InvalidArgumentError(
3764                            "complex expressions in MAX not yet fully supported".into(),
3765                        )
3766                    })?;
3767                    let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3768                        Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3769                    })?;
3770                    AggregateKind::Max {
3771                        field_id: col.field_id,
3772                        data_type: Self::validate_aggregate_type(
3773                            Some(col.data_type.clone()),
3774                            "MAX",
3775                            &[DataType::Int64, DataType::Float64],
3776                        )?,
3777                    }
3778                }
3779                AggregateCall::CountNulls(col_expr) => {
3780                    let col_name = try_extract_simple_column(col_expr).ok_or_else(|| {
3781                        Error::InvalidArgumentError(
3782                            "complex expressions in CountNulls not yet fully supported".into(),
3783                        )
3784                    })?;
3785                    let col = table_ref.schema.resolve(col_name).ok_or_else(|| {
3786                        Error::InvalidArgumentError(format!("unknown column '{}'", col_name))
3787                    })?;
3788                    AggregateKind::CountNulls {
3789                        field_id: col.field_id,
3790                    }
3791                }
3792            };
3793            specs.push(AggregateSpec {
3794                alias: key.clone(),
3795                kind,
3796            });
3797        }
3798
3799        // Prepare filter and projections
3800        let filter_expr = match filter {
3801            Some(expr) => crate::translation::expression::translate_predicate(
3802                expr.clone(),
3803                table_ref.schema.as_ref(),
3804                |name| Error::InvalidArgumentError(format!("unknown column '{}'", name)),
3805            )?,
3806            None => {
3807                let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
3808                    Error::InvalidArgumentError(
3809                        "table has no columns; cannot perform aggregate scan".into(),
3810                    )
3811                })?;
3812                crate::translation::expression::full_table_scan_filter(field_id)
3813            }
3814        };
3815
3816        let mut projections: Vec<ScanProjection> = Vec::new();
3817        let mut spec_to_projection: Vec<Option<usize>> = Vec::with_capacity(specs.len());
3818        let count_star_override: Option<i64> = None;
3819
3820        for spec in &specs {
3821            if let Some(field_id) = spec.kind.field_id() {
3822                spec_to_projection.push(Some(projections.len()));
3823                projections.push(ScanProjection::from(StoreProjection::with_alias(
3824                    LogicalFieldId::for_user(table.table.table_id(), field_id),
3825                    table
3826                        .schema
3827                        .column_by_field_id(field_id)
3828                        .map(|c| c.name.clone())
3829                        .unwrap_or_else(|| format!("col{field_id}")),
3830                )));
3831            } else {
3832                spec_to_projection.push(None);
3833            }
3834        }
3835
3836        if projections.is_empty() {
3837            let field_id = table_ref.schema.first_field_id().ok_or_else(|| {
3838                Error::InvalidArgumentError(
3839                    "table has no columns; cannot perform aggregate scan".into(),
3840                )
3841            })?;
3842            projections.push(ScanProjection::from(StoreProjection::with_alias(
3843                LogicalFieldId::for_user(table.table.table_id(), field_id),
3844                table
3845                    .schema
3846                    .column_by_field_id(field_id)
3847                    .map(|c| c.name.clone())
3848                    .unwrap_or_else(|| format!("col{field_id}")),
3849            )));
3850        }
3851
3852        let base_options = ScanStreamOptions {
3853            include_nulls: true,
3854            order: None,
3855            row_id_filter: None,
3856        };
3857
3858        let mut states: Vec<AggregateState> = Vec::with_capacity(specs.len());
3859        for (idx, spec) in specs.iter().enumerate() {
3860            states.push(AggregateState {
3861                alias: spec.alias.clone(),
3862                accumulator: AggregateAccumulator::new_with_projection_index(
3863                    spec,
3864                    spec_to_projection[idx],
3865                    count_star_override,
3866                )?,
3867                override_value: match &spec.kind {
3868                    AggregateKind::Count { field_id: None, .. } => count_star_override,
3869                    _ => None,
3870                },
3871            });
3872        }
3873
3874        let mut error: Option<Error> = None;
3875        match table.table.scan_stream(
3876            projections,
3877            &filter_expr,
3878            ScanStreamOptions {
3879                row_id_filter: row_filter.clone(),
3880                ..base_options
3881            },
3882            |batch| {
3883                if error.is_some() {
3884                    return;
3885                }
3886                for state in &mut states {
3887                    if let Err(err) = state.update(&batch) {
3888                        error = Some(err);
3889                        return;
3890                    }
3891                }
3892            },
3893        ) {
3894            Ok(()) => {}
3895            Err(llkv_result::Error::NotFound) => {}
3896            Err(err) => return Err(err),
3897        }
3898        if let Some(err) = error {
3899            return Err(err);
3900        }
3901
3902        // Extract the computed values
3903        for state in states {
3904            let alias = state.alias.clone();
3905            let (_field, array) = state.finalize()?;
3906
3907            // Try Int64Array first
3908            if let Some(int64_array) = array.as_any().downcast_ref::<arrow::array::Int64Array>() {
3909                if int64_array.len() != 1 {
3910                    return Err(Error::Internal(format!(
3911                        "Expected single value from aggregate, got {}",
3912                        int64_array.len()
3913                    )));
3914                }
3915                let value = if int64_array.is_null(0) {
3916                    AggregateValue::Int64(0)
3917                } else {
3918                    AggregateValue::Int64(int64_array.value(0))
3919                };
3920                results.insert(alias, value);
3921            }
3922            // Try Float64Array for AVG
3923            else if let Some(float64_array) =
3924                array.as_any().downcast_ref::<arrow::array::Float64Array>()
3925            {
3926                if float64_array.len() != 1 {
3927                    return Err(Error::Internal(format!(
3928                        "Expected single value from aggregate, got {}",
3929                        float64_array.len()
3930                    )));
3931                }
3932                let value = if float64_array.is_null(0) {
3933                    AggregateValue::Float64(0.0)
3934                } else {
3935                    AggregateValue::Float64(float64_array.value(0))
3936                };
3937                results.insert(alias, value);
3938            } else {
3939                return Err(Error::Internal(format!(
3940                    "Unexpected array type from aggregate: {:?}",
3941                    array.data_type()
3942                )));
3943            }
3944        }
3945
3946        Ok(results)
3947    }
3948
3949    fn evaluate_having_expr(
3950        expr: &llkv_expr::expr::Expr<String>,
3951        aggregates: &FxHashMap<String, PlanValue>,
3952        row_batch: &RecordBatch,
3953        column_lookup: &FxHashMap<String, usize>,
3954        row_idx: usize,
3955    ) -> ExecutorResult<Option<bool>> {
3956        fn compare_plan_values_for_pred(
3957            left: &PlanValue,
3958            right: &PlanValue,
3959        ) -> Option<std::cmp::Ordering> {
3960            match (left, right) {
3961                (PlanValue::Integer(l), PlanValue::Integer(r)) => Some(l.cmp(r)),
3962                (PlanValue::Float(l), PlanValue::Float(r)) => l.partial_cmp(r),
3963                (PlanValue::Integer(l), PlanValue::Float(r)) => (*l as f64).partial_cmp(r),
3964                (PlanValue::Float(l), PlanValue::Integer(r)) => l.partial_cmp(&(*r as f64)),
3965                (PlanValue::String(l), PlanValue::String(r)) => Some(l.cmp(r)),
3966                _ => None,
3967            }
3968        }
3969
3970        fn evaluate_ordering_predicate<F>(
3971            value: &PlanValue,
3972            literal: &Literal,
3973            predicate: F,
3974        ) -> ExecutorResult<Option<bool>>
3975        where
3976            F: Fn(std::cmp::Ordering) -> bool,
3977        {
3978            if matches!(value, PlanValue::Null) {
3979                return Ok(None);
3980            }
3981            let expected = llkv_plan::plan_value_from_literal(literal)?;
3982            if matches!(expected, PlanValue::Null) {
3983                return Ok(None);
3984            }
3985
3986            match compare_plan_values_for_pred(value, &expected) {
3987                Some(ordering) => Ok(Some(predicate(ordering))),
3988                None => Err(Error::InvalidArgumentError(
3989                    "unsupported HAVING comparison between column value and literal".into(),
3990                )),
3991            }
3992        }
3993
3994        match expr {
3995            llkv_expr::expr::Expr::Compare { left, op, right } => {
3996                let left_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
3997                    left,
3998                    aggregates,
3999                    Some(row_batch),
4000                    Some(column_lookup),
4001                    row_idx,
4002                )?;
4003                let right_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4004                    right,
4005                    aggregates,
4006                    Some(row_batch),
4007                    Some(column_lookup),
4008                    row_idx,
4009                )?;
4010
4011                // Coerce numeric types for comparison
4012                let (left_val, right_val) = match (&left_val, &right_val) {
4013                    (PlanValue::Integer(i), PlanValue::Float(_)) => {
4014                        (PlanValue::Float(*i as f64), right_val)
4015                    }
4016                    (PlanValue::Float(_), PlanValue::Integer(i)) => {
4017                        (left_val, PlanValue::Float(*i as f64))
4018                    }
4019                    _ => (left_val, right_val),
4020                };
4021
4022                match (left_val, right_val) {
4023                    // NULL comparisons return NULL (represented as None)
4024                    (PlanValue::Null, _) | (_, PlanValue::Null) => Ok(None),
4025                    (PlanValue::Integer(l), PlanValue::Integer(r)) => {
4026                        use llkv_expr::expr::CompareOp;
4027                        Ok(Some(match op {
4028                            CompareOp::Eq => l == r,
4029                            CompareOp::NotEq => l != r,
4030                            CompareOp::Lt => l < r,
4031                            CompareOp::LtEq => l <= r,
4032                            CompareOp::Gt => l > r,
4033                            CompareOp::GtEq => l >= r,
4034                        }))
4035                    }
4036                    (PlanValue::Float(l), PlanValue::Float(r)) => {
4037                        use llkv_expr::expr::CompareOp;
4038                        Ok(Some(match op {
4039                            CompareOp::Eq => l == r,
4040                            CompareOp::NotEq => l != r,
4041                            CompareOp::Lt => l < r,
4042                            CompareOp::LtEq => l <= r,
4043                            CompareOp::Gt => l > r,
4044                            CompareOp::GtEq => l >= r,
4045                        }))
4046                    }
4047                    _ => Ok(Some(false)),
4048                }
4049            }
4050            llkv_expr::expr::Expr::Not(inner) => {
4051                // NOT NULL = NULL, NOT TRUE = FALSE, NOT FALSE = TRUE
4052                match Self::evaluate_having_expr(
4053                    inner,
4054                    aggregates,
4055                    row_batch,
4056                    column_lookup,
4057                    row_idx,
4058                )? {
4059                    Some(b) => Ok(Some(!b)),
4060                    None => Ok(None), // NOT NULL = NULL
4061                }
4062            }
4063            llkv_expr::expr::Expr::InList {
4064                expr: test_expr,
4065                list,
4066                negated,
4067            } => {
4068                let test_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4069                    test_expr,
4070                    aggregates,
4071                    Some(row_batch),
4072                    Some(column_lookup),
4073                    row_idx,
4074                )?;
4075
4076                // SQL semantics: test_value IN (NULL, ...) handling
4077                // - If test_val is NULL, result is always NULL
4078                if matches!(test_val, PlanValue::Null) {
4079                    return Ok(None);
4080                }
4081
4082                let mut found = false;
4083                let mut has_null = false;
4084
4085                for list_item in list {
4086                    let list_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4087                        list_item,
4088                        aggregates,
4089                        Some(row_batch),
4090                        Some(column_lookup),
4091                        row_idx,
4092                    )?;
4093
4094                    // Track if list contains NULL
4095                    if matches!(list_val, PlanValue::Null) {
4096                        has_null = true;
4097                        continue;
4098                    }
4099
4100                    // Coerce for comparison
4101                    let matches = match (&test_val, &list_val) {
4102                        (PlanValue::Integer(a), PlanValue::Integer(b)) => a == b,
4103                        (PlanValue::Float(a), PlanValue::Float(b)) => a == b,
4104                        (PlanValue::Integer(a), PlanValue::Float(b)) => (*a as f64) == *b,
4105                        (PlanValue::Float(a), PlanValue::Integer(b)) => *a == (*b as f64),
4106                        (PlanValue::String(a), PlanValue::String(b)) => a == b,
4107                        _ => false,
4108                    };
4109
4110                    if matches {
4111                        found = true;
4112                        break;
4113                    }
4114                }
4115
4116                // SQL semantics for IN/NOT IN with NULL:
4117                // - value IN (...): TRUE if match found, FALSE if no match and no NULLs, NULL if no match but has NULLs
4118                // - value NOT IN (...): FALSE if match found, TRUE if no match and no NULLs, NULL if no match but has NULLs
4119                if *negated {
4120                    // NOT IN
4121                    Ok(if found {
4122                        Some(false)
4123                    } else if has_null {
4124                        None // NULL in list makes NOT IN return NULL
4125                    } else {
4126                        Some(true)
4127                    })
4128                } else {
4129                    // IN
4130                    Ok(if found {
4131                        Some(true)
4132                    } else if has_null {
4133                        None // No match but NULL in list returns NULL
4134                    } else {
4135                        Some(false)
4136                    })
4137                }
4138            }
4139            llkv_expr::expr::Expr::IsNull { expr, negated } => {
4140                // Evaluate the expression to get its value
4141                let val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4142                    expr,
4143                    aggregates,
4144                    Some(row_batch),
4145                    Some(column_lookup),
4146                    row_idx,
4147                )?;
4148
4149                // IS NULL / IS NOT NULL returns a boolean (not NULL) even when testing NULL
4150                // NULL IS NULL = TRUE
4151                // NULL IS NOT NULL = FALSE
4152                let is_null = matches!(val, PlanValue::Null);
4153                Ok(Some(if *negated { !is_null } else { is_null }))
4154            }
4155            llkv_expr::expr::Expr::Literal(val) => Ok(Some(*val)),
4156            llkv_expr::expr::Expr::And(exprs) => {
4157                // AND with NULL: FALSE AND anything = FALSE, TRUE AND NULL = NULL, NULL AND TRUE = NULL
4158                let mut has_null = false;
4159                for e in exprs {
4160                    match Self::evaluate_having_expr(
4161                        e,
4162                        aggregates,
4163                        row_batch,
4164                        column_lookup,
4165                        row_idx,
4166                    )? {
4167                        Some(false) => return Ok(Some(false)), // Short-circuit on FALSE
4168                        None => has_null = true,
4169                        Some(true) => {} // Continue
4170                    }
4171                }
4172                Ok(if has_null { None } else { Some(true) })
4173            }
4174            llkv_expr::expr::Expr::Or(exprs) => {
4175                // OR with NULL: TRUE OR anything = TRUE, FALSE OR NULL = NULL, NULL OR FALSE = NULL
4176                let mut has_null = false;
4177                for e in exprs {
4178                    match Self::evaluate_having_expr(
4179                        e,
4180                        aggregates,
4181                        row_batch,
4182                        column_lookup,
4183                        row_idx,
4184                    )? {
4185                        Some(true) => return Ok(Some(true)), // Short-circuit on TRUE
4186                        None => has_null = true,
4187                        Some(false) => {} // Continue
4188                    }
4189                }
4190                Ok(if has_null { None } else { Some(false) })
4191            }
4192            llkv_expr::expr::Expr::Pred(filter) => {
4193                // Handle Filter predicates (e.g., column IS NULL, column IS NOT NULL)
4194                // In HAVING context, filters reference columns in the grouped result
4195                use llkv_expr::expr::Operator;
4196
4197                let col_name = &filter.field_id;
4198                let col_idx = column_lookup
4199                    .get(&col_name.to_ascii_lowercase())
4200                    .ok_or_else(|| {
4201                        Error::InvalidArgumentError(format!(
4202                            "column '{}' not found in HAVING context",
4203                            col_name
4204                        ))
4205                    })?;
4206
4207                let value = llkv_plan::plan_value_from_array(row_batch.column(*col_idx), row_idx)?;
4208
4209                match &filter.op {
4210                    Operator::IsNull => Ok(Some(matches!(value, PlanValue::Null))),
4211                    Operator::IsNotNull => Ok(Some(!matches!(value, PlanValue::Null))),
4212                    Operator::Equals(expected) => {
4213                        // NULL comparisons return NULL
4214                        if matches!(value, PlanValue::Null) {
4215                            return Ok(None);
4216                        }
4217                        // Compare the value with the expected literal
4218                        let expected_value = llkv_plan::plan_value_from_literal(expected)?;
4219                        if matches!(expected_value, PlanValue::Null) {
4220                            return Ok(None);
4221                        }
4222                        Ok(Some(value == expected_value))
4223                    }
4224                    Operator::GreaterThan(expected) => {
4225                        evaluate_ordering_predicate(&value, expected, |ordering| {
4226                            ordering == std::cmp::Ordering::Greater
4227                        })
4228                    }
4229                    Operator::GreaterThanOrEquals(expected) => {
4230                        evaluate_ordering_predicate(&value, expected, |ordering| {
4231                            ordering == std::cmp::Ordering::Greater
4232                                || ordering == std::cmp::Ordering::Equal
4233                        })
4234                    }
4235                    Operator::LessThan(expected) => {
4236                        evaluate_ordering_predicate(&value, expected, |ordering| {
4237                            ordering == std::cmp::Ordering::Less
4238                        })
4239                    }
4240                    Operator::LessThanOrEquals(expected) => {
4241                        evaluate_ordering_predicate(&value, expected, |ordering| {
4242                            ordering == std::cmp::Ordering::Less
4243                                || ordering == std::cmp::Ordering::Equal
4244                        })
4245                    }
4246                    _ => {
4247                        // For other operators, fall back to a general error
4248                        // These should ideally be translated to Compare expressions instead of Pred
4249                        Err(Error::InvalidArgumentError(format!(
4250                            "Operator {:?} not supported for column predicates in HAVING clause",
4251                            filter.op
4252                        )))
4253                    }
4254                }
4255            }
4256            llkv_expr::expr::Expr::Exists(_) => Err(Error::InvalidArgumentError(
4257                "EXISTS subqueries not supported in HAVING clause".into(),
4258            )),
4259        }
4260    }
4261
4262    fn evaluate_expr_with_plan_value_aggregates_and_row(
4263        expr: &ScalarExpr<String>,
4264        aggregates: &FxHashMap<String, PlanValue>,
4265        row_batch: Option<&RecordBatch>,
4266        column_lookup: Option<&FxHashMap<String, usize>>,
4267        row_idx: usize,
4268    ) -> ExecutorResult<PlanValue> {
4269        use llkv_expr::expr::BinaryOp;
4270        use llkv_expr::literal::Literal;
4271
4272        match expr {
4273            ScalarExpr::Literal(Literal::Integer(v)) => Ok(PlanValue::Integer(*v as i64)),
4274            ScalarExpr::Literal(Literal::Float(v)) => Ok(PlanValue::Float(*v)),
4275            ScalarExpr::Literal(Literal::Boolean(v)) => {
4276                Ok(PlanValue::Integer(if *v { 1 } else { 0 }))
4277            }
4278            ScalarExpr::Literal(Literal::String(s)) => Ok(PlanValue::String(s.clone())),
4279            ScalarExpr::Literal(Literal::Null) => Ok(PlanValue::Null),
4280            ScalarExpr::Literal(Literal::Struct(_)) => Err(Error::InvalidArgumentError(
4281                "Struct literals not supported in aggregate expressions".into(),
4282            )),
4283            ScalarExpr::Column(col_name) => {
4284                // If row context is provided, look up the column value
4285                if let (Some(batch), Some(lookup)) = (row_batch, column_lookup) {
4286                    let col_idx = lookup.get(&col_name.to_ascii_lowercase()).ok_or_else(|| {
4287                        Error::InvalidArgumentError(format!("column '{}' not found", col_name))
4288                    })?;
4289                    llkv_plan::plan_value_from_array(batch.column(*col_idx), row_idx)
4290                } else {
4291                    Err(Error::InvalidArgumentError(
4292                        "Column references not supported in aggregate-only expressions".into(),
4293                    ))
4294                }
4295            }
4296            ScalarExpr::Compare { left, op, right } => {
4297                // Evaluate both sides of the comparison
4298                let left_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4299                    left,
4300                    aggregates,
4301                    row_batch,
4302                    column_lookup,
4303                    row_idx,
4304                )?;
4305                let right_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4306                    right,
4307                    aggregates,
4308                    row_batch,
4309                    column_lookup,
4310                    row_idx,
4311                )?;
4312
4313                // SQL three-valued logic: comparisons involving NULL yield NULL.
4314                if matches!(left_val, PlanValue::Null) || matches!(right_val, PlanValue::Null) {
4315                    return Ok(PlanValue::Null);
4316                }
4317
4318                // Coerce types for comparison
4319                let (left_val, right_val) = match (&left_val, &right_val) {
4320                    (PlanValue::Integer(i), PlanValue::Float(_)) => {
4321                        (PlanValue::Float(*i as f64), right_val)
4322                    }
4323                    (PlanValue::Float(_), PlanValue::Integer(i)) => {
4324                        (left_val, PlanValue::Float(*i as f64))
4325                    }
4326                    _ => (left_val, right_val),
4327                };
4328
4329                // Perform the comparison
4330                let result = match (&left_val, &right_val) {
4331                    (PlanValue::Integer(l), PlanValue::Integer(r)) => {
4332                        use llkv_expr::expr::CompareOp;
4333                        match op {
4334                            CompareOp::Eq => l == r,
4335                            CompareOp::NotEq => l != r,
4336                            CompareOp::Lt => l < r,
4337                            CompareOp::LtEq => l <= r,
4338                            CompareOp::Gt => l > r,
4339                            CompareOp::GtEq => l >= r,
4340                        }
4341                    }
4342                    (PlanValue::Float(l), PlanValue::Float(r)) => {
4343                        use llkv_expr::expr::CompareOp;
4344                        match op {
4345                            CompareOp::Eq => l == r,
4346                            CompareOp::NotEq => l != r,
4347                            CompareOp::Lt => l < r,
4348                            CompareOp::LtEq => l <= r,
4349                            CompareOp::Gt => l > r,
4350                            CompareOp::GtEq => l >= r,
4351                        }
4352                    }
4353                    (PlanValue::String(l), PlanValue::String(r)) => {
4354                        use llkv_expr::expr::CompareOp;
4355                        match op {
4356                            CompareOp::Eq => l == r,
4357                            CompareOp::NotEq => l != r,
4358                            CompareOp::Lt => l < r,
4359                            CompareOp::LtEq => l <= r,
4360                            CompareOp::Gt => l > r,
4361                            CompareOp::GtEq => l >= r,
4362                        }
4363                    }
4364                    _ => false,
4365                };
4366
4367                // Return 1 for true, 0 for false (integer representation of boolean)
4368                Ok(PlanValue::Integer(if result { 1 } else { 0 }))
4369            }
4370            ScalarExpr::Not(inner) => {
4371                let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4372                    inner,
4373                    aggregates,
4374                    row_batch,
4375                    column_lookup,
4376                    row_idx,
4377                )?;
4378                match value {
4379                    PlanValue::Integer(v) => Ok(PlanValue::Integer(if v != 0 { 0 } else { 1 })),
4380                    PlanValue::Float(v) => Ok(PlanValue::Integer(if v != 0.0 { 0 } else { 1 })),
4381                    PlanValue::Null => Ok(PlanValue::Null),
4382                    other => Err(Error::InvalidArgumentError(format!(
4383                        "logical NOT does not support value {other:?}"
4384                    ))),
4385                }
4386            }
4387            ScalarExpr::IsNull { expr, negated } => {
4388                let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4389                    expr,
4390                    aggregates,
4391                    row_batch,
4392                    column_lookup,
4393                    row_idx,
4394                )?;
4395                let is_null = matches!(value, PlanValue::Null);
4396                let condition = if is_null { !negated } else { *negated };
4397                Ok(PlanValue::Integer(if condition { 1 } else { 0 }))
4398            }
4399            ScalarExpr::Aggregate(agg) => {
4400                let key = format!("{:?}", agg);
4401                aggregates
4402                    .get(&key)
4403                    .cloned()
4404                    .ok_or_else(|| Error::Internal(format!("Aggregate value not found: {}", key)))
4405            }
4406            ScalarExpr::Binary { left, op, right } => {
4407                let left_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4408                    left,
4409                    aggregates,
4410                    row_batch,
4411                    column_lookup,
4412                    row_idx,
4413                )?;
4414                let right_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4415                    right,
4416                    aggregates,
4417                    row_batch,
4418                    column_lookup,
4419                    row_idx,
4420                )?;
4421
4422                // Convert to numeric values for binary operations
4423                let left_num = match left_val {
4424                    PlanValue::Integer(i) => i as f64,
4425                    PlanValue::Float(f) => f,
4426                    PlanValue::Null => return Ok(PlanValue::Null),
4427                    _ => {
4428                        return Err(Error::InvalidArgumentError(
4429                            "Non-numeric value in binary operation".into(),
4430                        ));
4431                    }
4432                };
4433                let right_num = match right_val {
4434                    PlanValue::Integer(i) => i as f64,
4435                    PlanValue::Float(f) => f,
4436                    PlanValue::Null => return Ok(PlanValue::Null),
4437                    _ => {
4438                        return Err(Error::InvalidArgumentError(
4439                            "Non-numeric value in binary operation".into(),
4440                        ));
4441                    }
4442                };
4443
4444                let result = match op {
4445                    BinaryOp::Add => left_num + right_num,
4446                    BinaryOp::Subtract => left_num - right_num,
4447                    BinaryOp::Multiply => left_num * right_num,
4448                    BinaryOp::Divide => {
4449                        if right_num == 0.0 {
4450                            return Ok(PlanValue::Null);
4451                        }
4452                        left_num / right_num
4453                    }
4454                    BinaryOp::Modulo => {
4455                        if right_num == 0.0 {
4456                            return Ok(PlanValue::Null);
4457                        }
4458                        left_num % right_num
4459                    }
4460                };
4461
4462                // Return as float if either operand was float, otherwise as integer
4463                if matches!(left_val, PlanValue::Float(_))
4464                    || matches!(right_val, PlanValue::Float(_))
4465                {
4466                    Ok(PlanValue::Float(result))
4467                } else {
4468                    Ok(PlanValue::Integer(result as i64))
4469                }
4470            }
4471            ScalarExpr::Cast { expr, data_type } => {
4472                // Evaluate the inner expression and cast it to the target type
4473                let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4474                    expr,
4475                    aggregates,
4476                    row_batch,
4477                    column_lookup,
4478                    row_idx,
4479                )?;
4480
4481                // Handle NULL values
4482                if matches!(value, PlanValue::Null) {
4483                    return Ok(PlanValue::Null);
4484                }
4485
4486                // Cast to the target type
4487                match data_type {
4488                    DataType::Int64 | DataType::Int32 | DataType::Int16 | DataType::Int8 => {
4489                        match value {
4490                            PlanValue::Integer(i) => Ok(PlanValue::Integer(i)),
4491                            PlanValue::Float(f) => Ok(PlanValue::Integer(f as i64)),
4492                            PlanValue::String(s) => {
4493                                s.parse::<i64>().map(PlanValue::Integer).map_err(|_| {
4494                                    Error::InvalidArgumentError(format!(
4495                                        "Cannot cast '{}' to integer",
4496                                        s
4497                                    ))
4498                                })
4499                            }
4500                            _ => Err(Error::InvalidArgumentError(format!(
4501                                "Cannot cast {:?} to integer",
4502                                value
4503                            ))),
4504                        }
4505                    }
4506                    DataType::Float64 | DataType::Float32 => match value {
4507                        PlanValue::Integer(i) => Ok(PlanValue::Float(i as f64)),
4508                        PlanValue::Float(f) => Ok(PlanValue::Float(f)),
4509                        PlanValue::String(s) => {
4510                            s.parse::<f64>().map(PlanValue::Float).map_err(|_| {
4511                                Error::InvalidArgumentError(format!("Cannot cast '{}' to float", s))
4512                            })
4513                        }
4514                        _ => Err(Error::InvalidArgumentError(format!(
4515                            "Cannot cast {:?} to float",
4516                            value
4517                        ))),
4518                    },
4519                    DataType::Utf8 | DataType::LargeUtf8 => match value {
4520                        PlanValue::String(s) => Ok(PlanValue::String(s)),
4521                        PlanValue::Integer(i) => Ok(PlanValue::String(i.to_string())),
4522                        PlanValue::Float(f) => Ok(PlanValue::String(f.to_string())),
4523                        _ => Err(Error::InvalidArgumentError(format!(
4524                            "Cannot cast {:?} to string",
4525                            value
4526                        ))),
4527                    },
4528                    _ => Err(Error::InvalidArgumentError(format!(
4529                        "CAST to {:?} not supported in aggregate expressions",
4530                        data_type
4531                    ))),
4532                }
4533            }
4534            ScalarExpr::Case {
4535                operand,
4536                branches,
4537                else_expr,
4538            } => {
4539                // Evaluate the operand if present (for simple CASE)
4540                let operand_value = if let Some(op) = operand {
4541                    Some(Self::evaluate_expr_with_plan_value_aggregates_and_row(
4542                        op,
4543                        aggregates,
4544                        row_batch,
4545                        column_lookup,
4546                        row_idx,
4547                    )?)
4548                } else {
4549                    None
4550                };
4551
4552                // Evaluate each WHEN/THEN branch
4553                for (when_expr, then_expr) in branches {
4554                    let matches = if let Some(ref op_val) = operand_value {
4555                        // Simple CASE compares using SQL equality semantics: NULL never matches.
4556                        let when_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4557                            when_expr,
4558                            aggregates,
4559                            row_batch,
4560                            column_lookup,
4561                            row_idx,
4562                        )?;
4563                        Self::simple_case_branch_matches(op_val, &when_val)
4564                    } else {
4565                        // Searched CASE: evaluate WHEN as boolean condition
4566                        let when_val = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4567                            when_expr,
4568                            aggregates,
4569                            row_batch,
4570                            column_lookup,
4571                            row_idx,
4572                        )?;
4573                        // Treat non-zero as true
4574                        match when_val {
4575                            PlanValue::Integer(i) => i != 0,
4576                            PlanValue::Float(f) => f != 0.0,
4577                            PlanValue::Null => false,
4578                            _ => false,
4579                        }
4580                    };
4581
4582                    if matches {
4583                        return Self::evaluate_expr_with_plan_value_aggregates_and_row(
4584                            then_expr,
4585                            aggregates,
4586                            row_batch,
4587                            column_lookup,
4588                            row_idx,
4589                        );
4590                    }
4591                }
4592
4593                // No branch matched, evaluate ELSE or return NULL
4594                if let Some(else_e) = else_expr {
4595                    Self::evaluate_expr_with_plan_value_aggregates_and_row(
4596                        else_e,
4597                        aggregates,
4598                        row_batch,
4599                        column_lookup,
4600                        row_idx,
4601                    )
4602                } else {
4603                    Ok(PlanValue::Null)
4604                }
4605            }
4606            ScalarExpr::Coalesce(exprs) => {
4607                // Return the first non-NULL value
4608                for expr in exprs {
4609                    let value = Self::evaluate_expr_with_plan_value_aggregates_and_row(
4610                        expr,
4611                        aggregates,
4612                        row_batch,
4613                        column_lookup,
4614                        row_idx,
4615                    )?;
4616                    if !matches!(value, PlanValue::Null) {
4617                        return Ok(value);
4618                    }
4619                }
4620                Ok(PlanValue::Null)
4621            }
4622            ScalarExpr::GetField { .. } => Err(Error::InvalidArgumentError(
4623                "GetField not supported in aggregate expressions".into(),
4624            )),
4625            ScalarExpr::ScalarSubquery(_) => Err(Error::InvalidArgumentError(
4626                "Scalar subqueries not supported in aggregate expressions".into(),
4627            )),
4628        }
4629    }
4630
4631    fn simple_case_branch_matches(operand: &PlanValue, candidate: &PlanValue) -> bool {
4632        if matches!(operand, PlanValue::Null) || matches!(candidate, PlanValue::Null) {
4633            return false;
4634        }
4635
4636        match (operand, candidate) {
4637            (PlanValue::Integer(left), PlanValue::Integer(right)) => left == right,
4638            (PlanValue::Integer(left), PlanValue::Float(right)) => (*left as f64) == *right,
4639            (PlanValue::Float(left), PlanValue::Integer(right)) => *left == (*right as f64),
4640            (PlanValue::Float(left), PlanValue::Float(right)) => left == right,
4641            (PlanValue::String(left), PlanValue::String(right)) => left == right,
4642            (PlanValue::Struct(left), PlanValue::Struct(right)) => left == right,
4643            _ => operand == candidate,
4644        }
4645    }
4646
4647    fn evaluate_expr_with_aggregates(
4648        expr: &ScalarExpr<String>,
4649        aggregates: &FxHashMap<String, AggregateValue>,
4650    ) -> ExecutorResult<i64> {
4651        use llkv_expr::expr::BinaryOp;
4652        use llkv_expr::literal::Literal;
4653
4654        match expr {
4655            ScalarExpr::Literal(Literal::Integer(v)) => Ok(*v as i64),
4656            ScalarExpr::Literal(Literal::Float(v)) => Ok(*v as i64),
4657            ScalarExpr::Literal(Literal::Boolean(v)) => Ok(if *v { 1 } else { 0 }),
4658            ScalarExpr::Literal(Literal::String(_)) => Err(Error::InvalidArgumentError(
4659                "String literals not supported in aggregate expressions".into(),
4660            )),
4661            ScalarExpr::Literal(Literal::Null) => Err(Error::InvalidArgumentError(
4662                "NULL literals not supported in aggregate expressions".into(),
4663            )),
4664            ScalarExpr::Literal(Literal::Struct(_)) => Err(Error::InvalidArgumentError(
4665                "Struct literals not supported in aggregate expressions".into(),
4666            )),
4667            ScalarExpr::Column(_) => Err(Error::InvalidArgumentError(
4668                "Column references not supported in aggregate-only expressions".into(),
4669            )),
4670            ScalarExpr::Compare { .. } => Err(Error::InvalidArgumentError(
4671                "Comparisons not supported in aggregate-only expressions".into(),
4672            )),
4673            ScalarExpr::Aggregate(agg) => {
4674                let key = format!("{:?}", agg);
4675                let value = aggregates.get(&key).ok_or_else(|| {
4676                    Error::Internal(format!("Aggregate value not found for key: {}", key))
4677                })?;
4678                // Convert to i64 for arithmetic (truncates floats)
4679                Ok(value.to_i64())
4680            }
4681            ScalarExpr::Not(inner) => {
4682                let value = Self::evaluate_expr_with_aggregates(inner, aggregates)?;
4683                Ok(if value != 0 { 0 } else { 1 })
4684            }
4685            ScalarExpr::IsNull { expr, negated } => {
4686                // Aggregates normalize NULL results to zero-length arrays which we treat as not null.
4687                let _ = Self::evaluate_expr_with_aggregates(expr, aggregates)?;
4688                Ok(if *negated { 1 } else { 0 })
4689            }
4690            ScalarExpr::Binary { left, op, right } => {
4691                let left_val = Self::evaluate_expr_with_aggregates(left, aggregates)?;
4692                let right_val = Self::evaluate_expr_with_aggregates(right, aggregates)?;
4693
4694                let result = match op {
4695                    BinaryOp::Add => left_val.checked_add(right_val),
4696                    BinaryOp::Subtract => left_val.checked_sub(right_val),
4697                    BinaryOp::Multiply => left_val.checked_mul(right_val),
4698                    BinaryOp::Divide => {
4699                        if right_val == 0 {
4700                            return Err(Error::InvalidArgumentError("Division by zero".into()));
4701                        }
4702                        left_val.checked_div(right_val)
4703                    }
4704                    BinaryOp::Modulo => {
4705                        if right_val == 0 {
4706                            return Err(Error::InvalidArgumentError("Modulo by zero".into()));
4707                        }
4708                        left_val.checked_rem(right_val)
4709                    }
4710                };
4711
4712                result.ok_or_else(|| {
4713                    Error::InvalidArgumentError("Arithmetic overflow in expression".into())
4714                })
4715            }
4716            ScalarExpr::Cast { .. } => Err(Error::InvalidArgumentError(
4717                "CAST is not supported in aggregate-only expressions".into(),
4718            )),
4719            ScalarExpr::GetField { .. } => Err(Error::InvalidArgumentError(
4720                "GetField not supported in aggregate-only expressions".into(),
4721            )),
4722            ScalarExpr::Case { .. } => Err(Error::InvalidArgumentError(
4723                "CASE not supported in aggregate-only expressions".into(),
4724            )),
4725            ScalarExpr::Coalesce(_) => Err(Error::InvalidArgumentError(
4726                "COALESCE not supported in aggregate-only expressions".into(),
4727            )),
4728            ScalarExpr::ScalarSubquery(_) => Err(Error::InvalidArgumentError(
4729                "Scalar subqueries not supported in aggregate-only expressions".into(),
4730            )),
4731        }
4732    }
4733}
4734
4735struct CrossProductExpressionContext {
4736    schema: Arc<ExecutorSchema>,
4737    field_id_to_index: FxHashMap<FieldId, usize>,
4738    numeric_cache: FxHashMap<FieldId, NumericArray>,
4739    column_cache: FxHashMap<FieldId, ColumnAccessor>,
4740    next_field_id: FieldId,
4741}
4742
4743#[derive(Clone)]
4744enum ColumnAccessor {
4745    Int64(Arc<Int64Array>),
4746    Float64(Arc<Float64Array>),
4747    Boolean(Arc<BooleanArray>),
4748    Utf8(Arc<StringArray>),
4749    Null(usize),
4750}
4751
4752impl ColumnAccessor {
4753    fn from_array(array: &ArrayRef) -> ExecutorResult<Self> {
4754        match array.data_type() {
4755            DataType::Int64 => {
4756                let typed = array
4757                    .as_any()
4758                    .downcast_ref::<Int64Array>()
4759                    .ok_or_else(|| Error::Internal("expected Int64 array".into()))?
4760                    .clone();
4761                Ok(Self::Int64(Arc::new(typed)))
4762            }
4763            DataType::Float64 => {
4764                let typed = array
4765                    .as_any()
4766                    .downcast_ref::<Float64Array>()
4767                    .ok_or_else(|| Error::Internal("expected Float64 array".into()))?
4768                    .clone();
4769                Ok(Self::Float64(Arc::new(typed)))
4770            }
4771            DataType::Boolean => {
4772                let typed = array
4773                    .as_any()
4774                    .downcast_ref::<BooleanArray>()
4775                    .ok_or_else(|| Error::Internal("expected Boolean array".into()))?
4776                    .clone();
4777                Ok(Self::Boolean(Arc::new(typed)))
4778            }
4779            DataType::Utf8 => {
4780                let typed = array
4781                    .as_any()
4782                    .downcast_ref::<StringArray>()
4783                    .ok_or_else(|| Error::Internal("expected Utf8 array".into()))?
4784                    .clone();
4785                Ok(Self::Utf8(Arc::new(typed)))
4786            }
4787            DataType::Null => Ok(Self::Null(array.len())),
4788            other => Err(Error::InvalidArgumentError(format!(
4789                "unsupported column type {:?} in cross product filter",
4790                other
4791            ))),
4792        }
4793    }
4794
4795    fn len(&self) -> usize {
4796        match self {
4797            ColumnAccessor::Int64(array) => array.len(),
4798            ColumnAccessor::Float64(array) => array.len(),
4799            ColumnAccessor::Boolean(array) => array.len(),
4800            ColumnAccessor::Utf8(array) => array.len(),
4801            ColumnAccessor::Null(len) => *len,
4802        }
4803    }
4804
4805    fn is_null(&self, idx: usize) -> bool {
4806        match self {
4807            ColumnAccessor::Int64(array) => array.is_null(idx),
4808            ColumnAccessor::Float64(array) => array.is_null(idx),
4809            ColumnAccessor::Boolean(array) => array.is_null(idx),
4810            ColumnAccessor::Utf8(array) => array.is_null(idx),
4811            ColumnAccessor::Null(_) => true,
4812        }
4813    }
4814
4815    fn literal_at(&self, idx: usize) -> ExecutorResult<Literal> {
4816        if self.is_null(idx) {
4817            return Ok(Literal::Null);
4818        }
4819        match self {
4820            ColumnAccessor::Int64(array) => Ok(Literal::Integer(array.value(idx) as i128)),
4821            ColumnAccessor::Float64(array) => Ok(Literal::Float(array.value(idx))),
4822            ColumnAccessor::Boolean(array) => Ok(Literal::Boolean(array.value(idx))),
4823            ColumnAccessor::Utf8(array) => Ok(Literal::String(array.value(idx).to_string())),
4824            ColumnAccessor::Null(_) => Ok(Literal::Null),
4825        }
4826    }
4827
4828    fn as_array_ref(&self) -> ArrayRef {
4829        match self {
4830            ColumnAccessor::Int64(array) => Arc::clone(array) as ArrayRef,
4831            ColumnAccessor::Float64(array) => Arc::clone(array) as ArrayRef,
4832            ColumnAccessor::Boolean(array) => Arc::clone(array) as ArrayRef,
4833            ColumnAccessor::Utf8(array) => Arc::clone(array) as ArrayRef,
4834            ColumnAccessor::Null(len) => new_null_array(&DataType::Null, *len),
4835        }
4836    }
4837}
4838
4839#[derive(Clone)]
4840enum ValueArray {
4841    Numeric(NumericArray),
4842    Boolean(Arc<BooleanArray>),
4843    Utf8(Arc<StringArray>),
4844    Null(usize),
4845}
4846
4847impl ValueArray {
4848    fn from_array(array: ArrayRef) -> ExecutorResult<Self> {
4849        match array.data_type() {
4850            DataType::Boolean => {
4851                let typed = array
4852                    .as_any()
4853                    .downcast_ref::<BooleanArray>()
4854                    .ok_or_else(|| Error::Internal("expected Boolean array".into()))?
4855                    .clone();
4856                Ok(Self::Boolean(Arc::new(typed)))
4857            }
4858            DataType::Utf8 => {
4859                let typed = array
4860                    .as_any()
4861                    .downcast_ref::<StringArray>()
4862                    .ok_or_else(|| Error::Internal("expected Utf8 array".into()))?
4863                    .clone();
4864                Ok(Self::Utf8(Arc::new(typed)))
4865            }
4866            DataType::Null => Ok(Self::Null(array.len())),
4867            DataType::Int8
4868            | DataType::Int16
4869            | DataType::Int32
4870            | DataType::Int64
4871            | DataType::UInt8
4872            | DataType::UInt16
4873            | DataType::UInt32
4874            | DataType::UInt64
4875            | DataType::Float32
4876            | DataType::Float64 => {
4877                let numeric = NumericArray::try_from_arrow(&array)?;
4878                Ok(Self::Numeric(numeric))
4879            }
4880            other => Err(Error::InvalidArgumentError(format!(
4881                "unsupported data type {:?} in cross product expression",
4882                other
4883            ))),
4884        }
4885    }
4886
4887    fn len(&self) -> usize {
4888        match self {
4889            ValueArray::Numeric(array) => array.len(),
4890            ValueArray::Boolean(array) => array.len(),
4891            ValueArray::Utf8(array) => array.len(),
4892            ValueArray::Null(len) => *len,
4893        }
4894    }
4895}
4896
4897fn truth_and(lhs: Option<bool>, rhs: Option<bool>) -> Option<bool> {
4898    match (lhs, rhs) {
4899        (Some(false), _) | (_, Some(false)) => Some(false),
4900        (Some(true), Some(true)) => Some(true),
4901        (Some(true), None) | (None, Some(true)) | (None, None) => None,
4902    }
4903}
4904
4905fn truth_or(lhs: Option<bool>, rhs: Option<bool>) -> Option<bool> {
4906    match (lhs, rhs) {
4907        (Some(true), _) | (_, Some(true)) => Some(true),
4908        (Some(false), Some(false)) => Some(false),
4909        (Some(false), None) | (None, Some(false)) | (None, None) => None,
4910    }
4911}
4912
4913fn truth_not(value: Option<bool>) -> Option<bool> {
4914    match value {
4915        Some(true) => Some(false),
4916        Some(false) => Some(true),
4917        None => None,
4918    }
4919}
4920
4921fn compare_bool(op: CompareOp, lhs: bool, rhs: bool) -> bool {
4922    let l = lhs as u8;
4923    let r = rhs as u8;
4924    match op {
4925        CompareOp::Eq => lhs == rhs,
4926        CompareOp::NotEq => lhs != rhs,
4927        CompareOp::Lt => l < r,
4928        CompareOp::LtEq => l <= r,
4929        CompareOp::Gt => l > r,
4930        CompareOp::GtEq => l >= r,
4931    }
4932}
4933
4934fn compare_str(op: CompareOp, lhs: &str, rhs: &str) -> bool {
4935    match op {
4936        CompareOp::Eq => lhs == rhs,
4937        CompareOp::NotEq => lhs != rhs,
4938        CompareOp::Lt => lhs < rhs,
4939        CompareOp::LtEq => lhs <= rhs,
4940        CompareOp::Gt => lhs > rhs,
4941        CompareOp::GtEq => lhs >= rhs,
4942    }
4943}
4944
4945fn finalize_in_list_result(has_match: bool, saw_null: bool, negated: bool) -> Option<bool> {
4946    if has_match {
4947        Some(!negated)
4948    } else if saw_null {
4949        None
4950    } else if negated {
4951        Some(true)
4952    } else {
4953        Some(false)
4954    }
4955}
4956
4957fn literal_to_constant_array(literal: &Literal, len: usize) -> ExecutorResult<ArrayRef> {
4958    match literal {
4959        Literal::Integer(v) => {
4960            let value = i64::try_from(*v).unwrap_or(0);
4961            let values = vec![value; len];
4962            Ok(Arc::new(Int64Array::from(values)) as ArrayRef)
4963        }
4964        Literal::Float(v) => {
4965            let values = vec![*v; len];
4966            Ok(Arc::new(Float64Array::from(values)) as ArrayRef)
4967        }
4968        Literal::Boolean(v) => {
4969            let values = vec![Some(*v); len];
4970            Ok(Arc::new(BooleanArray::from(values)) as ArrayRef)
4971        }
4972        Literal::String(v) => {
4973            let values: Vec<Option<String>> = (0..len).map(|_| Some(v.clone())).collect();
4974            Ok(Arc::new(StringArray::from(values)) as ArrayRef)
4975        }
4976        Literal::Null => Ok(new_null_array(&DataType::Null, len)),
4977        Literal::Struct(_) => Err(Error::InvalidArgumentError(
4978            "struct literals are not supported in cross product filters".into(),
4979        )),
4980    }
4981}
4982
4983impl CrossProductExpressionContext {
4984    fn new(schema: &Schema, lookup: FxHashMap<String, usize>) -> ExecutorResult<Self> {
4985        let mut columns = Vec::with_capacity(schema.fields().len());
4986        let mut field_id_to_index = FxHashMap::default();
4987        let mut next_field_id: FieldId = 1;
4988
4989        for (idx, field) in schema.fields().iter().enumerate() {
4990            if next_field_id == u32::MAX {
4991                return Err(Error::Internal(
4992                    "cross product projection exhausted FieldId space".into(),
4993                ));
4994            }
4995
4996            let executor_column = ExecutorColumn {
4997                name: field.name().clone(),
4998                data_type: field.data_type().clone(),
4999                nullable: field.is_nullable(),
5000                primary_key: false,
5001                unique: false,
5002                field_id: next_field_id,
5003                check_expr: None,
5004            };
5005            let field_id = next_field_id;
5006            next_field_id = next_field_id.saturating_add(1);
5007
5008            columns.push(executor_column);
5009            field_id_to_index.insert(field_id, idx);
5010        }
5011
5012        Ok(Self {
5013            schema: Arc::new(ExecutorSchema { columns, lookup }),
5014            field_id_to_index,
5015            numeric_cache: FxHashMap::default(),
5016            column_cache: FxHashMap::default(),
5017            next_field_id,
5018        })
5019    }
5020
5021    fn schema(&self) -> &ExecutorSchema {
5022        self.schema.as_ref()
5023    }
5024
5025    fn field_id_for_column(&self, name: &str) -> Option<FieldId> {
5026        self.schema.resolve(name).map(|column| column.field_id)
5027    }
5028
5029    fn reset(&mut self) {
5030        self.numeric_cache.clear();
5031        self.column_cache.clear();
5032    }
5033
5034    fn allocate_synthetic_field_id(&mut self) -> ExecutorResult<FieldId> {
5035        if self.next_field_id == FieldId::MAX {
5036            return Err(Error::Internal(
5037                "cross product projection exhausted FieldId space".into(),
5038            ));
5039        }
5040        let field_id = self.next_field_id;
5041        self.next_field_id = self.next_field_id.saturating_add(1);
5042        Ok(field_id)
5043    }
5044
5045    #[cfg(test)]
5046    fn evaluate(
5047        &mut self,
5048        expr: &ScalarExpr<String>,
5049        batch: &RecordBatch,
5050    ) -> ExecutorResult<ArrayRef> {
5051        let translated = translate_scalar(expr, self.schema.as_ref(), |name| {
5052            Error::InvalidArgumentError(format!(
5053                "column '{}' not found in cross product result",
5054                name
5055            ))
5056        })?;
5057
5058        self.evaluate_numeric(&translated, batch)
5059    }
5060
5061    fn evaluate_predicate_mask(
5062        &mut self,
5063        expr: &LlkvExpr<'static, FieldId>,
5064        batch: &RecordBatch,
5065        mut exists_eval: impl FnMut(
5066            &mut Self,
5067            &llkv_expr::SubqueryExpr,
5068            usize,
5069            &RecordBatch,
5070        ) -> ExecutorResult<Option<bool>>,
5071    ) -> ExecutorResult<BooleanArray> {
5072        let truths = self.evaluate_predicate_truths(expr, batch, &mut exists_eval)?;
5073        let mut builder = BooleanBuilder::with_capacity(truths.len());
5074        for value in truths {
5075            builder.append_value(value.unwrap_or(false));
5076        }
5077        Ok(builder.finish())
5078    }
5079
5080    fn evaluate_predicate_truths(
5081        &mut self,
5082        expr: &LlkvExpr<'static, FieldId>,
5083        batch: &RecordBatch,
5084        exists_eval: &mut impl FnMut(
5085            &mut Self,
5086            &llkv_expr::SubqueryExpr,
5087            usize,
5088            &RecordBatch,
5089        ) -> ExecutorResult<Option<bool>>,
5090    ) -> ExecutorResult<Vec<Option<bool>>> {
5091        match expr {
5092            LlkvExpr::Literal(value) => Ok(vec![Some(*value); batch.num_rows()]),
5093            LlkvExpr::And(children) => {
5094                if children.is_empty() {
5095                    return Ok(vec![Some(true); batch.num_rows()]);
5096                }
5097                let mut result =
5098                    self.evaluate_predicate_truths(&children[0], batch, exists_eval)?;
5099                for child in &children[1..] {
5100                    let next = self.evaluate_predicate_truths(child, batch, exists_eval)?;
5101                    for (lhs, rhs) in result.iter_mut().zip(next.into_iter()) {
5102                        *lhs = truth_and(*lhs, rhs);
5103                    }
5104                }
5105                Ok(result)
5106            }
5107            LlkvExpr::Or(children) => {
5108                if children.is_empty() {
5109                    return Ok(vec![Some(false); batch.num_rows()]);
5110                }
5111                let mut result =
5112                    self.evaluate_predicate_truths(&children[0], batch, exists_eval)?;
5113                for child in &children[1..] {
5114                    let next = self.evaluate_predicate_truths(child, batch, exists_eval)?;
5115                    for (lhs, rhs) in result.iter_mut().zip(next.into_iter()) {
5116                        *lhs = truth_or(*lhs, rhs);
5117                    }
5118                }
5119                Ok(result)
5120            }
5121            LlkvExpr::Not(inner) => {
5122                let mut values = self.evaluate_predicate_truths(inner, batch, exists_eval)?;
5123                for value in &mut values {
5124                    *value = truth_not(*value);
5125                }
5126                Ok(values)
5127            }
5128            LlkvExpr::Pred(filter) => self.evaluate_filter_truths(filter, batch),
5129            LlkvExpr::Compare { left, op, right } => {
5130                self.evaluate_compare_truths(left, *op, right, batch)
5131            }
5132            LlkvExpr::InList {
5133                expr: target,
5134                list,
5135                negated,
5136            } => self.evaluate_in_list_truths(target, list, *negated, batch),
5137            LlkvExpr::IsNull { expr, negated } => {
5138                self.evaluate_is_null_truths(expr, *negated, batch)
5139            }
5140            LlkvExpr::Exists(subquery_expr) => {
5141                let mut values = Vec::with_capacity(batch.num_rows());
5142                for row_idx in 0..batch.num_rows() {
5143                    let value = exists_eval(self, subquery_expr, row_idx, batch)?;
5144                    values.push(value);
5145                }
5146                Ok(values)
5147            }
5148        }
5149    }
5150
5151    fn evaluate_filter_truths(
5152        &mut self,
5153        filter: &Filter<FieldId>,
5154        batch: &RecordBatch,
5155    ) -> ExecutorResult<Vec<Option<bool>>> {
5156        let accessor = self.column_accessor(filter.field_id, batch)?;
5157        let len = accessor.len();
5158
5159        match &filter.op {
5160            Operator::IsNull => {
5161                let mut out = Vec::with_capacity(len);
5162                for idx in 0..len {
5163                    out.push(Some(accessor.is_null(idx)));
5164                }
5165                Ok(out)
5166            }
5167            Operator::IsNotNull => {
5168                let mut out = Vec::with_capacity(len);
5169                for idx in 0..len {
5170                    out.push(Some(!accessor.is_null(idx)));
5171                }
5172                Ok(out)
5173            }
5174            _ => match accessor {
5175                ColumnAccessor::Int64(array) => {
5176                    let predicate = build_fixed_width_predicate::<Int64Type>(&filter.op)
5177                        .map_err(Error::predicate_build)?;
5178                    let mut out = Vec::with_capacity(len);
5179                    for idx in 0..len {
5180                        if array.is_null(idx) {
5181                            out.push(None);
5182                        } else {
5183                            let value = array.value(idx);
5184                            out.push(Some(predicate.matches(&value)));
5185                        }
5186                    }
5187                    Ok(out)
5188                }
5189                ColumnAccessor::Float64(array) => {
5190                    let predicate = build_fixed_width_predicate::<Float64Type>(&filter.op)
5191                        .map_err(Error::predicate_build)?;
5192                    let mut out = Vec::with_capacity(len);
5193                    for idx in 0..len {
5194                        if array.is_null(idx) {
5195                            out.push(None);
5196                        } else {
5197                            let value = array.value(idx);
5198                            out.push(Some(predicate.matches(&value)));
5199                        }
5200                    }
5201                    Ok(out)
5202                }
5203                ColumnAccessor::Boolean(array) => {
5204                    let predicate =
5205                        build_bool_predicate(&filter.op).map_err(Error::predicate_build)?;
5206                    let mut out = Vec::with_capacity(len);
5207                    for idx in 0..len {
5208                        if array.is_null(idx) {
5209                            out.push(None);
5210                        } else {
5211                            let value = array.value(idx);
5212                            out.push(Some(predicate.matches(&value)));
5213                        }
5214                    }
5215                    Ok(out)
5216                }
5217                ColumnAccessor::Utf8(array) => {
5218                    let predicate =
5219                        build_var_width_predicate(&filter.op).map_err(Error::predicate_build)?;
5220                    let mut out = Vec::with_capacity(len);
5221                    for idx in 0..len {
5222                        if array.is_null(idx) {
5223                            out.push(None);
5224                        } else {
5225                            let value = array.value(idx);
5226                            out.push(Some(predicate.matches(value)));
5227                        }
5228                    }
5229                    Ok(out)
5230                }
5231                ColumnAccessor::Null(len) => Ok(vec![None; len]),
5232            },
5233        }
5234    }
5235
5236    fn evaluate_compare_truths(
5237        &mut self,
5238        left: &ScalarExpr<FieldId>,
5239        op: CompareOp,
5240        right: &ScalarExpr<FieldId>,
5241        batch: &RecordBatch,
5242    ) -> ExecutorResult<Vec<Option<bool>>> {
5243        let left_values = self.materialize_value_array(left, batch)?;
5244        let right_values = self.materialize_value_array(right, batch)?;
5245
5246        if left_values.len() != right_values.len() {
5247            return Err(Error::Internal(
5248                "mismatched compare operand lengths in cross product filter".into(),
5249            ));
5250        }
5251
5252        let len = left_values.len();
5253        match (&left_values, &right_values) {
5254            (ValueArray::Null(_), _) | (_, ValueArray::Null(_)) => Ok(vec![None; len]),
5255            (ValueArray::Numeric(lhs), ValueArray::Numeric(rhs)) => {
5256                let mut out = Vec::with_capacity(len);
5257                for idx in 0..len {
5258                    match (lhs.value(idx), rhs.value(idx)) {
5259                        (Some(lv), Some(rv)) => out.push(Some(NumericKernels::compare(op, lv, rv))),
5260                        _ => out.push(None),
5261                    }
5262                }
5263                Ok(out)
5264            }
5265            (ValueArray::Boolean(lhs), ValueArray::Boolean(rhs)) => {
5266                let lhs = lhs.as_ref();
5267                let rhs = rhs.as_ref();
5268                let mut out = Vec::with_capacity(len);
5269                for idx in 0..len {
5270                    if lhs.is_null(idx) || rhs.is_null(idx) {
5271                        out.push(None);
5272                    } else {
5273                        out.push(Some(compare_bool(op, lhs.value(idx), rhs.value(idx))));
5274                    }
5275                }
5276                Ok(out)
5277            }
5278            (ValueArray::Utf8(lhs), ValueArray::Utf8(rhs)) => {
5279                let lhs = lhs.as_ref();
5280                let rhs = rhs.as_ref();
5281                let mut out = Vec::with_capacity(len);
5282                for idx in 0..len {
5283                    if lhs.is_null(idx) || rhs.is_null(idx) {
5284                        out.push(None);
5285                    } else {
5286                        out.push(Some(compare_str(op, lhs.value(idx), rhs.value(idx))));
5287                    }
5288                }
5289                Ok(out)
5290            }
5291            _ => Err(Error::InvalidArgumentError(
5292                "unsupported comparison between mismatched types in cross product filter".into(),
5293            )),
5294        }
5295    }
5296
5297    fn evaluate_is_null_truths(
5298        &mut self,
5299        expr: &ScalarExpr<FieldId>,
5300        negated: bool,
5301        batch: &RecordBatch,
5302    ) -> ExecutorResult<Vec<Option<bool>>> {
5303        let values = self.materialize_value_array(expr, batch)?;
5304        let len = values.len();
5305
5306        match &values {
5307            ValueArray::Null(len) => {
5308                // All values are NULL
5309                let result = if negated {
5310                    Some(false) // IS NOT NULL on NULL column
5311                } else {
5312                    Some(true) // IS NULL on NULL column
5313                };
5314                Ok(vec![result; *len])
5315            }
5316            ValueArray::Numeric(arr) => {
5317                let mut out = Vec::with_capacity(len);
5318                for idx in 0..len {
5319                    let is_null = arr.value(idx).is_none();
5320                    let result = if negated {
5321                        !is_null // IS NOT NULL
5322                    } else {
5323                        is_null // IS NULL
5324                    };
5325                    out.push(Some(result));
5326                }
5327                Ok(out)
5328            }
5329            ValueArray::Boolean(arr) => {
5330                let mut out = Vec::with_capacity(len);
5331                for idx in 0..len {
5332                    let is_null = arr.is_null(idx);
5333                    let result = if negated { !is_null } else { is_null };
5334                    out.push(Some(result));
5335                }
5336                Ok(out)
5337            }
5338            ValueArray::Utf8(arr) => {
5339                let mut out = Vec::with_capacity(len);
5340                for idx in 0..len {
5341                    let is_null = arr.is_null(idx);
5342                    let result = if negated { !is_null } else { is_null };
5343                    out.push(Some(result));
5344                }
5345                Ok(out)
5346            }
5347        }
5348    }
5349
5350    fn evaluate_in_list_truths(
5351        &mut self,
5352        target: &ScalarExpr<FieldId>,
5353        list: &[ScalarExpr<FieldId>],
5354        negated: bool,
5355        batch: &RecordBatch,
5356    ) -> ExecutorResult<Vec<Option<bool>>> {
5357        let target_values = self.materialize_value_array(target, batch)?;
5358        let list_values = list
5359            .iter()
5360            .map(|expr| self.materialize_value_array(expr, batch))
5361            .collect::<ExecutorResult<Vec<_>>>()?;
5362
5363        let len = target_values.len();
5364        for values in &list_values {
5365            if values.len() != len {
5366                return Err(Error::Internal(
5367                    "mismatched IN list operand lengths in cross product filter".into(),
5368                ));
5369            }
5370        }
5371
5372        match &target_values {
5373            ValueArray::Numeric(target_numeric) => {
5374                let mut out = Vec::with_capacity(len);
5375                for idx in 0..len {
5376                    let target_value = match target_numeric.value(idx) {
5377                        Some(value) => value,
5378                        None => {
5379                            out.push(None);
5380                            continue;
5381                        }
5382                    };
5383                    let mut has_match = false;
5384                    let mut saw_null = false;
5385                    for candidate in &list_values {
5386                        match candidate {
5387                            ValueArray::Numeric(array) => match array.value(idx) {
5388                                Some(value) => {
5389                                    if NumericKernels::compare(CompareOp::Eq, target_value, value) {
5390                                        has_match = true;
5391                                        break;
5392                                    }
5393                                }
5394                                None => saw_null = true,
5395                            },
5396                            ValueArray::Null(_) => saw_null = true,
5397                            _ => {
5398                                return Err(Error::InvalidArgumentError(
5399                                    "type mismatch in IN list evaluation".into(),
5400                                ));
5401                            }
5402                        }
5403                    }
5404                    out.push(finalize_in_list_result(has_match, saw_null, negated));
5405                }
5406                Ok(out)
5407            }
5408            ValueArray::Boolean(target_bool) => {
5409                let mut out = Vec::with_capacity(len);
5410                for idx in 0..len {
5411                    if target_bool.is_null(idx) {
5412                        out.push(None);
5413                        continue;
5414                    }
5415                    let target_value = target_bool.value(idx);
5416                    let mut has_match = false;
5417                    let mut saw_null = false;
5418                    for candidate in &list_values {
5419                        match candidate {
5420                            ValueArray::Boolean(array) => {
5421                                if array.is_null(idx) {
5422                                    saw_null = true;
5423                                } else if array.value(idx) == target_value {
5424                                    has_match = true;
5425                                    break;
5426                                }
5427                            }
5428                            ValueArray::Null(_) => saw_null = true,
5429                            _ => {
5430                                return Err(Error::InvalidArgumentError(
5431                                    "type mismatch in IN list evaluation".into(),
5432                                ));
5433                            }
5434                        }
5435                    }
5436                    out.push(finalize_in_list_result(has_match, saw_null, negated));
5437                }
5438                Ok(out)
5439            }
5440            ValueArray::Utf8(target_utf8) => {
5441                let mut out = Vec::with_capacity(len);
5442                for idx in 0..len {
5443                    if target_utf8.is_null(idx) {
5444                        out.push(None);
5445                        continue;
5446                    }
5447                    let target_value = target_utf8.value(idx);
5448                    let mut has_match = false;
5449                    let mut saw_null = false;
5450                    for candidate in &list_values {
5451                        match candidate {
5452                            ValueArray::Utf8(array) => {
5453                                if array.is_null(idx) {
5454                                    saw_null = true;
5455                                } else if array.value(idx) == target_value {
5456                                    has_match = true;
5457                                    break;
5458                                }
5459                            }
5460                            ValueArray::Null(_) => saw_null = true,
5461                            _ => {
5462                                return Err(Error::InvalidArgumentError(
5463                                    "type mismatch in IN list evaluation".into(),
5464                                ));
5465                            }
5466                        }
5467                    }
5468                    out.push(finalize_in_list_result(has_match, saw_null, negated));
5469                }
5470                Ok(out)
5471            }
5472            ValueArray::Null(len) => Ok(vec![None; *len]),
5473        }
5474    }
5475
5476    fn evaluate_numeric(
5477        &mut self,
5478        expr: &ScalarExpr<FieldId>,
5479        batch: &RecordBatch,
5480    ) -> ExecutorResult<ArrayRef> {
5481        let mut required = FxHashSet::default();
5482        collect_field_ids(expr, &mut required);
5483
5484        let mut arrays = NumericArrayMap::default();
5485        for field_id in required {
5486            let numeric = self.numeric_array(field_id, batch)?;
5487            arrays.insert(field_id, numeric);
5488        }
5489
5490        NumericKernels::evaluate_batch(expr, batch.num_rows(), &arrays)
5491    }
5492
5493    fn numeric_array(
5494        &mut self,
5495        field_id: FieldId,
5496        batch: &RecordBatch,
5497    ) -> ExecutorResult<NumericArray> {
5498        if let Some(existing) = self.numeric_cache.get(&field_id) {
5499            return Ok(existing.clone());
5500        }
5501
5502        let column_index = *self.field_id_to_index.get(&field_id).ok_or_else(|| {
5503            Error::Internal("field mapping missing during cross product evaluation".into())
5504        })?;
5505
5506        let array_ref = batch.column(column_index).clone();
5507        let numeric = NumericArray::try_from_arrow(&array_ref)?;
5508        self.numeric_cache.insert(field_id, numeric.clone());
5509        Ok(numeric)
5510    }
5511
5512    fn column_accessor(
5513        &mut self,
5514        field_id: FieldId,
5515        batch: &RecordBatch,
5516    ) -> ExecutorResult<ColumnAccessor> {
5517        if let Some(existing) = self.column_cache.get(&field_id) {
5518            return Ok(existing.clone());
5519        }
5520
5521        let column_index = *self.field_id_to_index.get(&field_id).ok_or_else(|| {
5522            Error::Internal("field mapping missing during cross product evaluation".into())
5523        })?;
5524
5525        let accessor = ColumnAccessor::from_array(batch.column(column_index))?;
5526        self.column_cache.insert(field_id, accessor.clone());
5527        Ok(accessor)
5528    }
5529
5530    fn materialize_scalar_array(
5531        &mut self,
5532        expr: &ScalarExpr<FieldId>,
5533        batch: &RecordBatch,
5534    ) -> ExecutorResult<ArrayRef> {
5535        match expr {
5536            ScalarExpr::Column(field_id) => {
5537                let accessor = self.column_accessor(*field_id, batch)?;
5538                Ok(accessor.as_array_ref())
5539            }
5540            ScalarExpr::Literal(literal) => literal_to_constant_array(literal, batch.num_rows()),
5541            ScalarExpr::Binary { .. } => self.evaluate_numeric(expr, batch),
5542            ScalarExpr::Compare { .. } => self.evaluate_numeric(expr, batch),
5543            ScalarExpr::Not(_) => self.evaluate_numeric(expr, batch),
5544            ScalarExpr::IsNull { .. } => self.evaluate_numeric(expr, batch),
5545            ScalarExpr::Aggregate(_) => Err(Error::InvalidArgumentError(
5546                "aggregate expressions are not supported in cross product filters".into(),
5547            )),
5548            ScalarExpr::GetField { .. } => Err(Error::InvalidArgumentError(
5549                "struct field access is not supported in cross product filters".into(),
5550            )),
5551            ScalarExpr::Cast { expr, data_type } => {
5552                let source = self.materialize_scalar_array(expr.as_ref(), batch)?;
5553                let casted = cast(source.as_ref(), data_type).map_err(|err| {
5554                    Error::InvalidArgumentError(format!("failed to cast expression: {err}"))
5555                })?;
5556                Ok(casted)
5557            }
5558            ScalarExpr::Case { .. } => self.evaluate_numeric(expr, batch),
5559            ScalarExpr::Coalesce(_) => self.evaluate_numeric(expr, batch),
5560            ScalarExpr::ScalarSubquery(_) => Err(Error::InvalidArgumentError(
5561                "scalar subqueries are not supported in cross product filters".into(),
5562            )),
5563        }
5564    }
5565
5566    fn materialize_value_array(
5567        &mut self,
5568        expr: &ScalarExpr<FieldId>,
5569        batch: &RecordBatch,
5570    ) -> ExecutorResult<ValueArray> {
5571        let array = self.materialize_scalar_array(expr, batch)?;
5572        ValueArray::from_array(array)
5573    }
5574}
5575
5576// TODO: Move to llkv-aggregate?
5577fn collect_field_ids(expr: &ScalarExpr<FieldId>, out: &mut FxHashSet<FieldId>) {
5578    match expr {
5579        ScalarExpr::Column(fid) => {
5580            out.insert(*fid);
5581        }
5582        ScalarExpr::Binary { left, right, .. } => {
5583            collect_field_ids(left, out);
5584            collect_field_ids(right, out);
5585        }
5586        ScalarExpr::Compare { left, right, .. } => {
5587            collect_field_ids(left, out);
5588            collect_field_ids(right, out);
5589        }
5590        ScalarExpr::Aggregate(call) => match call {
5591            AggregateCall::CountStar => {}
5592            AggregateCall::Count { expr, .. }
5593            | AggregateCall::Sum { expr, .. }
5594            | AggregateCall::Avg { expr, .. }
5595            | AggregateCall::Min(expr)
5596            | AggregateCall::Max(expr)
5597            | AggregateCall::CountNulls(expr) => {
5598                collect_field_ids(expr, out);
5599            }
5600        },
5601        ScalarExpr::GetField { base, .. } => collect_field_ids(base, out),
5602        ScalarExpr::Cast { expr, .. } => collect_field_ids(expr, out),
5603        ScalarExpr::Not(expr) => collect_field_ids(expr, out),
5604        ScalarExpr::IsNull { expr, .. } => collect_field_ids(expr, out),
5605        ScalarExpr::Case {
5606            operand,
5607            branches,
5608            else_expr,
5609        } => {
5610            if let Some(inner) = operand.as_deref() {
5611                collect_field_ids(inner, out);
5612            }
5613            for (when_expr, then_expr) in branches {
5614                collect_field_ids(when_expr, out);
5615                collect_field_ids(then_expr, out);
5616            }
5617            if let Some(inner) = else_expr.as_deref() {
5618                collect_field_ids(inner, out);
5619            }
5620        }
5621        ScalarExpr::Coalesce(items) => {
5622            for item in items {
5623                collect_field_ids(item, out);
5624            }
5625        }
5626        ScalarExpr::Literal(_) => {}
5627        ScalarExpr::ScalarSubquery(_) => {}
5628    }
5629}
5630
5631fn strip_exists(expr: &LlkvExpr<'static, FieldId>) -> LlkvExpr<'static, FieldId> {
5632    match expr {
5633        LlkvExpr::And(children) => LlkvExpr::And(children.iter().map(strip_exists).collect()),
5634        LlkvExpr::Or(children) => LlkvExpr::Or(children.iter().map(strip_exists).collect()),
5635        LlkvExpr::Not(inner) => LlkvExpr::Not(Box::new(strip_exists(inner))),
5636        LlkvExpr::Pred(filter) => LlkvExpr::Pred(filter.clone()),
5637        LlkvExpr::Compare { left, op, right } => LlkvExpr::Compare {
5638            left: left.clone(),
5639            op: *op,
5640            right: right.clone(),
5641        },
5642        LlkvExpr::InList {
5643            expr,
5644            list,
5645            negated,
5646        } => LlkvExpr::InList {
5647            expr: expr.clone(),
5648            list: list.clone(),
5649            negated: *negated,
5650        },
5651        LlkvExpr::IsNull { expr, negated } => LlkvExpr::IsNull {
5652            expr: expr.clone(),
5653            negated: *negated,
5654        },
5655        LlkvExpr::Literal(value) => LlkvExpr::Literal(*value),
5656        LlkvExpr::Exists(_) => LlkvExpr::Literal(true),
5657    }
5658}
5659
5660fn bind_select_plan(
5661    plan: &SelectPlan,
5662    bindings: &FxHashMap<String, Literal>,
5663) -> ExecutorResult<SelectPlan> {
5664    if bindings.is_empty() {
5665        return Ok(plan.clone());
5666    }
5667
5668    let projections = plan
5669        .projections
5670        .iter()
5671        .map(|projection| bind_projection(projection, bindings))
5672        .collect::<ExecutorResult<Vec<_>>>()?;
5673
5674    let filter = match &plan.filter {
5675        Some(wrapper) => Some(bind_select_filter(wrapper, bindings)?),
5676        None => None,
5677    };
5678
5679    let aggregates = plan
5680        .aggregates
5681        .iter()
5682        .map(|aggregate| bind_aggregate_expr(aggregate, bindings))
5683        .collect::<ExecutorResult<Vec<_>>>()?;
5684
5685    let scalar_subqueries = plan
5686        .scalar_subqueries
5687        .iter()
5688        .map(|subquery| bind_scalar_subquery(subquery, bindings))
5689        .collect::<ExecutorResult<Vec<_>>>()?;
5690
5691    if let Some(compound) = &plan.compound {
5692        let bound_compound = bind_compound_select(compound, bindings)?;
5693        return Ok(SelectPlan {
5694            tables: Vec::new(),
5695            joins: Vec::new(),
5696            projections: Vec::new(),
5697            filter: None,
5698            having: None,
5699            aggregates: Vec::new(),
5700            order_by: plan.order_by.clone(),
5701            distinct: false,
5702            scalar_subqueries: Vec::new(),
5703            compound: Some(bound_compound),
5704            group_by: Vec::new(),
5705            value_table_mode: None,
5706        });
5707    }
5708
5709    Ok(SelectPlan {
5710        tables: plan.tables.clone(),
5711        joins: plan.joins.clone(),
5712        projections,
5713        filter,
5714        having: plan.having.clone(),
5715        aggregates,
5716        order_by: Vec::new(),
5717        distinct: plan.distinct,
5718        scalar_subqueries,
5719        compound: None,
5720        group_by: plan.group_by.clone(),
5721        value_table_mode: plan.value_table_mode.clone(),
5722    })
5723}
5724
5725fn bind_compound_select(
5726    compound: &CompoundSelectPlan,
5727    bindings: &FxHashMap<String, Literal>,
5728) -> ExecutorResult<CompoundSelectPlan> {
5729    let initial = bind_select_plan(&compound.initial, bindings)?;
5730    let mut operations = Vec::with_capacity(compound.operations.len());
5731    for component in &compound.operations {
5732        let bound_plan = bind_select_plan(&component.plan, bindings)?;
5733        operations.push(CompoundSelectComponent {
5734            operator: component.operator.clone(),
5735            quantifier: component.quantifier.clone(),
5736            plan: bound_plan,
5737        });
5738    }
5739    Ok(CompoundSelectPlan {
5740        initial: Box::new(initial),
5741        operations,
5742    })
5743}
5744
5745fn ensure_schema_compatibility(base: &Schema, other: &Schema) -> ExecutorResult<()> {
5746    if base.fields().len() != other.fields().len() {
5747        return Err(Error::InvalidArgumentError(
5748            "compound SELECT requires matching column counts".into(),
5749        ));
5750    }
5751    for (left, right) in base.fields().iter().zip(other.fields().iter()) {
5752        if left.data_type() != right.data_type() {
5753            return Err(Error::InvalidArgumentError(format!(
5754                "compound SELECT column type mismatch: {} vs {}",
5755                left.data_type(),
5756                right.data_type()
5757            )));
5758        }
5759    }
5760    Ok(())
5761}
5762
5763fn ensure_distinct_rows(rows: &mut Vec<Vec<PlanValue>>, cache: &mut Option<FxHashSet<Vec<u8>>>) {
5764    if cache.is_some() {
5765        return;
5766    }
5767    let mut set = FxHashSet::default();
5768    let mut deduped: Vec<Vec<PlanValue>> = Vec::with_capacity(rows.len());
5769    for row in rows.drain(..) {
5770        let key = encode_row(&row);
5771        if set.insert(key) {
5772            deduped.push(row);
5773        }
5774    }
5775    *rows = deduped;
5776    *cache = Some(set);
5777}
5778
5779fn encode_row(row: &[PlanValue]) -> Vec<u8> {
5780    let mut buf = Vec::new();
5781    for value in row {
5782        encode_plan_value(&mut buf, value);
5783        buf.push(0x1F);
5784    }
5785    buf
5786}
5787
5788fn encode_plan_value(buf: &mut Vec<u8>, value: &PlanValue) {
5789    match value {
5790        PlanValue::Null => buf.push(0),
5791        PlanValue::Integer(v) => {
5792            buf.push(1);
5793            buf.extend_from_slice(&v.to_be_bytes());
5794        }
5795        PlanValue::Float(v) => {
5796            buf.push(2);
5797            buf.extend_from_slice(&v.to_bits().to_be_bytes());
5798        }
5799        PlanValue::String(s) => {
5800            buf.push(3);
5801            let bytes = s.as_bytes();
5802            let len = u32::try_from(bytes.len()).unwrap_or(u32::MAX);
5803            buf.extend_from_slice(&len.to_be_bytes());
5804            buf.extend_from_slice(bytes);
5805        }
5806        PlanValue::Struct(map) => {
5807            buf.push(4);
5808            let mut entries: Vec<_> = map.iter().collect();
5809            entries.sort_by(|a, b| a.0.cmp(b.0));
5810            let len = u32::try_from(entries.len()).unwrap_or(u32::MAX);
5811            buf.extend_from_slice(&len.to_be_bytes());
5812            for (key, val) in entries {
5813                let key_bytes = key.as_bytes();
5814                let key_len = u32::try_from(key_bytes.len()).unwrap_or(u32::MAX);
5815                buf.extend_from_slice(&key_len.to_be_bytes());
5816                buf.extend_from_slice(key_bytes);
5817                encode_plan_value(buf, val);
5818            }
5819        }
5820    }
5821}
5822
5823fn rows_to_record_batch(
5824    schema: Arc<Schema>,
5825    rows: &[Vec<PlanValue>],
5826) -> ExecutorResult<RecordBatch> {
5827    let column_count = schema.fields().len();
5828    let mut columns: Vec<Vec<PlanValue>> = vec![Vec::with_capacity(rows.len()); column_count];
5829    for row in rows {
5830        if row.len() != column_count {
5831            return Err(Error::InvalidArgumentError(
5832                "compound SELECT produced mismatched column counts".into(),
5833            ));
5834        }
5835        for (idx, value) in row.iter().enumerate() {
5836            columns[idx].push(value.clone());
5837        }
5838    }
5839
5840    let mut arrays: Vec<ArrayRef> = Vec::with_capacity(column_count);
5841    for (idx, field) in schema.fields().iter().enumerate() {
5842        let array = build_array_for_column(field.data_type(), &columns[idx])?;
5843        arrays.push(array);
5844    }
5845
5846    RecordBatch::try_new(schema, arrays).map_err(|err| {
5847        Error::InvalidArgumentError(format!("failed to materialize compound SELECT: {err}"))
5848    })
5849}
5850
5851fn build_column_lookup_map(schema: &Schema) -> FxHashMap<String, usize> {
5852    let mut lookup = FxHashMap::default();
5853    for (idx, field) in schema.fields().iter().enumerate() {
5854        lookup.insert(field.name().to_ascii_lowercase(), idx);
5855    }
5856    lookup
5857}
5858
5859fn build_group_key(
5860    batch: &RecordBatch,
5861    row_idx: usize,
5862    key_indices: &[usize],
5863) -> ExecutorResult<Vec<GroupKeyValue>> {
5864    let mut values = Vec::with_capacity(key_indices.len());
5865    for &index in key_indices {
5866        values.push(group_key_value(batch.column(index), row_idx)?);
5867    }
5868    Ok(values)
5869}
5870
5871fn group_key_value(array: &ArrayRef, row_idx: usize) -> ExecutorResult<GroupKeyValue> {
5872    if !array.is_valid(row_idx) {
5873        return Ok(GroupKeyValue::Null);
5874    }
5875
5876    match array.data_type() {
5877        DataType::Int8 => {
5878            let values = array
5879                .as_any()
5880                .downcast_ref::<Int8Array>()
5881                .ok_or_else(|| Error::Internal("failed to downcast to Int8Array".into()))?;
5882            Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5883        }
5884        DataType::Int16 => {
5885            let values = array
5886                .as_any()
5887                .downcast_ref::<Int16Array>()
5888                .ok_or_else(|| Error::Internal("failed to downcast to Int16Array".into()))?;
5889            Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5890        }
5891        DataType::Int32 => {
5892            let values = array
5893                .as_any()
5894                .downcast_ref::<Int32Array>()
5895                .ok_or_else(|| Error::Internal("failed to downcast to Int32Array".into()))?;
5896            Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5897        }
5898        DataType::Int64 => {
5899            let values = array
5900                .as_any()
5901                .downcast_ref::<Int64Array>()
5902                .ok_or_else(|| Error::Internal("failed to downcast to Int64Array".into()))?;
5903            Ok(GroupKeyValue::Int(values.value(row_idx)))
5904        }
5905        DataType::UInt8 => {
5906            let values = array
5907                .as_any()
5908                .downcast_ref::<UInt8Array>()
5909                .ok_or_else(|| Error::Internal("failed to downcast to UInt8Array".into()))?;
5910            Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5911        }
5912        DataType::UInt16 => {
5913            let values = array
5914                .as_any()
5915                .downcast_ref::<UInt16Array>()
5916                .ok_or_else(|| Error::Internal("failed to downcast to UInt16Array".into()))?;
5917            Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5918        }
5919        DataType::UInt32 => {
5920            let values = array
5921                .as_any()
5922                .downcast_ref::<UInt32Array>()
5923                .ok_or_else(|| Error::Internal("failed to downcast to UInt32Array".into()))?;
5924            Ok(GroupKeyValue::Int(values.value(row_idx) as i64))
5925        }
5926        DataType::UInt64 => {
5927            let values = array
5928                .as_any()
5929                .downcast_ref::<UInt64Array>()
5930                .ok_or_else(|| Error::Internal("failed to downcast to UInt64Array".into()))?;
5931            let value = values.value(row_idx);
5932            if value > i64::MAX as u64 {
5933                return Err(Error::InvalidArgumentError(
5934                    "GROUP BY value exceeds supported integer range".into(),
5935                ));
5936            }
5937            Ok(GroupKeyValue::Int(value as i64))
5938        }
5939        DataType::Boolean => {
5940            let values = array
5941                .as_any()
5942                .downcast_ref::<BooleanArray>()
5943                .ok_or_else(|| Error::Internal("failed to downcast to BooleanArray".into()))?;
5944            Ok(GroupKeyValue::Bool(values.value(row_idx)))
5945        }
5946        DataType::Utf8 => {
5947            let values = array
5948                .as_any()
5949                .downcast_ref::<StringArray>()
5950                .ok_or_else(|| Error::Internal("failed to downcast to StringArray".into()))?;
5951            Ok(GroupKeyValue::String(values.value(row_idx).to_string()))
5952        }
5953        other => Err(Error::InvalidArgumentError(format!(
5954            "GROUP BY does not support column type {:?}",
5955            other
5956        ))),
5957    }
5958}
5959
5960fn evaluate_constant_predicate(expr: &LlkvExpr<'static, String>) -> Option<Option<bool>> {
5961    match expr {
5962        LlkvExpr::Literal(value) => Some(Some(*value)),
5963        LlkvExpr::Not(inner) => {
5964            let inner_val = evaluate_constant_predicate(inner)?;
5965            Some(truth_not(inner_val))
5966        }
5967        LlkvExpr::And(children) => {
5968            let mut acc = Some(true);
5969            for child in children {
5970                let child_val = evaluate_constant_predicate(child)?;
5971                acc = truth_and(acc, child_val);
5972            }
5973            Some(acc)
5974        }
5975        LlkvExpr::Or(children) => {
5976            let mut acc = Some(false);
5977            for child in children {
5978                let child_val = evaluate_constant_predicate(child)?;
5979                acc = truth_or(acc, child_val);
5980            }
5981            Some(acc)
5982        }
5983        LlkvExpr::Compare { left, op, right } => {
5984            let left_literal = evaluate_constant_scalar(left)?;
5985            let right_literal = evaluate_constant_scalar(right)?;
5986            Some(compare_literals(*op, &left_literal, &right_literal))
5987        }
5988        _ => None,
5989    }
5990}
5991
5992fn evaluate_constant_scalar(expr: &ScalarExpr<String>) -> Option<Literal> {
5993    match expr {
5994        ScalarExpr::Literal(lit) => Some(lit.clone()),
5995        _ => None,
5996    }
5997}
5998
5999fn compare_literals(op: CompareOp, left: &Literal, right: &Literal) -> Option<bool> {
6000    use std::cmp::Ordering;
6001
6002    match (left, right) {
6003        (Literal::Null, _) | (_, Literal::Null) => None,
6004        (Literal::Integer(lhs), Literal::Integer(rhs)) => {
6005            let ord = lhs.cmp(rhs);
6006            Some(match op {
6007                CompareOp::Eq => ord == Ordering::Equal,
6008                CompareOp::NotEq => ord != Ordering::Equal,
6009                CompareOp::Lt => ord == Ordering::Less,
6010                CompareOp::LtEq => ord != Ordering::Greater,
6011                CompareOp::Gt => ord == Ordering::Greater,
6012                CompareOp::GtEq => ord != Ordering::Less,
6013            })
6014        }
6015        (Literal::Float(lhs), Literal::Float(rhs)) => Some(match op {
6016            CompareOp::Eq => lhs == rhs,
6017            CompareOp::NotEq => lhs != rhs,
6018            CompareOp::Lt => lhs < rhs,
6019            CompareOp::LtEq => lhs <= rhs,
6020            CompareOp::Gt => lhs > rhs,
6021            CompareOp::GtEq => lhs >= rhs,
6022        }),
6023        (Literal::Integer(lhs), Literal::Float(_rhs)) => {
6024            compare_literals(op, &Literal::Float(*lhs as f64), right)
6025        }
6026        (Literal::Float(_lhs), Literal::Integer(rhs)) => {
6027            compare_literals(op, left, &Literal::Float(*rhs as f64))
6028        }
6029        (Literal::Boolean(lhs), Literal::Boolean(rhs)) => Some(match op {
6030            CompareOp::Eq => lhs == rhs,
6031            CompareOp::NotEq => lhs != rhs,
6032            CompareOp::Lt => (*lhs as u8) < (*rhs as u8),
6033            CompareOp::LtEq => (*lhs as u8) <= (*rhs as u8),
6034            CompareOp::Gt => (*lhs as u8) > (*rhs as u8),
6035            CompareOp::GtEq => (*lhs as u8) >= (*rhs as u8),
6036        }),
6037        (Literal::String(lhs), Literal::String(rhs)) => {
6038            let ord = lhs.cmp(rhs);
6039            Some(match op {
6040                CompareOp::Eq => ord == Ordering::Equal,
6041                CompareOp::NotEq => ord != Ordering::Equal,
6042                CompareOp::Lt => ord == Ordering::Less,
6043                CompareOp::LtEq => ord != Ordering::Greater,
6044                CompareOp::Gt => ord == Ordering::Greater,
6045                CompareOp::GtEq => ord != Ordering::Less,
6046            })
6047        }
6048        _ => None,
6049    }
6050}
6051
6052fn bind_select_filter(
6053    filter: &llkv_plan::SelectFilter,
6054    bindings: &FxHashMap<String, Literal>,
6055) -> ExecutorResult<llkv_plan::SelectFilter> {
6056    let predicate = bind_predicate_expr(&filter.predicate, bindings)?;
6057    let subqueries = filter
6058        .subqueries
6059        .iter()
6060        .map(|subquery| bind_filter_subquery(subquery, bindings))
6061        .collect::<ExecutorResult<Vec<_>>>()?;
6062
6063    Ok(llkv_plan::SelectFilter {
6064        predicate,
6065        subqueries,
6066    })
6067}
6068
6069fn bind_filter_subquery(
6070    subquery: &llkv_plan::FilterSubquery,
6071    bindings: &FxHashMap<String, Literal>,
6072) -> ExecutorResult<llkv_plan::FilterSubquery> {
6073    let bound_plan = bind_select_plan(&subquery.plan, bindings)?;
6074    Ok(llkv_plan::FilterSubquery {
6075        id: subquery.id,
6076        plan: Box::new(bound_plan),
6077        correlated_columns: subquery.correlated_columns.clone(),
6078    })
6079}
6080
6081fn bind_scalar_subquery(
6082    subquery: &llkv_plan::ScalarSubquery,
6083    bindings: &FxHashMap<String, Literal>,
6084) -> ExecutorResult<llkv_plan::ScalarSubquery> {
6085    let bound_plan = bind_select_plan(&subquery.plan, bindings)?;
6086    Ok(llkv_plan::ScalarSubquery {
6087        id: subquery.id,
6088        plan: Box::new(bound_plan),
6089        correlated_columns: subquery.correlated_columns.clone(),
6090    })
6091}
6092
6093fn bind_projection(
6094    projection: &SelectProjection,
6095    bindings: &FxHashMap<String, Literal>,
6096) -> ExecutorResult<SelectProjection> {
6097    match projection {
6098        SelectProjection::AllColumns => Ok(projection.clone()),
6099        SelectProjection::AllColumnsExcept { exclude } => Ok(SelectProjection::AllColumnsExcept {
6100            exclude: exclude.clone(),
6101        }),
6102        SelectProjection::Column { name, alias } => {
6103            if let Some(literal) = bindings.get(name) {
6104                let expr = ScalarExpr::Literal(literal.clone());
6105                Ok(SelectProjection::Computed {
6106                    expr,
6107                    alias: alias.clone().unwrap_or_else(|| name.clone()),
6108                })
6109            } else {
6110                Ok(projection.clone())
6111            }
6112        }
6113        SelectProjection::Computed { expr, alias } => Ok(SelectProjection::Computed {
6114            expr: bind_scalar_expr(expr, bindings)?,
6115            alias: alias.clone(),
6116        }),
6117    }
6118}
6119
6120fn bind_aggregate_expr(
6121    aggregate: &AggregateExpr,
6122    bindings: &FxHashMap<String, Literal>,
6123) -> ExecutorResult<AggregateExpr> {
6124    match aggregate {
6125        AggregateExpr::CountStar { .. } => Ok(aggregate.clone()),
6126        AggregateExpr::Column {
6127            column,
6128            alias,
6129            function,
6130            distinct,
6131        } => {
6132            if bindings.contains_key(column) {
6133                return Err(Error::InvalidArgumentError(
6134                    "correlated columns are not supported inside aggregate expressions".into(),
6135                ));
6136            }
6137            Ok(AggregateExpr::Column {
6138                column: column.clone(),
6139                alias: alias.clone(),
6140                function: function.clone(),
6141                distinct: *distinct,
6142            })
6143        }
6144    }
6145}
6146
6147fn bind_scalar_expr(
6148    expr: &ScalarExpr<String>,
6149    bindings: &FxHashMap<String, Literal>,
6150) -> ExecutorResult<ScalarExpr<String>> {
6151    match expr {
6152        ScalarExpr::Column(name) => {
6153            if let Some(literal) = bindings.get(name) {
6154                Ok(ScalarExpr::Literal(literal.clone()))
6155            } else {
6156                Ok(ScalarExpr::Column(name.clone()))
6157            }
6158        }
6159        ScalarExpr::Literal(literal) => Ok(ScalarExpr::Literal(literal.clone())),
6160        ScalarExpr::Binary { left, op, right } => Ok(ScalarExpr::Binary {
6161            left: Box::new(bind_scalar_expr(left, bindings)?),
6162            op: *op,
6163            right: Box::new(bind_scalar_expr(right, bindings)?),
6164        }),
6165        ScalarExpr::Compare { left, op, right } => Ok(ScalarExpr::Compare {
6166            left: Box::new(bind_scalar_expr(left, bindings)?),
6167            op: *op,
6168            right: Box::new(bind_scalar_expr(right, bindings)?),
6169        }),
6170        ScalarExpr::Aggregate(call) => Ok(ScalarExpr::Aggregate(call.clone())),
6171        ScalarExpr::GetField { base, field_name } => {
6172            let bound_base = bind_scalar_expr(base, bindings)?;
6173            match bound_base {
6174                ScalarExpr::Literal(literal) => {
6175                    let value = extract_struct_field(&literal, field_name).unwrap_or(Literal::Null);
6176                    Ok(ScalarExpr::Literal(value))
6177                }
6178                other => Ok(ScalarExpr::GetField {
6179                    base: Box::new(other),
6180                    field_name: field_name.clone(),
6181                }),
6182            }
6183        }
6184        ScalarExpr::Cast { expr, data_type } => Ok(ScalarExpr::Cast {
6185            expr: Box::new(bind_scalar_expr(expr, bindings)?),
6186            data_type: data_type.clone(),
6187        }),
6188        ScalarExpr::Case {
6189            operand,
6190            branches,
6191            else_expr,
6192        } => {
6193            let bound_operand = match operand {
6194                Some(inner) => Some(Box::new(bind_scalar_expr(inner, bindings)?)),
6195                None => None,
6196            };
6197            let mut bound_branches = Vec::with_capacity(branches.len());
6198            for (when_expr, then_expr) in branches {
6199                bound_branches.push((
6200                    bind_scalar_expr(when_expr, bindings)?,
6201                    bind_scalar_expr(then_expr, bindings)?,
6202                ));
6203            }
6204            let bound_else = match else_expr {
6205                Some(inner) => Some(Box::new(bind_scalar_expr(inner, bindings)?)),
6206                None => None,
6207            };
6208            Ok(ScalarExpr::Case {
6209                operand: bound_operand,
6210                branches: bound_branches,
6211                else_expr: bound_else,
6212            })
6213        }
6214        ScalarExpr::Coalesce(items) => {
6215            let mut bound_items = Vec::with_capacity(items.len());
6216            for item in items {
6217                bound_items.push(bind_scalar_expr(item, bindings)?);
6218            }
6219            Ok(ScalarExpr::Coalesce(bound_items))
6220        }
6221        ScalarExpr::Not(inner) => Ok(ScalarExpr::Not(Box::new(bind_scalar_expr(
6222            inner, bindings,
6223        )?))),
6224        ScalarExpr::IsNull { expr, negated } => Ok(ScalarExpr::IsNull {
6225            expr: Box::new(bind_scalar_expr(expr, bindings)?),
6226            negated: *negated,
6227        }),
6228        ScalarExpr::ScalarSubquery(sub) => Ok(ScalarExpr::ScalarSubquery(sub.clone())),
6229    }
6230}
6231
6232fn bind_predicate_expr(
6233    expr: &LlkvExpr<'static, String>,
6234    bindings: &FxHashMap<String, Literal>,
6235) -> ExecutorResult<LlkvExpr<'static, String>> {
6236    match expr {
6237        LlkvExpr::And(children) => {
6238            let mut bound = Vec::with_capacity(children.len());
6239            for child in children {
6240                bound.push(bind_predicate_expr(child, bindings)?);
6241            }
6242            Ok(LlkvExpr::And(bound))
6243        }
6244        LlkvExpr::Or(children) => {
6245            let mut bound = Vec::with_capacity(children.len());
6246            for child in children {
6247                bound.push(bind_predicate_expr(child, bindings)?);
6248            }
6249            Ok(LlkvExpr::Or(bound))
6250        }
6251        LlkvExpr::Not(inner) => Ok(LlkvExpr::Not(Box::new(bind_predicate_expr(
6252            inner, bindings,
6253        )?))),
6254        LlkvExpr::Pred(filter) => bind_filter_predicate(filter, bindings),
6255        LlkvExpr::Compare { left, op, right } => Ok(LlkvExpr::Compare {
6256            left: bind_scalar_expr(left, bindings)?,
6257            op: *op,
6258            right: bind_scalar_expr(right, bindings)?,
6259        }),
6260        LlkvExpr::InList {
6261            expr,
6262            list,
6263            negated,
6264        } => {
6265            let target = bind_scalar_expr(expr, bindings)?;
6266            let mut bound_list = Vec::with_capacity(list.len());
6267            for item in list {
6268                bound_list.push(bind_scalar_expr(item, bindings)?);
6269            }
6270            Ok(LlkvExpr::InList {
6271                expr: target,
6272                list: bound_list,
6273                negated: *negated,
6274            })
6275        }
6276        LlkvExpr::IsNull { expr, negated } => Ok(LlkvExpr::IsNull {
6277            expr: bind_scalar_expr(expr, bindings)?,
6278            negated: *negated,
6279        }),
6280        LlkvExpr::Literal(value) => Ok(LlkvExpr::Literal(*value)),
6281        LlkvExpr::Exists(subquery) => Ok(LlkvExpr::Exists(subquery.clone())),
6282    }
6283}
6284
6285fn bind_filter_predicate(
6286    filter: &Filter<'static, String>,
6287    bindings: &FxHashMap<String, Literal>,
6288) -> ExecutorResult<LlkvExpr<'static, String>> {
6289    if let Some(literal) = bindings.get(&filter.field_id) {
6290        let result = evaluate_filter_against_literal(literal, &filter.op)?;
6291        return Ok(LlkvExpr::Literal(result));
6292    }
6293    Ok(LlkvExpr::Pred(filter.clone()))
6294}
6295
6296fn evaluate_filter_against_literal(value: &Literal, op: &Operator) -> ExecutorResult<bool> {
6297    use std::ops::Bound;
6298
6299    match op {
6300        Operator::IsNull => Ok(matches!(value, Literal::Null)),
6301        Operator::IsNotNull => Ok(!matches!(value, Literal::Null)),
6302        Operator::Equals(rhs) => Ok(literal_equals(value, rhs).unwrap_or(false)),
6303        Operator::GreaterThan(rhs) => Ok(literal_compare(value, rhs)
6304            .map(|cmp| cmp == std::cmp::Ordering::Greater)
6305            .unwrap_or(false)),
6306        Operator::GreaterThanOrEquals(rhs) => Ok(literal_compare(value, rhs)
6307            .map(|cmp| matches!(cmp, std::cmp::Ordering::Greater | std::cmp::Ordering::Equal))
6308            .unwrap_or(false)),
6309        Operator::LessThan(rhs) => Ok(literal_compare(value, rhs)
6310            .map(|cmp| cmp == std::cmp::Ordering::Less)
6311            .unwrap_or(false)),
6312        Operator::LessThanOrEquals(rhs) => Ok(literal_compare(value, rhs)
6313            .map(|cmp| matches!(cmp, std::cmp::Ordering::Less | std::cmp::Ordering::Equal))
6314            .unwrap_or(false)),
6315        Operator::In(values) => Ok(values
6316            .iter()
6317            .any(|candidate| literal_equals(value, candidate).unwrap_or(false))),
6318        Operator::Range { lower, upper } => {
6319            let lower_ok = match lower {
6320                Bound::Unbounded => Some(true),
6321                Bound::Included(bound) => literal_compare(value, bound).map(|cmp| {
6322                    matches!(cmp, std::cmp::Ordering::Greater | std::cmp::Ordering::Equal)
6323                }),
6324                Bound::Excluded(bound) => {
6325                    literal_compare(value, bound).map(|cmp| cmp == std::cmp::Ordering::Greater)
6326                }
6327            }
6328            .unwrap_or(false);
6329
6330            let upper_ok = match upper {
6331                Bound::Unbounded => Some(true),
6332                Bound::Included(bound) => literal_compare(value, bound)
6333                    .map(|cmp| matches!(cmp, std::cmp::Ordering::Less | std::cmp::Ordering::Equal)),
6334                Bound::Excluded(bound) => {
6335                    literal_compare(value, bound).map(|cmp| cmp == std::cmp::Ordering::Less)
6336                }
6337            }
6338            .unwrap_or(false);
6339
6340            Ok(lower_ok && upper_ok)
6341        }
6342        Operator::StartsWith {
6343            pattern,
6344            case_sensitive,
6345        } => {
6346            let target = if *case_sensitive {
6347                pattern.to_string()
6348            } else {
6349                pattern.to_ascii_lowercase()
6350            };
6351            Ok(literal_string(value, *case_sensitive)
6352                .map(|source| source.starts_with(&target))
6353                .unwrap_or(false))
6354        }
6355        Operator::EndsWith {
6356            pattern,
6357            case_sensitive,
6358        } => {
6359            let target = if *case_sensitive {
6360                pattern.to_string()
6361            } else {
6362                pattern.to_ascii_lowercase()
6363            };
6364            Ok(literal_string(value, *case_sensitive)
6365                .map(|source| source.ends_with(&target))
6366                .unwrap_or(false))
6367        }
6368        Operator::Contains {
6369            pattern,
6370            case_sensitive,
6371        } => {
6372            let target = if *case_sensitive {
6373                pattern.to_string()
6374            } else {
6375                pattern.to_ascii_lowercase()
6376            };
6377            Ok(literal_string(value, *case_sensitive)
6378                .map(|source| source.contains(&target))
6379                .unwrap_or(false))
6380        }
6381    }
6382}
6383
6384fn literal_compare(lhs: &Literal, rhs: &Literal) -> Option<std::cmp::Ordering> {
6385    match (lhs, rhs) {
6386        (Literal::Integer(a), Literal::Integer(b)) => Some(a.cmp(b)),
6387        (Literal::Float(a), Literal::Float(b)) => a.partial_cmp(b),
6388        (Literal::Integer(a), Literal::Float(b)) => (*a as f64).partial_cmp(b),
6389        (Literal::Float(a), Literal::Integer(b)) => a.partial_cmp(&(*b as f64)),
6390        (Literal::String(a), Literal::String(b)) => Some(a.cmp(b)),
6391        _ => None,
6392    }
6393}
6394
6395fn literal_equals(lhs: &Literal, rhs: &Literal) -> Option<bool> {
6396    match (lhs, rhs) {
6397        (Literal::Boolean(a), Literal::Boolean(b)) => Some(a == b),
6398        (Literal::String(a), Literal::String(b)) => Some(a == b),
6399        (Literal::Integer(_), Literal::Integer(_))
6400        | (Literal::Integer(_), Literal::Float(_))
6401        | (Literal::Float(_), Literal::Integer(_))
6402        | (Literal::Float(_), Literal::Float(_)) => {
6403            literal_compare(lhs, rhs).map(|cmp| cmp == std::cmp::Ordering::Equal)
6404        }
6405        _ => None,
6406    }
6407}
6408
6409fn literal_string(literal: &Literal, case_sensitive: bool) -> Option<String> {
6410    match literal {
6411        Literal::String(value) => {
6412            if case_sensitive {
6413                Some(value.clone())
6414            } else {
6415                Some(value.to_ascii_lowercase())
6416            }
6417        }
6418        _ => None,
6419    }
6420}
6421
6422fn extract_struct_field(literal: &Literal, field_name: &str) -> Option<Literal> {
6423    if let Literal::Struct(fields) = literal {
6424        for (name, value) in fields {
6425            if name.eq_ignore_ascii_case(field_name) {
6426                return Some((**value).clone());
6427            }
6428        }
6429    }
6430    None
6431}
6432
6433fn array_value_to_literal(array: &ArrayRef, idx: usize) -> ExecutorResult<Literal> {
6434    if array.is_null(idx) {
6435        return Ok(Literal::Null);
6436    }
6437
6438    match array.data_type() {
6439        DataType::Boolean => {
6440            let array = array
6441                .as_any()
6442                .downcast_ref::<BooleanArray>()
6443                .ok_or_else(|| Error::Internal("failed to downcast boolean array".into()))?;
6444            Ok(Literal::Boolean(array.value(idx)))
6445        }
6446        DataType::Int8 => {
6447            let array = array
6448                .as_any()
6449                .downcast_ref::<Int8Array>()
6450                .ok_or_else(|| Error::Internal("failed to downcast int8 array".into()))?;
6451            Ok(Literal::Integer(array.value(idx) as i128))
6452        }
6453        DataType::Int16 => {
6454            let array = array
6455                .as_any()
6456                .downcast_ref::<Int16Array>()
6457                .ok_or_else(|| Error::Internal("failed to downcast int16 array".into()))?;
6458            Ok(Literal::Integer(array.value(idx) as i128))
6459        }
6460        DataType::Int32 => {
6461            let array = array
6462                .as_any()
6463                .downcast_ref::<Int32Array>()
6464                .ok_or_else(|| Error::Internal("failed to downcast int32 array".into()))?;
6465            Ok(Literal::Integer(array.value(idx) as i128))
6466        }
6467        DataType::Int64 => {
6468            let array = array
6469                .as_any()
6470                .downcast_ref::<Int64Array>()
6471                .ok_or_else(|| Error::Internal("failed to downcast int64 array".into()))?;
6472            Ok(Literal::Integer(array.value(idx) as i128))
6473        }
6474        DataType::UInt8 => {
6475            let array = array
6476                .as_any()
6477                .downcast_ref::<UInt8Array>()
6478                .ok_or_else(|| Error::Internal("failed to downcast uint8 array".into()))?;
6479            Ok(Literal::Integer(array.value(idx) as i128))
6480        }
6481        DataType::UInt16 => {
6482            let array = array
6483                .as_any()
6484                .downcast_ref::<UInt16Array>()
6485                .ok_or_else(|| Error::Internal("failed to downcast uint16 array".into()))?;
6486            Ok(Literal::Integer(array.value(idx) as i128))
6487        }
6488        DataType::UInt32 => {
6489            let array = array
6490                .as_any()
6491                .downcast_ref::<UInt32Array>()
6492                .ok_or_else(|| Error::Internal("failed to downcast uint32 array".into()))?;
6493            Ok(Literal::Integer(array.value(idx) as i128))
6494        }
6495        DataType::UInt64 => {
6496            let array = array
6497                .as_any()
6498                .downcast_ref::<UInt64Array>()
6499                .ok_or_else(|| Error::Internal("failed to downcast uint64 array".into()))?;
6500            Ok(Literal::Integer(array.value(idx) as i128))
6501        }
6502        DataType::Float32 => {
6503            let array = array
6504                .as_any()
6505                .downcast_ref::<Float32Array>()
6506                .ok_or_else(|| Error::Internal("failed to downcast float32 array".into()))?;
6507            Ok(Literal::Float(array.value(idx) as f64))
6508        }
6509        DataType::Float64 => {
6510            let array = array
6511                .as_any()
6512                .downcast_ref::<Float64Array>()
6513                .ok_or_else(|| Error::Internal("failed to downcast float64 array".into()))?;
6514            Ok(Literal::Float(array.value(idx)))
6515        }
6516        DataType::Utf8 => {
6517            let array = array
6518                .as_any()
6519                .downcast_ref::<StringArray>()
6520                .ok_or_else(|| Error::Internal("failed to downcast utf8 array".into()))?;
6521            Ok(Literal::String(array.value(idx).to_string()))
6522        }
6523        DataType::LargeUtf8 => {
6524            let array = array
6525                .as_any()
6526                .downcast_ref::<LargeStringArray>()
6527                .ok_or_else(|| Error::Internal("failed to downcast large utf8 array".into()))?;
6528            Ok(Literal::String(array.value(idx).to_string()))
6529        }
6530        DataType::Struct(fields) => {
6531            let struct_array = array
6532                .as_any()
6533                .downcast_ref::<StructArray>()
6534                .ok_or_else(|| Error::Internal("failed to downcast struct array".into()))?;
6535            let mut members = Vec::with_capacity(fields.len());
6536            for (field_idx, field) in fields.iter().enumerate() {
6537                let child = struct_array.column(field_idx);
6538                let literal = array_value_to_literal(child, idx)?;
6539                members.push((field.name().clone(), Box::new(literal)));
6540            }
6541            Ok(Literal::Struct(members))
6542        }
6543        other => Err(Error::InvalidArgumentError(format!(
6544            "unsupported scalar subquery result type: {other:?}"
6545        ))),
6546    }
6547}
6548
6549fn collect_scalar_subquery_ids(expr: &ScalarExpr<FieldId>, ids: &mut FxHashSet<SubqueryId>) {
6550    match expr {
6551        ScalarExpr::ScalarSubquery(subquery) => {
6552            ids.insert(subquery.id);
6553        }
6554        ScalarExpr::Binary { left, right, .. } => {
6555            collect_scalar_subquery_ids(left, ids);
6556            collect_scalar_subquery_ids(right, ids);
6557        }
6558        ScalarExpr::Compare { left, right, .. } => {
6559            collect_scalar_subquery_ids(left, ids);
6560            collect_scalar_subquery_ids(right, ids);
6561        }
6562        ScalarExpr::GetField { base, .. } => {
6563            collect_scalar_subquery_ids(base, ids);
6564        }
6565        ScalarExpr::Cast { expr, .. } => {
6566            collect_scalar_subquery_ids(expr, ids);
6567        }
6568        ScalarExpr::Not(expr) => {
6569            collect_scalar_subquery_ids(expr, ids);
6570        }
6571        ScalarExpr::IsNull { expr, .. } => {
6572            collect_scalar_subquery_ids(expr, ids);
6573        }
6574        ScalarExpr::Case {
6575            operand,
6576            branches,
6577            else_expr,
6578        } => {
6579            if let Some(op) = operand {
6580                collect_scalar_subquery_ids(op, ids);
6581            }
6582            for (when_expr, then_expr) in branches {
6583                collect_scalar_subquery_ids(when_expr, ids);
6584                collect_scalar_subquery_ids(then_expr, ids);
6585            }
6586            if let Some(else_expr) = else_expr {
6587                collect_scalar_subquery_ids(else_expr, ids);
6588            }
6589        }
6590        ScalarExpr::Coalesce(items) => {
6591            for item in items {
6592                collect_scalar_subquery_ids(item, ids);
6593            }
6594        }
6595        ScalarExpr::Aggregate(_) | ScalarExpr::Column(_) | ScalarExpr::Literal(_) => {}
6596    }
6597}
6598
6599fn rewrite_scalar_expr_for_subqueries(
6600    expr: &ScalarExpr<FieldId>,
6601    mapping: &FxHashMap<SubqueryId, FieldId>,
6602) -> ScalarExpr<FieldId> {
6603    match expr {
6604        ScalarExpr::ScalarSubquery(subquery) => mapping
6605            .get(&subquery.id)
6606            .map(|field_id| ScalarExpr::Column(*field_id))
6607            .unwrap_or_else(|| ScalarExpr::ScalarSubquery(subquery.clone())),
6608        ScalarExpr::Binary { left, op, right } => ScalarExpr::Binary {
6609            left: Box::new(rewrite_scalar_expr_for_subqueries(left, mapping)),
6610            op: *op,
6611            right: Box::new(rewrite_scalar_expr_for_subqueries(right, mapping)),
6612        },
6613        ScalarExpr::Compare { left, op, right } => ScalarExpr::Compare {
6614            left: Box::new(rewrite_scalar_expr_for_subqueries(left, mapping)),
6615            op: *op,
6616            right: Box::new(rewrite_scalar_expr_for_subqueries(right, mapping)),
6617        },
6618        ScalarExpr::GetField { base, field_name } => ScalarExpr::GetField {
6619            base: Box::new(rewrite_scalar_expr_for_subqueries(base, mapping)),
6620            field_name: field_name.clone(),
6621        },
6622        ScalarExpr::Cast { expr, data_type } => ScalarExpr::Cast {
6623            expr: Box::new(rewrite_scalar_expr_for_subqueries(expr, mapping)),
6624            data_type: data_type.clone(),
6625        },
6626        ScalarExpr::Not(expr) => {
6627            ScalarExpr::Not(Box::new(rewrite_scalar_expr_for_subqueries(expr, mapping)))
6628        }
6629        ScalarExpr::IsNull { expr, negated } => ScalarExpr::IsNull {
6630            expr: Box::new(rewrite_scalar_expr_for_subqueries(expr, mapping)),
6631            negated: *negated,
6632        },
6633        ScalarExpr::Case {
6634            operand,
6635            branches,
6636            else_expr,
6637        } => ScalarExpr::Case {
6638            operand: operand
6639                .as_ref()
6640                .map(|op| Box::new(rewrite_scalar_expr_for_subqueries(op, mapping))),
6641            branches: branches
6642                .iter()
6643                .map(|(when_expr, then_expr)| {
6644                    (
6645                        rewrite_scalar_expr_for_subqueries(when_expr, mapping),
6646                        rewrite_scalar_expr_for_subqueries(then_expr, mapping),
6647                    )
6648                })
6649                .collect(),
6650            else_expr: else_expr
6651                .as_ref()
6652                .map(|expr| Box::new(rewrite_scalar_expr_for_subqueries(expr, mapping))),
6653        },
6654        ScalarExpr::Coalesce(items) => ScalarExpr::Coalesce(
6655            items
6656                .iter()
6657                .map(|item| rewrite_scalar_expr_for_subqueries(item, mapping))
6658                .collect(),
6659        ),
6660        ScalarExpr::Aggregate(_) | ScalarExpr::Column(_) | ScalarExpr::Literal(_) => expr.clone(),
6661    }
6662}
6663
6664fn collect_correlated_bindings(
6665    context: &mut CrossProductExpressionContext,
6666    batch: &RecordBatch,
6667    row_idx: usize,
6668    columns: &[llkv_plan::CorrelatedColumn],
6669) -> ExecutorResult<FxHashMap<String, Literal>> {
6670    let mut out = FxHashMap::default();
6671
6672    for correlated in columns {
6673        if !correlated.field_path.is_empty() {
6674            return Err(Error::InvalidArgumentError(
6675                "correlated field path resolution is not yet supported".into(),
6676            ));
6677        }
6678
6679        let field_id = context
6680            .field_id_for_column(&correlated.column)
6681            .ok_or_else(|| {
6682                Error::InvalidArgumentError(format!(
6683                    "correlated column '{}' not found in outer query output",
6684                    correlated.column
6685                ))
6686            })?;
6687
6688        let accessor = context.column_accessor(field_id, batch)?;
6689        let literal = accessor.literal_at(row_idx)?;
6690        out.insert(correlated.placeholder.clone(), literal);
6691    }
6692
6693    Ok(out)
6694}
6695
6696/// Streaming execution handle for SELECT queries.
6697#[derive(Clone)]
6698pub struct SelectExecution<P>
6699where
6700    P: Pager<Blob = EntryHandle> + Send + Sync,
6701{
6702    table_name: String,
6703    schema: Arc<Schema>,
6704    stream: SelectStream<P>,
6705}
6706
6707#[derive(Clone)]
6708enum SelectStream<P>
6709where
6710    P: Pager<Blob = EntryHandle> + Send + Sync,
6711{
6712    Projection {
6713        table: Arc<ExecutorTable<P>>,
6714        projections: Vec<ScanProjection>,
6715        filter_expr: LlkvExpr<'static, FieldId>,
6716        options: ScanStreamOptions<P>,
6717        full_table_scan: bool,
6718        order_by: Vec<OrderByPlan>,
6719        distinct: bool,
6720    },
6721    Aggregation {
6722        batch: RecordBatch,
6723    },
6724}
6725
6726impl<P> SelectExecution<P>
6727where
6728    P: Pager<Blob = EntryHandle> + Send + Sync,
6729{
6730    #[allow(clippy::too_many_arguments)]
6731    fn new_projection(
6732        table_name: String,
6733        schema: Arc<Schema>,
6734        table: Arc<ExecutorTable<P>>,
6735        projections: Vec<ScanProjection>,
6736        filter_expr: LlkvExpr<'static, FieldId>,
6737        options: ScanStreamOptions<P>,
6738        full_table_scan: bool,
6739        order_by: Vec<OrderByPlan>,
6740        distinct: bool,
6741    ) -> Self {
6742        Self {
6743            table_name,
6744            schema,
6745            stream: SelectStream::Projection {
6746                table,
6747                projections,
6748                filter_expr,
6749                options,
6750                full_table_scan,
6751                order_by,
6752                distinct,
6753            },
6754        }
6755    }
6756
6757    pub fn new_single_batch(table_name: String, schema: Arc<Schema>, batch: RecordBatch) -> Self {
6758        Self {
6759            table_name,
6760            schema,
6761            stream: SelectStream::Aggregation { batch },
6762        }
6763    }
6764
6765    pub fn from_batch(table_name: String, schema: Arc<Schema>, batch: RecordBatch) -> Self {
6766        Self::new_single_batch(table_name, schema, batch)
6767    }
6768
6769    pub fn table_name(&self) -> &str {
6770        &self.table_name
6771    }
6772
6773    pub fn schema(&self) -> Arc<Schema> {
6774        Arc::clone(&self.schema)
6775    }
6776
6777    pub fn stream(
6778        self,
6779        mut on_batch: impl FnMut(RecordBatch) -> ExecutorResult<()>,
6780    ) -> ExecutorResult<()> {
6781        let schema = Arc::clone(&self.schema);
6782        match self.stream {
6783            SelectStream::Projection {
6784                table,
6785                projections,
6786                filter_expr,
6787                options,
6788                full_table_scan,
6789                order_by,
6790                distinct,
6791            } => {
6792                // Early return for empty tables to avoid ColumnStore data_type() errors
6793                let total_rows = table.total_rows.load(Ordering::SeqCst);
6794                if total_rows == 0 {
6795                    // Empty table - return empty result with correct schema
6796                    return Ok(());
6797                }
6798
6799                let mut error: Option<Error> = None;
6800                let mut produced = false;
6801                let mut produced_rows: u64 = 0;
6802                let capture_nulls_first = matches!(options.order, Some(spec) if spec.nulls_first);
6803                let needs_post_sort =
6804                    !order_by.is_empty() && (order_by.len() > 1 || options.order.is_none());
6805                let collect_batches = needs_post_sort || capture_nulls_first;
6806                let include_nulls = options.include_nulls;
6807                let has_row_id_filter = options.row_id_filter.is_some();
6808                let mut distinct_state = if distinct {
6809                    Some(DistinctState::default())
6810                } else {
6811                    None
6812                };
6813                let scan_options = options;
6814                let mut buffered_batches: Vec<RecordBatch> = Vec::new();
6815                table
6816                    .table
6817                    .scan_stream(projections, &filter_expr, scan_options, |batch| {
6818                        if error.is_some() {
6819                            return;
6820                        }
6821                        let mut batch = batch;
6822                        if let Some(state) = distinct_state.as_mut() {
6823                            match distinct_filter_batch(batch, state) {
6824                                Ok(Some(filtered)) => {
6825                                    batch = filtered;
6826                                }
6827                                Ok(None) => {
6828                                    return;
6829                                }
6830                                Err(err) => {
6831                                    error = Some(err);
6832                                    return;
6833                                }
6834                            }
6835                        }
6836                        produced = true;
6837                        produced_rows = produced_rows.saturating_add(batch.num_rows() as u64);
6838                        if collect_batches {
6839                            buffered_batches.push(batch);
6840                        } else if let Err(err) = on_batch(batch) {
6841                            error = Some(err);
6842                        }
6843                    })?;
6844                if let Some(err) = error {
6845                    return Err(err);
6846                }
6847                if !produced {
6848                    // Only synthesize null rows if this was a full table scan
6849                    // If there was a filter and it matched no rows, we should return empty results
6850                    if !distinct && full_table_scan && total_rows > 0 {
6851                        for batch in synthesize_null_scan(Arc::clone(&schema), total_rows)? {
6852                            on_batch(batch)?;
6853                        }
6854                    }
6855                    return Ok(());
6856                }
6857                let mut null_batches: Vec<RecordBatch> = Vec::new();
6858                // Only synthesize null rows if:
6859                // 1. include_nulls is true
6860                // 2. This is a full table scan
6861                // 3. We produced fewer rows than the total
6862                // 4. We DON'T have a row_id_filter (e.g., MVCC filter) that intentionally filtered rows
6863                if !distinct
6864                    && include_nulls
6865                    && full_table_scan
6866                    && produced_rows < total_rows
6867                    && !has_row_id_filter
6868                {
6869                    let missing = total_rows - produced_rows;
6870                    if missing > 0 {
6871                        null_batches = synthesize_null_scan(Arc::clone(&schema), missing)?;
6872                    }
6873                }
6874
6875                if collect_batches {
6876                    if needs_post_sort {
6877                        if !null_batches.is_empty() {
6878                            buffered_batches.extend(null_batches);
6879                        }
6880                        if !buffered_batches.is_empty() {
6881                            let combined =
6882                                concat_batches(&schema, &buffered_batches).map_err(|err| {
6883                                    Error::InvalidArgumentError(format!(
6884                                        "failed to concatenate result batches for ORDER BY: {}",
6885                                        err
6886                                    ))
6887                                })?;
6888                            let sorted_batch =
6889                                sort_record_batch_with_order(&schema, &combined, &order_by)?;
6890                            on_batch(sorted_batch)?;
6891                        }
6892                    } else if capture_nulls_first {
6893                        for batch in null_batches {
6894                            on_batch(batch)?;
6895                        }
6896                        for batch in buffered_batches {
6897                            on_batch(batch)?;
6898                        }
6899                    }
6900                } else if !null_batches.is_empty() {
6901                    for batch in null_batches {
6902                        on_batch(batch)?;
6903                    }
6904                }
6905                Ok(())
6906            }
6907            SelectStream::Aggregation { batch } => on_batch(batch),
6908        }
6909    }
6910
6911    pub fn collect(self) -> ExecutorResult<Vec<RecordBatch>> {
6912        let mut batches = Vec::new();
6913        self.stream(|batch| {
6914            batches.push(batch);
6915            Ok(())
6916        })?;
6917        Ok(batches)
6918    }
6919
6920    pub fn collect_rows(self) -> ExecutorResult<ExecutorRowBatch> {
6921        let schema = self.schema();
6922        let mut rows: Vec<Vec<PlanValue>> = Vec::new();
6923        self.stream(|batch| {
6924            for row_idx in 0..batch.num_rows() {
6925                let mut row: Vec<PlanValue> = Vec::with_capacity(batch.num_columns());
6926                for col_idx in 0..batch.num_columns() {
6927                    let value = llkv_plan::plan_value_from_array(batch.column(col_idx), row_idx)?;
6928                    row.push(value);
6929                }
6930                rows.push(row);
6931            }
6932            Ok(())
6933        })?;
6934        let columns = schema
6935            .fields()
6936            .iter()
6937            .map(|field| field.name().to_string())
6938            .collect();
6939        Ok(ExecutorRowBatch { columns, rows })
6940    }
6941
6942    pub fn into_rows(self) -> ExecutorResult<Vec<Vec<PlanValue>>> {
6943        Ok(self.collect_rows()?.rows)
6944    }
6945}
6946
6947impl<P> fmt::Debug for SelectExecution<P>
6948where
6949    P: Pager<Blob = EntryHandle> + Send + Sync,
6950{
6951    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6952        f.debug_struct("SelectExecution")
6953            .field("table_name", &self.table_name)
6954            .field("schema", &self.schema)
6955            .finish()
6956    }
6957}
6958
6959// ============================================================================
6960// Helper Functions
6961// ============================================================================
6962
6963fn expand_order_targets(
6964    order_items: &[OrderByPlan],
6965    projections: &[ScanProjection],
6966) -> ExecutorResult<Vec<OrderByPlan>> {
6967    let mut expanded = Vec::new();
6968
6969    for item in order_items {
6970        match &item.target {
6971            OrderTarget::All => {
6972                if projections.is_empty() {
6973                    return Err(Error::InvalidArgumentError(
6974                        "ORDER BY ALL requires at least one projection".into(),
6975                    ));
6976                }
6977
6978                for (idx, projection) in projections.iter().enumerate() {
6979                    if matches!(projection, ScanProjection::Computed { .. }) {
6980                        return Err(Error::InvalidArgumentError(
6981                            "ORDER BY ALL cannot reference computed projections".into(),
6982                        ));
6983                    }
6984
6985                    let mut clone = item.clone();
6986                    clone.target = OrderTarget::Index(idx);
6987                    expanded.push(clone);
6988                }
6989            }
6990            _ => expanded.push(item.clone()),
6991        }
6992    }
6993
6994    Ok(expanded)
6995}
6996
6997fn resolve_scan_order<P>(
6998    table: &ExecutorTable<P>,
6999    projections: &[ScanProjection],
7000    order_plan: &OrderByPlan,
7001) -> ExecutorResult<ScanOrderSpec>
7002where
7003    P: Pager<Blob = EntryHandle> + Send + Sync,
7004{
7005    let (column, field_id) = match &order_plan.target {
7006        OrderTarget::Column(name) => {
7007            let column = table.schema.resolve(name).ok_or_else(|| {
7008                Error::InvalidArgumentError(format!("unknown column '{}' in ORDER BY", name))
7009            })?;
7010            (column, column.field_id)
7011        }
7012        OrderTarget::Index(position) => {
7013            let projection = projections.get(*position).ok_or_else(|| {
7014                Error::InvalidArgumentError(format!(
7015                    "ORDER BY position {} is out of range",
7016                    position + 1
7017                ))
7018            })?;
7019            match projection {
7020                ScanProjection::Column(store_projection) => {
7021                    let field_id = store_projection.logical_field_id.field_id();
7022                    let column = table.schema.column_by_field_id(field_id).ok_or_else(|| {
7023                        Error::InvalidArgumentError(format!(
7024                            "unknown column with field id {field_id} in ORDER BY"
7025                        ))
7026                    })?;
7027                    (column, field_id)
7028                }
7029                ScanProjection::Computed { .. } => {
7030                    return Err(Error::InvalidArgumentError(
7031                        "ORDER BY position referring to computed projection is not supported"
7032                            .into(),
7033                    ));
7034                }
7035            }
7036        }
7037        OrderTarget::All => {
7038            return Err(Error::InvalidArgumentError(
7039                "ORDER BY ALL should be expanded before execution".into(),
7040            ));
7041        }
7042    };
7043
7044    let transform = match order_plan.sort_type {
7045        OrderSortType::Native => match column.data_type {
7046            DataType::Int64 => ScanOrderTransform::IdentityInteger,
7047            DataType::Utf8 => ScanOrderTransform::IdentityUtf8,
7048            ref other => {
7049                return Err(Error::InvalidArgumentError(format!(
7050                    "ORDER BY on column type {:?} is not supported",
7051                    other
7052                )));
7053            }
7054        },
7055        OrderSortType::CastTextToInteger => {
7056            if column.data_type != DataType::Utf8 {
7057                return Err(Error::InvalidArgumentError(
7058                    "ORDER BY CAST expects a text column".into(),
7059                ));
7060            }
7061            ScanOrderTransform::CastUtf8ToInteger
7062        }
7063    };
7064
7065    let direction = if order_plan.ascending {
7066        ScanOrderDirection::Ascending
7067    } else {
7068        ScanOrderDirection::Descending
7069    };
7070
7071    Ok(ScanOrderSpec {
7072        field_id,
7073        direction,
7074        nulls_first: order_plan.nulls_first,
7075        transform,
7076    })
7077}
7078
7079fn synthesize_null_scan(schema: Arc<Schema>, total_rows: u64) -> ExecutorResult<Vec<RecordBatch>> {
7080    let row_count = usize::try_from(total_rows).map_err(|_| {
7081        Error::InvalidArgumentError("table row count exceeds supported in-memory batch size".into())
7082    })?;
7083
7084    let mut arrays: Vec<ArrayRef> = Vec::with_capacity(schema.fields().len());
7085    for field in schema.fields() {
7086        match field.data_type() {
7087            DataType::Int64 => {
7088                let mut builder = Int64Builder::with_capacity(row_count);
7089                for _ in 0..row_count {
7090                    builder.append_null();
7091                }
7092                arrays.push(Arc::new(builder.finish()));
7093            }
7094            DataType::Float64 => {
7095                let mut builder = arrow::array::Float64Builder::with_capacity(row_count);
7096                for _ in 0..row_count {
7097                    builder.append_null();
7098                }
7099                arrays.push(Arc::new(builder.finish()));
7100            }
7101            DataType::Utf8 => {
7102                let mut builder = arrow::array::StringBuilder::with_capacity(row_count, 0);
7103                for _ in 0..row_count {
7104                    builder.append_null();
7105                }
7106                arrays.push(Arc::new(builder.finish()));
7107            }
7108            DataType::Date32 => {
7109                let mut builder = arrow::array::Date32Builder::with_capacity(row_count);
7110                for _ in 0..row_count {
7111                    builder.append_null();
7112                }
7113                arrays.push(Arc::new(builder.finish()));
7114            }
7115            other => {
7116                return Err(Error::InvalidArgumentError(format!(
7117                    "unsupported data type in null synthesis: {other:?}"
7118                )));
7119            }
7120        }
7121    }
7122
7123    let batch = RecordBatch::try_new(schema, arrays)?;
7124    Ok(vec![batch])
7125}
7126
7127struct TableCrossProductData {
7128    schema: Arc<Schema>,
7129    batches: Vec<RecordBatch>,
7130    column_counts: Vec<usize>,
7131    table_indices: Vec<usize>,
7132}
7133
7134fn collect_table_data<P>(
7135    table_index: usize,
7136    table_ref: &llkv_plan::TableRef,
7137    table: &ExecutorTable<P>,
7138    constraints: &[ColumnConstraint],
7139) -> ExecutorResult<TableCrossProductData>
7140where
7141    P: Pager<Blob = EntryHandle> + Send + Sync,
7142{
7143    if table.schema.columns.is_empty() {
7144        return Err(Error::InvalidArgumentError(format!(
7145            "table '{}' has no columns; cross products require at least one column",
7146            table_ref.qualified_name()
7147        )));
7148    }
7149
7150    let mut projections = Vec::with_capacity(table.schema.columns.len());
7151    let mut fields = Vec::with_capacity(table.schema.columns.len());
7152
7153    for column in &table.schema.columns {
7154        let table_component = table_ref
7155            .alias
7156            .as_deref()
7157            .unwrap_or(table_ref.table.as_str());
7158        let qualified_name = format!("{}.{}.{}", table_ref.schema, table_component, column.name);
7159        projections.push(ScanProjection::from(StoreProjection::with_alias(
7160            LogicalFieldId::for_user(table.table.table_id(), column.field_id),
7161            qualified_name.clone(),
7162        )));
7163        fields.push(Field::new(
7164            qualified_name,
7165            column.data_type.clone(),
7166            column.nullable,
7167        ));
7168    }
7169
7170    let schema = Arc::new(Schema::new(fields));
7171
7172    let filter_field_id = table.schema.first_field_id().unwrap_or(ROW_ID_FIELD_ID);
7173    let filter_expr = crate::translation::expression::full_table_scan_filter(filter_field_id);
7174
7175    let mut raw_batches = Vec::new();
7176    table.table.scan_stream(
7177        projections,
7178        &filter_expr,
7179        ScanStreamOptions {
7180            include_nulls: true,
7181            ..ScanStreamOptions::default()
7182        },
7183        |batch| {
7184            raw_batches.push(batch);
7185        },
7186    )?;
7187
7188    let mut normalized_batches = Vec::with_capacity(raw_batches.len());
7189    for batch in raw_batches {
7190        let normalized = RecordBatch::try_new(Arc::clone(&schema), batch.columns().to_vec())
7191            .map_err(|err| {
7192                Error::Internal(format!(
7193                    "failed to align scan batch for table '{}': {}",
7194                    table_ref.qualified_name(),
7195                    err
7196                ))
7197            })?;
7198        normalized_batches.push(normalized);
7199    }
7200
7201    if !constraints.is_empty() {
7202        normalized_batches = apply_column_constraints_to_batches(normalized_batches, constraints)?;
7203    }
7204
7205    Ok(TableCrossProductData {
7206        schema,
7207        batches: normalized_batches,
7208        column_counts: vec![table.schema.columns.len()],
7209        table_indices: vec![table_index],
7210    })
7211}
7212
7213fn apply_column_constraints_to_batches(
7214    batches: Vec<RecordBatch>,
7215    constraints: &[ColumnConstraint],
7216) -> ExecutorResult<Vec<RecordBatch>> {
7217    if batches.is_empty() {
7218        return Ok(batches);
7219    }
7220
7221    let mut filtered = batches;
7222    for constraint in constraints {
7223        match constraint {
7224            ColumnConstraint::Equality(lit) => {
7225                filtered = filter_batches_by_literal(filtered, lit.column.column, &lit.value)?;
7226            }
7227            ColumnConstraint::InList(in_list) => {
7228                filtered =
7229                    filter_batches_by_in_list(filtered, in_list.column.column, &in_list.values)?;
7230            }
7231        }
7232        if filtered.is_empty() {
7233            break;
7234        }
7235    }
7236
7237    Ok(filtered)
7238}
7239
7240fn filter_batches_by_literal(
7241    batches: Vec<RecordBatch>,
7242    column_idx: usize,
7243    literal: &PlanValue,
7244) -> ExecutorResult<Vec<RecordBatch>> {
7245    let mut result = Vec::with_capacity(batches.len());
7246
7247    for batch in batches {
7248        if column_idx >= batch.num_columns() {
7249            return Err(Error::Internal(
7250                "literal constraint referenced invalid column index".into(),
7251            ));
7252        }
7253
7254        if batch.num_rows() == 0 {
7255            result.push(batch);
7256            continue;
7257        }
7258
7259        let column = batch.column(column_idx);
7260        let mut keep_rows: Vec<u32> = Vec::with_capacity(batch.num_rows());
7261
7262        for row_idx in 0..batch.num_rows() {
7263            if array_value_equals_plan_value(column.as_ref(), row_idx, literal)? {
7264                keep_rows.push(row_idx as u32);
7265            }
7266        }
7267
7268        if keep_rows.len() == batch.num_rows() {
7269            result.push(batch);
7270            continue;
7271        }
7272
7273        if keep_rows.is_empty() {
7274            // Constraint filtered out entire batch; skip it.
7275            continue;
7276        }
7277
7278        let indices = UInt32Array::from(keep_rows);
7279        let mut filtered_columns: Vec<ArrayRef> = Vec::with_capacity(batch.num_columns());
7280        for col_idx in 0..batch.num_columns() {
7281            let filtered = take(batch.column(col_idx).as_ref(), &indices, None)
7282                .map_err(|err| Error::Internal(format!("failed to apply literal filter: {err}")))?;
7283            filtered_columns.push(filtered);
7284        }
7285
7286        let filtered_batch =
7287            RecordBatch::try_new(batch.schema(), filtered_columns).map_err(|err| {
7288                Error::Internal(format!(
7289                    "failed to rebuild batch after literal filter: {err}"
7290                ))
7291            })?;
7292        result.push(filtered_batch);
7293    }
7294
7295    Ok(result)
7296}
7297
7298fn filter_batches_by_in_list(
7299    batches: Vec<RecordBatch>,
7300    column_idx: usize,
7301    values: &[PlanValue],
7302) -> ExecutorResult<Vec<RecordBatch>> {
7303    use arrow::array::*;
7304    use arrow::compute::or;
7305
7306    if values.is_empty() {
7307        // Empty IN list matches nothing
7308        return Ok(Vec::new());
7309    }
7310
7311    let mut result = Vec::with_capacity(batches.len());
7312
7313    for batch in batches {
7314        if column_idx >= batch.num_columns() {
7315            return Err(Error::Internal(
7316                "IN list constraint referenced invalid column index".into(),
7317            ));
7318        }
7319
7320        if batch.num_rows() == 0 {
7321            result.push(batch);
7322            continue;
7323        }
7324
7325        let column = batch.column(column_idx);
7326
7327        // Build a boolean mask: true if row matches ANY value in the IN list
7328        // Start with all false, then OR together comparisons for each value
7329        let mut mask = BooleanArray::from(vec![false; batch.num_rows()]);
7330
7331        for value in values {
7332            let comparison_mask = build_comparison_mask(column.as_ref(), value)?;
7333            mask = or(&mask, &comparison_mask)
7334                .map_err(|err| Error::Internal(format!("failed to OR comparison masks: {err}")))?;
7335        }
7336
7337        // Check if all rows match or no rows match for optimization
7338        let true_count = mask.true_count();
7339        if true_count == batch.num_rows() {
7340            result.push(batch);
7341            continue;
7342        }
7343
7344        if true_count == 0 {
7345            // IN list filtered out entire batch; skip it.
7346            continue;
7347        }
7348
7349        // Use Arrow's filter kernel for vectorized filtering
7350        let filtered_batch = arrow::compute::filter_record_batch(&batch, &mask)
7351            .map_err(|err| Error::Internal(format!("failed to apply IN list filter: {err}")))?;
7352
7353        result.push(filtered_batch);
7354    }
7355
7356    Ok(result)
7357}
7358
7359/// Build a boolean mask for column == value comparison using vectorized operations.
7360fn build_comparison_mask(column: &dyn Array, value: &PlanValue) -> ExecutorResult<BooleanArray> {
7361    use arrow::array::*;
7362    use arrow::datatypes::DataType;
7363
7364    match value {
7365        PlanValue::Null => {
7366            // For NULL, check if each element is null
7367            let mut builder = BooleanBuilder::with_capacity(column.len());
7368            for i in 0..column.len() {
7369                builder.append_value(column.is_null(i));
7370            }
7371            Ok(builder.finish())
7372        }
7373        PlanValue::Integer(val) => {
7374            let mut builder = BooleanBuilder::with_capacity(column.len());
7375            match column.data_type() {
7376                DataType::Int8 => {
7377                    let arr = column
7378                        .as_any()
7379                        .downcast_ref::<Int8Array>()
7380                        .ok_or_else(|| Error::Internal("failed to downcast to Int8Array".into()))?;
7381                    let target = *val as i8;
7382                    for i in 0..arr.len() {
7383                        builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7384                    }
7385                }
7386                DataType::Int16 => {
7387                    let arr = column
7388                        .as_any()
7389                        .downcast_ref::<Int16Array>()
7390                        .ok_or_else(|| {
7391                            Error::Internal("failed to downcast to Int16Array".into())
7392                        })?;
7393                    let target = *val as i16;
7394                    for i in 0..arr.len() {
7395                        builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7396                    }
7397                }
7398                DataType::Int32 => {
7399                    let arr = column
7400                        .as_any()
7401                        .downcast_ref::<Int32Array>()
7402                        .ok_or_else(|| {
7403                            Error::Internal("failed to downcast to Int32Array".into())
7404                        })?;
7405                    let target = *val as i32;
7406                    for i in 0..arr.len() {
7407                        builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7408                    }
7409                }
7410                DataType::Int64 => {
7411                    let arr = column
7412                        .as_any()
7413                        .downcast_ref::<Int64Array>()
7414                        .ok_or_else(|| {
7415                            Error::Internal("failed to downcast to Int64Array".into())
7416                        })?;
7417                    for i in 0..arr.len() {
7418                        builder.append_value(!arr.is_null(i) && arr.value(i) == *val);
7419                    }
7420                }
7421                DataType::UInt8 => {
7422                    let arr = column
7423                        .as_any()
7424                        .downcast_ref::<UInt8Array>()
7425                        .ok_or_else(|| {
7426                            Error::Internal("failed to downcast to UInt8Array".into())
7427                        })?;
7428                    let target = *val as u8;
7429                    for i in 0..arr.len() {
7430                        builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7431                    }
7432                }
7433                DataType::UInt16 => {
7434                    let arr = column
7435                        .as_any()
7436                        .downcast_ref::<UInt16Array>()
7437                        .ok_or_else(|| {
7438                            Error::Internal("failed to downcast to UInt16Array".into())
7439                        })?;
7440                    let target = *val as u16;
7441                    for i in 0..arr.len() {
7442                        builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7443                    }
7444                }
7445                DataType::UInt32 => {
7446                    let arr = column
7447                        .as_any()
7448                        .downcast_ref::<UInt32Array>()
7449                        .ok_or_else(|| {
7450                            Error::Internal("failed to downcast to UInt32Array".into())
7451                        })?;
7452                    let target = *val as u32;
7453                    for i in 0..arr.len() {
7454                        builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7455                    }
7456                }
7457                DataType::UInt64 => {
7458                    let arr = column
7459                        .as_any()
7460                        .downcast_ref::<UInt64Array>()
7461                        .ok_or_else(|| {
7462                            Error::Internal("failed to downcast to UInt64Array".into())
7463                        })?;
7464                    let target = *val as u64;
7465                    for i in 0..arr.len() {
7466                        builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7467                    }
7468                }
7469                _ => {
7470                    return Err(Error::Internal(format!(
7471                        "unsupported integer type for IN list: {:?}",
7472                        column.data_type()
7473                    )));
7474                }
7475            }
7476            Ok(builder.finish())
7477        }
7478        PlanValue::Float(val) => {
7479            let mut builder = BooleanBuilder::with_capacity(column.len());
7480            match column.data_type() {
7481                DataType::Float32 => {
7482                    let arr = column
7483                        .as_any()
7484                        .downcast_ref::<Float32Array>()
7485                        .ok_or_else(|| {
7486                            Error::Internal("failed to downcast to Float32Array".into())
7487                        })?;
7488                    let target = *val as f32;
7489                    for i in 0..arr.len() {
7490                        builder.append_value(!arr.is_null(i) && arr.value(i) == target);
7491                    }
7492                }
7493                DataType::Float64 => {
7494                    let arr = column
7495                        .as_any()
7496                        .downcast_ref::<Float64Array>()
7497                        .ok_or_else(|| {
7498                            Error::Internal("failed to downcast to Float64Array".into())
7499                        })?;
7500                    for i in 0..arr.len() {
7501                        builder.append_value(!arr.is_null(i) && arr.value(i) == *val);
7502                    }
7503                }
7504                _ => {
7505                    return Err(Error::Internal(format!(
7506                        "unsupported float type for IN list: {:?}",
7507                        column.data_type()
7508                    )));
7509                }
7510            }
7511            Ok(builder.finish())
7512        }
7513        PlanValue::String(val) => {
7514            let mut builder = BooleanBuilder::with_capacity(column.len());
7515            let arr = column
7516                .as_any()
7517                .downcast_ref::<StringArray>()
7518                .ok_or_else(|| Error::Internal("failed to downcast to StringArray".into()))?;
7519            for i in 0..arr.len() {
7520                builder.append_value(!arr.is_null(i) && arr.value(i) == val.as_str());
7521            }
7522            Ok(builder.finish())
7523        }
7524        PlanValue::Struct(_) => Err(Error::Internal(
7525            "struct comparison in IN list not supported".into(),
7526        )),
7527    }
7528}
7529
7530fn array_value_equals_plan_value(
7531    array: &dyn Array,
7532    row_idx: usize,
7533    literal: &PlanValue,
7534) -> ExecutorResult<bool> {
7535    use arrow::array::*;
7536    use arrow::datatypes::DataType;
7537
7538    match literal {
7539        PlanValue::Null => Ok(array.is_null(row_idx)),
7540        PlanValue::Integer(expected) => match array.data_type() {
7541            DataType::Int8 => Ok(!array.is_null(row_idx)
7542                && array
7543                    .as_any()
7544                    .downcast_ref::<Int8Array>()
7545                    .expect("int8 array")
7546                    .value(row_idx) as i64
7547                    == *expected),
7548            DataType::Int16 => Ok(!array.is_null(row_idx)
7549                && array
7550                    .as_any()
7551                    .downcast_ref::<Int16Array>()
7552                    .expect("int16 array")
7553                    .value(row_idx) as i64
7554                    == *expected),
7555            DataType::Int32 => Ok(!array.is_null(row_idx)
7556                && array
7557                    .as_any()
7558                    .downcast_ref::<Int32Array>()
7559                    .expect("int32 array")
7560                    .value(row_idx) as i64
7561                    == *expected),
7562            DataType::Int64 => Ok(!array.is_null(row_idx)
7563                && array
7564                    .as_any()
7565                    .downcast_ref::<Int64Array>()
7566                    .expect("int64 array")
7567                    .value(row_idx)
7568                    == *expected),
7569            DataType::UInt8 if *expected >= 0 => Ok(!array.is_null(row_idx)
7570                && array
7571                    .as_any()
7572                    .downcast_ref::<UInt8Array>()
7573                    .expect("uint8 array")
7574                    .value(row_idx) as i64
7575                    == *expected),
7576            DataType::UInt16 if *expected >= 0 => Ok(!array.is_null(row_idx)
7577                && array
7578                    .as_any()
7579                    .downcast_ref::<UInt16Array>()
7580                    .expect("uint16 array")
7581                    .value(row_idx) as i64
7582                    == *expected),
7583            DataType::UInt32 if *expected >= 0 => Ok(!array.is_null(row_idx)
7584                && array
7585                    .as_any()
7586                    .downcast_ref::<UInt32Array>()
7587                    .expect("uint32 array")
7588                    .value(row_idx) as i64
7589                    == *expected),
7590            DataType::UInt64 if *expected >= 0 => Ok(!array.is_null(row_idx)
7591                && array
7592                    .as_any()
7593                    .downcast_ref::<UInt64Array>()
7594                    .expect("uint64 array")
7595                    .value(row_idx)
7596                    == *expected as u64),
7597            DataType::Boolean => {
7598                if array.is_null(row_idx) {
7599                    Ok(false)
7600                } else if *expected == 0 || *expected == 1 {
7601                    let value = array
7602                        .as_any()
7603                        .downcast_ref::<BooleanArray>()
7604                        .expect("bool array")
7605                        .value(row_idx);
7606                    Ok(value == (*expected == 1))
7607                } else {
7608                    Ok(false)
7609                }
7610            }
7611            _ => Err(Error::InvalidArgumentError(format!(
7612                "literal integer comparison not supported for {:?}",
7613                array.data_type()
7614            ))),
7615        },
7616        PlanValue::Float(expected) => match array.data_type() {
7617            DataType::Float32 => Ok(!array.is_null(row_idx)
7618                && (array
7619                    .as_any()
7620                    .downcast_ref::<Float32Array>()
7621                    .expect("float32 array")
7622                    .value(row_idx) as f64
7623                    - *expected)
7624                    .abs()
7625                    .eq(&0.0)),
7626            DataType::Float64 => Ok(!array.is_null(row_idx)
7627                && (array
7628                    .as_any()
7629                    .downcast_ref::<Float64Array>()
7630                    .expect("float64 array")
7631                    .value(row_idx)
7632                    - *expected)
7633                    .abs()
7634                    .eq(&0.0)),
7635            _ => Err(Error::InvalidArgumentError(format!(
7636                "literal float comparison not supported for {:?}",
7637                array.data_type()
7638            ))),
7639        },
7640        PlanValue::String(expected) => match array.data_type() {
7641            DataType::Utf8 => Ok(!array.is_null(row_idx)
7642                && array
7643                    .as_any()
7644                    .downcast_ref::<StringArray>()
7645                    .expect("string array")
7646                    .value(row_idx)
7647                    == expected),
7648            DataType::LargeUtf8 => Ok(!array.is_null(row_idx)
7649                && array
7650                    .as_any()
7651                    .downcast_ref::<LargeStringArray>()
7652                    .expect("large string array")
7653                    .value(row_idx)
7654                    == expected),
7655            _ => Err(Error::InvalidArgumentError(format!(
7656                "literal string comparison not supported for {:?}",
7657                array.data_type()
7658            ))),
7659        },
7660        PlanValue::Struct(_) => Err(Error::InvalidArgumentError(
7661            "struct literals are not supported in join filters".into(),
7662        )),
7663    }
7664}
7665
7666fn hash_join_table_batches(
7667    left: TableCrossProductData,
7668    right: TableCrossProductData,
7669    join_keys: &[(usize, usize)],
7670    join_type: llkv_join::JoinType,
7671) -> ExecutorResult<TableCrossProductData> {
7672    let TableCrossProductData {
7673        schema: left_schema,
7674        batches: left_batches,
7675        column_counts: left_counts,
7676        table_indices: left_tables,
7677    } = left;
7678
7679    let TableCrossProductData {
7680        schema: right_schema,
7681        batches: right_batches,
7682        column_counts: right_counts,
7683        table_indices: right_tables,
7684    } = right;
7685
7686    let combined_fields: Vec<Field> = left_schema
7687        .fields()
7688        .iter()
7689        .chain(right_schema.fields().iter())
7690        .map(|field| field.as_ref().clone())
7691        .collect();
7692
7693    let combined_schema = Arc::new(Schema::new(combined_fields));
7694
7695    let mut column_counts = Vec::with_capacity(left_counts.len() + right_counts.len());
7696    column_counts.extend(left_counts.iter());
7697    column_counts.extend(right_counts.iter());
7698
7699    let mut table_indices = Vec::with_capacity(left_tables.len() + right_tables.len());
7700    table_indices.extend(left_tables.iter().copied());
7701    table_indices.extend(right_tables.iter().copied());
7702
7703    // Handle empty inputs
7704    if left_batches.is_empty() {
7705        return Ok(TableCrossProductData {
7706            schema: combined_schema,
7707            batches: Vec::new(),
7708            column_counts,
7709            table_indices,
7710        });
7711    }
7712
7713    if right_batches.is_empty() {
7714        // For LEFT JOIN with no right rows, return all left rows with NULL right columns
7715        if join_type == llkv_join::JoinType::Left {
7716            let total_left_rows: usize = left_batches.iter().map(|b| b.num_rows()).sum();
7717            let mut left_arrays = Vec::new();
7718            for field in left_schema.fields() {
7719                let column_idx = left_schema.index_of(field.name()).map_err(|e| {
7720                    Error::Internal(format!("failed to find field {}: {}", field.name(), e))
7721                })?;
7722                let arrays: Vec<ArrayRef> = left_batches
7723                    .iter()
7724                    .map(|batch| batch.column(column_idx).clone())
7725                    .collect();
7726                let concatenated =
7727                    arrow::compute::concat(&arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>())
7728                        .map_err(|e| {
7729                            Error::Internal(format!("failed to concat left arrays: {}", e))
7730                        })?;
7731                left_arrays.push(concatenated);
7732            }
7733
7734            // Add NULL arrays for right side
7735            for field in right_schema.fields() {
7736                let null_array = arrow::array::new_null_array(field.data_type(), total_left_rows);
7737                left_arrays.push(null_array);
7738            }
7739
7740            let joined_batch = RecordBatch::try_new(Arc::clone(&combined_schema), left_arrays)
7741                .map_err(|err| {
7742                    Error::Internal(format!(
7743                        "failed to create LEFT JOIN batch with NULL right: {err}"
7744                    ))
7745                })?;
7746
7747            return Ok(TableCrossProductData {
7748                schema: combined_schema,
7749                batches: vec![joined_batch],
7750                column_counts,
7751                table_indices,
7752            });
7753        } else {
7754            // For INNER JOIN, no right rows means no results
7755            return Ok(TableCrossProductData {
7756                schema: combined_schema,
7757                batches: Vec::new(),
7758                column_counts,
7759                table_indices,
7760            });
7761        }
7762    }
7763
7764    match join_type {
7765        llkv_join::JoinType::Inner => {
7766            let (left_matches, right_matches) =
7767                build_join_match_indices(&left_batches, &right_batches, join_keys)?;
7768
7769            if left_matches.is_empty() {
7770                return Ok(TableCrossProductData {
7771                    schema: combined_schema,
7772                    batches: Vec::new(),
7773                    column_counts,
7774                    table_indices,
7775                });
7776            }
7777
7778            let left_arrays = gather_indices_from_batches(&left_batches, &left_matches)?;
7779            let right_arrays = gather_indices_from_batches(&right_batches, &right_matches)?;
7780
7781            let mut combined_columns = Vec::with_capacity(left_arrays.len() + right_arrays.len());
7782            combined_columns.extend(left_arrays);
7783            combined_columns.extend(right_arrays);
7784
7785            let joined_batch = RecordBatch::try_new(Arc::clone(&combined_schema), combined_columns)
7786                .map_err(|err| {
7787                    Error::Internal(format!("failed to materialize INNER JOIN batch: {err}"))
7788                })?;
7789
7790            Ok(TableCrossProductData {
7791                schema: combined_schema,
7792                batches: vec![joined_batch],
7793                column_counts,
7794                table_indices,
7795            })
7796        }
7797        llkv_join::JoinType::Left => {
7798            let (left_matches, right_optional_matches) =
7799                build_left_join_match_indices(&left_batches, &right_batches, join_keys)?;
7800
7801            if left_matches.is_empty() {
7802                // This shouldn't happen for LEFT JOIN since all left rows should be included
7803                return Ok(TableCrossProductData {
7804                    schema: combined_schema,
7805                    batches: Vec::new(),
7806                    column_counts,
7807                    table_indices,
7808                });
7809            }
7810
7811            let left_arrays = gather_indices_from_batches(&left_batches, &left_matches)?;
7812            // Use gather_optional_indices to handle None values (unmatched rows)
7813            let right_arrays = llkv_column_map::gather::gather_optional_indices_from_batches(
7814                &right_batches,
7815                &right_optional_matches,
7816            )?;
7817
7818            let mut combined_columns = Vec::with_capacity(left_arrays.len() + right_arrays.len());
7819            combined_columns.extend(left_arrays);
7820            combined_columns.extend(right_arrays);
7821
7822            let joined_batch = RecordBatch::try_new(Arc::clone(&combined_schema), combined_columns)
7823                .map_err(|err| {
7824                    Error::Internal(format!("failed to materialize LEFT JOIN batch: {err}"))
7825                })?;
7826
7827            Ok(TableCrossProductData {
7828                schema: combined_schema,
7829                batches: vec![joined_batch],
7830                column_counts,
7831                table_indices,
7832            })
7833        }
7834        // Other join types not yet supported in this helper (delegate to llkv-join)
7835        _ => Err(Error::Internal(format!(
7836            "join type {:?} not supported in hash_join_table_batches; use llkv-join",
7837            join_type
7838        ))),
7839    }
7840}
7841
7842/// Type alias for join match index pairs (batch_idx, row_idx)
7843type JoinMatchIndices = Vec<(usize, usize)>;
7844/// Type alias for hash table mapping join keys to row positions
7845type JoinHashTable = FxHashMap<Vec<u8>, Vec<(usize, usize)>>;
7846/// Type alias for complete match pairs for inner-style joins
7847type JoinMatchPairs = (JoinMatchIndices, JoinMatchIndices);
7848/// Type alias for optional matches produced by LEFT joins
7849type OptionalJoinMatches = Vec<Option<(usize, usize)>>;
7850/// Type alias for LEFT join match outputs
7851type LeftJoinMatchPairs = (JoinMatchIndices, OptionalJoinMatches);
7852
7853/// Build hash join match indices using parallel hash table construction and probing.
7854///
7855/// Constructs a hash table from the right batches (build phase), then probes it with
7856/// rows from the left batches to find matches. Both phases are parallelized using Rayon.
7857///
7858/// # Parallelization Strategy
7859///
7860/// **Build Phase**: Each right batch is processed in parallel. Each thread builds a local
7861/// hash table for its batch(es), then all local tables are merged into a single shared
7862/// hash table. This eliminates lock contention during the build phase.
7863///
7864/// **Probe Phase**: Each left batch is probed against the shared hash table in parallel.
7865/// Each thread generates local match lists which are concatenated at the end.
7866///
7867/// # Arguments
7868///
7869/// * `left_batches` - Batches to probe against the hash table
7870/// * `right_batches` - Batches used to build the hash table
7871/// * `join_keys` - Column indices for join keys: (left_column_idx, right_column_idx)
7872///
7873/// # Returns
7874///
7875/// Tuple of `(left_matches, right_matches)` where each vector contains (batch_idx, row_idx)
7876/// pairs indicating which rows from left and right should be joined together.
7877///
7878/// # Performance
7879///
7880/// Scales with available CPU cores via `llkv_column_map::parallel::with_thread_pool()`.
7881/// Respects `LLKV_MAX_THREADS` environment variable for thread pool sizing.
7882fn build_join_match_indices(
7883    left_batches: &[RecordBatch],
7884    right_batches: &[RecordBatch],
7885    join_keys: &[(usize, usize)],
7886) -> ExecutorResult<JoinMatchPairs> {
7887    let right_key_indices: Vec<usize> = join_keys.iter().map(|(_, right)| *right).collect();
7888
7889    // Parallelize hash table build phase across batches
7890    // Each thread builds a local hash table for its batch(es), then we merge them
7891    let hash_table: JoinHashTable = llkv_column_map::parallel::with_thread_pool(|| {
7892        let local_tables: Vec<JoinHashTable> = right_batches
7893            .par_iter()
7894            .enumerate()
7895            .map(|(batch_idx, batch)| {
7896                let mut local_table: JoinHashTable = FxHashMap::default();
7897                let mut key_buffer: Vec<u8> = Vec::new();
7898
7899                for row_idx in 0..batch.num_rows() {
7900                    key_buffer.clear();
7901                    match build_join_key(batch, &right_key_indices, row_idx, &mut key_buffer) {
7902                        Ok(true) => {
7903                            local_table
7904                                .entry(key_buffer.clone())
7905                                .or_default()
7906                                .push((batch_idx, row_idx));
7907                        }
7908                        Ok(false) => continue,
7909                        Err(_) => continue, // Skip rows with errors during parallel build
7910                    }
7911                }
7912
7913                local_table
7914            })
7915            .collect();
7916
7917        // Merge all local hash tables into one
7918        let mut merged_table: JoinHashTable = FxHashMap::default();
7919        for local_table in local_tables {
7920            for (key, mut positions) in local_table {
7921                merged_table.entry(key).or_default().append(&mut positions);
7922            }
7923        }
7924
7925        merged_table
7926    });
7927
7928    if hash_table.is_empty() {
7929        return Ok((Vec::new(), Vec::new()));
7930    }
7931
7932    let left_key_indices: Vec<usize> = join_keys.iter().map(|(left, _)| *left).collect();
7933
7934    // Parallelize probe phase across left batches
7935    // Each thread probes its batch(es) against the shared hash table
7936    let matches: Vec<JoinMatchPairs> = llkv_column_map::parallel::with_thread_pool(|| {
7937        left_batches
7938            .par_iter()
7939            .enumerate()
7940            .map(|(batch_idx, batch)| {
7941                let mut local_left_matches: JoinMatchIndices = Vec::new();
7942                let mut local_right_matches: JoinMatchIndices = Vec::new();
7943                let mut key_buffer: Vec<u8> = Vec::new();
7944
7945                for row_idx in 0..batch.num_rows() {
7946                    key_buffer.clear();
7947                    match build_join_key(batch, &left_key_indices, row_idx, &mut key_buffer) {
7948                        Ok(true) => {
7949                            if let Some(entries) = hash_table.get(&key_buffer) {
7950                                for &(r_batch, r_row) in entries {
7951                                    local_left_matches.push((batch_idx, row_idx));
7952                                    local_right_matches.push((r_batch, r_row));
7953                                }
7954                            }
7955                        }
7956                        Ok(false) => continue,
7957                        Err(_) => continue, // Skip rows with errors during parallel probe
7958                    }
7959                }
7960
7961                (local_left_matches, local_right_matches)
7962            })
7963            .collect()
7964    });
7965
7966    // Merge all match results
7967    let mut left_matches: JoinMatchIndices = Vec::new();
7968    let mut right_matches: JoinMatchIndices = Vec::new();
7969    for (mut left, mut right) in matches {
7970        left_matches.append(&mut left);
7971        right_matches.append(&mut right);
7972    }
7973
7974    Ok((left_matches, right_matches))
7975}
7976
7977/// Build match indices for LEFT JOIN, returning all left rows with optional right matches.
7978///
7979/// Unlike `build_join_match_indices` which only returns matching pairs, this function
7980/// returns every left row. For rows with no match, the right match is `None`.
7981///
7982/// # Returns
7983///
7984/// Tuple of `(left_matches, right_optional_matches)` where:
7985/// - `left_matches`: (batch_idx, row_idx) for every left row
7986/// - `right_optional_matches`: `Some((batch_idx, row_idx))` for matched rows, `None` for unmatched
7987fn build_left_join_match_indices(
7988    left_batches: &[RecordBatch],
7989    right_batches: &[RecordBatch],
7990    join_keys: &[(usize, usize)],
7991) -> ExecutorResult<LeftJoinMatchPairs> {
7992    let right_key_indices: Vec<usize> = join_keys.iter().map(|(_, right)| *right).collect();
7993
7994    // Build hash table from right batches
7995    let hash_table: JoinHashTable = llkv_column_map::parallel::with_thread_pool(|| {
7996        let local_tables: Vec<JoinHashTable> = right_batches
7997            .par_iter()
7998            .enumerate()
7999            .map(|(batch_idx, batch)| {
8000                let mut local_table: JoinHashTable = FxHashMap::default();
8001                let mut key_buffer: Vec<u8> = Vec::new();
8002
8003                for row_idx in 0..batch.num_rows() {
8004                    key_buffer.clear();
8005                    match build_join_key(batch, &right_key_indices, row_idx, &mut key_buffer) {
8006                        Ok(true) => {
8007                            local_table
8008                                .entry(key_buffer.clone())
8009                                .or_default()
8010                                .push((batch_idx, row_idx));
8011                        }
8012                        Ok(false) => continue,
8013                        Err(_) => continue,
8014                    }
8015                }
8016
8017                local_table
8018            })
8019            .collect();
8020
8021        let mut merged_table: JoinHashTable = FxHashMap::default();
8022        for local_table in local_tables {
8023            for (key, mut positions) in local_table {
8024                merged_table.entry(key).or_default().append(&mut positions);
8025            }
8026        }
8027
8028        merged_table
8029    });
8030
8031    let left_key_indices: Vec<usize> = join_keys.iter().map(|(left, _)| *left).collect();
8032
8033    // Probe phase: process ALL left rows, recording matches or None
8034    let matches: Vec<LeftJoinMatchPairs> = llkv_column_map::parallel::with_thread_pool(|| {
8035        left_batches
8036            .par_iter()
8037            .enumerate()
8038            .map(|(batch_idx, batch)| {
8039                let mut local_left_matches: JoinMatchIndices = Vec::new();
8040                let mut local_right_optional: Vec<Option<(usize, usize)>> = Vec::new();
8041                let mut key_buffer: Vec<u8> = Vec::new();
8042
8043                for row_idx in 0..batch.num_rows() {
8044                    key_buffer.clear();
8045                    match build_join_key(batch, &left_key_indices, row_idx, &mut key_buffer) {
8046                        Ok(true) => {
8047                            if let Some(entries) = hash_table.get(&key_buffer) {
8048                                // Has matches - emit one output row per match
8049                                for &(r_batch, r_row) in entries {
8050                                    local_left_matches.push((batch_idx, row_idx));
8051                                    local_right_optional.push(Some((r_batch, r_row)));
8052                                }
8053                            } else {
8054                                // No match - emit left row with NULL right
8055                                local_left_matches.push((batch_idx, row_idx));
8056                                local_right_optional.push(None);
8057                            }
8058                        }
8059                        Ok(false) => {
8060                            // NULL key on left side - no match, emit with NULL right
8061                            local_left_matches.push((batch_idx, row_idx));
8062                            local_right_optional.push(None);
8063                        }
8064                        Err(_) => {
8065                            // Error reading key - treat as no match
8066                            local_left_matches.push((batch_idx, row_idx));
8067                            local_right_optional.push(None);
8068                        }
8069                    }
8070                }
8071
8072                (local_left_matches, local_right_optional)
8073            })
8074            .collect()
8075    });
8076
8077    // Merge all match results
8078    let mut left_matches: JoinMatchIndices = Vec::new();
8079    let mut right_optional: Vec<Option<(usize, usize)>> = Vec::new();
8080    for (mut left, mut right) in matches {
8081        left_matches.append(&mut left);
8082        right_optional.append(&mut right);
8083    }
8084
8085    Ok((left_matches, right_optional))
8086}
8087
8088fn build_join_key(
8089    batch: &RecordBatch,
8090    column_indices: &[usize],
8091    row_idx: usize,
8092    buffer: &mut Vec<u8>,
8093) -> ExecutorResult<bool> {
8094    buffer.clear();
8095
8096    for &col_idx in column_indices {
8097        let array = batch.column(col_idx);
8098        if array.is_null(row_idx) {
8099            return Ok(false);
8100        }
8101        append_array_value_to_key(array.as_ref(), row_idx, buffer)?;
8102    }
8103
8104    Ok(true)
8105}
8106
8107fn append_array_value_to_key(
8108    array: &dyn Array,
8109    row_idx: usize,
8110    buffer: &mut Vec<u8>,
8111) -> ExecutorResult<()> {
8112    use arrow::array::*;
8113    use arrow::datatypes::DataType;
8114
8115    match array.data_type() {
8116        DataType::Int8 => buffer.extend_from_slice(
8117            &array
8118                .as_any()
8119                .downcast_ref::<Int8Array>()
8120                .expect("int8 array")
8121                .value(row_idx)
8122                .to_le_bytes(),
8123        ),
8124        DataType::Int16 => buffer.extend_from_slice(
8125            &array
8126                .as_any()
8127                .downcast_ref::<Int16Array>()
8128                .expect("int16 array")
8129                .value(row_idx)
8130                .to_le_bytes(),
8131        ),
8132        DataType::Int32 => buffer.extend_from_slice(
8133            &array
8134                .as_any()
8135                .downcast_ref::<Int32Array>()
8136                .expect("int32 array")
8137                .value(row_idx)
8138                .to_le_bytes(),
8139        ),
8140        DataType::Int64 => buffer.extend_from_slice(
8141            &array
8142                .as_any()
8143                .downcast_ref::<Int64Array>()
8144                .expect("int64 array")
8145                .value(row_idx)
8146                .to_le_bytes(),
8147        ),
8148        DataType::UInt8 => buffer.extend_from_slice(
8149            &array
8150                .as_any()
8151                .downcast_ref::<UInt8Array>()
8152                .expect("uint8 array")
8153                .value(row_idx)
8154                .to_le_bytes(),
8155        ),
8156        DataType::UInt16 => buffer.extend_from_slice(
8157            &array
8158                .as_any()
8159                .downcast_ref::<UInt16Array>()
8160                .expect("uint16 array")
8161                .value(row_idx)
8162                .to_le_bytes(),
8163        ),
8164        DataType::UInt32 => buffer.extend_from_slice(
8165            &array
8166                .as_any()
8167                .downcast_ref::<UInt32Array>()
8168                .expect("uint32 array")
8169                .value(row_idx)
8170                .to_le_bytes(),
8171        ),
8172        DataType::UInt64 => buffer.extend_from_slice(
8173            &array
8174                .as_any()
8175                .downcast_ref::<UInt64Array>()
8176                .expect("uint64 array")
8177                .value(row_idx)
8178                .to_le_bytes(),
8179        ),
8180        DataType::Float32 => buffer.extend_from_slice(
8181            &array
8182                .as_any()
8183                .downcast_ref::<Float32Array>()
8184                .expect("float32 array")
8185                .value(row_idx)
8186                .to_le_bytes(),
8187        ),
8188        DataType::Float64 => buffer.extend_from_slice(
8189            &array
8190                .as_any()
8191                .downcast_ref::<Float64Array>()
8192                .expect("float64 array")
8193                .value(row_idx)
8194                .to_le_bytes(),
8195        ),
8196        DataType::Boolean => buffer.push(
8197            array
8198                .as_any()
8199                .downcast_ref::<BooleanArray>()
8200                .expect("bool array")
8201                .value(row_idx) as u8,
8202        ),
8203        DataType::Utf8 => {
8204            let value = array
8205                .as_any()
8206                .downcast_ref::<StringArray>()
8207                .expect("utf8 array")
8208                .value(row_idx);
8209            buffer.extend_from_slice(&(value.len() as u32).to_le_bytes());
8210            buffer.extend_from_slice(value.as_bytes());
8211        }
8212        DataType::LargeUtf8 => {
8213            let value = array
8214                .as_any()
8215                .downcast_ref::<LargeStringArray>()
8216                .expect("large utf8 array")
8217                .value(row_idx);
8218            buffer.extend_from_slice(&(value.len() as u32).to_le_bytes());
8219            buffer.extend_from_slice(value.as_bytes());
8220        }
8221        DataType::Binary => {
8222            let value = array
8223                .as_any()
8224                .downcast_ref::<BinaryArray>()
8225                .expect("binary array")
8226                .value(row_idx);
8227            buffer.extend_from_slice(&(value.len() as u32).to_le_bytes());
8228            buffer.extend_from_slice(value);
8229        }
8230        other => {
8231            return Err(Error::InvalidArgumentError(format!(
8232                "hash join does not support join key type {:?}",
8233                other
8234            )));
8235        }
8236    }
8237
8238    Ok(())
8239}
8240
8241fn table_has_join_with_used(
8242    candidate: usize,
8243    used_tables: &FxHashSet<usize>,
8244    equalities: &[ColumnEquality],
8245) -> bool {
8246    equalities.iter().any(|equality| {
8247        (equality.left.table == candidate && used_tables.contains(&equality.right.table))
8248            || (equality.right.table == candidate && used_tables.contains(&equality.left.table))
8249    })
8250}
8251
8252fn gather_join_keys(
8253    left: &TableCrossProductData,
8254    right: &TableCrossProductData,
8255    used_tables: &FxHashSet<usize>,
8256    right_table_index: usize,
8257    equalities: &[ColumnEquality],
8258) -> ExecutorResult<Vec<(usize, usize)>> {
8259    let mut keys = Vec::new();
8260
8261    for equality in equalities {
8262        if equality.left.table == right_table_index && used_tables.contains(&equality.right.table) {
8263            let left_idx = resolve_column_index(left, &equality.right).ok_or_else(|| {
8264                Error::Internal("failed to resolve column offset for hash join".into())
8265            })?;
8266            let right_idx = resolve_column_index(right, &equality.left).ok_or_else(|| {
8267                Error::Internal("failed to resolve column offset for hash join".into())
8268            })?;
8269            keys.push((left_idx, right_idx));
8270        } else if equality.right.table == right_table_index
8271            && used_tables.contains(&equality.left.table)
8272        {
8273            let left_idx = resolve_column_index(left, &equality.left).ok_or_else(|| {
8274                Error::Internal("failed to resolve column offset for hash join".into())
8275            })?;
8276            let right_idx = resolve_column_index(right, &equality.right).ok_or_else(|| {
8277                Error::Internal("failed to resolve column offset for hash join".into())
8278            })?;
8279            keys.push((left_idx, right_idx));
8280        }
8281    }
8282
8283    Ok(keys)
8284}
8285
8286fn resolve_column_index(data: &TableCrossProductData, column: &ColumnRef) -> Option<usize> {
8287    let mut offset = 0;
8288    for (table_idx, count) in data.table_indices.iter().zip(data.column_counts.iter()) {
8289        if *table_idx == column.table {
8290            if column.column < *count {
8291                return Some(offset + column.column);
8292            } else {
8293                return None;
8294            }
8295        }
8296        offset += count;
8297    }
8298    None
8299}
8300
8301fn build_cross_product_column_lookup(
8302    schema: &Schema,
8303    tables: &[llkv_plan::TableRef],
8304    column_counts: &[usize],
8305    table_indices: &[usize],
8306) -> FxHashMap<String, usize> {
8307    debug_assert_eq!(tables.len(), column_counts.len());
8308    debug_assert_eq!(column_counts.len(), table_indices.len());
8309
8310    let mut column_occurrences: FxHashMap<String, usize> = FxHashMap::default();
8311    let mut table_column_counts: FxHashMap<String, usize> = FxHashMap::default();
8312    for field in schema.fields() {
8313        let column_name = extract_column_name(field.name());
8314        *column_occurrences.entry(column_name).or_insert(0) += 1;
8315        if let Some(pair) = table_column_suffix(field.name()) {
8316            *table_column_counts.entry(pair).or_insert(0) += 1;
8317        }
8318    }
8319
8320    let mut base_table_totals: FxHashMap<String, usize> = FxHashMap::default();
8321    let mut base_table_unaliased: FxHashMap<String, usize> = FxHashMap::default();
8322    for table_ref in tables {
8323        let key = base_table_key(table_ref);
8324        *base_table_totals.entry(key.clone()).or_insert(0) += 1;
8325        if table_ref.alias.is_none() {
8326            *base_table_unaliased.entry(key).or_insert(0) += 1;
8327        }
8328    }
8329
8330    let mut lookup = FxHashMap::default();
8331
8332    if table_indices.is_empty() || column_counts.is_empty() {
8333        for (idx, field) in schema.fields().iter().enumerate() {
8334            let field_name_lower = field.name().to_ascii_lowercase();
8335            lookup.entry(field_name_lower).or_insert(idx);
8336
8337            let trimmed_lower = field.name().trim_start_matches('.').to_ascii_lowercase();
8338            lookup.entry(trimmed_lower).or_insert(idx);
8339
8340            if let Some(pair) = table_column_suffix(field.name())
8341                && table_column_counts.get(&pair).copied().unwrap_or(0) == 1
8342            {
8343                lookup.entry(pair).or_insert(idx);
8344            }
8345
8346            let column_name = extract_column_name(field.name());
8347            if column_occurrences.get(&column_name).copied().unwrap_or(0) == 1 {
8348                lookup.entry(column_name).or_insert(idx);
8349            }
8350        }
8351        return lookup;
8352    }
8353
8354    let mut offset = 0usize;
8355    for (&table_idx, &count) in table_indices.iter().zip(column_counts.iter()) {
8356        if table_idx >= tables.len() {
8357            continue;
8358        }
8359        let table_ref = &tables[table_idx];
8360        let alias_lower = table_ref
8361            .alias
8362            .as_ref()
8363            .map(|alias| alias.to_ascii_lowercase());
8364        let table_lower = table_ref.table.to_ascii_lowercase();
8365        let schema_lower = table_ref.schema.to_ascii_lowercase();
8366        let base_key = base_table_key(table_ref);
8367        let total_refs = base_table_totals.get(&base_key).copied().unwrap_or(0);
8368        let unaliased_refs = base_table_unaliased.get(&base_key).copied().unwrap_or(0);
8369
8370        let allow_base_mapping = if table_ref.alias.is_none() {
8371            unaliased_refs == 1
8372        } else {
8373            unaliased_refs == 0 && total_refs == 1
8374        };
8375
8376        let mut table_keys: Vec<String> = Vec::new();
8377
8378        if let Some(alias) = &alias_lower {
8379            table_keys.push(alias.clone());
8380            if !schema_lower.is_empty() {
8381                table_keys.push(format!("{}.{}", schema_lower, alias));
8382            }
8383        }
8384
8385        if allow_base_mapping {
8386            table_keys.push(table_lower.clone());
8387            if !schema_lower.is_empty() {
8388                table_keys.push(format!("{}.{}", schema_lower, table_lower));
8389            }
8390        }
8391
8392        for local_idx in 0..count {
8393            let field_index = offset + local_idx;
8394            let field = schema.field(field_index);
8395            let field_name_lower = field.name().to_ascii_lowercase();
8396            lookup.entry(field_name_lower).or_insert(field_index);
8397
8398            let trimmed_lower = field.name().trim_start_matches('.').to_ascii_lowercase();
8399            lookup.entry(trimmed_lower).or_insert(field_index);
8400
8401            let column_name = extract_column_name(field.name());
8402            for table_key in &table_keys {
8403                lookup
8404                    .entry(format!("{}.{}", table_key, column_name))
8405                    .or_insert(field_index);
8406            }
8407
8408            // Use first-match semantics for bare column names (matches SQLite behavior)
8409            // This allows ambiguous column names to resolve to the first occurrence
8410            // in FROM clause order
8411            lookup.entry(column_name.clone()).or_insert(field_index);
8412
8413            if table_keys.is_empty()
8414                && let Some(pair) = table_column_suffix(field.name())
8415                && table_column_counts.get(&pair).copied().unwrap_or(0) == 1
8416            {
8417                lookup.entry(pair).or_insert(field_index);
8418            }
8419        }
8420
8421        offset = offset.saturating_add(count);
8422    }
8423
8424    lookup
8425}
8426
8427fn base_table_key(table_ref: &llkv_plan::TableRef) -> String {
8428    let schema_lower = table_ref.schema.to_ascii_lowercase();
8429    let table_lower = table_ref.table.to_ascii_lowercase();
8430    if schema_lower.is_empty() {
8431        table_lower
8432    } else {
8433        format!("{}.{}", schema_lower, table_lower)
8434    }
8435}
8436
8437fn extract_column_name(name: &str) -> String {
8438    name.trim_start_matches('.')
8439        .rsplit('.')
8440        .next()
8441        .unwrap_or(name)
8442        .to_ascii_lowercase()
8443}
8444
8445fn table_column_suffix(name: &str) -> Option<String> {
8446    let trimmed = name.trim_start_matches('.');
8447    let mut parts: Vec<&str> = trimmed.split('.').collect();
8448    if parts.len() < 2 {
8449        return None;
8450    }
8451    let column = parts.pop()?.to_ascii_lowercase();
8452    let table = parts.pop()?.to_ascii_lowercase();
8453    Some(format!("{}.{}", table, column))
8454}
8455
8456/// Combine two table batch sets into a cartesian product using parallel processing.
8457///
8458/// For each pair of (left_batch, right_batch), generates the cross product using
8459/// [`llkv_join::cross_join_pair`]. The computation is parallelized across all batch
8460/// pairs since they are independent.
8461///
8462/// # Parallelization
8463///
8464/// Uses nested parallel iteration via Rayon:
8465/// - Outer loop: parallel iteration over left batches
8466/// - Inner loop: parallel iteration over right batches
8467/// - Each (left, right) pair is processed independently
8468///
8469/// This effectively distributes N×M batch pairs across available CPU cores, providing
8470/// significant speedup for multi-batch joins.
8471///
8472/// # Arguments
8473///
8474/// * `left` - Left side table data with batches
8475/// * `right` - Right side table data with batches
8476///
8477/// # Returns
8478///
8479/// Combined table data containing the cartesian product of all left and right rows.
8480fn cross_join_table_batches(
8481    left: TableCrossProductData,
8482    right: TableCrossProductData,
8483) -> ExecutorResult<TableCrossProductData> {
8484    let TableCrossProductData {
8485        schema: left_schema,
8486        batches: left_batches,
8487        column_counts: mut left_counts,
8488        table_indices: mut left_tables,
8489    } = left;
8490    let TableCrossProductData {
8491        schema: right_schema,
8492        batches: right_batches,
8493        column_counts: right_counts,
8494        table_indices: right_tables,
8495    } = right;
8496
8497    let combined_fields: Vec<Field> = left_schema
8498        .fields()
8499        .iter()
8500        .chain(right_schema.fields().iter())
8501        .map(|field| field.as_ref().clone())
8502        .collect();
8503
8504    let mut column_counts = Vec::with_capacity(left_counts.len() + right_counts.len());
8505    column_counts.append(&mut left_counts);
8506    column_counts.extend(right_counts);
8507
8508    let mut table_indices = Vec::with_capacity(left_tables.len() + right_tables.len());
8509    table_indices.append(&mut left_tables);
8510    table_indices.extend(right_tables);
8511
8512    let combined_schema = Arc::new(Schema::new(combined_fields));
8513
8514    let left_has_rows = left_batches.iter().any(|batch| batch.num_rows() > 0);
8515    let right_has_rows = right_batches.iter().any(|batch| batch.num_rows() > 0);
8516
8517    if !left_has_rows || !right_has_rows {
8518        return Ok(TableCrossProductData {
8519            schema: combined_schema,
8520            batches: Vec::new(),
8521            column_counts,
8522            table_indices,
8523        });
8524    }
8525
8526    // Parallelize cross join batch generation using nested parallel iteration
8527    // This is safe because cross_join_pair is pure and each batch pair is independent
8528    let output_batches: Vec<RecordBatch> = llkv_column_map::parallel::with_thread_pool(|| {
8529        left_batches
8530            .par_iter()
8531            .filter(|left_batch| left_batch.num_rows() > 0)
8532            .flat_map(|left_batch| {
8533                right_batches
8534                    .par_iter()
8535                    .filter(|right_batch| right_batch.num_rows() > 0)
8536                    .filter_map(|right_batch| {
8537                        cross_join_pair(left_batch, right_batch, &combined_schema).ok()
8538                    })
8539                    .collect::<Vec<_>>()
8540            })
8541            .collect()
8542    });
8543
8544    Ok(TableCrossProductData {
8545        schema: combined_schema,
8546        batches: output_batches,
8547        column_counts,
8548        table_indices,
8549    })
8550}
8551
8552fn cross_join_all(staged: Vec<TableCrossProductData>) -> ExecutorResult<TableCrossProductData> {
8553    let mut iter = staged.into_iter();
8554    let mut current = iter
8555        .next()
8556        .ok_or_else(|| Error::Internal("cross product preparation yielded no tables".into()))?;
8557    for next in iter {
8558        current = cross_join_table_batches(current, next)?;
8559    }
8560    Ok(current)
8561}
8562
8563struct TableInfo<'a> {
8564    index: usize,
8565    table_ref: &'a llkv_plan::TableRef,
8566    column_map: FxHashMap<String, usize>,
8567}
8568
8569#[derive(Clone, Copy)]
8570struct ColumnRef {
8571    table: usize,
8572    column: usize,
8573}
8574
8575#[derive(Clone, Copy)]
8576struct ColumnEquality {
8577    left: ColumnRef,
8578    right: ColumnRef,
8579}
8580
8581#[derive(Clone)]
8582struct ColumnLiteral {
8583    column: ColumnRef,
8584    value: PlanValue,
8585}
8586
8587#[derive(Clone)]
8588struct ColumnInList {
8589    column: ColumnRef,
8590    values: Vec<PlanValue>,
8591}
8592
8593#[derive(Clone)]
8594enum ColumnConstraint {
8595    Equality(ColumnLiteral),
8596    InList(ColumnInList),
8597}
8598
8599// TODO: Move `llkv-plan`?
8600struct JoinConstraintPlan {
8601    equalities: Vec<ColumnEquality>,
8602    literals: Vec<ColumnConstraint>,
8603    unsatisfiable: bool,
8604    /// Total number of conjuncts in the original WHERE clause
8605    total_conjuncts: usize,
8606    /// Number of conjuncts successfully handled (as equalities or literals)
8607    handled_conjuncts: usize,
8608}
8609
8610/// Extract literal pushdown filters from a WHERE clause, even in the presence of OR clauses.
8611///
8612/// Unlike `extract_join_constraints`, this function is more lenient and extracts column-to-literal
8613/// comparisons and IN-list predicates regardless of OR clauses. This allows selective table scans
8614/// even when hash join optimization cannot be applied.
8615///
8616/// # Strategy
8617///
8618/// Extracts top-level AND-connected predicates:
8619/// - Column-to-literal equalities (e.g., `c2 = 374`)
8620/// - IN-list predicates (e.g., `b4 IN (408, 261, 877, 33)`)
8621///
8622/// OR clauses and other complex predicates are left for post-join filtering.
8623///
8624/// # Returns
8625///
8626/// A vector indexed by table position, where each element contains the constraints
8627/// that can be pushed down to that table.
8628fn extract_literal_pushdown_filters<P>(
8629    expr: &LlkvExpr<'static, String>,
8630    tables_with_handles: &[(llkv_plan::TableRef, Arc<ExecutorTable<P>>)],
8631) -> Vec<Vec<ColumnConstraint>>
8632where
8633    P: Pager<Blob = EntryHandle> + Send + Sync,
8634{
8635    let mut table_infos = Vec::with_capacity(tables_with_handles.len());
8636    for (index, (table_ref, executor_table)) in tables_with_handles.iter().enumerate() {
8637        let mut column_map = FxHashMap::default();
8638        for (column_idx, column) in executor_table.schema.columns.iter().enumerate() {
8639            let column_name = column.name.to_ascii_lowercase();
8640            column_map.entry(column_name).or_insert(column_idx);
8641        }
8642        table_infos.push(TableInfo {
8643            index,
8644            table_ref,
8645            column_map,
8646        });
8647    }
8648
8649    let mut constraints: Vec<Vec<ColumnConstraint>> = vec![Vec::new(); tables_with_handles.len()];
8650
8651    // Collect all conjuncts, but be lenient about OR clauses - we'll skip them
8652    let mut conjuncts = Vec::new();
8653    collect_conjuncts_lenient(expr, &mut conjuncts);
8654
8655    for conjunct in conjuncts {
8656        // Handle Compare expressions: column = literal
8657        if let LlkvExpr::Compare {
8658            left,
8659            op: CompareOp::Eq,
8660            right,
8661        } = conjunct
8662        {
8663            match (
8664                resolve_column_reference(left, &table_infos),
8665                resolve_column_reference(right, &table_infos),
8666            ) {
8667                (Some(column), None) => {
8668                    if let Some(literal) = extract_literal(right)
8669                        && let Some(value) = literal_to_plan_value_for_join(literal)
8670                        && column.table < constraints.len()
8671                    {
8672                        constraints[column.table]
8673                            .push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
8674                    }
8675                }
8676                (None, Some(column)) => {
8677                    if let Some(literal) = extract_literal(left)
8678                        && let Some(value) = literal_to_plan_value_for_join(literal)
8679                        && column.table < constraints.len()
8680                    {
8681                        constraints[column.table]
8682                            .push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
8683                    }
8684                }
8685                _ => {}
8686            }
8687        }
8688        // Handle Pred(Filter) expressions: these are already in filter form
8689        // We extract simple equality predicates only
8690        else if let LlkvExpr::Pred(filter) = conjunct {
8691            if let Operator::Equals(ref literal_val) = filter.op {
8692                // field_id is the column name in string form
8693                let field_name = filter.field_id.trim().to_ascii_lowercase();
8694
8695                // Try to find which table this column belongs to
8696                for info in &table_infos {
8697                    if let Some(&col_idx) = info.column_map.get(&field_name) {
8698                        if let Some(value) = plan_value_from_operator_literal(literal_val) {
8699                            let column_ref = ColumnRef {
8700                                table: info.index,
8701                                column: col_idx,
8702                            };
8703                            if info.index < constraints.len() {
8704                                constraints[info.index].push(ColumnConstraint::Equality(
8705                                    ColumnLiteral {
8706                                        column: column_ref,
8707                                        value,
8708                                    },
8709                                ));
8710                            }
8711                        }
8712                        break; // Found the column, no need to check other tables
8713                    }
8714                }
8715            }
8716        }
8717        // Handle InList expressions: column IN (val1, val2, ...)
8718        else if let LlkvExpr::InList {
8719            expr: col_expr,
8720            list,
8721            negated: false,
8722        } = conjunct
8723        {
8724            if let Some(column) = resolve_column_reference(col_expr, &table_infos) {
8725                let mut values = Vec::new();
8726                for item in list {
8727                    if let Some(literal) = extract_literal(item)
8728                        && let Some(value) = literal_to_plan_value_for_join(literal)
8729                    {
8730                        values.push(value);
8731                    }
8732                }
8733                if !values.is_empty() && column.table < constraints.len() {
8734                    constraints[column.table]
8735                        .push(ColumnConstraint::InList(ColumnInList { column, values }));
8736                }
8737            }
8738        }
8739        // Handle OR expressions: try to convert (col=v1 OR col=v2) into col IN (v1, v2)
8740        else if let LlkvExpr::Or(or_children) = conjunct
8741            && let Some((column, values)) = try_extract_or_as_in_list(or_children, &table_infos)
8742            && !values.is_empty()
8743            && column.table < constraints.len()
8744        {
8745            constraints[column.table]
8746                .push(ColumnConstraint::InList(ColumnInList { column, values }));
8747        }
8748    }
8749
8750    constraints
8751}
8752
8753/// Collect conjuncts from an expression, including OR clauses for potential conversion.
8754///
8755/// Unlike `collect_conjuncts`, this function doesn't bail out on OR - it includes OR clauses
8756/// in the output so they can be analyzed for conversion to IN lists: `(col=v1 OR col=v2)` → `col IN (v1, v2)`.
8757fn collect_conjuncts_lenient<'a>(
8758    expr: &'a LlkvExpr<'static, String>,
8759    out: &mut Vec<&'a LlkvExpr<'static, String>>,
8760) {
8761    match expr {
8762        LlkvExpr::And(children) => {
8763            for child in children {
8764                collect_conjuncts_lenient(child, out);
8765            }
8766        }
8767        other => {
8768            // Include all non-AND expressions (including OR) for analysis
8769            out.push(other);
8770        }
8771    }
8772}
8773
8774/// Attempt to extract an OR clause as an IN list if it matches the pattern (col=v1 OR col=v2 OR ...).
8775///
8776/// Returns Some((column_ref, values)) if successful, None otherwise.
8777fn try_extract_or_as_in_list(
8778    or_children: &[LlkvExpr<'static, String>],
8779    table_infos: &[TableInfo<'_>],
8780) -> Option<(ColumnRef, Vec<PlanValue>)> {
8781    if or_children.is_empty() {
8782        return None;
8783    }
8784
8785    let mut common_column: Option<ColumnRef> = None;
8786    let mut values = Vec::new();
8787
8788    for child in or_children {
8789        // Try to extract column = literal pattern from Compare expressions
8790        if let LlkvExpr::Compare {
8791            left,
8792            op: CompareOp::Eq,
8793            right,
8794        } = child
8795        {
8796            // Try col = literal
8797            if let (Some(column), None) = (
8798                resolve_column_reference(left, table_infos),
8799                resolve_column_reference(right, table_infos),
8800            ) && let Some(literal) = extract_literal(right)
8801                && let Some(value) = literal_to_plan_value_for_join(literal)
8802            {
8803                // Check if this is the same column as previous OR branches
8804                match common_column {
8805                    None => common_column = Some(column),
8806                    Some(ref prev)
8807                        if prev.table == column.table && prev.column == column.column =>
8808                    {
8809                        // Same column, continue
8810                    }
8811                    _ => {
8812                        // Different column - OR cannot be converted to IN list
8813                        return None;
8814                    }
8815                }
8816                values.push(value);
8817                continue;
8818            }
8819
8820            // Try literal = col
8821            if let (None, Some(column)) = (
8822                resolve_column_reference(left, table_infos),
8823                resolve_column_reference(right, table_infos),
8824            ) && let Some(literal) = extract_literal(left)
8825                && let Some(value) = literal_to_plan_value_for_join(literal)
8826            {
8827                match common_column {
8828                    None => common_column = Some(column),
8829                    Some(ref prev)
8830                        if prev.table == column.table && prev.column == column.column => {}
8831                    _ => return None,
8832                }
8833                values.push(value);
8834                continue;
8835            }
8836        }
8837        // Also handle Pred(Filter{...}) expressions with Equals operator
8838        else if let LlkvExpr::Pred(filter) = child
8839            && let Operator::Equals(ref literal) = filter.op
8840            && let Some(column) =
8841                resolve_column_reference(&ScalarExpr::Column(filter.field_id.clone()), table_infos)
8842            && let Some(value) = literal_to_plan_value_for_join(literal)
8843        {
8844            match common_column {
8845                None => common_column = Some(column),
8846                Some(ref prev) if prev.table == column.table && prev.column == column.column => {}
8847                _ => return None,
8848            }
8849            values.push(value);
8850            continue;
8851        }
8852
8853        // If any branch doesn't match the pattern, OR cannot be converted
8854        return None;
8855    }
8856
8857    common_column.map(|col| (col, values))
8858}
8859
8860/// Extract join constraints from a WHERE clause predicate for hash join optimization.
8861///
8862/// Analyzes the predicate to identify:
8863/// - **Equality constraints**: column-to-column equalities for hash join keys
8864/// - **Literal constraints**: column-to-literal comparisons that can be pushed down
8865/// - **Unsatisfiable conditions**: `WHERE false` that makes result set empty
8866///
8867/// Returns `None` if the predicate structure is too complex for optimization (e.g.,
8868/// contains OR, NOT, or other non-conjunctive patterns).
8869///
8870/// # Partial Handling
8871///
8872/// The optimizer tracks `handled_conjuncts` vs `total_conjuncts`. If some predicates
8873/// cannot be optimized (e.g., complex expressions, unsupported operators), they are
8874/// left for post-join filtering. This allows partial optimization rather than falling
8875/// back to full cartesian product.
8876///
8877/// # Arguments
8878///
8879/// * `expr` - WHERE clause expression to analyze
8880/// * `table_infos` - Metadata about tables in the query (for column resolution)
8881///
8882/// # Returns
8883///
8884/// * `Some(JoinConstraintPlan)` - Successfully extracted constraints
8885/// * `None` - Predicate cannot be optimized (fall back to cartesian product)
8886fn extract_join_constraints(
8887    expr: &LlkvExpr<'static, String>,
8888    table_infos: &[TableInfo<'_>],
8889) -> Option<JoinConstraintPlan> {
8890    let mut conjuncts = Vec::new();
8891    // Use lenient collection to include OR clauses for potential conversion to IN lists
8892    collect_conjuncts_lenient(expr, &mut conjuncts);
8893
8894    let total_conjuncts = conjuncts.len();
8895    let mut equalities = Vec::new();
8896    let mut literals = Vec::new();
8897    let mut unsatisfiable = false;
8898    let mut handled_conjuncts = 0;
8899
8900    for conjunct in conjuncts {
8901        match conjunct {
8902            LlkvExpr::Literal(true) => {
8903                handled_conjuncts += 1;
8904            }
8905            LlkvExpr::Literal(false) => {
8906                unsatisfiable = true;
8907                handled_conjuncts += 1;
8908                break;
8909            }
8910            LlkvExpr::Compare {
8911                left,
8912                op: CompareOp::Eq,
8913                right,
8914            } => {
8915                match (
8916                    resolve_column_reference(left, table_infos),
8917                    resolve_column_reference(right, table_infos),
8918                ) {
8919                    (Some(left_col), Some(right_col)) => {
8920                        equalities.push(ColumnEquality {
8921                            left: left_col,
8922                            right: right_col,
8923                        });
8924                        handled_conjuncts += 1;
8925                        continue;
8926                    }
8927                    (Some(column), None) => {
8928                        if let Some(literal) = extract_literal(right)
8929                            && let Some(value) = literal_to_plan_value_for_join(literal)
8930                        {
8931                            literals
8932                                .push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
8933                            handled_conjuncts += 1;
8934                            continue;
8935                        }
8936                    }
8937                    (None, Some(column)) => {
8938                        if let Some(literal) = extract_literal(left)
8939                            && let Some(value) = literal_to_plan_value_for_join(literal)
8940                        {
8941                            literals
8942                                .push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
8943                            handled_conjuncts += 1;
8944                            continue;
8945                        }
8946                    }
8947                    _ => {}
8948                }
8949                // Ignore this predicate - it will be handled by post-join filter
8950            }
8951            // Handle InList - these can be used for hash join build side filtering
8952            LlkvExpr::InList {
8953                expr: col_expr,
8954                list,
8955                negated: false,
8956            } => {
8957                if let Some(column) = resolve_column_reference(col_expr, table_infos) {
8958                    // Extract all values from IN list
8959                    let mut in_list_values = Vec::new();
8960                    for item in list {
8961                        if let Some(literal) = extract_literal(item)
8962                            && let Some(value) = literal_to_plan_value_for_join(literal)
8963                        {
8964                            in_list_values.push(value);
8965                        }
8966                    }
8967                    if !in_list_values.is_empty() {
8968                        literals.push(ColumnConstraint::InList(ColumnInList {
8969                            column,
8970                            values: in_list_values,
8971                        }));
8972                        handled_conjuncts += 1;
8973                        continue;
8974                    }
8975                }
8976                // Ignore - will be handled by post-join filter
8977            }
8978            // Handle OR clauses that can be converted to IN lists
8979            LlkvExpr::Or(or_children) => {
8980                if let Some((column, values)) = try_extract_or_as_in_list(or_children, table_infos)
8981                {
8982                    // Treat as IN list
8983                    literals.push(ColumnConstraint::InList(ColumnInList { column, values }));
8984                    handled_conjuncts += 1;
8985                    continue;
8986                }
8987                // OR clause couldn't be converted - ignore, will be handled by post-join filter
8988            }
8989            // Handle Pred(Filter{...}) expressions - these are field-based predicates
8990            LlkvExpr::Pred(filter) => {
8991                // Try to extract equality constraints
8992                if let Operator::Equals(ref literal) = filter.op
8993                    && let Some(column) = resolve_column_reference(
8994                        &ScalarExpr::Column(filter.field_id.clone()),
8995                        table_infos,
8996                    )
8997                    && let Some(value) = literal_to_plan_value_for_join(literal)
8998                {
8999                    literals.push(ColumnConstraint::Equality(ColumnLiteral { column, value }));
9000                    handled_conjuncts += 1;
9001                    continue;
9002                }
9003                // Ignore other Pred expressions - will be handled by post-join filter
9004            }
9005            _ => {
9006                // Ignore unsupported predicates - they will be handled by post-join filter
9007            }
9008        }
9009    }
9010
9011    Some(JoinConstraintPlan {
9012        equalities,
9013        literals,
9014        unsatisfiable,
9015        total_conjuncts,
9016        handled_conjuncts,
9017    })
9018}
9019
9020fn resolve_column_reference(
9021    expr: &ScalarExpr<String>,
9022    table_infos: &[TableInfo<'_>],
9023) -> Option<ColumnRef> {
9024    let name = match expr {
9025        ScalarExpr::Column(name) => name.trim(),
9026        _ => return None,
9027    };
9028
9029    let mut parts: Vec<&str> = name
9030        .trim_start_matches('.')
9031        .split('.')
9032        .filter(|segment| !segment.is_empty())
9033        .collect();
9034
9035    if parts.is_empty() {
9036        return None;
9037    }
9038
9039    let column_part = parts.pop()?.to_ascii_lowercase();
9040    if parts.is_empty() {
9041        // Use first-match semantics for bare column names (matches SQLite behavior)
9042        // This allows ambiguous column names in WHERE clauses to resolve to the
9043        // first occurrence in FROM clause order
9044        for info in table_infos {
9045            if let Some(&col_idx) = info.column_map.get(&column_part) {
9046                return Some(ColumnRef {
9047                    table: info.index,
9048                    column: col_idx,
9049                });
9050            }
9051        }
9052        return None;
9053    }
9054
9055    let table_ident = parts.join(".").to_ascii_lowercase();
9056    for info in table_infos {
9057        if matches_table_ident(info.table_ref, &table_ident) {
9058            if let Some(&col_idx) = info.column_map.get(&column_part) {
9059                return Some(ColumnRef {
9060                    table: info.index,
9061                    column: col_idx,
9062                });
9063            } else {
9064                return None;
9065            }
9066        }
9067    }
9068    None
9069}
9070
9071fn matches_table_ident(table_ref: &llkv_plan::TableRef, ident: &str) -> bool {
9072    if ident.is_empty() {
9073        return false;
9074    }
9075    if let Some(alias) = &table_ref.alias
9076        && alias.to_ascii_lowercase() == ident
9077    {
9078        return true;
9079    }
9080    if table_ref.table.to_ascii_lowercase() == ident {
9081        return true;
9082    }
9083    if !table_ref.schema.is_empty() {
9084        let full = format!(
9085            "{}.{}",
9086            table_ref.schema.to_ascii_lowercase(),
9087            table_ref.table.to_ascii_lowercase()
9088        );
9089        if full == ident {
9090            return true;
9091        }
9092    }
9093    false
9094}
9095
9096fn extract_literal(expr: &ScalarExpr<String>) -> Option<&Literal> {
9097    match expr {
9098        ScalarExpr::Literal(lit) => Some(lit),
9099        _ => None,
9100    }
9101}
9102
9103fn plan_value_from_operator_literal(op_value: &llkv_expr::literal::Literal) -> Option<PlanValue> {
9104    match op_value {
9105        llkv_expr::literal::Literal::Integer(v) => i64::try_from(*v).ok().map(PlanValue::Integer),
9106        llkv_expr::literal::Literal::Float(v) => Some(PlanValue::Float(*v)),
9107        llkv_expr::literal::Literal::Boolean(v) => Some(PlanValue::Integer(if *v { 1 } else { 0 })),
9108        llkv_expr::literal::Literal::String(v) => Some(PlanValue::String(v.clone())),
9109        _ => None,
9110    }
9111}
9112
9113fn literal_to_plan_value_for_join(literal: &Literal) -> Option<PlanValue> {
9114    match literal {
9115        Literal::Integer(v) => i64::try_from(*v).ok().map(PlanValue::Integer),
9116        Literal::Float(v) => Some(PlanValue::Float(*v)),
9117        Literal::Boolean(v) => Some(PlanValue::Integer(if *v { 1 } else { 0 })),
9118        Literal::String(v) => Some(PlanValue::String(v.clone())),
9119        _ => None,
9120    }
9121}
9122
9123#[derive(Default)]
9124struct DistinctState {
9125    seen: FxHashSet<CanonicalRow>,
9126}
9127
9128impl DistinctState {
9129    fn insert(&mut self, row: CanonicalRow) -> bool {
9130        self.seen.insert(row)
9131    }
9132}
9133
9134fn distinct_filter_batch(
9135    batch: RecordBatch,
9136    state: &mut DistinctState,
9137) -> ExecutorResult<Option<RecordBatch>> {
9138    if batch.num_rows() == 0 {
9139        return Ok(None);
9140    }
9141
9142    let mut keep_flags = Vec::with_capacity(batch.num_rows());
9143    let mut keep_count = 0usize;
9144
9145    for row_idx in 0..batch.num_rows() {
9146        let row = CanonicalRow::from_batch(&batch, row_idx)?;
9147        if state.insert(row) {
9148            keep_flags.push(true);
9149            keep_count += 1;
9150        } else {
9151            keep_flags.push(false);
9152        }
9153    }
9154
9155    if keep_count == 0 {
9156        return Ok(None);
9157    }
9158
9159    if keep_count == batch.num_rows() {
9160        return Ok(Some(batch));
9161    }
9162
9163    let mut builder = BooleanBuilder::with_capacity(batch.num_rows());
9164    for flag in keep_flags {
9165        builder.append_value(flag);
9166    }
9167    let mask = Arc::new(builder.finish());
9168
9169    let filtered = filter_record_batch(&batch, &mask).map_err(|err| {
9170        Error::InvalidArgumentError(format!("failed to apply DISTINCT filter: {err}"))
9171    })?;
9172
9173    Ok(Some(filtered))
9174}
9175
9176fn sort_record_batch_with_order(
9177    schema: &Arc<Schema>,
9178    batch: &RecordBatch,
9179    order_by: &[OrderByPlan],
9180) -> ExecutorResult<RecordBatch> {
9181    if order_by.is_empty() {
9182        return Ok(batch.clone());
9183    }
9184
9185    let mut sort_columns: Vec<SortColumn> = Vec::with_capacity(order_by.len());
9186
9187    for order in order_by {
9188        let column_index = match &order.target {
9189            OrderTarget::Column(name) => schema.index_of(name).map_err(|_| {
9190                Error::InvalidArgumentError(format!(
9191                    "ORDER BY references unknown column '{}'",
9192                    name
9193                ))
9194            })?,
9195            OrderTarget::Index(idx) => {
9196                if *idx >= batch.num_columns() {
9197                    return Err(Error::InvalidArgumentError(format!(
9198                        "ORDER BY position {} is out of bounds for {} columns",
9199                        idx + 1,
9200                        batch.num_columns()
9201                    )));
9202                }
9203                *idx
9204            }
9205            OrderTarget::All => {
9206                return Err(Error::InvalidArgumentError(
9207                    "ORDER BY ALL should be expanded before sorting".into(),
9208                ));
9209            }
9210        };
9211
9212        let source_array = batch.column(column_index);
9213
9214        let values: ArrayRef = match order.sort_type {
9215            OrderSortType::Native => Arc::clone(source_array),
9216            OrderSortType::CastTextToInteger => {
9217                let strings = source_array
9218                    .as_any()
9219                    .downcast_ref::<StringArray>()
9220                    .ok_or_else(|| {
9221                        Error::InvalidArgumentError(
9222                            "ORDER BY CAST expects the underlying column to be TEXT".into(),
9223                        )
9224                    })?;
9225                let mut builder = Int64Builder::with_capacity(strings.len());
9226                for i in 0..strings.len() {
9227                    if strings.is_null(i) {
9228                        builder.append_null();
9229                    } else {
9230                        match strings.value(i).parse::<i64>() {
9231                            Ok(value) => builder.append_value(value),
9232                            Err(_) => builder.append_null(),
9233                        }
9234                    }
9235                }
9236                Arc::new(builder.finish()) as ArrayRef
9237            }
9238        };
9239
9240        let sort_options = SortOptions {
9241            descending: !order.ascending,
9242            nulls_first: order.nulls_first,
9243        };
9244
9245        sort_columns.push(SortColumn {
9246            values,
9247            options: Some(sort_options),
9248        });
9249    }
9250
9251    let indices = lexsort_to_indices(&sort_columns, None).map_err(|err| {
9252        Error::InvalidArgumentError(format!("failed to compute ORDER BY indices: {err}"))
9253    })?;
9254
9255    let perm = indices
9256        .as_any()
9257        .downcast_ref::<UInt32Array>()
9258        .ok_or_else(|| Error::Internal("ORDER BY sorting produced unexpected index type".into()))?;
9259
9260    let mut reordered_columns: Vec<ArrayRef> = Vec::with_capacity(batch.num_columns());
9261    for col_idx in 0..batch.num_columns() {
9262        let reordered = take(batch.column(col_idx), perm, None).map_err(|err| {
9263            Error::InvalidArgumentError(format!(
9264                "failed to apply ORDER BY permutation to column {col_idx}: {err}"
9265            ))
9266        })?;
9267        reordered_columns.push(reordered);
9268    }
9269
9270    RecordBatch::try_new(Arc::clone(schema), reordered_columns)
9271        .map_err(|err| Error::Internal(format!("failed to build reordered ORDER BY batch: {err}")))
9272}
9273
9274#[cfg(test)]
9275mod tests {
9276    use super::*;
9277    use arrow::array::{Array, ArrayRef, Int64Array};
9278    use arrow::datatypes::{DataType, Field, Schema};
9279    use llkv_expr::expr::BinaryOp;
9280    use std::sync::Arc;
9281
9282    #[test]
9283    fn cross_product_context_evaluates_expressions() {
9284        let schema = Arc::new(Schema::new(vec![
9285            Field::new("main.tab2.a", DataType::Int64, false),
9286            Field::new("main.tab2.b", DataType::Int64, false),
9287        ]));
9288
9289        let batch = RecordBatch::try_new(
9290            Arc::clone(&schema),
9291            vec![
9292                Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef,
9293                Arc::new(Int64Array::from(vec![10, 20, 30])) as ArrayRef,
9294            ],
9295        )
9296        .expect("valid batch");
9297
9298        let lookup = build_cross_product_column_lookup(schema.as_ref(), &[], &[], &[]);
9299        let mut ctx = CrossProductExpressionContext::new(schema.as_ref(), lookup)
9300            .expect("context builds from schema");
9301
9302        let literal_expr: ScalarExpr<String> = ScalarExpr::literal(67);
9303        let literal = ctx
9304            .evaluate(&literal_expr, &batch)
9305            .expect("literal evaluation succeeds");
9306        let literal_array = literal
9307            .as_any()
9308            .downcast_ref::<Int64Array>()
9309            .expect("int64 literal result");
9310        assert_eq!(literal_array.len(), 3);
9311        assert!(literal_array.iter().all(|value| value == Some(67)));
9312
9313        let add_expr = ScalarExpr::binary(
9314            ScalarExpr::column("tab2.a".to_string()),
9315            BinaryOp::Add,
9316            ScalarExpr::literal(5),
9317        );
9318        let added = ctx
9319            .evaluate(&add_expr, &batch)
9320            .expect("column addition succeeds");
9321        let added_array = added
9322            .as_any()
9323            .downcast_ref::<Int64Array>()
9324            .expect("int64 addition result");
9325        assert_eq!(added_array.values(), &[6, 7, 8]);
9326    }
9327
9328    #[test]
9329    fn cross_product_handles_more_than_two_tables() {
9330        let schema_a = Arc::new(Schema::new(vec![Field::new(
9331            "main.t1.a",
9332            DataType::Int64,
9333            false,
9334        )]));
9335        let schema_b = Arc::new(Schema::new(vec![Field::new(
9336            "main.t2.b",
9337            DataType::Int64,
9338            false,
9339        )]));
9340        let schema_c = Arc::new(Schema::new(vec![Field::new(
9341            "main.t3.c",
9342            DataType::Int64,
9343            false,
9344        )]));
9345
9346        let batch_a = RecordBatch::try_new(
9347            Arc::clone(&schema_a),
9348            vec![Arc::new(Int64Array::from(vec![1, 2])) as ArrayRef],
9349        )
9350        .expect("valid batch");
9351        let batch_b = RecordBatch::try_new(
9352            Arc::clone(&schema_b),
9353            vec![Arc::new(Int64Array::from(vec![10, 20, 30])) as ArrayRef],
9354        )
9355        .expect("valid batch");
9356        let batch_c = RecordBatch::try_new(
9357            Arc::clone(&schema_c),
9358            vec![Arc::new(Int64Array::from(vec![100])) as ArrayRef],
9359        )
9360        .expect("valid batch");
9361
9362        let data_a = TableCrossProductData {
9363            schema: schema_a,
9364            batches: vec![batch_a],
9365            column_counts: vec![1],
9366            table_indices: vec![0],
9367        };
9368        let data_b = TableCrossProductData {
9369            schema: schema_b,
9370            batches: vec![batch_b],
9371            column_counts: vec![1],
9372            table_indices: vec![1],
9373        };
9374        let data_c = TableCrossProductData {
9375            schema: schema_c,
9376            batches: vec![batch_c],
9377            column_counts: vec![1],
9378            table_indices: vec![2],
9379        };
9380
9381        let ab = cross_join_table_batches(data_a, data_b).expect("two-table product");
9382        assert_eq!(ab.schema.fields().len(), 2);
9383        assert_eq!(ab.batches.len(), 1);
9384        assert_eq!(ab.batches[0].num_rows(), 6);
9385
9386        let abc = cross_join_table_batches(ab, data_c).expect("three-table product");
9387        assert_eq!(abc.schema.fields().len(), 3);
9388        assert_eq!(abc.batches.len(), 1);
9389
9390        let final_batch = &abc.batches[0];
9391        assert_eq!(final_batch.num_rows(), 6);
9392
9393        let col_a = final_batch
9394            .column(0)
9395            .as_any()
9396            .downcast_ref::<Int64Array>()
9397            .expect("left column values");
9398        assert_eq!(col_a.values(), &[1, 1, 1, 2, 2, 2]);
9399
9400        let col_b = final_batch
9401            .column(1)
9402            .as_any()
9403            .downcast_ref::<Int64Array>()
9404            .expect("middle column values");
9405        assert_eq!(col_b.values(), &[10, 20, 30, 10, 20, 30]);
9406
9407        let col_c = final_batch
9408            .column(2)
9409            .as_any()
9410            .downcast_ref::<Int64Array>()
9411            .expect("right column values");
9412        assert_eq!(col_c.values(), &[100, 100, 100, 100, 100, 100]);
9413    }
9414}