datafusion_index_provider/types.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Core type definitions for representing index-based filter operations.
19//!
20//! This module defines the [`IndexFilter`] enum which represents the structure of
21//! filter operations that can be pushed down to indexes. These types form the
22//! foundation for the query optimization and execution plan generation process.
23
24use std::sync::Arc;
25
26use datafusion::prelude::Expr;
27
28use crate::physical_plan::Index;
29use std::fmt;
30
31/// Represents the structure of filter expressions that can be executed using indexes.
32///
33/// This enum captures the hierarchical structure of filter conditions and their
34/// mapping to physical indexes. It serves as an intermediate representation between
35/// SQL filter expressions and physical execution plans.
36///
37/// ## Execution Plan Mapping
38/// Each variant maps to a specific execution strategy:
39/// - `Single`: Direct index scan via `IndexScanExec`
40/// - `And`: Index intersection via cascaded joins (Hash or SortMerge joins)
41/// - `Or`: Index union via `UnionExec` + `AggregateExec` for deduplication
42#[derive(Debug, Clone)]
43pub enum IndexFilter {
44 /// A filter condition that can be handled by a single index.
45 ///
46 /// This represents the base case where one index can directly answer a filter predicate.
47 /// The filter expression should be compatible with the index's `supports_predicate()` method.
48 Single {
49 /// The index that will handle this filter
50 index: Arc<dyn Index>,
51 /// The filter expression to be evaluated by the index
52 filter: Expr,
53 },
54
55 /// A conjunction (AND) of multiple filter conditions across different indexes.
56 ///
57 /// This represents scenarios where multiple indexes must be consulted and their
58 /// results intersected to find rows that satisfy ALL conditions. The execution
59 /// strategy builds a left-deep tree of joins to progressively narrow the result set.
60 And(Vec<IndexFilter>),
61
62 /// A disjunction (OR) of multiple filter conditions across different indexes.
63 ///
64 /// This represents scenarios where any of several conditions can be satisfied.
65 /// The execution strategy unions results from all indexes and deduplicates on
66 /// all primary key columns to ensure each row appears only once in the final result.
67 Or(Vec<IndexFilter>),
68}
69
70/// A collection of [`IndexFilter`]s representing a complete index-based query plan.
71///
72/// This type represents the complete set of index operations needed to satisfy a query's
73/// filter conditions. It is typically the output of `analyze_and_optimize_filters()` and
74/// serves as input to `create_execution_plan_with_indexes()` for physical plan generation.
75pub type IndexFilters = Vec<IndexFilter>;
76
77/// Controls how union operations combine multiple index scans for OR conditions.
78///
79/// When executing disjunctive (OR) queries across multiple indexes, results must be
80/// combined. This enum allows choosing between parallel and sequential execution
81/// strategies based on runtime requirements.
82#[derive(Debug, Clone, Copy, Default)]
83pub enum UnionMode {
84 /// Use standard `UnionExec` with parallel execution.
85 ///
86 /// This mode spawns Tokio tasks via `JoinSet::spawn()` to process partitions
87 /// concurrently. Best for Tokio-based runtimes with good parallelism support.
88 ///
89 /// **Warning**: This mode will panic in non-Tokio async runtimes.
90 #[default]
91 Parallel,
92
93 /// Use `SequentialUnionExec` with single-threaded sequential execution.
94 ///
95 /// This mode processes all input partitions sequentially in a single stream
96 /// without spawning any tasks. Required for non-Tokio runtimes such as
97 /// custom async executors that don't support Tokio task spawning.
98 Sequential,
99}
100
101impl fmt::Display for IndexFilter {
102 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103 match self {
104 IndexFilter::Single { index, .. } => write!(f, "{}", index.name()),
105 IndexFilter::And(filters) => {
106 write!(f, "(")?;
107 for (i, filter) in filters.iter().enumerate() {
108 if i > 0 {
109 write!(f, " AND ")?;
110 }
111 write!(f, "{filter}")?;
112 }
113 write!(f, ")")
114 }
115 IndexFilter::Or(filters) => {
116 write!(f, "(")?;
117 for (i, filter) in filters.iter().enumerate() {
118 if i > 0 {
119 write!(f, " OR ")?;
120 }
121 write!(f, "{filter}")?;
122 }
123 write!(f, ")")
124 }
125 }
126 }
127}