Skip to main content

datafusion_index_provider/
types.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Core type definitions for representing index-based filter operations.
19//!
20//! This module defines the [`IndexFilter`] enum which represents the structure of
21//! filter operations that can be pushed down to indexes. These types form the
22//! foundation for the query optimization and execution plan generation process.
23
24use std::sync::Arc;
25
26use datafusion::prelude::Expr;
27
28use crate::physical_plan::Index;
29use std::fmt;
30
31/// Represents the structure of filter expressions that can be executed using indexes.
32///
33/// This enum captures the hierarchical structure of filter conditions and their
34/// mapping to physical indexes. It serves as an intermediate representation between
35/// SQL filter expressions and physical execution plans.
36///
37/// ## Execution Plan Mapping
38/// Each variant maps to a specific execution strategy:
39/// - `Single`: Direct index scan via `IndexScanExec`
40/// - `And`: Index intersection via cascaded joins (Hash or SortMerge joins)
41/// - `Or`: Index union via `UnionExec` + `AggregateExec` for deduplication
42#[derive(Debug, Clone)]
43pub enum IndexFilter {
44    /// A filter condition that can be handled by a single index.
45    ///
46    /// This represents the base case where one index can directly answer a filter predicate.
47    /// The filter expression should be compatible with the index's `supports_predicate()` method.
48    Single {
49        /// The index that will handle this filter
50        index: Arc<dyn Index>,
51        /// The filter expression to be evaluated by the index
52        filter: Expr,
53    },
54
55    /// A conjunction (AND) of multiple filter conditions across different indexes.
56    ///
57    /// This represents scenarios where multiple indexes must be consulted and their
58    /// results intersected to find rows that satisfy ALL conditions. The execution
59    /// strategy builds a left-deep tree of joins to progressively narrow the result set.
60    And(Vec<IndexFilter>),
61
62    /// A disjunction (OR) of multiple filter conditions across different indexes.
63    ///
64    /// This represents scenarios where any of several conditions can be satisfied.
65    /// The execution strategy unions results from all indexes and deduplicates on
66    /// all primary key columns to ensure each row appears only once in the final result.
67    Or(Vec<IndexFilter>),
68}
69
70/// A collection of [`IndexFilter`]s representing a complete index-based query plan.
71///
72/// This type represents the complete set of index operations needed to satisfy a query's
73/// filter conditions. It is typically the output of `analyze_and_optimize_filters()` and
74/// serves as input to `create_execution_plan_with_indexes()` for physical plan generation.
75pub type IndexFilters = Vec<IndexFilter>;
76
77/// Controls how union operations combine multiple index scans for OR conditions.
78///
79/// When executing disjunctive (OR) queries across multiple indexes, results must be
80/// combined. This enum allows choosing between parallel and sequential execution
81/// strategies based on runtime requirements.
82#[derive(Debug, Clone, Copy, Default)]
83pub enum UnionMode {
84    /// Use standard `UnionExec` with parallel execution.
85    ///
86    /// This mode spawns Tokio tasks via `JoinSet::spawn()` to process partitions
87    /// concurrently. Best for Tokio-based runtimes with good parallelism support.
88    ///
89    /// **Warning**: This mode will panic in non-Tokio async runtimes.
90    #[default]
91    Parallel,
92
93    /// Use `SequentialUnionExec` with single-threaded sequential execution.
94    ///
95    /// This mode processes all input partitions sequentially in a single stream
96    /// without spawning any tasks. Required for non-Tokio runtimes such as
97    /// custom async executors that don't support Tokio task spawning.
98    Sequential,
99}
100
101impl fmt::Display for IndexFilter {
102    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103        match self {
104            IndexFilter::Single { index, .. } => write!(f, "{}", index.name()),
105            IndexFilter::And(filters) => {
106                write!(f, "(")?;
107                for (i, filter) in filters.iter().enumerate() {
108                    if i > 0 {
109                        write!(f, " AND ")?;
110                    }
111                    write!(f, "{filter}")?;
112                }
113                write!(f, ")")
114            }
115            IndexFilter::Or(filters) => {
116                write!(f, "(")?;
117                for (i, filter) in filters.iter().enumerate() {
118                    if i > 0 {
119                        write!(f, " OR ")?;
120                    }
121                    write!(f, "{filter}")?;
122                }
123                write!(f, ")")
124            }
125        }
126    }
127}