vibesql_executor/select/join/search/
optimizer.rs

1//! Public API for join order optimization
2
3use super::{config::ParallelSearchConfig, context::JoinOrderContext, reorder::JoinOrderAnalyzer};
4use crate::optimizer::aggregate_analysis::AggregateAnalysis;
5
6/// Performs join order optimization via exhaustive search
7pub struct JoinOrderSearch {
8    pub(super) context: JoinOrderContext,
9}
10
11impl JoinOrderSearch {
12    /// Create a new join order search from an analyzer with real table statistics
13    pub fn from_analyzer(
14        analyzer: &JoinOrderAnalyzer,
15        database: &vibesql_storage::Database,
16    ) -> Self {
17        Self::from_analyzer_with_predicates(
18            analyzer,
19            database,
20            &std::collections::HashMap::new(),
21            &std::collections::HashMap::new(),
22        )
23    }
24
25    /// Create a new join order search with WHERE clause selectivity applied
26    ///
27    /// This version accounts for table-local predicates when estimating cardinalities,
28    /// which helps choose better join orders for queries like TPC-H Q3 where filter
29    /// predicates significantly reduce table sizes before joining.
30    ///
31    /// # Parameters
32    /// - `alias_to_table`: Maps table aliases (e.g., "n1", "n2") to actual table names (e.g.,
33    ///   "nation"). This is critical for queries with self-joins (like TPC-H Q7 with two nation
34    ///   aliases) to correctly look up table cardinalities and statistics.
35    pub fn from_analyzer_with_predicates(
36        analyzer: &JoinOrderAnalyzer,
37        database: &vibesql_storage::Database,
38        table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
39        alias_to_table: &std::collections::HashMap<String, String>,
40    ) -> Self {
41        let edges = analyzer.edges().to_vec();
42        let edge_selectivities =
43            JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
44
45        let num_tables = analyzer.tables().len();
46
47        // Extract base cardinalities (before filters) for cascading filter tracking
48        let table_base_cardinalities =
49            JoinOrderContext::extract_base_cardinalities(analyzer, database, alias_to_table);
50
51        let context = JoinOrderContext {
52            all_tables: analyzer.tables().clone(),
53            edges,
54            table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
55                analyzer,
56                database,
57                table_local_predicates,
58                alias_to_table,
59            ),
60            table_base_cardinalities,
61            edge_selectivities,
62            config: ParallelSearchConfig::with_table_count(num_tables),
63            aggregate_analysis: None,
64        };
65
66        Self { context }
67    }
68
69    /// Create a new join order search with aggregate-aware optimization
70    ///
71    /// This version accepts aggregate analysis results and can adjust join order
72    /// decisions based on GROUP BY/HAVING clauses. When HAVING filters are selective,
73    /// the optimizer may prefer different join orders that enable early aggregation.
74    ///
75    /// # Parameters
76    /// - `alias_to_table`: Maps table aliases to actual table names for database lookups.
77    pub fn from_analyzer_with_aggregates(
78        analyzer: &JoinOrderAnalyzer,
79        database: &vibesql_storage::Database,
80        table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
81        alias_to_table: &std::collections::HashMap<String, String>,
82        aggregate_analysis: AggregateAnalysis,
83    ) -> Self {
84        let edges = analyzer.edges().to_vec();
85        let edge_selectivities =
86            JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
87
88        let num_tables = analyzer.tables().len();
89
90        // Extract base cardinalities (before filters) for cascading filter tracking
91        let table_base_cardinalities =
92            JoinOrderContext::extract_base_cardinalities(analyzer, database, alias_to_table);
93
94        let context = JoinOrderContext {
95            all_tables: analyzer.tables().clone(),
96            edges,
97            table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
98                analyzer,
99                database,
100                table_local_predicates,
101                alias_to_table,
102            ),
103            table_base_cardinalities,
104            edge_selectivities,
105            config: ParallelSearchConfig::with_table_count(num_tables),
106            aggregate_analysis: Some(aggregate_analysis),
107        };
108
109        Self { context }
110    }
111
112    /// Find optimal join order by exploring search space
113    ///
114    /// Returns list of table names in the order they should be joined.
115    ///
116    /// For small queries (≤3 tables), uses sequential DFS since the search space
117    /// is trivial (at most 6 orderings) and parallel overhead isn't worth it.
118    ///
119    /// When time-bounded search is enabled (default), uses parallel BFS for
120    /// multi-table queries (4+) with a configurable time budget. This allows
121    /// optimization of large queries (9+ tables) while preventing excessive search time.
122    ///
123    /// When time-bounded search is disabled, uses legacy behavior: parallel BFS for
124    /// 3-6 table queries with highly connected join graphs, DFS for others.
125    pub fn find_optimal_order(&self) -> Vec<String> {
126        if self.context.all_tables.is_empty() {
127            return Vec::new();
128        }
129
130        // For small queries (≤3 tables), use sequential DFS
131        // The search space is trivial (at most 3! = 6 orderings) and
132        // rayon thread pool overhead can cause delays under contention
133        if self.context.all_tables.len() <= 3 {
134            return self.context.find_optimal_order_dfs();
135        }
136
137        // Use time-bounded BFS for larger multi-table queries when enabled
138        if self.context.config.use_time_budget {
139            // Time-bounded BFS handles all query sizes with time budget protection
140            self.context.find_optimal_order_parallel()
141        } else {
142            // Legacy behavior: table-count based decision
143            if self.should_use_parallel_search() {
144                self.context.find_optimal_order_parallel()
145            } else {
146                self.context.find_optimal_order_dfs()
147            }
148        }
149    }
150
151    /// Determine whether to use parallel BFS or sequential DFS
152    pub(super) fn should_use_parallel_search(&self) -> bool {
153        // Don't parallelize if disabled
154        if !self.context.config.enabled {
155            return false;
156        }
157
158        let num_tables = self.context.all_tables.len();
159
160        // Don't parallelize small queries (< 3 tables)
161        if num_tables < 3 {
162            return false;
163        }
164
165        // Don't parallelize beyond depth limit (memory constraints)
166        if num_tables > self.context.config.max_depth {
167            return false;
168        }
169
170        // Parallel BFS beneficial for highly connected graphs
171        // Calculate edge density: edges per table
172        let edge_density = self.context.edges.len() as f64 / num_tables as f64;
173
174        // High edge density suggests complex join graph → parallel beneficial
175        // Threshold of 1.5 means we need at least 1-2 edges per table
176        edge_density >= 1.5
177    }
178}