vibesql_executor/select/join/search/optimizer.rs
1//! Public API for join order optimization
2
3use super::{config::ParallelSearchConfig, context::JoinOrderContext, reorder::JoinOrderAnalyzer};
4use crate::optimizer::aggregate_analysis::AggregateAnalysis;
5
6/// Performs join order optimization via exhaustive search
7pub struct JoinOrderSearch {
8 pub(super) context: JoinOrderContext,
9}
10
11impl JoinOrderSearch {
12 /// Create a new join order search from an analyzer with real table statistics
13 pub fn from_analyzer(
14 analyzer: &JoinOrderAnalyzer,
15 database: &vibesql_storage::Database,
16 ) -> Self {
17 Self::from_analyzer_with_predicates(
18 analyzer,
19 database,
20 &std::collections::HashMap::new(),
21 &std::collections::HashMap::new(),
22 )
23 }
24
25 /// Create a new join order search with WHERE clause selectivity applied
26 ///
27 /// This version accounts for table-local predicates when estimating cardinalities,
28 /// which helps choose better join orders for queries like TPC-H Q3 where filter
29 /// predicates significantly reduce table sizes before joining.
30 ///
31 /// # Parameters
32 /// - `alias_to_table`: Maps table aliases (e.g., "n1", "n2") to actual table names (e.g.,
33 /// "nation"). This is critical for queries with self-joins (like TPC-H Q7 with two nation
34 /// aliases) to correctly look up table cardinalities and statistics.
35 pub fn from_analyzer_with_predicates(
36 analyzer: &JoinOrderAnalyzer,
37 database: &vibesql_storage::Database,
38 table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
39 alias_to_table: &std::collections::HashMap<String, String>,
40 ) -> Self {
41 let edges = analyzer.edges().to_vec();
42 let edge_selectivities =
43 JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
44
45 let num_tables = analyzer.tables().len();
46
47 // Extract base cardinalities (before filters) for cascading filter tracking
48 let table_base_cardinalities =
49 JoinOrderContext::extract_base_cardinalities(analyzer, database, alias_to_table);
50
51 let context = JoinOrderContext {
52 all_tables: analyzer.tables().clone(),
53 edges,
54 table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
55 analyzer,
56 database,
57 table_local_predicates,
58 alias_to_table,
59 ),
60 table_base_cardinalities,
61 edge_selectivities,
62 config: ParallelSearchConfig::with_table_count(num_tables),
63 aggregate_analysis: None,
64 };
65
66 Self { context }
67 }
68
69 /// Create a new join order search with aggregate-aware optimization
70 ///
71 /// This version accepts aggregate analysis results and can adjust join order
72 /// decisions based on GROUP BY/HAVING clauses. When HAVING filters are selective,
73 /// the optimizer may prefer different join orders that enable early aggregation.
74 ///
75 /// # Parameters
76 /// - `alias_to_table`: Maps table aliases to actual table names for database lookups.
77 pub fn from_analyzer_with_aggregates(
78 analyzer: &JoinOrderAnalyzer,
79 database: &vibesql_storage::Database,
80 table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
81 alias_to_table: &std::collections::HashMap<String, String>,
82 aggregate_analysis: AggregateAnalysis,
83 ) -> Self {
84 let edges = analyzer.edges().to_vec();
85 let edge_selectivities =
86 JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
87
88 let num_tables = analyzer.tables().len();
89
90 // Extract base cardinalities (before filters) for cascading filter tracking
91 let table_base_cardinalities =
92 JoinOrderContext::extract_base_cardinalities(analyzer, database, alias_to_table);
93
94 let context = JoinOrderContext {
95 all_tables: analyzer.tables().clone(),
96 edges,
97 table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
98 analyzer,
99 database,
100 table_local_predicates,
101 alias_to_table,
102 ),
103 table_base_cardinalities,
104 edge_selectivities,
105 config: ParallelSearchConfig::with_table_count(num_tables),
106 aggregate_analysis: Some(aggregate_analysis),
107 };
108
109 Self { context }
110 }
111
112 /// Find optimal join order by exploring search space
113 ///
114 /// Returns list of table names in the order they should be joined.
115 ///
116 /// For small queries (≤3 tables), uses sequential DFS since the search space
117 /// is trivial (at most 6 orderings) and parallel overhead isn't worth it.
118 ///
119 /// When time-bounded search is enabled (default), uses parallel BFS for
120 /// multi-table queries (4+) with a configurable time budget. This allows
121 /// optimization of large queries (9+ tables) while preventing excessive search time.
122 ///
123 /// When time-bounded search is disabled, uses legacy behavior: parallel BFS for
124 /// 3-6 table queries with highly connected join graphs, DFS for others.
125 pub fn find_optimal_order(&self) -> Vec<String> {
126 if self.context.all_tables.is_empty() {
127 return Vec::new();
128 }
129
130 // For small queries (≤3 tables), use sequential DFS
131 // The search space is trivial (at most 3! = 6 orderings) and
132 // rayon thread pool overhead can cause delays under contention
133 if self.context.all_tables.len() <= 3 {
134 return self.context.find_optimal_order_dfs();
135 }
136
137 // Use time-bounded BFS for larger multi-table queries when enabled
138 if self.context.config.use_time_budget {
139 // Time-bounded BFS handles all query sizes with time budget protection
140 self.context.find_optimal_order_parallel()
141 } else {
142 // Legacy behavior: table-count based decision
143 if self.should_use_parallel_search() {
144 self.context.find_optimal_order_parallel()
145 } else {
146 self.context.find_optimal_order_dfs()
147 }
148 }
149 }
150
151 /// Determine whether to use parallel BFS or sequential DFS
152 pub(super) fn should_use_parallel_search(&self) -> bool {
153 // Don't parallelize if disabled
154 if !self.context.config.enabled {
155 return false;
156 }
157
158 let num_tables = self.context.all_tables.len();
159
160 // Don't parallelize small queries (< 3 tables)
161 if num_tables < 3 {
162 return false;
163 }
164
165 // Don't parallelize beyond depth limit (memory constraints)
166 if num_tables > self.context.config.max_depth {
167 return false;
168 }
169
170 // Parallel BFS beneficial for highly connected graphs
171 // Calculate edge density: edges per table
172 let edge_density = self.context.edges.len() as f64 / num_tables as f64;
173
174 // High edge density suggests complex join graph → parallel beneficial
175 // Threshold of 1.5 means we need at least 1-2 edges per table
176 edge_density >= 1.5
177 }
178}