vibesql_executor/select/join/search/optimizer.rs
1//! Public API for join order optimization
2
3use super::config::ParallelSearchConfig;
4use super::context::JoinOrderContext;
5use super::reorder::JoinOrderAnalyzer;
6use crate::optimizer::aggregate_analysis::AggregateAnalysis;
7
8/// Performs join order optimization via exhaustive search
9pub struct JoinOrderSearch {
10 pub(super) context: JoinOrderContext,
11}
12
13impl JoinOrderSearch {
14 /// Create a new join order search from an analyzer with real table statistics
15 pub fn from_analyzer(
16 analyzer: &JoinOrderAnalyzer,
17 database: &vibesql_storage::Database,
18 ) -> Self {
19 Self::from_analyzer_with_predicates(
20 analyzer,
21 database,
22 &std::collections::HashMap::new(),
23 &std::collections::HashMap::new(),
24 )
25 }
26
27 /// Create a new join order search with WHERE clause selectivity applied
28 ///
29 /// This version accounts for table-local predicates when estimating cardinalities,
30 /// which helps choose better join orders for queries like TPC-H Q3 where filter
31 /// predicates significantly reduce table sizes before joining.
32 ///
33 /// # Parameters
34 /// - `alias_to_table`: Maps table aliases (e.g., "n1", "n2") to actual table names (e.g., "nation").
35 /// This is critical for queries with self-joins (like TPC-H Q7 with two nation aliases)
36 /// to correctly look up table cardinalities and statistics.
37 pub fn from_analyzer_with_predicates(
38 analyzer: &JoinOrderAnalyzer,
39 database: &vibesql_storage::Database,
40 table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
41 alias_to_table: &std::collections::HashMap<String, String>,
42 ) -> Self {
43 let edges = analyzer.edges().to_vec();
44 let edge_selectivities = JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
45
46 let num_tables = analyzer.tables().len();
47
48 // Extract base cardinalities (before filters) for cascading filter tracking
49 let table_base_cardinalities = JoinOrderContext::extract_base_cardinalities(
50 analyzer,
51 database,
52 alias_to_table,
53 );
54
55 let context = JoinOrderContext {
56 all_tables: analyzer.tables().clone(),
57 edges,
58 table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
59 analyzer,
60 database,
61 table_local_predicates,
62 alias_to_table,
63 ),
64 table_base_cardinalities,
65 edge_selectivities,
66 config: ParallelSearchConfig::with_table_count(num_tables),
67 aggregate_analysis: None,
68 };
69
70 Self { context }
71 }
72
73 /// Create a new join order search with aggregate-aware optimization
74 ///
75 /// This version accepts aggregate analysis results and can adjust join order
76 /// decisions based on GROUP BY/HAVING clauses. When HAVING filters are selective,
77 /// the optimizer may prefer different join orders that enable early aggregation.
78 ///
79 /// # Parameters
80 /// - `alias_to_table`: Maps table aliases to actual table names for database lookups.
81 pub fn from_analyzer_with_aggregates(
82 analyzer: &JoinOrderAnalyzer,
83 database: &vibesql_storage::Database,
84 table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
85 alias_to_table: &std::collections::HashMap<String, String>,
86 aggregate_analysis: AggregateAnalysis,
87 ) -> Self {
88 let edges = analyzer.edges().to_vec();
89 let edge_selectivities = JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
90
91 let num_tables = analyzer.tables().len();
92
93 // Extract base cardinalities (before filters) for cascading filter tracking
94 let table_base_cardinalities = JoinOrderContext::extract_base_cardinalities(
95 analyzer,
96 database,
97 alias_to_table,
98 );
99
100 let context = JoinOrderContext {
101 all_tables: analyzer.tables().clone(),
102 edges,
103 table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
104 analyzer,
105 database,
106 table_local_predicates,
107 alias_to_table,
108 ),
109 table_base_cardinalities,
110 edge_selectivities,
111 config: ParallelSearchConfig::with_table_count(num_tables),
112 aggregate_analysis: Some(aggregate_analysis),
113 };
114
115 Self { context }
116 }
117
118 /// Find optimal join order by exploring search space
119 ///
120 /// Returns list of table names in the order they should be joined.
121 ///
122 /// When time-bounded search is enabled (default), uses parallel BFS for all
123 /// multi-table queries with a configurable time budget. This allows optimization
124 /// of large queries (9+ tables) while preventing excessive search time.
125 ///
126 /// When time-bounded search is disabled, uses legacy behavior: parallel BFS for
127 /// 3-6 table queries with highly connected join graphs, DFS for others.
128 pub fn find_optimal_order(&self) -> Vec<String> {
129 if self.context.all_tables.is_empty() {
130 return Vec::new();
131 }
132
133 // Use time-bounded BFS for all multi-table queries when enabled
134 if self.context.config.use_time_budget {
135 // Time-bounded BFS handles all query sizes with time budget protection
136 self.context.find_optimal_order_parallel()
137 } else {
138 // Legacy behavior: table-count based decision
139 if self.should_use_parallel_search() {
140 self.context.find_optimal_order_parallel()
141 } else {
142 self.context.find_optimal_order_dfs()
143 }
144 }
145 }
146
147 /// Determine whether to use parallel BFS or sequential DFS
148 pub(super) fn should_use_parallel_search(&self) -> bool {
149 // Don't parallelize if disabled
150 if !self.context.config.enabled {
151 return false;
152 }
153
154 let num_tables = self.context.all_tables.len();
155
156 // Don't parallelize small queries (< 3 tables)
157 if num_tables < 3 {
158 return false;
159 }
160
161 // Don't parallelize beyond depth limit (memory constraints)
162 if num_tables > self.context.config.max_depth {
163 return false;
164 }
165
166 // Parallel BFS beneficial for highly connected graphs
167 // Calculate edge density: edges per table
168 let edge_density = self.context.edges.len() as f64 / num_tables as f64;
169
170 // High edge density suggests complex join graph → parallel beneficial
171 // Threshold of 1.5 means we need at least 1-2 edges per table
172 edge_density >= 1.5
173 }
174}