vibesql_executor/select/join/search/optimizer.rs
1//! Public API for join order optimization
2
3use super::config::ParallelSearchConfig;
4use super::context::JoinOrderContext;
5use super::reorder::JoinOrderAnalyzer;
6use crate::optimizer::aggregate_analysis::AggregateAnalysis;
7
8/// Performs join order optimization via exhaustive search
9pub struct JoinOrderSearch {
10 pub(super) context: JoinOrderContext,
11}
12
13impl JoinOrderSearch {
14 /// Create a new join order search from an analyzer with real table statistics
15 pub fn from_analyzer(
16 analyzer: &JoinOrderAnalyzer,
17 database: &vibesql_storage::Database,
18 ) -> Self {
19 Self::from_analyzer_with_predicates(
20 analyzer,
21 database,
22 &std::collections::HashMap::new(),
23 &std::collections::HashMap::new(),
24 )
25 }
26
27 /// Create a new join order search with WHERE clause selectivity applied
28 ///
29 /// This version accounts for table-local predicates when estimating cardinalities,
30 /// which helps choose better join orders for queries like TPC-H Q3 where filter
31 /// predicates significantly reduce table sizes before joining.
32 ///
33 /// # Parameters
34 /// - `alias_to_table`: Maps table aliases (e.g., "n1", "n2") to actual table names (e.g., "nation").
35 /// This is critical for queries with self-joins (like TPC-H Q7 with two nation aliases)
36 /// to correctly look up table cardinalities and statistics.
37 pub fn from_analyzer_with_predicates(
38 analyzer: &JoinOrderAnalyzer,
39 database: &vibesql_storage::Database,
40 table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
41 alias_to_table: &std::collections::HashMap<String, String>,
42 ) -> Self {
43 let edges = analyzer.edges().to_vec();
44 let edge_selectivities =
45 JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
46
47 let num_tables = analyzer.tables().len();
48
49 // Extract base cardinalities (before filters) for cascading filter tracking
50 let table_base_cardinalities =
51 JoinOrderContext::extract_base_cardinalities(analyzer, database, alias_to_table);
52
53 let context = JoinOrderContext {
54 all_tables: analyzer.tables().clone(),
55 edges,
56 table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
57 analyzer,
58 database,
59 table_local_predicates,
60 alias_to_table,
61 ),
62 table_base_cardinalities,
63 edge_selectivities,
64 config: ParallelSearchConfig::with_table_count(num_tables),
65 aggregate_analysis: None,
66 };
67
68 Self { context }
69 }
70
71 /// Create a new join order search with aggregate-aware optimization
72 ///
73 /// This version accepts aggregate analysis results and can adjust join order
74 /// decisions based on GROUP BY/HAVING clauses. When HAVING filters are selective,
75 /// the optimizer may prefer different join orders that enable early aggregation.
76 ///
77 /// # Parameters
78 /// - `alias_to_table`: Maps table aliases to actual table names for database lookups.
79 pub fn from_analyzer_with_aggregates(
80 analyzer: &JoinOrderAnalyzer,
81 database: &vibesql_storage::Database,
82 table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
83 alias_to_table: &std::collections::HashMap<String, String>,
84 aggregate_analysis: AggregateAnalysis,
85 ) -> Self {
86 let edges = analyzer.edges().to_vec();
87 let edge_selectivities =
88 JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
89
90 let num_tables = analyzer.tables().len();
91
92 // Extract base cardinalities (before filters) for cascading filter tracking
93 let table_base_cardinalities =
94 JoinOrderContext::extract_base_cardinalities(analyzer, database, alias_to_table);
95
96 let context = JoinOrderContext {
97 all_tables: analyzer.tables().clone(),
98 edges,
99 table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
100 analyzer,
101 database,
102 table_local_predicates,
103 alias_to_table,
104 ),
105 table_base_cardinalities,
106 edge_selectivities,
107 config: ParallelSearchConfig::with_table_count(num_tables),
108 aggregate_analysis: Some(aggregate_analysis),
109 };
110
111 Self { context }
112 }
113
114 /// Find optimal join order by exploring search space
115 ///
116 /// Returns list of table names in the order they should be joined.
117 ///
118 /// When time-bounded search is enabled (default), uses parallel BFS for all
119 /// multi-table queries with a configurable time budget. This allows optimization
120 /// of large queries (9+ tables) while preventing excessive search time.
121 ///
122 /// When time-bounded search is disabled, uses legacy behavior: parallel BFS for
123 /// 3-6 table queries with highly connected join graphs, DFS for others.
124 pub fn find_optimal_order(&self) -> Vec<String> {
125 if self.context.all_tables.is_empty() {
126 return Vec::new();
127 }
128
129 // Use time-bounded BFS for all multi-table queries when enabled
130 if self.context.config.use_time_budget {
131 // Time-bounded BFS handles all query sizes with time budget protection
132 self.context.find_optimal_order_parallel()
133 } else {
134 // Legacy behavior: table-count based decision
135 if self.should_use_parallel_search() {
136 self.context.find_optimal_order_parallel()
137 } else {
138 self.context.find_optimal_order_dfs()
139 }
140 }
141 }
142
143 /// Determine whether to use parallel BFS or sequential DFS
144 pub(super) fn should_use_parallel_search(&self) -> bool {
145 // Don't parallelize if disabled
146 if !self.context.config.enabled {
147 return false;
148 }
149
150 let num_tables = self.context.all_tables.len();
151
152 // Don't parallelize small queries (< 3 tables)
153 if num_tables < 3 {
154 return false;
155 }
156
157 // Don't parallelize beyond depth limit (memory constraints)
158 if num_tables > self.context.config.max_depth {
159 return false;
160 }
161
162 // Parallel BFS beneficial for highly connected graphs
163 // Calculate edge density: edges per table
164 let edge_density = self.context.edges.len() as f64 / num_tables as f64;
165
166 // High edge density suggests complex join graph → parallel beneficial
167 // Threshold of 1.5 means we need at least 1-2 edges per table
168 edge_density >= 1.5
169 }
170}