vibesql_executor/select/join/search/
optimizer.rs

1//! Public API for join order optimization
2
3use super::config::ParallelSearchConfig;
4use super::context::JoinOrderContext;
5use super::reorder::JoinOrderAnalyzer;
6use crate::optimizer::aggregate_analysis::AggregateAnalysis;
7
8/// Performs join order optimization via exhaustive search
9pub struct JoinOrderSearch {
10    pub(super) context: JoinOrderContext,
11}
12
13impl JoinOrderSearch {
14    /// Create a new join order search from an analyzer with real table statistics
15    pub fn from_analyzer(
16        analyzer: &JoinOrderAnalyzer,
17        database: &vibesql_storage::Database,
18    ) -> Self {
19        Self::from_analyzer_with_predicates(
20            analyzer,
21            database,
22            &std::collections::HashMap::new(),
23            &std::collections::HashMap::new(),
24        )
25    }
26
27    /// Create a new join order search with WHERE clause selectivity applied
28    ///
29    /// This version accounts for table-local predicates when estimating cardinalities,
30    /// which helps choose better join orders for queries like TPC-H Q3 where filter
31    /// predicates significantly reduce table sizes before joining.
32    ///
33    /// # Parameters
34    /// - `alias_to_table`: Maps table aliases (e.g., "n1", "n2") to actual table names (e.g., "nation").
35    ///   This is critical for queries with self-joins (like TPC-H Q7 with two nation aliases)
36    ///   to correctly look up table cardinalities and statistics.
37    pub fn from_analyzer_with_predicates(
38        analyzer: &JoinOrderAnalyzer,
39        database: &vibesql_storage::Database,
40        table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
41        alias_to_table: &std::collections::HashMap<String, String>,
42    ) -> Self {
43        let edges = analyzer.edges().to_vec();
44        let edge_selectivities =
45            JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
46
47        let num_tables = analyzer.tables().len();
48
49        // Extract base cardinalities (before filters) for cascading filter tracking
50        let table_base_cardinalities =
51            JoinOrderContext::extract_base_cardinalities(analyzer, database, alias_to_table);
52
53        let context = JoinOrderContext {
54            all_tables: analyzer.tables().clone(),
55            edges,
56            table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
57                analyzer,
58                database,
59                table_local_predicates,
60                alias_to_table,
61            ),
62            table_base_cardinalities,
63            edge_selectivities,
64            config: ParallelSearchConfig::with_table_count(num_tables),
65            aggregate_analysis: None,
66        };
67
68        Self { context }
69    }
70
71    /// Create a new join order search with aggregate-aware optimization
72    ///
73    /// This version accepts aggregate analysis results and can adjust join order
74    /// decisions based on GROUP BY/HAVING clauses. When HAVING filters are selective,
75    /// the optimizer may prefer different join orders that enable early aggregation.
76    ///
77    /// # Parameters
78    /// - `alias_to_table`: Maps table aliases to actual table names for database lookups.
79    pub fn from_analyzer_with_aggregates(
80        analyzer: &JoinOrderAnalyzer,
81        database: &vibesql_storage::Database,
82        table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
83        alias_to_table: &std::collections::HashMap<String, String>,
84        aggregate_analysis: AggregateAnalysis,
85    ) -> Self {
86        let edges = analyzer.edges().to_vec();
87        let edge_selectivities =
88            JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
89
90        let num_tables = analyzer.tables().len();
91
92        // Extract base cardinalities (before filters) for cascading filter tracking
93        let table_base_cardinalities =
94            JoinOrderContext::extract_base_cardinalities(analyzer, database, alias_to_table);
95
96        let context = JoinOrderContext {
97            all_tables: analyzer.tables().clone(),
98            edges,
99            table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
100                analyzer,
101                database,
102                table_local_predicates,
103                alias_to_table,
104            ),
105            table_base_cardinalities,
106            edge_selectivities,
107            config: ParallelSearchConfig::with_table_count(num_tables),
108            aggregate_analysis: Some(aggregate_analysis),
109        };
110
111        Self { context }
112    }
113
114    /// Find optimal join order by exploring search space
115    ///
116    /// Returns list of table names in the order they should be joined.
117    ///
118    /// When time-bounded search is enabled (default), uses parallel BFS for all
119    /// multi-table queries with a configurable time budget. This allows optimization
120    /// of large queries (9+ tables) while preventing excessive search time.
121    ///
122    /// When time-bounded search is disabled, uses legacy behavior: parallel BFS for
123    /// 3-6 table queries with highly connected join graphs, DFS for others.
124    pub fn find_optimal_order(&self) -> Vec<String> {
125        if self.context.all_tables.is_empty() {
126            return Vec::new();
127        }
128
129        // Use time-bounded BFS for all multi-table queries when enabled
130        if self.context.config.use_time_budget {
131            // Time-bounded BFS handles all query sizes with time budget protection
132            self.context.find_optimal_order_parallel()
133        } else {
134            // Legacy behavior: table-count based decision
135            if self.should_use_parallel_search() {
136                self.context.find_optimal_order_parallel()
137            } else {
138                self.context.find_optimal_order_dfs()
139            }
140        }
141    }
142
143    /// Determine whether to use parallel BFS or sequential DFS
144    pub(super) fn should_use_parallel_search(&self) -> bool {
145        // Don't parallelize if disabled
146        if !self.context.config.enabled {
147            return false;
148        }
149
150        let num_tables = self.context.all_tables.len();
151
152        // Don't parallelize small queries (< 3 tables)
153        if num_tables < 3 {
154            return false;
155        }
156
157        // Don't parallelize beyond depth limit (memory constraints)
158        if num_tables > self.context.config.max_depth {
159            return false;
160        }
161
162        // Parallel BFS beneficial for highly connected graphs
163        // Calculate edge density: edges per table
164        let edge_density = self.context.edges.len() as f64 / num_tables as f64;
165
166        // High edge density suggests complex join graph → parallel beneficial
167        // Threshold of 1.5 means we need at least 1-2 edges per table
168        edge_density >= 1.5
169    }
170}