vibesql_executor/select/join/search/
optimizer.rs

1//! Public API for join order optimization
2
3use super::config::ParallelSearchConfig;
4use super::context::JoinOrderContext;
5use super::reorder::JoinOrderAnalyzer;
6use crate::optimizer::aggregate_analysis::AggregateAnalysis;
7
8/// Performs join order optimization via exhaustive search
9pub struct JoinOrderSearch {
10    pub(super) context: JoinOrderContext,
11}
12
13impl JoinOrderSearch {
14    /// Create a new join order search from an analyzer with real table statistics
15    pub fn from_analyzer(
16        analyzer: &JoinOrderAnalyzer,
17        database: &vibesql_storage::Database,
18    ) -> Self {
19        Self::from_analyzer_with_predicates(
20            analyzer,
21            database,
22            &std::collections::HashMap::new(),
23            &std::collections::HashMap::new(),
24        )
25    }
26
27    /// Create a new join order search with WHERE clause selectivity applied
28    ///
29    /// This version accounts for table-local predicates when estimating cardinalities,
30    /// which helps choose better join orders for queries like TPC-H Q3 where filter
31    /// predicates significantly reduce table sizes before joining.
32    ///
33    /// # Parameters
34    /// - `alias_to_table`: Maps table aliases (e.g., "n1", "n2") to actual table names (e.g., "nation").
35    ///   This is critical for queries with self-joins (like TPC-H Q7 with two nation aliases)
36    ///   to correctly look up table cardinalities and statistics.
37    pub fn from_analyzer_with_predicates(
38        analyzer: &JoinOrderAnalyzer,
39        database: &vibesql_storage::Database,
40        table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
41        alias_to_table: &std::collections::HashMap<String, String>,
42    ) -> Self {
43        let edges = analyzer.edges().to_vec();
44        let edge_selectivities = JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
45
46        let num_tables = analyzer.tables().len();
47
48        // Extract base cardinalities (before filters) for cascading filter tracking
49        let table_base_cardinalities = JoinOrderContext::extract_base_cardinalities(
50            analyzer,
51            database,
52            alias_to_table,
53        );
54
55        let context = JoinOrderContext {
56            all_tables: analyzer.tables().clone(),
57            edges,
58            table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
59                analyzer,
60                database,
61                table_local_predicates,
62                alias_to_table,
63            ),
64            table_base_cardinalities,
65            edge_selectivities,
66            config: ParallelSearchConfig::with_table_count(num_tables),
67            aggregate_analysis: None,
68        };
69
70        Self { context }
71    }
72
73    /// Create a new join order search with aggregate-aware optimization
74    ///
75    /// This version accepts aggregate analysis results and can adjust join order
76    /// decisions based on GROUP BY/HAVING clauses. When HAVING filters are selective,
77    /// the optimizer may prefer different join orders that enable early aggregation.
78    ///
79    /// # Parameters
80    /// - `alias_to_table`: Maps table aliases to actual table names for database lookups.
81    pub fn from_analyzer_with_aggregates(
82        analyzer: &JoinOrderAnalyzer,
83        database: &vibesql_storage::Database,
84        table_local_predicates: &std::collections::HashMap<String, Vec<vibesql_ast::Expression>>,
85        alias_to_table: &std::collections::HashMap<String, String>,
86        aggregate_analysis: AggregateAnalysis,
87    ) -> Self {
88        let edges = analyzer.edges().to_vec();
89        let edge_selectivities = JoinOrderContext::compute_edge_selectivities(&edges, database, alias_to_table);
90
91        let num_tables = analyzer.tables().len();
92
93        // Extract base cardinalities (before filters) for cascading filter tracking
94        let table_base_cardinalities = JoinOrderContext::extract_base_cardinalities(
95            analyzer,
96            database,
97            alias_to_table,
98        );
99
100        let context = JoinOrderContext {
101            all_tables: analyzer.tables().clone(),
102            edges,
103            table_cardinalities: JoinOrderContext::extract_cardinalities_with_selectivity(
104                analyzer,
105                database,
106                table_local_predicates,
107                alias_to_table,
108            ),
109            table_base_cardinalities,
110            edge_selectivities,
111            config: ParallelSearchConfig::with_table_count(num_tables),
112            aggregate_analysis: Some(aggregate_analysis),
113        };
114
115        Self { context }
116    }
117
118    /// Find optimal join order by exploring search space
119    ///
120    /// Returns list of table names in the order they should be joined.
121    ///
122    /// When time-bounded search is enabled (default), uses parallel BFS for all
123    /// multi-table queries with a configurable time budget. This allows optimization
124    /// of large queries (9+ tables) while preventing excessive search time.
125    ///
126    /// When time-bounded search is disabled, uses legacy behavior: parallel BFS for
127    /// 3-6 table queries with highly connected join graphs, DFS for others.
128    pub fn find_optimal_order(&self) -> Vec<String> {
129        if self.context.all_tables.is_empty() {
130            return Vec::new();
131        }
132
133        // Use time-bounded BFS for all multi-table queries when enabled
134        if self.context.config.use_time_budget {
135            // Time-bounded BFS handles all query sizes with time budget protection
136            self.context.find_optimal_order_parallel()
137        } else {
138            // Legacy behavior: table-count based decision
139            if self.should_use_parallel_search() {
140                self.context.find_optimal_order_parallel()
141            } else {
142                self.context.find_optimal_order_dfs()
143            }
144        }
145    }
146
147    /// Determine whether to use parallel BFS or sequential DFS
148    pub(super) fn should_use_parallel_search(&self) -> bool {
149        // Don't parallelize if disabled
150        if !self.context.config.enabled {
151            return false;
152        }
153
154        let num_tables = self.context.all_tables.len();
155
156        // Don't parallelize small queries (< 3 tables)
157        if num_tables < 3 {
158            return false;
159        }
160
161        // Don't parallelize beyond depth limit (memory constraints)
162        if num_tables > self.context.config.max_depth {
163            return false;
164        }
165
166        // Parallel BFS beneficial for highly connected graphs
167        // Calculate edge density: edges per table
168        let edge_density = self.context.edges.len() as f64 / num_tables as f64;
169
170        // High edge density suggests complex join graph → parallel beneficial
171        // Threshold of 1.5 means we need at least 1-2 edges per table
172        edge_density >= 1.5
173    }
174}