vibesql_executor/select/join/search/state.rs
1//! State types for join order search
2
3use std::collections::{BTreeMap, BTreeSet};
4
5/// Represents the cost of joining a set of tables
6#[derive(Debug, Clone, Copy, PartialEq)]
7pub struct JoinCost {
8 /// Estimated number of intermediate rows
9 pub cardinality: usize,
10 /// Estimated comparison operations
11 pub operations: u64,
12}
13
14impl JoinCost {
15 pub fn new(cardinality: usize, operations: u64) -> Self {
16 Self { cardinality, operations }
17 }
18
19 /// Estimate total cost as a comparable value
20 /// Prioritizes reducing intermediate row count (cardinality)
21 /// then comparison operations
22 pub fn total(&self) -> u64 {
23 // Weight cardinality heavily since it affects downstream joins
24 // 1 additional row impacts all future joins
25 // Use saturating arithmetic to prevent overflow with large intermediate results
26 (self.cardinality as u64).saturating_mul(1000).saturating_add(self.operations)
27 }
28}
29
30/// Tracks cascading filter information for accurate cardinality estimation
31///
32/// When a table has local predicates applied (e.g., `o_orderdate < '1995-03-15'`),
33/// this affects downstream joins. Rows that survive the filter have correlated
34/// characteristics, so joins through filtered tables should not assume independent
35/// selectivity.
36#[derive(Debug, Clone, Default)]
37pub struct CascadingFilterState {
38 /// Tables that have had local predicates applied
39 /// Maps table name -> cumulative selectivity factor applied to that table
40 pub filtered_tables: BTreeMap<String, f64>,
41
42 /// Cumulative correlation factor for the current join chain
43 /// Starts at 1.0, decreases as we join through more filtered tables
44 /// This represents how much the intermediate result is "tighter" than
45 /// independent selectivity would suggest
46 pub correlation_factor: f64,
47}
48
49impl CascadingFilterState {
50 pub fn new() -> Self {
51 Self { filtered_tables: BTreeMap::new(), correlation_factor: 1.0 }
52 }
53
54 /// Record that a table has had filters applied with given selectivity
55 pub fn add_filtered_table(&mut self, table: String, selectivity: f64) {
56 self.filtered_tables.insert(table, selectivity);
57 }
58
59 /// Check if a table has been filtered
60 pub fn is_filtered(&self, table: &str) -> bool {
61 self.filtered_tables.contains_key(table)
62 }
63
64 /// Get the selectivity applied to a table (1.0 if not filtered)
65 pub fn get_table_selectivity(&self, table: &str) -> f64 {
66 self.filtered_tables.get(table).copied().unwrap_or(1.0)
67 }
68
69 /// Apply correlation adjustment when joining through filtered tables
70 ///
71 /// When joining table B to a set containing filtered table A, the output
72 /// cardinality is reduced because:
73 /// 1. The rows in A that survived the filter are not randomly distributed
74 /// 2. The join key values in the filtered A may correlate with the filter predicate
75 ///
76 /// We use a conservative factor: if a table was filtered, joins to it
77 /// produce fewer rows than independent selectivity would suggest.
78 pub fn apply_correlation_for_join(&mut self, joined_tables: &BTreeSet<String>) {
79 // Count how many filtered tables we're joining through
80 let filtered_count = joined_tables.iter().filter(|t| self.is_filtered(t)).count();
81
82 if filtered_count > 0 {
83 // Each filtered table in the join chain reduces the correlation factor
84 // Use a conservative reduction: 0.85 per filtered table
85 // This means joins through filtered tables produce ~15% fewer rows
86 // than independent selectivity would predict
87 self.correlation_factor *= 0.85_f64.powi(filtered_count as i32);
88 }
89 }
90}
91
92/// State during join order search
93#[derive(Debug, Clone)]
94pub(super) struct SearchState {
95 /// Tables already joined
96 pub joined_tables: BTreeSet<String>,
97 /// Cumulative cost so far
98 pub cost_so_far: JoinCost,
99 /// Ordering of tables
100 pub order: Vec<String>,
101 /// Current intermediate result size (rows after all joins so far)
102 pub current_cardinality: usize,
103 /// Cascading filter state for accurate cardinality estimation
104 pub filter_state: CascadingFilterState,
105}