vibesql_executor/select/join/search/state.rs
1//! State types for join order search
2
3use std::collections::{BTreeMap, BTreeSet};
4
5/// Represents the cost of joining a set of tables
6#[derive(Debug, Clone, Copy, PartialEq)]
7pub struct JoinCost {
8 /// Estimated number of intermediate rows
9 pub cardinality: usize,
10 /// Estimated comparison operations
11 pub operations: u64,
12}
13
14impl JoinCost {
15 pub fn new(cardinality: usize, operations: u64) -> Self {
16 Self { cardinality, operations }
17 }
18
19 /// Estimate total cost as a comparable value
20 /// Prioritizes reducing intermediate row count (cardinality)
21 /// then comparison operations
22 pub fn total(&self) -> u64 {
23 // Weight cardinality heavily since it affects downstream joins
24 // 1 additional row impacts all future joins
25 // Use saturating arithmetic to prevent overflow with large intermediate results
26 (self.cardinality as u64).saturating_mul(1000).saturating_add(self.operations)
27 }
28}
29
30/// Tracks cascading filter information for accurate cardinality estimation
31///
32/// When a table has local predicates applied (e.g., `o_orderdate < '1995-03-15'`),
33/// this affects downstream joins. Rows that survive the filter have correlated
34/// characteristics, so joins through filtered tables should not assume independent
35/// selectivity.
36#[derive(Debug, Clone, Default)]
37pub struct CascadingFilterState {
38 /// Tables that have had local predicates applied
39 /// Maps table name -> cumulative selectivity factor applied to that table
40 pub filtered_tables: BTreeMap<String, f64>,
41
42 /// Cumulative correlation factor for the current join chain
43 /// Starts at 1.0, decreases as we join through more filtered tables
44 /// This represents how much the intermediate result is "tighter" than
45 /// independent selectivity would suggest
46 pub correlation_factor: f64,
47}
48
49impl CascadingFilterState {
50 pub fn new() -> Self {
51 Self {
52 filtered_tables: BTreeMap::new(),
53 correlation_factor: 1.0,
54 }
55 }
56
57 /// Record that a table has had filters applied with given selectivity
58 pub fn add_filtered_table(&mut self, table: String, selectivity: f64) {
59 self.filtered_tables.insert(table, selectivity);
60 }
61
62 /// Check if a table has been filtered
63 pub fn is_filtered(&self, table: &str) -> bool {
64 self.filtered_tables.contains_key(table)
65 }
66
67 /// Get the selectivity applied to a table (1.0 if not filtered)
68 pub fn get_table_selectivity(&self, table: &str) -> f64 {
69 self.filtered_tables.get(table).copied().unwrap_or(1.0)
70 }
71
72 /// Apply correlation adjustment when joining through filtered tables
73 ///
74 /// When joining table B to a set containing filtered table A, the output
75 /// cardinality is reduced because:
76 /// 1. The rows in A that survived the filter are not randomly distributed
77 /// 2. The join key values in the filtered A may correlate with the filter predicate
78 ///
79 /// We use a conservative factor: if a table was filtered, joins to it
80 /// produce fewer rows than independent selectivity would suggest.
81 pub fn apply_correlation_for_join(&mut self, joined_tables: &BTreeSet<String>) {
82 // Count how many filtered tables we're joining through
83 let filtered_count = joined_tables
84 .iter()
85 .filter(|t| self.is_filtered(t))
86 .count();
87
88 if filtered_count > 0 {
89 // Each filtered table in the join chain reduces the correlation factor
90 // Use a conservative reduction: 0.85 per filtered table
91 // This means joins through filtered tables produce ~15% fewer rows
92 // than independent selectivity would predict
93 self.correlation_factor *= 0.85_f64.powi(filtered_count as i32);
94 }
95 }
96}
97
98/// State during join order search
99#[derive(Debug, Clone)]
100pub(super) struct SearchState {
101 /// Tables already joined
102 pub joined_tables: BTreeSet<String>,
103 /// Cumulative cost so far
104 pub cost_so_far: JoinCost,
105 /// Ordering of tables
106 pub order: Vec<String>,
107 /// Current intermediate result size (rows after all joins so far)
108 pub current_cardinality: usize,
109 /// Cascading filter state for accurate cardinality estimation
110 pub filter_state: CascadingFilterState,
111}