vibesql_executor/select/join/search/
state.rs

1//! State types for join order search
2
3use std::collections::{BTreeMap, BTreeSet};
4
5/// Represents the cost of joining a set of tables
6#[derive(Debug, Clone, Copy, PartialEq)]
7pub struct JoinCost {
8    /// Estimated number of intermediate rows
9    pub cardinality: usize,
10    /// Estimated comparison operations
11    pub operations: u64,
12}
13
14impl JoinCost {
15    pub fn new(cardinality: usize, operations: u64) -> Self {
16        Self { cardinality, operations }
17    }
18
19    /// Estimate total cost as a comparable value
20    /// Prioritizes reducing intermediate row count (cardinality)
21    /// then comparison operations
22    pub fn total(&self) -> u64 {
23        // Weight cardinality heavily since it affects downstream joins
24        // 1 additional row impacts all future joins
25        // Use saturating arithmetic to prevent overflow with large intermediate results
26        (self.cardinality as u64).saturating_mul(1000).saturating_add(self.operations)
27    }
28}
29
30/// Tracks cascading filter information for accurate cardinality estimation
31///
32/// When a table has local predicates applied (e.g., `o_orderdate < '1995-03-15'`),
33/// this affects downstream joins. Rows that survive the filter have correlated
34/// characteristics, so joins through filtered tables should not assume independent
35/// selectivity.
36#[derive(Debug, Clone, Default)]
37pub struct CascadingFilterState {
38    /// Tables that have had local predicates applied
39    /// Maps table name -> cumulative selectivity factor applied to that table
40    pub filtered_tables: BTreeMap<String, f64>,
41
42    /// Cumulative correlation factor for the current join chain
43    /// Starts at 1.0, decreases as we join through more filtered tables
44    /// This represents how much the intermediate result is "tighter" than
45    /// independent selectivity would suggest
46    pub correlation_factor: f64,
47}
48
49impl CascadingFilterState {
50    pub fn new() -> Self {
51        Self {
52            filtered_tables: BTreeMap::new(),
53            correlation_factor: 1.0,
54        }
55    }
56
57    /// Record that a table has had filters applied with given selectivity
58    pub fn add_filtered_table(&mut self, table: String, selectivity: f64) {
59        self.filtered_tables.insert(table, selectivity);
60    }
61
62    /// Check if a table has been filtered
63    pub fn is_filtered(&self, table: &str) -> bool {
64        self.filtered_tables.contains_key(table)
65    }
66
67    /// Get the selectivity applied to a table (1.0 if not filtered)
68    pub fn get_table_selectivity(&self, table: &str) -> f64 {
69        self.filtered_tables.get(table).copied().unwrap_or(1.0)
70    }
71
72    /// Apply correlation adjustment when joining through filtered tables
73    ///
74    /// When joining table B to a set containing filtered table A, the output
75    /// cardinality is reduced because:
76    /// 1. The rows in A that survived the filter are not randomly distributed
77    /// 2. The join key values in the filtered A may correlate with the filter predicate
78    ///
79    /// We use a conservative factor: if a table was filtered, joins to it
80    /// produce fewer rows than independent selectivity would suggest.
81    pub fn apply_correlation_for_join(&mut self, joined_tables: &BTreeSet<String>) {
82        // Count how many filtered tables we're joining through
83        let filtered_count = joined_tables
84            .iter()
85            .filter(|t| self.is_filtered(t))
86            .count();
87
88        if filtered_count > 0 {
89            // Each filtered table in the join chain reduces the correlation factor
90            // Use a conservative reduction: 0.85 per filtered table
91            // This means joins through filtered tables produce ~15% fewer rows
92            // than independent selectivity would predict
93            self.correlation_factor *= 0.85_f64.powi(filtered_count as i32);
94        }
95    }
96}
97
98/// State during join order search
99#[derive(Debug, Clone)]
100pub(super) struct SearchState {
101    /// Tables already joined
102    pub joined_tables: BTreeSet<String>,
103    /// Cumulative cost so far
104    pub cost_so_far: JoinCost,
105    /// Ordering of tables
106    pub order: Vec<String>,
107    /// Current intermediate result size (rows after all joins so far)
108    pub current_cardinality: usize,
109    /// Cascading filter state for accurate cardinality estimation
110    pub filter_state: CascadingFilterState,
111}