vibesql_executor/select/join/search/
state.rs

1//! State types for join order search
2
3use std::collections::{BTreeMap, BTreeSet};
4
5/// Represents the cost of joining a set of tables
6#[derive(Debug, Clone, Copy, PartialEq)]
7pub struct JoinCost {
8    /// Estimated number of intermediate rows
9    pub cardinality: usize,
10    /// Estimated comparison operations
11    pub operations: u64,
12}
13
14impl JoinCost {
15    pub fn new(cardinality: usize, operations: u64) -> Self {
16        Self { cardinality, operations }
17    }
18
19    /// Estimate total cost as a comparable value
20    /// Prioritizes reducing intermediate row count (cardinality)
21    /// then comparison operations
22    pub fn total(&self) -> u64 {
23        // Weight cardinality heavily since it affects downstream joins
24        // 1 additional row impacts all future joins
25        // Use saturating arithmetic to prevent overflow with large intermediate results
26        (self.cardinality as u64).saturating_mul(1000).saturating_add(self.operations)
27    }
28}
29
30/// Tracks cascading filter information for accurate cardinality estimation
31///
32/// When a table has local predicates applied (e.g., `o_orderdate < '1995-03-15'`),
33/// this affects downstream joins. Rows that survive the filter have correlated
34/// characteristics, so joins through filtered tables should not assume independent
35/// selectivity.
36#[derive(Debug, Clone, Default)]
37pub struct CascadingFilterState {
38    /// Tables that have had local predicates applied
39    /// Maps table name -> cumulative selectivity factor applied to that table
40    pub filtered_tables: BTreeMap<String, f64>,
41
42    /// Cumulative correlation factor for the current join chain
43    /// Starts at 1.0, decreases as we join through more filtered tables
44    /// This represents how much the intermediate result is "tighter" than
45    /// independent selectivity would suggest
46    pub correlation_factor: f64,
47}
48
49impl CascadingFilterState {
50    pub fn new() -> Self {
51        Self { filtered_tables: BTreeMap::new(), correlation_factor: 1.0 }
52    }
53
54    /// Record that a table has had filters applied with given selectivity
55    pub fn add_filtered_table(&mut self, table: String, selectivity: f64) {
56        self.filtered_tables.insert(table, selectivity);
57    }
58
59    /// Check if a table has been filtered
60    pub fn is_filtered(&self, table: &str) -> bool {
61        self.filtered_tables.contains_key(table)
62    }
63
64    /// Get the selectivity applied to a table (1.0 if not filtered)
65    pub fn get_table_selectivity(&self, table: &str) -> f64 {
66        self.filtered_tables.get(table).copied().unwrap_or(1.0)
67    }
68
69    /// Apply correlation adjustment when joining through filtered tables
70    ///
71    /// When joining table B to a set containing filtered table A, the output
72    /// cardinality is reduced because:
73    /// 1. The rows in A that survived the filter are not randomly distributed
74    /// 2. The join key values in the filtered A may correlate with the filter predicate
75    ///
76    /// We use a conservative factor: if a table was filtered, joins to it
77    /// produce fewer rows than independent selectivity would suggest.
78    pub fn apply_correlation_for_join(&mut self, joined_tables: &BTreeSet<String>) {
79        // Count how many filtered tables we're joining through
80        let filtered_count = joined_tables.iter().filter(|t| self.is_filtered(t)).count();
81
82        if filtered_count > 0 {
83            // Each filtered table in the join chain reduces the correlation factor
84            // Use a conservative reduction: 0.85 per filtered table
85            // This means joins through filtered tables produce ~15% fewer rows
86            // than independent selectivity would predict
87            self.correlation_factor *= 0.85_f64.powi(filtered_count as i32);
88        }
89    }
90}
91
92/// State during join order search
93#[derive(Debug, Clone)]
94pub(super) struct SearchState {
95    /// Tables already joined
96    pub joined_tables: BTreeSet<String>,
97    /// Cumulative cost so far
98    pub cost_so_far: JoinCost,
99    /// Ordering of tables
100    pub order: Vec<String>,
101    /// Current intermediate result size (rows after all joins so far)
102    pub current_cardinality: usize,
103    /// Cascading filter state for accurate cardinality estimation
104    pub filter_state: CascadingFilterState,
105}