use xlog_core::{RelId, ScalarType};
#[derive(Debug, Clone)]
pub struct RelationStats {
pub rel_id: RelId,
pub cardinality: u64,
pub byte_size: u64,
pub column_stats: Vec<ColumnStats>,
pub heat: f32,
pub last_access: u64,
pub has_index: bool,
}
impl RelationStats {
pub fn new(rel_id: RelId) -> Self {
Self {
rel_id,
cardinality: 0,
byte_size: 0,
column_stats: Vec::new(),
heat: 0.0,
last_access: 0,
has_index: false,
}
}
pub fn update_cardinality(&mut self, rows: u64) {
self.cardinality = rows;
}
pub fn update_byte_size(&mut self, bytes: u64) {
self.byte_size = bytes;
}
pub fn record_access(&mut self) {
self.heat = self.heat * 0.9 + 0.1;
self.last_access = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
}
pub fn decay_heat(&mut self, factor: f32) {
self.heat *= factor;
}
pub fn add_column(&mut self, col_stats: ColumnStats) {
self.column_stats.push(col_stats);
}
pub fn get_column(&self, col_idx: usize) -> Option<&ColumnStats> {
self.column_stats.iter().find(|c| c.col_idx == col_idx)
}
pub fn get_column_mut(&mut self, col_idx: usize) -> Option<&mut ColumnStats> {
self.column_stats.iter_mut().find(|c| c.col_idx == col_idx)
}
pub fn estimate_selectivity(&self, estimated_matches: u64) -> f64 {
if self.cardinality == 0 {
return 1.0;
}
(estimated_matches as f64 / self.cardinality as f64).clamp(0.0, 1.0)
}
}
#[derive(Debug, Clone)]
pub struct ColumnStats {
pub col_idx: usize,
pub dtype: ScalarType,
pub null_count: u64,
pub distinct_estimate: u64,
pub min_value: Option<i64>,
pub max_value: Option<i64>,
pub avg_width: Option<f32>,
}
impl ColumnStats {
pub fn new(col_idx: usize, dtype: ScalarType) -> Self {
Self {
col_idx,
dtype,
null_count: 0,
distinct_estimate: 0,
min_value: None,
max_value: None,
avg_width: None,
}
}
pub fn update_distinct(&mut self, estimate: u64) {
self.distinct_estimate = estimate;
}
pub fn update_range(&mut self, min: i64, max: i64) {
self.min_value = Some(min);
self.max_value = Some(max);
}
pub fn update_null_count(&mut self, count: u64) {
self.null_count = count;
}
pub fn update_avg_width(&mut self, width: f32) {
self.avg_width = Some(width);
}
pub fn equality_selectivity(&self, total_rows: u64) -> f64 {
if self.distinct_estimate == 0 || total_rows == 0 {
return 0.1;
}
1.0 / self.distinct_estimate as f64
}
pub fn range_selectivity(&self, low: i64, high: i64) -> f64 {
match (self.min_value, self.max_value) {
(Some(col_min), Some(col_max)) if col_max > col_min => {
let col_range = (col_max - col_min) as f64;
let effective_low = low.max(col_min);
let effective_high = high.min(col_max);
if effective_high < effective_low {
return 0.0;
}
let query_range = (effective_high - effective_low) as f64;
(query_range / col_range).clamp(0.0, 1.0)
}
_ => {
0.25
}
}
}
pub fn value_size_bytes(&self) -> usize {
self.dtype.size_bytes()
}
}
#[derive(Debug, Clone)]
pub struct JoinSelectivity {
pub left_rel: RelId,
pub right_rel: RelId,
pub left_keys: Vec<usize>,
pub right_keys: Vec<usize>,
pub selectivity: f64,
pub is_pk_fk: bool,
cached_output_estimate: Option<u64>,
}
impl JoinSelectivity {
pub fn new(left_rel: RelId, right_rel: RelId) -> Self {
Self {
left_rel,
right_rel,
left_keys: Vec::new(),
right_keys: Vec::new(),
selectivity: 1.0,
is_pk_fk: false,
cached_output_estimate: None,
}
}
pub fn set_keys(&mut self, left_keys: Vec<usize>, right_keys: Vec<usize>) {
debug_assert_eq!(
left_keys.len(),
right_keys.len(),
"Join key counts must match"
);
self.left_keys = left_keys;
self.right_keys = right_keys;
self.cached_output_estimate = None;
}
pub fn set_selectivity(&mut self, selectivity: f64) {
self.selectivity = selectivity.clamp(0.0, 1.0);
self.cached_output_estimate = None;
}
pub fn mark_pk_fk(&mut self) {
self.is_pk_fk = true;
}
pub fn estimate_output_rows(&self, left_rows: u64, right_rows: u64) -> u64 {
if self.is_pk_fk {
return right_rows;
}
((left_rows as f64 * right_rows as f64 * self.selectivity) as u64).max(1)
}
pub fn estimate_selectivity_from_stats(left_distinct: u64, right_distinct: u64) -> f64 {
if left_distinct == 0 || right_distinct == 0 {
return 1.0;
}
1.0 / left_distinct.max(right_distinct) as f64
}
pub fn update_from_observation(&mut self, left_rows: u64, right_rows: u64, output_rows: u64) {
let product = left_rows as f64 * right_rows as f64;
if product > 0.0 {
self.selectivity = (output_rows as f64 / product).clamp(0.0, 1.0);
self.cached_output_estimate = Some(output_rows);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_relation_stats_new() {
let stats = RelationStats::new(RelId(1));
assert_eq!(stats.rel_id, RelId(1));
assert_eq!(stats.cardinality, 0);
assert_eq!(stats.heat, 0.0);
assert_eq!(stats.byte_size, 0);
assert!(stats.column_stats.is_empty());
assert!(!stats.has_index);
}
#[test]
fn test_relation_stats_update_cardinality() {
let mut stats = RelationStats::new(RelId(1));
stats.update_cardinality(1000);
assert_eq!(stats.cardinality, 1000);
}
#[test]
fn test_relation_stats_update_byte_size() {
let mut stats = RelationStats::new(RelId(1));
stats.update_byte_size(4096);
assert_eq!(stats.byte_size, 4096);
}
#[test]
fn test_relation_stats_update_heat() {
let mut stats = RelationStats::new(RelId(1));
assert_eq!(stats.heat, 0.0);
stats.record_access();
assert!(stats.heat > 0.0);
let heat_after_first = stats.heat;
assert!((heat_after_first - 0.1).abs() < 0.001);
stats.record_access();
assert!(stats.heat > heat_after_first);
assert!((stats.heat - 0.19).abs() < 0.001);
assert!(stats.last_access > 0);
}
#[test]
fn test_relation_stats_decay_heat() {
let mut stats = RelationStats::new(RelId(1));
stats.record_access();
stats.record_access();
let initial_heat = stats.heat;
stats.decay_heat(0.5);
assert!((stats.heat - initial_heat * 0.5).abs() < 0.001);
}
#[test]
fn test_relation_stats_column_management() {
let mut stats = RelationStats::new(RelId(1));
let col0 = ColumnStats::new(0, ScalarType::U32);
let col1 = ColumnStats::new(1, ScalarType::I64);
stats.add_column(col0);
stats.add_column(col1);
assert_eq!(stats.column_stats.len(), 2);
assert!(stats.get_column(0).is_some());
assert!(stats.get_column(1).is_some());
assert!(stats.get_column(2).is_none());
if let Some(col) = stats.get_column_mut(0) {
col.update_distinct(100);
}
assert_eq!(stats.get_column(0).unwrap().distinct_estimate, 100);
}
#[test]
fn test_relation_stats_estimate_selectivity() {
let mut stats = RelationStats::new(RelId(1));
stats.update_cardinality(1000);
let sel = stats.estimate_selectivity(100);
assert!((sel - 0.1).abs() < 0.001);
let empty_stats = RelationStats::new(RelId(2));
assert_eq!(empty_stats.estimate_selectivity(50), 1.0);
}
#[test]
fn test_column_stats_new() {
let col = ColumnStats::new(0, ScalarType::U32);
assert_eq!(col.col_idx, 0);
assert_eq!(col.dtype, ScalarType::U32);
assert_eq!(col.distinct_estimate, 0);
assert_eq!(col.null_count, 0);
assert!(col.min_value.is_none());
assert!(col.max_value.is_none());
assert!(col.avg_width.is_none());
}
#[test]
fn test_column_stats_update_distinct() {
let mut col = ColumnStats::new(0, ScalarType::U32);
col.update_distinct(500);
assert_eq!(col.distinct_estimate, 500);
}
#[test]
fn test_column_stats_update_range() {
let mut col = ColumnStats::new(0, ScalarType::I32);
col.update_range(-100, 100);
assert_eq!(col.min_value, Some(-100));
assert_eq!(col.max_value, Some(100));
}
#[test]
fn test_column_stats_update_null_count() {
let mut col = ColumnStats::new(0, ScalarType::U32);
col.update_null_count(42);
assert_eq!(col.null_count, 42);
}
#[test]
fn test_column_stats_update_avg_width() {
let mut col = ColumnStats::new(0, ScalarType::Symbol);
col.update_avg_width(12.5);
assert_eq!(col.avg_width, Some(12.5));
}
#[test]
fn test_column_stats_equality_selectivity() {
let mut col = ColumnStats::new(0, ScalarType::U32);
col.update_distinct(100);
let sel = col.equality_selectivity(1000);
assert!((sel - 0.01).abs() < 0.0001);
let empty_col = ColumnStats::new(1, ScalarType::U32);
assert_eq!(empty_col.equality_selectivity(1000), 0.1); }
#[test]
fn test_column_stats_range_selectivity() {
let mut col = ColumnStats::new(0, ScalarType::I64);
col.update_range(0, 100);
let sel = col.range_selectivity(25, 75);
assert!((sel - 0.5).abs() < 0.001);
let sel_outside = col.range_selectivity(200, 300);
assert_eq!(sel_outside, 0.0);
let sel_partial = col.range_selectivity(50, 150);
assert!((sel_partial - 0.5).abs() < 0.001);
let empty_col = ColumnStats::new(1, ScalarType::I64);
assert_eq!(empty_col.range_selectivity(0, 100), 0.25); }
#[test]
fn test_column_stats_value_size() {
assert_eq!(ColumnStats::new(0, ScalarType::U32).value_size_bytes(), 4);
assert_eq!(ColumnStats::new(0, ScalarType::U64).value_size_bytes(), 8);
assert_eq!(ColumnStats::new(0, ScalarType::Bool).value_size_bytes(), 1);
}
#[test]
fn test_join_selectivity_new() {
let js = JoinSelectivity::new(RelId(1), RelId(2));
assert_eq!(js.left_rel, RelId(1));
assert_eq!(js.right_rel, RelId(2));
assert!(js.left_keys.is_empty());
assert!(js.right_keys.is_empty());
assert_eq!(js.selectivity, 1.0);
assert!(!js.is_pk_fk);
}
#[test]
fn test_join_selectivity_set_keys() {
let mut js = JoinSelectivity::new(RelId(1), RelId(2));
js.set_keys(vec![0, 1], vec![0, 1]);
assert_eq!(js.left_keys, vec![0, 1]);
assert_eq!(js.right_keys, vec![0, 1]);
}
#[test]
fn test_join_selectivity_set_selectivity() {
let mut js = JoinSelectivity::new(RelId(1), RelId(2));
js.set_selectivity(0.01);
assert!((js.selectivity - 0.01).abs() < 0.0001);
js.set_selectivity(2.0);
assert_eq!(js.selectivity, 1.0);
js.set_selectivity(-1.0);
assert_eq!(js.selectivity, 0.0);
}
#[test]
fn test_join_selectivity_estimate_output_rows() {
let mut js = JoinSelectivity::new(RelId(1), RelId(2));
js.set_selectivity(0.01);
let output = js.estimate_output_rows(1000, 500);
assert_eq!(output, 5000);
js.set_selectivity(0.0);
let output_min = js.estimate_output_rows(10, 10);
assert_eq!(output_min, 1);
}
#[test]
fn test_join_selectivity_pk_fk() {
let mut js = JoinSelectivity::new(RelId(1), RelId(2));
js.mark_pk_fk();
assert!(js.is_pk_fk);
let output = js.estimate_output_rows(100, 500);
assert_eq!(output, 500); }
#[test]
fn test_join_selectivity_estimate_from_stats() {
let sel = JoinSelectivity::estimate_selectivity_from_stats(100, 200);
assert!((sel - 0.005).abs() < 0.0001);
let sel_zero = JoinSelectivity::estimate_selectivity_from_stats(0, 100);
assert_eq!(sel_zero, 1.0);
}
#[test]
fn test_join_selectivity_update_from_observation() {
let mut js = JoinSelectivity::new(RelId(1), RelId(2));
js.update_from_observation(1000, 500, 2500);
assert!((js.selectivity - 0.005).abs() < 0.0001);
}
#[test]
fn test_all_scalar_types_column_stats() {
let types = [
ScalarType::U32,
ScalarType::U64,
ScalarType::I32,
ScalarType::I64,
ScalarType::F32,
ScalarType::F64,
ScalarType::Bool,
ScalarType::Symbol,
];
for (idx, dtype) in types.iter().enumerate() {
let col = ColumnStats::new(idx, *dtype);
assert_eq!(col.col_idx, idx);
assert_eq!(col.dtype, *dtype);
assert!(col.value_size_bytes() > 0);
}
}
}