#![allow(clippy::cast_precision_loss)]
#![allow(clippy::cast_possible_truncation)]
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[cfg(test)]
mod tests;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct CollectionStats {
pub total_points: u64,
pub payload_size_bytes: u64,
pub field_stats: HashMap<String, ColumnStats>,
pub row_count: u64,
pub deleted_count: u64,
pub avg_row_size_bytes: u64,
pub total_size_bytes: u64,
pub column_stats: HashMap<String, ColumnStats>,
pub index_stats: HashMap<String, IndexStats>,
pub last_analyzed_epoch_ms: Option<u64>,
}
impl CollectionStats {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_counts(row_count: u64, deleted_count: u64) -> Self {
Self {
total_points: row_count,
row_count,
deleted_count,
..Default::default()
}
}
#[must_use]
pub fn live_row_count(&self) -> u64 {
self.row_count.saturating_sub(self.deleted_count)
}
#[must_use]
pub fn deletion_ratio(&self) -> f64 {
if self.row_count == 0 {
0.0
} else {
self.deleted_count as f64 / self.row_count as f64
}
}
#[must_use]
pub fn estimate_selectivity(&self, column: &str) -> f64 {
if let Some(col_stats) = self.field_stats.get(column) {
if col_stats.distinct_values > 0 && self.total_points > 0 {
return 1.0 / col_stats.distinct_values as f64;
}
}
if let Some(col_stats) = self.column_stats.get(column) {
if col_stats.distinct_count > 0 && self.row_count > 0 {
return 1.0 / col_stats.distinct_count as f64;
}
}
0.1
}
pub fn mark_analyzed(&mut self) {
self.last_analyzed_epoch_ms = Some(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_millis() as u64)
.unwrap_or(0),
);
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ColumnStats {
pub name: String,
pub null_count: u64,
pub distinct_count: u64,
pub distinct_values: u64,
pub min_value: Option<String>,
pub max_value: Option<String>,
pub avg_size_bytes: u64,
pub histogram: Option<Histogram>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct HistogramBucket {
pub lower_bound: f64,
pub upper_bound: f64,
pub count: u64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Histogram {
pub buckets: Vec<HistogramBucket>,
}
impl ColumnStats {
#[must_use]
pub fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
..Default::default()
}
}
#[must_use]
pub fn with_distinct_count(mut self, count: u64) -> Self {
self.distinct_count = count;
self.distinct_values = count;
self
}
#[must_use]
pub fn with_null_count(mut self, count: u64) -> Self {
self.null_count = count;
self
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct IndexStats {
pub name: String,
pub index_type: String,
pub entry_count: u64,
pub depth: u32,
pub size_bytes: u64,
}
impl IndexStats {
#[must_use]
pub fn new(name: impl Into<String>, index_type: impl Into<String>) -> Self {
Self {
name: name.into(),
index_type: index_type.into(),
..Default::default()
}
}
#[must_use]
pub fn with_entry_count(mut self, count: u64) -> Self {
self.entry_count = count;
self
}
#[must_use]
pub fn with_depth(mut self, depth: u32) -> Self {
self.depth = depth;
self
}
}
#[derive(Debug, Default)]
pub struct StatsCollector {
stats: CollectionStats,
}
impl StatsCollector {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn set_row_count(&mut self, count: u64) {
self.stats.row_count = count;
self.stats.total_points = count;
}
pub fn set_deleted_count(&mut self, count: u64) {
self.stats.deleted_count = count;
}
pub fn set_total_size(&mut self, size: u64) {
self.stats.total_size_bytes = size;
self.stats.payload_size_bytes = size;
}
pub fn add_column_stats(&mut self, stats: ColumnStats) {
self.stats
.column_stats
.insert(stats.name.clone(), stats.clone());
self.stats.field_stats.insert(stats.name.clone(), stats);
}
pub fn add_index_stats(&mut self, stats: IndexStats) {
self.stats.index_stats.insert(stats.name.clone(), stats);
}
#[must_use]
pub fn build(mut self) -> CollectionStats {
if self.stats.row_count > 0 {
self.stats.avg_row_size_bytes = self.stats.total_size_bytes / self.stats.row_count;
}
self.stats.mark_analyzed();
self.stats
}
}