velesdb_core/collection/stats/
mod.rs1#![allow(clippy::cast_precision_loss)]
20#![allow(clippy::cast_possible_truncation)]
21
22use serde::{Deserialize, Serialize};
23use std::collections::HashMap;
24
25#[cfg(test)]
26mod tests;
27
28#[derive(Debug, Clone, Default, Serialize, Deserialize)]
30pub struct CollectionStats {
31 pub total_points: u64,
33 pub payload_size_bytes: u64,
35 pub field_stats: HashMap<String, ColumnStats>,
37 pub row_count: u64,
39 pub deleted_count: u64,
41 pub avg_row_size_bytes: u64,
43 pub total_size_bytes: u64,
45 pub column_stats: HashMap<String, ColumnStats>,
47 pub index_stats: HashMap<String, IndexStats>,
49 pub last_analyzed_epoch_ms: Option<u64>,
51}
52
53impl CollectionStats {
54 #[must_use]
56 pub fn new() -> Self {
57 Self::default()
58 }
59
60 #[must_use]
62 pub fn with_counts(row_count: u64, deleted_count: u64) -> Self {
63 Self {
64 total_points: row_count,
65 row_count,
66 deleted_count,
67 ..Default::default()
68 }
69 }
70
71 #[must_use]
73 pub fn live_row_count(&self) -> u64 {
74 self.row_count.saturating_sub(self.deleted_count)
75 }
76
77 #[must_use]
79 pub fn deletion_ratio(&self) -> f64 {
80 if self.row_count == 0 {
81 0.0
82 } else {
83 self.deleted_count as f64 / self.row_count as f64
84 }
85 }
86
87 #[must_use]
89 pub fn estimate_selectivity(&self, column: &str) -> f64 {
90 if let Some(col_stats) = self.field_stats.get(column) {
91 if col_stats.distinct_values > 0 && self.total_points > 0 {
92 return 1.0 / col_stats.distinct_values as f64;
93 }
94 }
95 if let Some(col_stats) = self.column_stats.get(column) {
96 if col_stats.distinct_count > 0 && self.row_count > 0 {
97 return 1.0 / col_stats.distinct_count as f64;
98 }
99 }
100 0.1
102 }
103
104 pub fn mark_analyzed(&mut self) {
106 self.last_analyzed_epoch_ms = Some(
107 std::time::SystemTime::now()
108 .duration_since(std::time::UNIX_EPOCH)
109 .map(|d| d.as_millis() as u64)
110 .unwrap_or(0),
111 );
112 }
113}
114
115#[derive(Debug, Clone, Default, Serialize, Deserialize)]
117pub struct ColumnStats {
118 pub name: String,
120 pub null_count: u64,
122 pub distinct_count: u64,
124 pub distinct_values: u64,
126 pub min_value: Option<String>,
128 pub max_value: Option<String>,
130 pub avg_size_bytes: u64,
132 pub histogram: Option<Histogram>,
134}
135
136#[derive(Debug, Clone, Default, Serialize, Deserialize)]
138pub struct HistogramBucket {
139 pub lower_bound: f64,
141 pub upper_bound: f64,
143 pub count: u64,
145}
146
147#[derive(Debug, Clone, Default, Serialize, Deserialize)]
149pub struct Histogram {
150 pub buckets: Vec<HistogramBucket>,
152}
153
154impl ColumnStats {
155 #[must_use]
157 pub fn new(name: impl Into<String>) -> Self {
158 Self {
159 name: name.into(),
160 ..Default::default()
161 }
162 }
163
164 #[must_use]
166 pub fn with_distinct_count(mut self, count: u64) -> Self {
167 self.distinct_count = count;
168 self.distinct_values = count;
169 self
170 }
171
172 #[must_use]
174 pub fn with_null_count(mut self, count: u64) -> Self {
175 self.null_count = count;
176 self
177 }
178}
179
180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
182pub struct IndexStats {
183 pub name: String,
185 pub index_type: String,
187 pub entry_count: u64,
189 pub depth: u32,
191 pub size_bytes: u64,
193}
194
195impl IndexStats {
196 #[must_use]
198 pub fn new(name: impl Into<String>, index_type: impl Into<String>) -> Self {
199 Self {
200 name: name.into(),
201 index_type: index_type.into(),
202 ..Default::default()
203 }
204 }
205
206 #[must_use]
208 pub fn with_entry_count(mut self, count: u64) -> Self {
209 self.entry_count = count;
210 self
211 }
212
213 #[must_use]
215 pub fn with_depth(mut self, depth: u32) -> Self {
216 self.depth = depth;
217 self
218 }
219}
220
221#[derive(Debug, Default)]
223pub struct StatsCollector {
224 stats: CollectionStats,
225}
226
227impl StatsCollector {
228 #[must_use]
230 pub fn new() -> Self {
231 Self::default()
232 }
233
234 pub fn set_row_count(&mut self, count: u64) {
236 self.stats.row_count = count;
237 self.stats.total_points = count;
238 }
239
240 pub fn set_deleted_count(&mut self, count: u64) {
242 self.stats.deleted_count = count;
243 }
244
245 pub fn set_total_size(&mut self, size: u64) {
247 self.stats.total_size_bytes = size;
248 self.stats.payload_size_bytes = size;
249 }
250
251 pub fn add_column_stats(&mut self, stats: ColumnStats) {
253 self.stats
254 .column_stats
255 .insert(stats.name.clone(), stats.clone());
256 self.stats.field_stats.insert(stats.name.clone(), stats);
257 }
258
259 pub fn add_index_stats(&mut self, stats: IndexStats) {
261 self.stats.index_stats.insert(stats.name.clone(), stats);
262 }
263
264 #[must_use]
266 pub fn build(mut self) -> CollectionStats {
267 if self.stats.row_count > 0 {
269 self.stats.avg_row_size_bytes = self.stats.total_size_bytes / self.stats.row_count;
270 }
271
272 self.stats.mark_analyzed();
273 self.stats
274 }
275}