1use std::collections::{HashMap, HashSet};
29use std::sync::Arc;
30
31use super::cost::{ColumnStats, TableStats};
32use super::histogram::{Histogram, MostCommonValues};
33use super::stats_catalog::load_persisted_stats;
34use crate::storage::index::{IndexRegistry, IndexScope, IndexStats};
35
36pub trait StatsProvider: Send + Sync {
42 fn table_stats(&self, table: &str) -> Option<TableStats>;
45
46 fn column_stats(&self, table: &str, column: &str) -> Option<ColumnStats> {
50 self.table_stats(table)?
51 .columns
52 .into_iter()
53 .find(|c| c.name == column)
54 }
55
56 fn index_stats(&self, table: &str, column: &str) -> Option<IndexStats>;
61
62 fn has_index(&self, table: &str, column: &str) -> bool {
64 self.index_stats(table, column).is_some()
65 }
66
67 fn distinct_values(&self, table: &str, column: &str) -> Option<u64> {
70 if let Some(cs) = self.column_stats(table, column) {
71 if cs.distinct_count > 0 {
72 return Some(cs.distinct_count);
73 }
74 }
75 self.index_stats(table, column)
76 .map(|s| s.distinct_keys as u64)
77 }
78
79 fn column_histogram(&self, _table: &str, _column: &str) -> Option<Histogram> {
86 None
87 }
88
89 fn column_mcv(&self, _table: &str, _column: &str) -> Option<MostCommonValues> {
94 None
95 }
96}
97
98#[derive(Debug, Clone, Default)]
101pub struct NullProvider;
102
103impl StatsProvider for NullProvider {
104 fn table_stats(&self, _table: &str) -> Option<TableStats> {
105 None
106 }
107
108 fn index_stats(&self, _table: &str, _column: &str) -> Option<IndexStats> {
109 None
110 }
111}
112
113#[derive(Debug, Clone, Default)]
116pub struct StaticProvider {
117 tables: HashMap<String, TableStats>,
118 indexes: HashMap<(String, String), IndexStats>,
120 histograms: HashMap<(String, String), Histogram>,
122 mcvs: HashMap<(String, String), MostCommonValues>,
124}
125
126impl StaticProvider {
127 pub fn new() -> Self {
129 Self::default()
130 }
131
132 pub fn with_table(mut self, table: impl Into<String>, stats: TableStats) -> Self {
134 self.tables.insert(table.into(), stats);
135 self
136 }
137
138 pub fn with_index(
140 mut self,
141 table: impl Into<String>,
142 column: impl Into<String>,
143 stats: IndexStats,
144 ) -> Self {
145 self.indexes.insert((table.into(), column.into()), stats);
146 self
147 }
148
149 pub fn insert_table(&mut self, table: impl Into<String>, stats: TableStats) {
151 self.tables.insert(table.into(), stats);
152 }
153
154 pub fn insert_index(
156 &mut self,
157 table: impl Into<String>,
158 column: impl Into<String>,
159 stats: IndexStats,
160 ) {
161 self.indexes.insert((table.into(), column.into()), stats);
162 }
163
164 pub fn with_histogram(
166 mut self,
167 table: impl Into<String>,
168 column: impl Into<String>,
169 histogram: Histogram,
170 ) -> Self {
171 self.histograms
172 .insert((table.into(), column.into()), histogram);
173 self
174 }
175
176 pub fn with_mcv(
178 mut self,
179 table: impl Into<String>,
180 column: impl Into<String>,
181 mcv: MostCommonValues,
182 ) -> Self {
183 self.mcvs.insert((table.into(), column.into()), mcv);
184 self
185 }
186}
187
188impl StatsProvider for StaticProvider {
189 fn table_stats(&self, table: &str) -> Option<TableStats> {
190 self.tables.get(table).cloned()
191 }
192
193 fn index_stats(&self, table: &str, column: &str) -> Option<IndexStats> {
194 self.indexes
195 .get(&(table.to_string(), column.to_string()))
196 .cloned()
197 }
198
199 fn column_histogram(&self, table: &str, column: &str) -> Option<Histogram> {
200 self.histograms
201 .get(&(table.to_string(), column.to_string()))
202 .cloned()
203 }
204
205 fn column_mcv(&self, table: &str, column: &str) -> Option<MostCommonValues> {
206 self.mcvs
207 .get(&(table.to_string(), column.to_string()))
208 .cloned()
209 }
210}
211
212pub struct CatalogStatsProvider {
218 tables: HashMap<String, TableStats>,
219 histograms: HashMap<(String, String), Histogram>,
220 mcvs: HashMap<(String, String), MostCommonValues>,
221}
222
223impl CatalogStatsProvider {
224 pub fn from_catalog(snapshot: &crate::api::CatalogSnapshot) -> Self {
228 const AVG_ROW_SIZE: u32 = 128;
229 const ROWS_PER_PAGE: u64 = 100;
230 let tables = snapshot
231 .stats_by_collection
232 .iter()
233 .map(|(name, cstats)| {
234 let row_count = cstats.entities as u64;
235 let stats = TableStats {
236 row_count,
237 avg_row_size: AVG_ROW_SIZE,
238 page_count: (row_count / ROWS_PER_PAGE).max(1),
239 columns: vec![],
240 };
241 (name.clone(), stats)
242 })
243 .collect();
244 Self {
245 tables,
246 histograms: HashMap::new(),
247 mcvs: HashMap::new(),
248 }
249 }
250
251 pub fn from_db(db: &crate::storage::RedDB) -> Self {
254 let mut provider = Self::from_catalog(&db.catalog_snapshot());
255 let persisted = load_persisted_stats(&db.store(), &db.catalog_snapshot());
256 for (table, stats) in persisted.tables {
257 provider
258 .tables
259 .entry(table)
260 .and_modify(|current| {
261 current.avg_row_size = stats.avg_row_size;
262 current.page_count = stats.page_count.max(current.page_count);
263 current.columns = merge_persisted_columns(¤t.columns, &stats.columns);
264 })
265 .or_insert(stats);
266 }
267 provider.histograms = persisted.histograms;
268 provider.mcvs = persisted.mcvs;
269 provider
270 }
271}
272
273fn merge_persisted_columns(current: &[ColumnStats], persisted: &[ColumnStats]) -> Vec<ColumnStats> {
274 let current_by_name: HashMap<&str, &ColumnStats> = current
275 .iter()
276 .map(|column| (column.name.as_str(), column))
277 .collect();
278 let mut merged = Vec::with_capacity(current.len().max(persisted.len()));
279 let mut seen = HashSet::new();
280
281 for column in persisted {
282 let mut merged_column = column.clone();
283 if let Some(current_column) = current_by_name.get(merged_column.name.as_str()) {
284 merged_column.has_index = current_column.has_index;
285 }
286 seen.insert(merged_column.name.clone());
287 merged.push(merged_column);
288 }
289
290 for column in current {
291 if seen.insert(column.name.clone()) {
292 merged.push(column.clone());
293 }
294 }
295
296 merged
297}
298
299impl StatsProvider for CatalogStatsProvider {
300 fn table_stats(&self, table: &str) -> Option<TableStats> {
301 self.tables.get(table).cloned()
302 }
303
304 fn index_stats(&self, _table: &str, _column: &str) -> Option<IndexStats> {
305 None
306 }
307
308 fn column_histogram(&self, table: &str, column: &str) -> Option<Histogram> {
309 self.histograms
310 .get(&(table.to_string(), column.to_string()))
311 .cloned()
312 }
313
314 fn column_mcv(&self, table: &str, column: &str) -> Option<MostCommonValues> {
315 self.mcvs
316 .get(&(table.to_string(), column.to_string()))
317 .cloned()
318 }
319}
320
321pub struct RegistryProvider {
333 registry: Arc<IndexRegistry>,
334 table_fallback: Option<Arc<dyn StatsProvider>>,
335}
336
337impl RegistryProvider {
338 pub fn new(registry: Arc<IndexRegistry>) -> Self {
341 Self {
342 registry,
343 table_fallback: None,
344 }
345 }
346
347 pub fn with_table_fallback(mut self, fallback: Arc<dyn StatsProvider>) -> Self {
350 self.table_fallback = Some(fallback);
351 self
352 }
353}
354
355impl StatsProvider for RegistryProvider {
356 fn table_stats(&self, table: &str) -> Option<TableStats> {
357 self.table_fallback
358 .as_ref()
359 .and_then(|f| f.table_stats(table))
360 }
361
362 fn index_stats(&self, table: &str, column: &str) -> Option<IndexStats> {
363 self.registry
364 .get(&IndexScope::table(table, column))
365 .map(|idx| idx.stats())
366 }
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372 use crate::storage::index::IndexKind;
373
374 fn sample_stats(rows: u64) -> TableStats {
375 TableStats {
376 row_count: rows,
377 avg_row_size: 128,
378 page_count: rows / 32,
379 columns: vec![ColumnStats {
380 name: "id".to_string(),
381 distinct_count: rows,
382 null_count: 0,
383 min_value: None,
384 max_value: None,
385 has_index: true,
386 }],
387 }
388 }
389
390 #[test]
391 fn null_provider_returns_none() {
392 let p = NullProvider;
393 assert!(p.table_stats("anything").is_none());
394 assert!(p.index_stats("t", "c").is_none());
395 assert!(!p.has_index("t", "c"));
396 assert!(p.distinct_values("t", "c").is_none());
397 }
398
399 #[test]
400 fn static_provider_roundtrip() {
401 let p = StaticProvider::new()
402 .with_table("users", sample_stats(1_000_000))
403 .with_index(
404 "users",
405 "email",
406 IndexStats {
407 entries: 1_000_000,
408 distinct_keys: 1_000_000,
409 approx_bytes: 32_000_000,
410 kind: IndexKind::Hash,
411 has_bloom: true,
412 index_correlation: 0.0,
413 },
414 );
415
416 let t = p.table_stats("users").unwrap();
417 assert_eq!(t.row_count, 1_000_000);
418 assert_eq!(t.columns.len(), 1);
419
420 assert!(p.has_index("users", "email"));
421 assert!(!p.has_index("users", "display_name"));
422
423 let idx = p.index_stats("users", "email").unwrap();
424 assert_eq!(idx.distinct_keys, 1_000_000);
425 assert!(idx.point_selectivity() < 1e-5);
427 }
428
429 #[test]
430 fn column_stats_default_derives_from_table() {
431 let p = StaticProvider::new().with_table("users", sample_stats(100));
432 let cs = p.column_stats("users", "id").unwrap();
433 assert_eq!(cs.distinct_count, 100);
434 assert!(cs.has_index);
435 }
436
437 #[test]
438 fn null_provider_returns_no_histogram_or_mcv() {
439 let p = NullProvider;
440 assert!(p.column_histogram("users", "email").is_none());
441 assert!(p.column_mcv("users", "email").is_none());
442 }
443
444 #[test]
445 fn static_provider_serves_histograms() {
446 use super::super::histogram::{ColumnValue, Histogram};
447 let h = Histogram::equi_depth_from_sample((0..100i64).map(ColumnValue::Int).collect(), 10);
448 let p = StaticProvider::new().with_histogram("orders", "amount", h);
449 let got = p.column_histogram("orders", "amount").unwrap();
450 assert_eq!(got.bucket_count(), 10);
451 assert_eq!(got.total_count, 100);
452 assert!(p.column_histogram("orders", "missing").is_none());
454 }
455
456 #[test]
457 fn static_provider_serves_mcv_lists() {
458 use super::super::histogram::{ColumnValue, MostCommonValues};
459 let mcv = MostCommonValues::new(vec![
460 (ColumnValue::Text("admin".to_string()), 0.4),
461 (ColumnValue::Text("user".to_string()), 0.5),
462 ]);
463 let p = StaticProvider::new().with_mcv("users", "role", mcv);
464 let got = p.column_mcv("users", "role").unwrap();
465 assert_eq!(got.len(), 2);
466 assert_eq!(got.values[0].1, 0.5);
468 assert!(p.column_mcv("users", "missing").is_none());
469 }
470
471 #[test]
472 fn registry_provider_default_no_histogram() {
473 use crate::storage::index::IndexRegistry;
476 use std::sync::Arc;
477 let p = RegistryProvider::new(Arc::new(IndexRegistry::new()));
478 assert!(p.column_histogram("any", "any").is_none());
479 assert!(p.column_mcv("any", "any").is_none());
480 }
481
482 #[test]
483 fn registry_provider_serves_index_stats() {
484 use crate::storage::index::{IndexBase, IndexKind, IndexRegistry, IndexScope};
485 use std::sync::Arc;
486
487 struct StubIndex(IndexStats);
488 impl IndexBase for StubIndex {
489 fn name(&self) -> &str {
490 "stub"
491 }
492 fn kind(&self) -> IndexKind {
493 self.0.kind
494 }
495 fn stats(&self) -> IndexStats {
496 self.0.clone()
497 }
498 }
499
500 let registry = Arc::new(IndexRegistry::new());
501 registry.register(
502 IndexScope::table("users", "email"),
503 Arc::new(StubIndex(IndexStats {
504 entries: 500_000,
505 distinct_keys: 500_000,
506 approx_bytes: 0,
507 kind: IndexKind::Hash,
508 has_bloom: true,
509 index_correlation: 0.0,
510 })),
511 );
512
513 let provider = RegistryProvider::new(Arc::clone(®istry));
514 let stats = provider.index_stats("users", "email").unwrap();
515 assert_eq!(stats.distinct_keys, 500_000);
516 assert_eq!(stats.kind, IndexKind::Hash);
517 assert!(provider.table_stats("users").is_none());
519 }
520
521 #[test]
522 fn registry_provider_chains_fallback_for_table_stats() {
523 use crate::storage::index::IndexRegistry;
524 use std::sync::Arc;
525
526 let fallback: Arc<dyn StatsProvider> = Arc::new(StaticProvider::new().with_table(
527 "orders",
528 TableStats {
529 row_count: 25_000,
530 avg_row_size: 512,
531 page_count: 50,
532 columns: vec![],
533 },
534 ));
535
536 let registry = Arc::new(IndexRegistry::new());
537 let provider = RegistryProvider::new(registry).with_table_fallback(fallback);
538 let t = provider.table_stats("orders").unwrap();
539 assert_eq!(t.row_count, 25_000);
540 assert!(provider.index_stats("orders", "id").is_none());
542 }
543
544 #[test]
545 fn distinct_values_prefers_column_then_index() {
546 let p = StaticProvider::new().with_table("t", sample_stats(500));
548 assert_eq!(p.distinct_values("t", "id"), Some(500));
549
550 let p = StaticProvider::new().with_index(
552 "t",
553 "name",
554 IndexStats {
555 entries: 10,
556 distinct_keys: 7,
557 approx_bytes: 0,
558 kind: IndexKind::BTree,
559 has_bloom: false,
560 index_correlation: 0.0,
561 },
562 );
563 assert_eq!(p.distinct_values("t", "name"), Some(7));
564
565 assert_eq!(NullProvider.distinct_values("t", "name"), None);
567 }
568}