samkhya_core/stats.rs
1//! Column-level statistics that samkhya-* adapters inject into native optimizers.
2
3use serde::{Deserialize, Serialize};
4
5/// Canonical column statistics surface.
6///
7/// Intentionally a superset of DataFusion's `ColumnStatistics` and DuckDB's
8/// internal `BaseStatistics` so a single instance can satisfy either engine.
9///
10/// # Examples
11///
12/// ```
13/// use samkhya_core::ColumnStats;
14///
15/// let stats = ColumnStats::new()
16/// .with_row_count(10_000)
17/// .with_distinct_count(8_421)
18/// .with_null_count(7)
19/// .with_upper_bound(10_000);
20/// assert_eq!(stats.row_count, Some(10_000));
21/// assert_eq!(stats.distinct_count, Some(8_421));
22/// assert_eq!(stats.null_count, Some(7));
23/// assert_eq!(stats.upper_bound_rows, Some(10_000));
24/// ```
25#[derive(Debug, Clone, Default, Serialize, Deserialize)]
26pub struct ColumnStats {
27 pub row_count: Option<u64>,
28 pub null_count: Option<u64>,
29 pub distinct_count: Option<u64>,
30 pub min: Option<Bound>,
31 pub max: Option<Bound>,
32 /// Inclusive ceiling derived from LpBound; correctors must not exceed this.
33 pub upper_bound_rows: Option<u64>,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub enum Bound {
38 Int(i64),
39 Float(f64),
40 Str(String),
41 Bytes(Vec<u8>),
42}
43
44impl ColumnStats {
45 /// Construct an empty stats record (all fields `None`).
46 ///
47 /// # Examples
48 ///
49 /// ```
50 /// use samkhya_core::ColumnStats;
51 ///
52 /// let s = ColumnStats::new();
53 /// assert!(s.row_count.is_none());
54 /// ```
55 pub fn new() -> Self {
56 Self::default()
57 }
58
59 /// Builder-style setter for `row_count`.
60 ///
61 /// # Examples
62 ///
63 /// ```
64 /// use samkhya_core::ColumnStats;
65 ///
66 /// let s = ColumnStats::new().with_row_count(100);
67 /// assert_eq!(s.row_count, Some(100));
68 /// ```
69 pub fn with_row_count(mut self, n: u64) -> Self {
70 self.row_count = Some(n);
71 self
72 }
73
74 /// Builder-style setter for `distinct_count`.
75 ///
76 /// # Examples
77 ///
78 /// ```
79 /// use samkhya_core::ColumnStats;
80 ///
81 /// let s = ColumnStats::new().with_distinct_count(42);
82 /// assert_eq!(s.distinct_count, Some(42));
83 /// ```
84 pub fn with_distinct_count(mut self, n: u64) -> Self {
85 self.distinct_count = Some(n);
86 self
87 }
88
89 /// Builder-style setter for `null_count`.
90 ///
91 /// # Examples
92 ///
93 /// ```
94 /// use samkhya_core::ColumnStats;
95 ///
96 /// let s = ColumnStats::new().with_null_count(3);
97 /// assert_eq!(s.null_count, Some(3));
98 /// ```
99 pub fn with_null_count(mut self, n: u64) -> Self {
100 self.null_count = Some(n);
101 self
102 }
103
104 /// Builder-style setter for `upper_bound_rows`. The LpBound ceiling
105 /// that downstream correctors must respect.
106 ///
107 /// # Examples
108 ///
109 /// ```
110 /// use samkhya_core::ColumnStats;
111 ///
112 /// let s = ColumnStats::new().with_upper_bound(10_000);
113 /// assert_eq!(s.upper_bound_rows, Some(10_000));
114 /// ```
115 pub fn with_upper_bound(mut self, n: u64) -> Self {
116 self.upper_bound_rows = Some(n);
117 self
118 }
119}