Skip to main content

samkhya_core/
stats.rs

1//! Column-level statistics that samkhya-* adapters inject into native optimizers.
2
3use serde::{Deserialize, Serialize};
4
5/// Canonical column statistics surface.
6///
7/// Intentionally a superset of DataFusion's `ColumnStatistics` and DuckDB's
8/// internal `BaseStatistics` so a single instance can satisfy either engine.
9///
10/// # Examples
11///
12/// ```
13/// use samkhya_core::ColumnStats;
14///
15/// let stats = ColumnStats::new()
16///     .with_row_count(10_000)
17///     .with_distinct_count(8_421)
18///     .with_null_count(7)
19///     .with_upper_bound(10_000);
20/// assert_eq!(stats.row_count, Some(10_000));
21/// assert_eq!(stats.distinct_count, Some(8_421));
22/// assert_eq!(stats.null_count, Some(7));
23/// assert_eq!(stats.upper_bound_rows, Some(10_000));
24/// ```
25#[derive(Debug, Clone, Default, Serialize, Deserialize)]
26pub struct ColumnStats {
27    pub row_count: Option<u64>,
28    pub null_count: Option<u64>,
29    pub distinct_count: Option<u64>,
30    pub min: Option<Bound>,
31    pub max: Option<Bound>,
32    /// Inclusive ceiling derived from LpBound; correctors must not exceed this.
33    pub upper_bound_rows: Option<u64>,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub enum Bound {
38    Int(i64),
39    Float(f64),
40    Str(String),
41    Bytes(Vec<u8>),
42}
43
44impl ColumnStats {
45    /// Construct an empty stats record (all fields `None`).
46    ///
47    /// # Examples
48    ///
49    /// ```
50    /// use samkhya_core::ColumnStats;
51    ///
52    /// let s = ColumnStats::new();
53    /// assert!(s.row_count.is_none());
54    /// ```
55    pub fn new() -> Self {
56        Self::default()
57    }
58
59    /// Builder-style setter for `row_count`.
60    ///
61    /// # Examples
62    ///
63    /// ```
64    /// use samkhya_core::ColumnStats;
65    ///
66    /// let s = ColumnStats::new().with_row_count(100);
67    /// assert_eq!(s.row_count, Some(100));
68    /// ```
69    pub fn with_row_count(mut self, n: u64) -> Self {
70        self.row_count = Some(n);
71        self
72    }
73
74    /// Builder-style setter for `distinct_count`.
75    ///
76    /// # Examples
77    ///
78    /// ```
79    /// use samkhya_core::ColumnStats;
80    ///
81    /// let s = ColumnStats::new().with_distinct_count(42);
82    /// assert_eq!(s.distinct_count, Some(42));
83    /// ```
84    pub fn with_distinct_count(mut self, n: u64) -> Self {
85        self.distinct_count = Some(n);
86        self
87    }
88
89    /// Builder-style setter for `null_count`.
90    ///
91    /// # Examples
92    ///
93    /// ```
94    /// use samkhya_core::ColumnStats;
95    ///
96    /// let s = ColumnStats::new().with_null_count(3);
97    /// assert_eq!(s.null_count, Some(3));
98    /// ```
99    pub fn with_null_count(mut self, n: u64) -> Self {
100        self.null_count = Some(n);
101        self
102    }
103
104    /// Builder-style setter for `upper_bound_rows`. The LpBound ceiling
105    /// that downstream correctors must respect.
106    ///
107    /// # Examples
108    ///
109    /// ```
110    /// use samkhya_core::ColumnStats;
111    ///
112    /// let s = ColumnStats::new().with_upper_bound(10_000);
113    /// assert_eq!(s.upper_bound_rows, Some(10_000));
114    /// ```
115    pub fn with_upper_bound(mut self, n: u64) -> Self {
116        self.upper_bound_rows = Some(n);
117        self
118    }
119}