alopex_sql/columnar/
statistics.rs1use std::cmp::Ordering;
2
3use crate::storage::SqlValue;
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub struct RowGroupStatistics {
9 pub row_count: u64,
10 pub columns: Vec<ColumnStatistics>,
11 #[serde(default)]
12 pub row_id_min: Option<u64>,
13 #[serde(default)]
14 pub row_id_max: Option<u64>,
15}
16
17#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub struct ColumnStatistics {
20 pub min: SqlValue,
21 pub max: SqlValue,
22 pub null_count: u64,
23 pub total_count: u64,
24 pub distinct_count: Option<u64>,
25}
26
27impl Default for ColumnStatistics {
28 fn default() -> Self {
29 Self {
30 min: SqlValue::Null,
31 max: SqlValue::Null,
32 null_count: 0,
33 total_count: 0,
34 distinct_count: None,
35 }
36 }
37}
38
39impl ColumnStatistics {
40 pub fn compute(values: &[SqlValue]) -> Self {
42 let total_count = values.len() as u64;
43 let null_count = values.iter().filter(|v| v.is_null()).count() as u64;
44
45 let mut non_nulls = values.iter().filter(|v| !v.is_null());
46 let (min, max) = if let Some(first) = non_nulls.next() {
47 let mut min = first.clone();
48 let mut max = first.clone();
49 for v in non_nulls {
50 if let Some(Ordering::Less) = v.partial_cmp(&min) {
51 min = v.clone();
52 }
53 if let Some(Ordering::Greater) = v.partial_cmp(&max) {
54 max = v.clone();
55 }
56 }
57 (min, max)
58 } else {
59 (SqlValue::Null, SqlValue::Null)
60 };
61
62 Self {
63 min,
64 max,
65 null_count,
66 total_count,
67 distinct_count: None,
68 }
69 }
70}
71
72pub fn compute_row_group_statistics(rows: &[Vec<SqlValue>]) -> RowGroupStatistics {
74 let row_count = rows.len() as u64;
75 let column_count = rows.first().map(|r| r.len()).unwrap_or(0);
76 let mut columns = Vec::with_capacity(column_count);
77
78 for idx in 0..column_count {
79 let mut col_values = Vec::with_capacity(rows.len());
80 for row in rows {
81 col_values.push(row.get(idx).cloned().unwrap_or(SqlValue::Null));
82 }
83 columns.push(ColumnStatistics::compute(&col_values));
84 }
85
86 RowGroupStatistics {
87 row_count,
88 columns,
89 row_id_min: None,
90 row_id_max: None,
91 }
92}
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97
98 #[test]
99 fn compute_statistics_basic() {
100 let values = vec![
101 SqlValue::Integer(3),
102 SqlValue::Integer(1),
103 SqlValue::Null,
104 SqlValue::Integer(2),
105 ];
106 let stats = ColumnStatistics::compute(&values);
107 assert_eq!(stats.min, SqlValue::Integer(1));
108 assert_eq!(stats.max, SqlValue::Integer(3));
109 assert_eq!(stats.null_count, 1);
110 assert_eq!(stats.total_count, 4);
111 assert_eq!(stats.distinct_count, None);
112 }
113
114 #[test]
115 fn compute_row_group_statistics_handles_empty() {
116 let stats = compute_row_group_statistics(&[]);
117 assert_eq!(stats.row_count, 0);
118 assert!(stats.columns.is_empty());
119 }
120}