alopex_sql/columnar/
statistics.rs

1use std::cmp::Ordering;
2
3use crate::storage::SqlValue;
4use serde::{Deserialize, Serialize};
5
6/// RowGroup 統計情報。
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub struct RowGroupStatistics {
9    pub row_count: u64,
10    pub columns: Vec<ColumnStatistics>,
11    #[serde(default)]
12    pub row_id_min: Option<u64>,
13    #[serde(default)]
14    pub row_id_max: Option<u64>,
15}
16
17/// カラム統計情報。
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub struct ColumnStatistics {
20    pub min: SqlValue,
21    pub max: SqlValue,
22    pub null_count: u64,
23    pub total_count: u64,
24    pub distinct_count: Option<u64>,
25}
26
27impl Default for ColumnStatistics {
28    fn default() -> Self {
29        Self {
30            min: SqlValue::Null,
31            max: SqlValue::Null,
32            null_count: 0,
33            total_count: 0,
34            distinct_count: None,
35        }
36    }
37}
38
39impl ColumnStatistics {
40    /// `SqlValue` スライスからカラム統計を計算する。
41    pub fn compute(values: &[SqlValue]) -> Self {
42        let total_count = values.len() as u64;
43        let null_count = values.iter().filter(|v| v.is_null()).count() as u64;
44
45        let mut non_nulls = values.iter().filter(|v| !v.is_null());
46        let (min, max) = if let Some(first) = non_nulls.next() {
47            let mut min = first.clone();
48            let mut max = first.clone();
49            for v in non_nulls {
50                if let Some(Ordering::Less) = v.partial_cmp(&min) {
51                    min = v.clone();
52                }
53                if let Some(Ordering::Greater) = v.partial_cmp(&max) {
54                    max = v.clone();
55                }
56            }
57            (min, max)
58        } else {
59            (SqlValue::Null, SqlValue::Null)
60        };
61
62        Self {
63            min,
64            max,
65            null_count,
66            total_count,
67            distinct_count: None,
68        }
69    }
70}
71
72/// 複数行から RowGroup 統計を計算する。
73pub fn compute_row_group_statistics(rows: &[Vec<SqlValue>]) -> RowGroupStatistics {
74    let row_count = rows.len() as u64;
75    let column_count = rows.first().map(|r| r.len()).unwrap_or(0);
76    let mut columns = Vec::with_capacity(column_count);
77
78    for idx in 0..column_count {
79        let mut col_values = Vec::with_capacity(rows.len());
80        for row in rows {
81            col_values.push(row.get(idx).cloned().unwrap_or(SqlValue::Null));
82        }
83        columns.push(ColumnStatistics::compute(&col_values));
84    }
85
86    RowGroupStatistics {
87        row_count,
88        columns,
89        row_id_min: None,
90        row_id_max: None,
91    }
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97
98    #[test]
99    fn compute_statistics_basic() {
100        let values = vec![
101            SqlValue::Integer(3),
102            SqlValue::Integer(1),
103            SqlValue::Null,
104            SqlValue::Integer(2),
105        ];
106        let stats = ColumnStatistics::compute(&values);
107        assert_eq!(stats.min, SqlValue::Integer(1));
108        assert_eq!(stats.max, SqlValue::Integer(3));
109        assert_eq!(stats.null_count, 1);
110        assert_eq!(stats.total_count, 4);
111        assert_eq!(stats.distinct_count, None);
112    }
113
114    #[test]
115    fn compute_row_group_statistics_handles_empty() {
116        let stats = compute_row_group_statistics(&[]);
117        assert_eq!(stats.row_count, 0);
118        assert!(stats.columns.is_empty());
119    }
120}