datafusion_ffi/
statistics.rs1use datafusion_common::{DataFusionError, Result, Statistics};
30use prost::Message;
31
32pub(crate) fn serialize_statistics(stats: &Statistics) -> Vec<u8> {
35 datafusion_proto_common::Statistics::from(stats).encode_to_vec()
36}
37
38pub(crate) fn deserialize_statistics(bytes: &[u8]) -> Result<Statistics> {
41 let proto = datafusion_proto_common::Statistics::decode(bytes).map_err(|e| {
42 DataFusionError::Plan(format!("failed to decode Statistics: {e}"))
43 })?;
44 Statistics::try_from(&proto)
45}
46
47#[cfg(test)]
48mod tests {
49 use std::sync::Arc;
50
51 use arrow::datatypes::{DataType, Field, Schema};
52 use datafusion_common::ScalarValue;
53 use datafusion_common::stats::Precision;
54 use datafusion_common::{ColumnStatistics, Statistics};
55
56 use super::*;
57
58 #[test]
59 fn round_trip_unknown_statistics() {
60 let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
61 let original = Statistics::new_unknown(&Arc::new(schema));
62
63 let bytes = serialize_statistics(&original);
64 let observed = deserialize_statistics(&bytes).expect("decode");
65
66 assert_eq!(observed, original);
67 }
68
69 #[test]
70 fn round_trip_exact_statistics_with_scalar_values() {
71 let original = Statistics {
72 num_rows: Precision::Exact(100),
73 total_byte_size: Precision::Exact(4096),
74 column_statistics: vec![
75 ColumnStatistics {
76 null_count: Precision::Exact(2),
77 max_value: Precision::Exact(ScalarValue::Int32(Some(50))),
78 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
79 sum_value: Precision::Exact(ScalarValue::Int64(Some(1234))),
80 distinct_count: Precision::Exact(40),
81 byte_size: Precision::Exact(800),
82 },
83 ColumnStatistics {
84 null_count: Precision::Exact(0),
85 max_value: Precision::Exact(ScalarValue::Utf8(Some(
86 "zebra".to_string(),
87 ))),
88 min_value: Precision::Exact(ScalarValue::Utf8(Some(
89 "ant".to_string(),
90 ))),
91 sum_value: Precision::Absent,
92 distinct_count: Precision::Inexact(95),
93 byte_size: Precision::Inexact(2048),
94 },
95 ],
96 };
97
98 let bytes = serialize_statistics(&original);
99 let observed = deserialize_statistics(&bytes).expect("decode");
100
101 assert_eq!(observed, original);
102 }
103
104 #[test]
105 fn round_trip_mixed_precision() {
106 let original = Statistics {
107 num_rows: Precision::Inexact(42),
108 total_byte_size: Precision::Absent,
109 column_statistics: vec![ColumnStatistics {
110 null_count: Precision::Absent,
111 max_value: Precision::Inexact(ScalarValue::Float64(Some(1.5))),
112 min_value: Precision::Absent,
113 sum_value: Precision::Inexact(ScalarValue::Float64(Some(63.0))),
114 distinct_count: Precision::Absent,
115 byte_size: Precision::Absent,
116 }],
117 };
118
119 let bytes = serialize_statistics(&original);
120 let observed = deserialize_statistics(&bytes).expect("decode");
121
122 assert_eq!(observed, original);
123 }
124}