vortex_file/footer/
file_statistics.rs1use std::sync::Arc;
10
11use flatbuffers::FlatBufferBuilder;
12use flatbuffers::WIPOffset;
13use itertools::Itertools;
14use vortex_array::stats::StatsSet;
15use vortex_dtype::DType;
16use vortex_error::VortexExpect;
17use vortex_error::VortexResult;
18use vortex_error::vortex_ensure_eq;
19use vortex_flatbuffers::FlatBufferRoot;
20use vortex_flatbuffers::WriteFlatBuffer;
21use vortex_flatbuffers::array::ArrayStats;
22use vortex_flatbuffers::footer as fb;
23
24#[derive(Clone, Debug)]
30pub struct FileStatistics {
31 stats: Arc<[StatsSet]>,
33 dtypes: Arc<[DType]>,
35}
36
37impl FileStatistics {
38 pub fn new(stats: Arc<[StatsSet]>, dtypes: Arc<[DType]>) -> Self {
44 assert_eq!(
45 stats.len(),
46 dtypes.len(),
47 "stats and dtypes must have the same length"
48 );
49
50 Self { stats, dtypes }
51 }
52
53 pub fn new_with_dtype(stats: Arc<[StatsSet]>, file_dtype: &DType) -> Self {
62 if let DType::Struct(struct_fields, _) = file_dtype {
63 assert_eq!(
64 stats.len(),
65 struct_fields.nfields(),
66 "stats length must match number of struct fields"
67 );
68
69 let dtypes = struct_fields.fields().collect();
70
71 Self { stats, dtypes }
72 } else {
73 assert_eq!(
74 stats.len(),
75 1,
76 "non-struct dtype must have exactly 1 statistic"
77 );
78
79 Self {
80 stats,
81 dtypes: Arc::new([file_dtype.clone()]),
82 }
83 }
84 }
85
86 pub fn from_flatbuffer<'a>(
91 fb: &fb::FileStatistics<'a>,
92 file_dtype: &DType,
93 ) -> VortexResult<Self> {
94 let field_stats = fb.field_stats().unwrap_or_default();
95 let mut array_stats: Vec<ArrayStats> = field_stats.iter().collect();
96
97 if let DType::Struct(struct_fields, _) = file_dtype {
98 vortex_ensure_eq!(array_stats.len(), struct_fields.nfields());
99
100 let stats_sets: Arc<[StatsSet]> = array_stats
101 .into_iter()
102 .zip(struct_fields.fields())
103 .map(|(array_stat, field_dtype)| {
104 StatsSet::from_flatbuffer(&array_stat, &field_dtype)
105 })
106 .try_collect()?;
107
108 let dtypes = struct_fields.fields().collect();
109
110 Ok(Self {
111 stats: stats_sets,
112 dtypes,
113 })
114 } else {
115 vortex_ensure_eq!(array_stats.len(), 1);
116
117 let array_stat = array_stats
118 .pop()
119 .vortex_expect("we just checked that there was 1 field");
120 let stats_set = StatsSet::from_flatbuffer(&array_stat, file_dtype)?;
121
122 Ok(Self {
123 stats: Arc::new([stats_set]),
124 dtypes: Arc::new([file_dtype.clone()]),
125 })
126 }
127 }
128
129 pub fn stats_sets(&self) -> &Arc<[StatsSet]> {
131 &self.stats
132 }
133
134 pub fn dtypes(&self) -> &Arc<[DType]> {
136 &self.dtypes
137 }
138
139 pub fn get(&self, field_idx: usize) -> (&StatsSet, &DType) {
145 (&self.stats[field_idx], &self.dtypes[field_idx])
146 }
147}
148
149impl FlatBufferRoot for FileStatistics {}
150
151impl WriteFlatBuffer for FileStatistics {
152 type Target<'a> = fb::FileStatistics<'a>;
153
154 fn write_flatbuffer<'fb>(
155 &self,
156 fbb: &mut FlatBufferBuilder<'fb>,
157 ) -> VortexResult<WIPOffset<Self::Target<'fb>>> {
158 let field_stats = self
159 .stats_sets()
160 .iter()
161 .map(|s| s.write_flatbuffer(fbb))
162 .collect::<VortexResult<Vec<_>>>()?;
163 let field_stats = fbb.create_vector(field_stats.as_slice());
164
165 Ok(fb::FileStatistics::create(
166 fbb,
167 &fb::FileStatisticsArgs {
168 field_stats: Some(field_stats),
169 },
170 ))
171 }
172}