vortex_file/footer/
file_statistics.rs1use std::sync::Arc;
10
11use flatbuffers::FlatBufferBuilder;
12use flatbuffers::WIPOffset;
13use itertools::Itertools;
14use vortex_array::dtype::DType;
15use vortex_array::stats::StatsSet;
16use vortex_error::VortexExpect;
17use vortex_error::VortexResult;
18use vortex_error::vortex_ensure_eq;
19use vortex_flatbuffers::FlatBufferRoot;
20use vortex_flatbuffers::WriteFlatBuffer;
21use vortex_flatbuffers::array::ArrayStats;
22use vortex_flatbuffers::footer as fb;
23use vortex_session::VortexSession;
24
25#[derive(Clone, Debug)]
31pub struct FileStatistics {
32 stats: Arc<[StatsSet]>,
34 dtypes: Arc<[DType]>,
36}
37
38impl FileStatistics {
39 pub fn new(stats: Arc<[StatsSet]>, dtypes: Arc<[DType]>) -> Self {
45 assert_eq!(
46 stats.len(),
47 dtypes.len(),
48 "stats and dtypes must have the same length"
49 );
50
51 Self { stats, dtypes }
52 }
53
54 pub fn new_with_dtype(stats: Arc<[StatsSet]>, file_dtype: &DType) -> Self {
63 if let DType::Struct(struct_fields, _) = file_dtype {
64 assert_eq!(
65 stats.len(),
66 struct_fields.nfields(),
67 "stats length must match number of struct fields"
68 );
69
70 let dtypes = struct_fields.fields().collect();
71
72 Self { stats, dtypes }
73 } else {
74 assert_eq!(
75 stats.len(),
76 1,
77 "non-struct dtype must have exactly 1 statistic"
78 );
79
80 Self {
81 stats,
82 dtypes: Arc::new([file_dtype.clone()]),
83 }
84 }
85 }
86
87 pub fn from_flatbuffer<'a>(
92 fb: &fb::FileStatistics<'a>,
93 file_dtype: &DType,
94 session: &VortexSession,
95 ) -> VortexResult<Self> {
96 let field_stats = fb.field_stats().unwrap_or_default();
97 let mut array_stats: Vec<ArrayStats> = field_stats.iter().collect();
98
99 if let DType::Struct(struct_fields, _) = file_dtype {
100 vortex_ensure_eq!(array_stats.len(), struct_fields.nfields());
101
102 let stats_sets: Arc<[StatsSet]> = array_stats
103 .into_iter()
104 .zip(struct_fields.fields())
105 .map(|(array_stat, field_dtype)| {
106 StatsSet::from_flatbuffer(&array_stat, &field_dtype, session)
107 })
108 .try_collect()?;
109
110 let dtypes = struct_fields.fields().collect();
111
112 Ok(Self {
113 stats: stats_sets,
114 dtypes,
115 })
116 } else {
117 vortex_ensure_eq!(array_stats.len(), 1);
118
119 let array_stat = array_stats
120 .pop()
121 .vortex_expect("we just checked that there was 1 field");
122 let stats_set = StatsSet::from_flatbuffer(&array_stat, file_dtype, session)?;
123
124 Ok(Self {
125 stats: Arc::new([stats_set]),
126 dtypes: Arc::new([file_dtype.clone()]),
127 })
128 }
129 }
130
131 pub fn stats_sets(&self) -> &Arc<[StatsSet]> {
133 &self.stats
134 }
135
136 pub fn dtypes(&self) -> &Arc<[DType]> {
138 &self.dtypes
139 }
140
141 pub fn get(&self, field_idx: usize) -> (&StatsSet, &DType) {
147 (&self.stats[field_idx], &self.dtypes[field_idx])
148 }
149}
150
151impl<'a> IntoIterator for &'a FileStatistics {
152 type Item = (&'a StatsSet, &'a DType);
153 type IntoIter = std::iter::Zip<std::slice::Iter<'a, StatsSet>, std::slice::Iter<'a, DType>>;
154
155 fn into_iter(self) -> Self::IntoIter {
156 self.stats.iter().zip(self.dtypes.iter())
157 }
158}
159
160impl FlatBufferRoot for FileStatistics {}
161
162impl WriteFlatBuffer for FileStatistics {
163 type Target<'a> = fb::FileStatistics<'a>;
164
165 fn write_flatbuffer<'fb>(
166 &self,
167 fbb: &mut FlatBufferBuilder<'fb>,
168 ) -> VortexResult<WIPOffset<Self::Target<'fb>>> {
169 let field_stats = self
170 .stats_sets()
171 .iter()
172 .map(|s| s.write_flatbuffer(fbb))
173 .collect::<VortexResult<Vec<_>>>()?;
174 let field_stats = fbb.create_vector(field_stats.as_slice());
175
176 Ok(fb::FileStatistics::create(
177 fbb,
178 &fb::FileStatisticsArgs {
179 field_stats: Some(field_stats),
180 },
181 ))
182 }
183}