Skip to main content

vortex_array/stats/
flatbuffers.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use flatbuffers::FlatBufferBuilder;
5use flatbuffers::WIPOffset;
6use vortex_error::VortexResult;
7use vortex_error::vortex_bail;
8use vortex_flatbuffers::WriteFlatBuffer;
9use vortex_flatbuffers::array as fba;
10use vortex_session::VortexSession;
11
12use crate::dtype::DType;
13use crate::dtype::Nullability;
14use crate::dtype::PType;
15use crate::expr::stats::Precision;
16use crate::expr::stats::Stat;
17use crate::scalar::ScalarValue;
18use crate::stats::StatsSet;
19use crate::stats::StatsSetRef;
20
21impl WriteFlatBuffer for StatsSetRef<'_> {
22    type Target<'t> = fba::ArrayStats<'t>;
23
24    /// All statistics written must be exact
25    fn write_flatbuffer<'fb>(
26        &self,
27        fbb: &mut FlatBufferBuilder<'fb>,
28    ) -> VortexResult<WIPOffset<Self::Target<'fb>>> {
29        self.with_typed_stats_set(|stats_set| stats_set.values.write_flatbuffer(fbb))
30    }
31}
32
33impl WriteFlatBuffer for StatsSet {
34    type Target<'t> = fba::ArrayStats<'t>;
35
36    /// All statistics written must be exact
37    fn write_flatbuffer<'fb>(
38        &self,
39        fbb: &mut FlatBufferBuilder<'fb>,
40    ) -> VortexResult<WIPOffset<Self::Target<'fb>>> {
41        let (min_precision, min) = self
42            .get(Stat::Min)
43            .map(|min| {
44                (
45                    if min.is_exact() {
46                        fba::Precision::Exact
47                    } else {
48                        fba::Precision::Inexact
49                    },
50                    Some(
51                        fbb.create_vector(&ScalarValue::to_proto_bytes::<Vec<u8>>(Some(
52                            &min.into_inner(),
53                        ))),
54                    ),
55                )
56            })
57            .unwrap_or_else(|| (fba::Precision::Inexact, None));
58
59        let (max_precision, max) = self
60            .get(Stat::Max)
61            .map(|max| {
62                (
63                    if max.is_exact() {
64                        fba::Precision::Exact
65                    } else {
66                        fba::Precision::Inexact
67                    },
68                    Some(
69                        fbb.create_vector(&ScalarValue::to_proto_bytes::<Vec<u8>>(Some(
70                            &max.into_inner(),
71                        ))),
72                    ),
73                )
74            })
75            .unwrap_or_else(|| (fba::Precision::Inexact, None));
76
77        let sum = self
78            .get(Stat::Sum)
79            .and_then(Precision::as_exact)
80            .map(|sum| fbb.create_vector(&ScalarValue::to_proto_bytes::<Vec<u8>>(Some(&sum))));
81
82        let stat_args = &fba::ArrayStatsArgs {
83            min,
84            min_precision,
85            max,
86            max_precision,
87            sum,
88            is_sorted: self
89                .get_as::<bool>(Stat::IsSorted, &DType::Bool(Nullability::NonNullable))
90                .and_then(Precision::as_exact),
91            is_strict_sorted: self
92                .get_as::<bool>(Stat::IsStrictSorted, &DType::Bool(Nullability::NonNullable))
93                .and_then(Precision::as_exact),
94            is_constant: self
95                .get_as::<bool>(Stat::IsConstant, &DType::Bool(Nullability::NonNullable))
96                .and_then(Precision::as_exact),
97            null_count: self
98                .get_as::<u64>(Stat::NullCount, &PType::U64.into())
99                .and_then(Precision::as_exact),
100            uncompressed_size_in_bytes: self
101                .get_as::<u64>(Stat::UncompressedSizeInBytes, &PType::U64.into())
102                .and_then(Precision::as_exact),
103            nan_count: self
104                .get_as::<u64>(Stat::NaNCount, &PType::U64.into())
105                .and_then(Precision::as_exact),
106        };
107
108        Ok(fba::ArrayStats::create(fbb, stat_args))
109    }
110}
111
112impl StatsSet {
113    /// Creates a [`StatsSet`] from a flatbuffers array [`fba::ArrayStats<'a>`].
114    pub fn from_flatbuffer<'a>(
115        fb: &fba::ArrayStats<'a>,
116        array_dtype: &DType,
117        session: &VortexSession,
118    ) -> VortexResult<Self> {
119        let mut stats_set = StatsSet::default();
120
121        for stat in Stat::all() {
122            let stat_dtype = stat.dtype(array_dtype);
123
124            match stat {
125                Stat::IsConstant => {
126                    if let Some(is_constant) = fb.is_constant() {
127                        stats_set.set(Stat::IsConstant, Precision::Exact(is_constant.into()));
128                    }
129                }
130                Stat::IsSorted => {
131                    if let Some(is_sorted) = fb.is_sorted() {
132                        stats_set.set(Stat::IsSorted, Precision::Exact(is_sorted.into()));
133                    }
134                }
135                Stat::IsStrictSorted => {
136                    if let Some(is_strict_sorted) = fb.is_strict_sorted() {
137                        stats_set.set(
138                            Stat::IsStrictSorted,
139                            Precision::Exact(is_strict_sorted.into()),
140                        );
141                    }
142                }
143                Stat::Max => {
144                    if let Some(max) = fb.max()
145                        && let Some(stat_dtype) = stat_dtype
146                    {
147                        let value =
148                            ScalarValue::from_proto_bytes(max.bytes(), &stat_dtype, session)?;
149                        let Some(value) = value else {
150                            continue;
151                        };
152
153                        stats_set.set(
154                            Stat::Max,
155                            match fb.max_precision() {
156                                fba::Precision::Exact => Precision::Exact(value),
157                                fba::Precision::Inexact => Precision::Inexact(value),
158                                other => vortex_bail!("Corrupted max_precision field: {other:?}"),
159                            },
160                        );
161                    }
162                }
163                Stat::Min => {
164                    if let Some(min) = fb.min()
165                        && let Some(stat_dtype) = stat_dtype
166                    {
167                        let value =
168                            ScalarValue::from_proto_bytes(min.bytes(), &stat_dtype, session)?;
169                        let Some(value) = value else {
170                            continue;
171                        };
172
173                        stats_set.set(
174                            Stat::Min,
175                            match fb.min_precision() {
176                                fba::Precision::Exact => Precision::Exact(value),
177                                fba::Precision::Inexact => Precision::Inexact(value),
178                                other => vortex_bail!("Corrupted min_precision field: {other:?}"),
179                            },
180                        );
181                    }
182                }
183                Stat::NullCount => {
184                    if let Some(null_count) = fb.null_count() {
185                        stats_set.set(Stat::NullCount, Precision::Exact(null_count.into()));
186                    }
187                }
188                Stat::UncompressedSizeInBytes => {
189                    if let Some(uncompressed_size_in_bytes) = fb.uncompressed_size_in_bytes() {
190                        stats_set.set(
191                            Stat::UncompressedSizeInBytes,
192                            Precision::Exact(uncompressed_size_in_bytes.into()),
193                        );
194                    }
195                }
196                Stat::Sum => {
197                    if let Some(sum) = fb.sum()
198                        && let Some(stat_dtype) = stat_dtype
199                    {
200                        let value =
201                            ScalarValue::from_proto_bytes(sum.bytes(), &stat_dtype, session)?;
202                        let Some(value) = value else {
203                            continue;
204                        };
205
206                        stats_set.set(Stat::Sum, Precision::Exact(value));
207                    }
208                }
209                Stat::NaNCount => {
210                    if let Some(nan_count) = fb.nan_count() {
211                        stats_set.set(
212                            Stat::NaNCount,
213                            Precision::Exact(ScalarValue::from(nan_count)),
214                        );
215                    }
216                }
217            }
218        }
219
220        Ok(stats_set)
221    }
222}