Skip to main content

vortex_compressor/builtins/constant/
float.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Constant encoding for float arrays.
5
6use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::aggregate_fn::fns::is_constant::is_constant;
9use vortex_error::VortexResult;
10
11use super::is_float_primitive;
12use crate::CascadingCompressor;
13use crate::builtins::FloatConstantScheme;
14use crate::builtins::constant::compress_constant_array_with_validity;
15use crate::ctx::CompressorContext;
16use crate::estimate::CompressionEstimate;
17use crate::scheme::Scheme;
18use crate::stats::ArrayAndStats;
19
20impl Scheme for FloatConstantScheme {
21    fn scheme_name(&self) -> &'static str {
22        "vortex.float.constant"
23    }
24
25    fn matches(&self, canonical: &Canonical) -> bool {
26        is_float_primitive(canonical)
27    }
28
29    fn expected_compression_ratio(
30        &self,
31        data: &mut ArrayAndStats,
32        ctx: CompressorContext,
33    ) -> CompressionEstimate {
34        // Constant detection on a sample is a false positive, since the sample being constant does
35        // not mean the full array is constant.
36        if ctx.is_sample() {
37            return CompressionEstimate::Skip;
38        }
39
40        let array_len = data.array().len();
41        let stats = data.float_stats();
42
43        // Note that we only compute distinct counts if other schemes have requested it.
44        if let Some(distinct_count) = stats.distinct_count() {
45            if distinct_count > 1 {
46                return CompressionEstimate::Skip;
47            } else {
48                debug_assert_eq!(distinct_count, 1);
49                return CompressionEstimate::AlwaysUse;
50            }
51        }
52
53        // We want to use `Constant` if there are only nulls in the array.
54        if stats.value_count() == 0 {
55            debug_assert_eq!(stats.null_count() as usize, array_len);
56            return CompressionEstimate::AlwaysUse;
57        }
58
59        // TODO(connor): Can we be smart here with the max and min like with integers?
60
61        // Otherwise our best bet is to actually check if the array is constant.
62        // This is an expensive check, but in practice the distinct count is known because we often
63        // include dictionary encoding in our set of schemes, so we rarely call this.
64        CompressionEstimate::Estimate(Box::new(|compressor, data, _ctx| {
65            if is_constant(data.array(), &mut compressor.execution_ctx())? {
66                Ok(CompressionEstimate::AlwaysUse)
67            } else {
68                Ok(CompressionEstimate::Skip)
69            }
70        }))
71    }
72
73    fn compress(
74        &self,
75        _compressor: &CascadingCompressor,
76        data: &mut ArrayAndStats,
77        _ctx: CompressorContext,
78    ) -> VortexResult<ArrayRef> {
79        compress_constant_array_with_validity(data.array())
80    }
81}