Skip to main content

vortex_compressor/builtins/constant/
integer.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Constant encoding for integer arrays.
5
6use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::ExecutionCtx;
9use vortex_error::VortexResult;
10
11use super::is_integer_primitive;
12use crate::CascadingCompressor;
13use crate::builtins::IntConstantScheme;
14use crate::builtins::constant::compress_constant_array_with_validity;
15use crate::ctx::CompressorContext;
16use crate::estimate::CompressionEstimate;
17use crate::estimate::EstimateVerdict;
18use crate::scheme::Scheme;
19use crate::stats::ArrayAndStats;
20
21impl Scheme for IntConstantScheme {
22    fn scheme_name(&self) -> &'static str {
23        "vortex.int.constant"
24    }
25
26    fn matches(&self, canonical: &Canonical) -> bool {
27        is_integer_primitive(canonical)
28    }
29
30    fn expected_compression_ratio(
31        &self,
32        data: &ArrayAndStats,
33        compress_ctx: CompressorContext,
34        exec_ctx: &mut ExecutionCtx,
35    ) -> CompressionEstimate {
36        // Constant detection on a sample is a false positive, since the sample being constant does
37        // not mean the full array is constant.
38        if compress_ctx.is_sample() {
39            return CompressionEstimate::Verdict(EstimateVerdict::Skip);
40        }
41
42        let array_len = data.array().len();
43        let stats = data.integer_stats(exec_ctx);
44
45        // Note that we only compute distinct counts if other schemes have requested it.
46        if let Some(distinct_count) = stats.distinct_count() {
47            if distinct_count > 1 {
48                return CompressionEstimate::Verdict(EstimateVerdict::Skip);
49            } else {
50                debug_assert_eq!(distinct_count, 1);
51                return CompressionEstimate::Verdict(EstimateVerdict::AlwaysUse);
52            }
53        }
54
55        // We want to use `Constant` if there are only nulls in the array.
56        if stats.value_count() == 0 {
57            debug_assert_eq!(stats.null_count() as usize, array_len);
58            return CompressionEstimate::Verdict(EstimateVerdict::AlwaysUse);
59        }
60
61        // Otherwise, use the max and min to determine if there is a single value.
62        match stats.erased().max_minus_min().checked_ilog2() {
63            Some(_) => CompressionEstimate::Verdict(EstimateVerdict::Skip),
64            // If max-min == 0, then we know that there is only 1 value.
65            None => CompressionEstimate::Verdict(EstimateVerdict::AlwaysUse),
66        }
67    }
68
69    fn compress(
70        &self,
71        _compressor: &CascadingCompressor,
72        data: &ArrayAndStats,
73        _compress_ctx: CompressorContext,
74        exec_ctx: &mut ExecutionCtx,
75    ) -> VortexResult<ArrayRef> {
76        compress_constant_array_with_validity(data.array(), exec_ctx)
77    }
78}