Skip to main content

vortex_compressor/builtins/constant/
integer.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Constant encoding for integer arrays.
5
6use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_error::VortexResult;
9
10use super::is_integer_primitive;
11use crate::CascadingCompressor;
12use crate::builtins::IntConstantScheme;
13use crate::builtins::constant::compress_constant_array_with_validity;
14use crate::ctx::CompressorContext;
15use crate::estimate::CompressionEstimate;
16use crate::scheme::Scheme;
17use crate::stats::ArrayAndStats;
18
19impl Scheme for IntConstantScheme {
20    fn scheme_name(&self) -> &'static str {
21        "vortex.int.constant"
22    }
23
24    fn matches(&self, canonical: &Canonical) -> bool {
25        is_integer_primitive(canonical)
26    }
27
28    fn expected_compression_ratio(
29        &self,
30        data: &mut ArrayAndStats,
31        ctx: CompressorContext,
32    ) -> CompressionEstimate {
33        // Constant detection on a sample is a false positive, since the sample being constant does
34        // not mean the full array is constant.
35        if ctx.is_sample() {
36            return CompressionEstimate::Skip;
37        }
38
39        let array_len = data.array().len();
40        let stats = data.integer_stats();
41
42        // Note that we only compute distinct counts if other schemes have requested it.
43        if let Some(distinct_count) = stats.distinct_count() {
44            if distinct_count > 1 {
45                return CompressionEstimate::Skip;
46            } else {
47                debug_assert_eq!(distinct_count, 1);
48                return CompressionEstimate::AlwaysUse;
49            }
50        }
51
52        // We want to use `Constant` if there are only nulls in the array.
53        if stats.value_count() == 0 {
54            debug_assert_eq!(stats.null_count() as usize, array_len);
55            return CompressionEstimate::AlwaysUse;
56        }
57
58        // Otherwise, use the max and min to determine if there is a single value.
59        match stats.erased().max_minus_min().checked_ilog2() {
60            Some(_) => CompressionEstimate::Skip,
61            // If max-min == 0, then we know that there is only 1 value.
62            None => CompressionEstimate::AlwaysUse,
63        }
64    }
65
66    fn compress(
67        &self,
68        _compressor: &CascadingCompressor,
69        data: &mut ArrayAndStats,
70        _ctx: CompressorContext,
71    ) -> VortexResult<ArrayRef> {
72        compress_constant_array_with_validity(data.array())
73    }
74}