Skip to main content

vortex_btrblocks/schemes/
decimal.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Decimal compression scheme using byte-part decomposition.
5
6use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::ExecutionCtx;
9use vortex_array::IntoArray;
10use vortex_array::arrays::DecimalArray;
11use vortex_array::arrays::PrimitiveArray;
12use vortex_array::arrays::decimal::narrowed_decimal;
13use vortex_array::dtype::DecimalType;
14use vortex_compressor::estimate::CompressionEstimate;
15use vortex_compressor::estimate::EstimateVerdict;
16use vortex_decimal_byte_parts::DecimalByteParts;
17use vortex_error::VortexResult;
18
19use crate::ArrayAndStats;
20use crate::CascadingCompressor;
21use crate::CompressorContext;
22use crate::Scheme;
23use crate::SchemeExt;
24
25/// Compression scheme for decimal arrays via byte-part decomposition.
26///
27/// Narrows the decimal to the smallest integer type, compresses the underlying primitive, and wraps
28/// the result in a `DecimalBytePartsArray`.
29#[derive(Debug, Copy, Clone, PartialEq, Eq)]
30pub struct DecimalScheme;
31
32impl Scheme for DecimalScheme {
33    fn scheme_name(&self) -> &'static str {
34        "vortex.decimal.byte_parts"
35    }
36
37    fn matches(&self, canonical: &Canonical) -> bool {
38        matches!(canonical, Canonical::Decimal(_))
39    }
40
41    /// Children: primitive=0.
42    fn num_children(&self) -> usize {
43        1
44    }
45
46    fn expected_compression_ratio(
47        &self,
48        _data: &ArrayAndStats,
49        _compress_ctx: CompressorContext,
50        _exec_ctx: &mut ExecutionCtx,
51    ) -> CompressionEstimate {
52        // Decimal compression is almost always beneficial (narrowing + primitive compression).
53        CompressionEstimate::Verdict(EstimateVerdict::AlwaysUse)
54    }
55
56    fn compress(
57        &self,
58        compressor: &CascadingCompressor,
59        data: &ArrayAndStats,
60        compress_ctx: CompressorContext,
61        exec_ctx: &mut ExecutionCtx,
62    ) -> VortexResult<ArrayRef> {
63        // TODO(joe): add support splitting i128/256 buffers into chunks of primitive values
64        // for compression. 2 for i128 and 4 for i256.
65        let decimal = data.array().clone().execute::<DecimalArray>(exec_ctx)?;
66        let decimal = narrowed_decimal(decimal);
67        let validity = decimal.validity()?;
68        let prim = match decimal.values_type() {
69            DecimalType::I8 => PrimitiveArray::new(decimal.buffer::<i8>(), validity),
70            DecimalType::I16 => PrimitiveArray::new(decimal.buffer::<i16>(), validity),
71            DecimalType::I32 => PrimitiveArray::new(decimal.buffer::<i32>(), validity),
72            DecimalType::I64 => PrimitiveArray::new(decimal.buffer::<i64>(), validity),
73            _ => return Ok(decimal.into_array()),
74        };
75
76        let compressed =
77            compressor.compress_child(&prim.into_array(), &compress_ctx, self.id(), 0, exec_ctx)?;
78
79        DecimalByteParts::try_new(compressed, decimal.decimal_dtype()).map(|d| d.into_array())
80    }
81}