Skip to main content

vortex_btrblocks/schemes/
decimal.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Decimal compression scheme using byte-part decomposition.
5
6use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::IntoArray;
9use vortex_array::ToCanonical;
10use vortex_array::arrays::PrimitiveArray;
11use vortex_array::arrays::decimal::narrowed_decimal;
12use vortex_array::dtype::DecimalType;
13use vortex_compressor::estimate::CompressionEstimate;
14use vortex_compressor::estimate::EstimateVerdict;
15use vortex_decimal_byte_parts::DecimalByteParts;
16use vortex_error::VortexResult;
17
18use crate::ArrayAndStats;
19use crate::CascadingCompressor;
20use crate::CompressorContext;
21use crate::Scheme;
22use crate::SchemeExt;
23
24/// Compression scheme for decimal arrays via byte-part decomposition.
25///
26/// Narrows the decimal to the smallest integer type, compresses the underlying primitive, and wraps
27/// the result in a `DecimalBytePartsArray`.
28#[derive(Debug, Copy, Clone, PartialEq, Eq)]
29pub struct DecimalScheme;
30
31impl Scheme for DecimalScheme {
32    fn scheme_name(&self) -> &'static str {
33        "vortex.decimal.byte_parts"
34    }
35
36    fn matches(&self, canonical: &Canonical) -> bool {
37        matches!(canonical, Canonical::Decimal(_))
38    }
39
40    /// Children: primitive=0.
41    fn num_children(&self) -> usize {
42        1
43    }
44
45    fn expected_compression_ratio(
46        &self,
47        _data: &mut ArrayAndStats,
48        _ctx: CompressorContext,
49    ) -> CompressionEstimate {
50        // Decimal compression is almost always beneficial (narrowing + primitive compression).
51        CompressionEstimate::Verdict(EstimateVerdict::AlwaysUse)
52    }
53
54    fn compress(
55        &self,
56        compressor: &CascadingCompressor,
57        data: &mut ArrayAndStats,
58        ctx: CompressorContext,
59    ) -> VortexResult<ArrayRef> {
60        // TODO(joe): add support splitting i128/256 buffers into chunks of primitive values
61        // for compression. 2 for i128 and 4 for i256.
62        let decimal = data.array().clone().to_decimal();
63        let decimal = narrowed_decimal(decimal);
64        let validity = decimal.validity()?;
65        let prim = match decimal.values_type() {
66            DecimalType::I8 => PrimitiveArray::new(decimal.buffer::<i8>(), validity),
67            DecimalType::I16 => PrimitiveArray::new(decimal.buffer::<i16>(), validity),
68            DecimalType::I32 => PrimitiveArray::new(decimal.buffer::<i32>(), validity),
69            DecimalType::I64 => PrimitiveArray::new(decimal.buffer::<i64>(), validity),
70            _ => return Ok(decimal.into_array()),
71        };
72
73        let compressed = compressor.compress_child(&prim.into_array(), &ctx, self.id(), 0)?;
74
75        DecimalByteParts::try_new(compressed, decimal.decimal_dtype()).map(|d| d.into_array())
76    }
77}