vortex_sampling_compressor/compressors/
delta.rs

1use vortex_array::aliases::hash_set::HashSet;
2use vortex_array::array::PrimitiveArray;
3use vortex_array::variants::PrimitiveArrayTrait;
4use vortex_array::{Array, Encoding, EncodingId, IntoArray};
5use vortex_error::VortexResult;
6use vortex_fastlanes::{delta_compress, DeltaArray, DeltaEncoding};
7
8use crate::compressors::{CompressedArray, CompressionTree, EncodingCompressor};
9use crate::{constants, SamplingCompressor};
10
11#[derive(Debug)]
12pub struct DeltaCompressor;
13
14impl EncodingCompressor for DeltaCompressor {
15    fn id(&self) -> &str {
16        DeltaEncoding::ID.as_ref()
17    }
18
19    fn cost(&self) -> u8 {
20        constants::DELTA_COST
21    }
22
23    fn can_compress(&self, array: &Array) -> Option<&dyn EncodingCompressor> {
24        // Only support primitive arrays
25        let parray = PrimitiveArray::maybe_from(array)?;
26
27        // Only supports ints
28        if !parray.ptype().is_unsigned_int() {
29            return None;
30        }
31
32        Some(self)
33    }
34
35    fn compress<'a>(
36        &'a self,
37        array: &Array,
38        like: Option<CompressionTree<'a>>,
39        ctx: SamplingCompressor<'a>,
40    ) -> VortexResult<CompressedArray<'a>> {
41        let parray = PrimitiveArray::try_from(array.clone())?;
42        let validity = ctx.compress_validity(parray.validity())?;
43
44        // Compress the filled array
45        let (bases, deltas) = delta_compress(&parray)?;
46
47        // Recursively compress the bases and deltas
48        let bases = ctx
49            .named("bases")
50            .compress(bases.as_ref(), like.as_ref().and_then(|l| l.child(0)))?;
51        let deltas = ctx
52            .named("deltas")
53            .compress(deltas.as_ref(), like.as_ref().and_then(|l| l.child(1)))?;
54
55        Ok(CompressedArray::compressed(
56            DeltaArray::try_from_delta_compress_parts(bases.array, deltas.array, validity)?
57                .into_array(),
58            Some(CompressionTree::new(self, vec![bases.path, deltas.path])),
59            array,
60        ))
61    }
62
63    fn used_encodings(&self) -> HashSet<EncodingId> {
64        HashSet::from([DeltaEncoding::ID])
65    }
66}