vortex_sampling_compressor/compressors/
alp_rd.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
use std::any::Any;
use std::sync::Arc;

use vortex_alp::{match_each_alp_float_ptype, ALPRDEncoding, RDEncoder as ALPRDEncoder};
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::PrimitiveArray;
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{Array, Encoding, EncodingId, IntoArray, IntoArrayVariant};
use vortex_dtype::PType;
use vortex_error::{vortex_bail, VortexResult};
use vortex_fastlanes::BitPackedEncoding;

use crate::compressors::{CompressedArray, CompressionTree, EncoderMetadata, EncodingCompressor};
use crate::{constants, SamplingCompressor};

#[derive(Debug)]
pub struct ALPRDCompressor;

impl EncoderMetadata for ALPRDEncoder {
    fn as_any(&self) -> &dyn Any {
        self
    }
}

impl EncodingCompressor for ALPRDCompressor {
    fn id(&self) -> &str {
        ALPRDEncoding::ID.as_ref()
    }

    fn cost(&self) -> u8 {
        constants::ALP_RD_COST
    }

    fn can_compress(&self, array: &Array) -> Option<&dyn EncodingCompressor> {
        // Only support primitive arrays
        let parray = PrimitiveArray::maybe_from(array)?;

        // Only supports f32 and f64
        if !matches!(parray.ptype(), PType::F32 | PType::F64) {
            return None;
        }

        Some(self)
    }

    fn compress<'a>(
        &'a self,
        array: &Array,
        like: Option<CompressionTree<'a>>,
        _ctx: SamplingCompressor<'a>,
    ) -> VortexResult<CompressedArray<'a>> {
        let primitive = array.clone().into_primitive()?;

        // Train a new compressor or reuse an existing compressor.
        let encoder = like
            .clone()
            .and_then(|mut tree| tree.metadata())
            .map(VortexResult::Ok)
            .unwrap_or_else(|| Ok(Arc::new(alp_rd_new_encoder(&primitive))))?;

        let Some(alp_rd_encoder) = encoder.as_any().downcast_ref::<ALPRDEncoder>() else {
            vortex_bail!("Could not downcast metadata as ALPRDEncoder");
        };

        let encoded = alp_rd_encoder.encode(&primitive).into_array();
        Ok(CompressedArray::compressed(
            encoded,
            Some(CompressionTree::new_with_metadata(self, vec![], encoder)),
            array,
        ))
    }

    fn used_encodings(&self) -> HashSet<EncodingId> {
        HashSet::from([ALPRDEncoding::ID, BitPackedEncoding::ID])
    }
}

/// Create a new `ALPRDEncoder` from the given array of samples.
fn alp_rd_new_encoder(array: &PrimitiveArray) -> ALPRDEncoder {
    match_each_alp_float_ptype!(array.ptype(), |$P| {
        ALPRDEncoder::new(array.as_slice::<$P>())
    })
}