vortex_sampling_compressor/compressors/
dict.rs1use vortex_array::aliases::hash_set::HashSet;
2use vortex_array::array::{PrimitiveEncoding, VarBinEncoding, VarBinViewEncoding};
3use vortex_array::{Array, Encoding, EncodingId, IntoArray};
4use vortex_dict::{dict_encode, DictArray, DictEncoding};
5use vortex_error::VortexResult;
6
7use crate::compressors::{CompressedArray, CompressionTree, EncodingCompressor};
8use crate::downscale::downscale_integer_array;
9use crate::{constants, SamplingCompressor};
10
11#[derive(Debug)]
12pub struct DictCompressor;
13
14impl EncodingCompressor for DictCompressor {
15 fn id(&self) -> &str {
16 DictEncoding::ID.as_ref()
17 }
18
19 fn cost(&self) -> u8 {
20 constants::DICT_COST
21 }
22
23 fn can_compress(&self, array: &Array) -> Option<&dyn EncodingCompressor> {
24 if !array.is_encoding(PrimitiveEncoding::ID)
25 && !array.is_encoding(VarBinEncoding::ID)
26 && !array.is_encoding(VarBinViewEncoding::ID)
27 {
28 return None;
29 };
30
31 if array
34 .statistics()
35 .compute_is_strict_sorted()
36 .unwrap_or(false)
37 {
38 return None;
39 }
40
41 Some(self)
42 }
43
44 fn compress<'a>(
45 &'a self,
46 array: &Array,
47 like: Option<CompressionTree<'a>>,
48 ctx: SamplingCompressor<'a>,
49 ) -> VortexResult<CompressedArray<'a>> {
50 let dict = dict_encode(array)?;
51 let codes = dict.codes();
52 let values = dict.values();
53
54 let (codes, values) = (
55 ctx.auxiliary("codes").excluding(self).compress(
56 &downscale_integer_array(codes)?,
57 like.as_ref().and_then(|l| l.child(0)),
58 )?,
59 ctx.named("values")
60 .excluding(self)
61 .compress(&values, like.as_ref().and_then(|l| l.child(1)))?,
62 );
63
64 Ok(CompressedArray::compressed(
65 DictArray::try_new(codes.array, values.array)?.into_array(),
66 Some(CompressionTree::new(self, vec![codes.path, values.path])),
67 array,
68 ))
69 }
70
71 fn used_encodings(&self) -> HashSet<EncodingId> {
72 HashSet::from([DictEncoding::ID])
73 }
74}