vortex_dict/builders/
mod.rs

1use bytes::BytesDictBuilder;
2use primitive::PrimitiveDictBuilder;
3use vortex_array::arrays::{PrimitiveArray, VarBinArray, VarBinViewArray};
4use vortex_array::compress::downscale_integer_array;
5use vortex_array::variants::PrimitiveArrayTrait;
6use vortex_array::{Array, ArrayExt, ArrayRef};
7use vortex_dtype::match_each_native_ptype;
8use vortex_error::{VortexResult, vortex_bail};
9
10use crate::DictArray;
11
12mod bytes;
13mod primitive;
14
15pub trait DictEncoder {
16    fn encode(&mut self, array: &dyn Array) -> VortexResult<ArrayRef>;
17
18    fn values(&mut self) -> VortexResult<ArrayRef>;
19}
20
21pub fn dict_encode_max_sized(array: &dyn Array, max_dict_bytes: usize) -> VortexResult<DictArray> {
22    let dict_builder: &mut dyn DictEncoder = if let Some(pa) = array.as_opt::<PrimitiveArray>() {
23        match_each_native_ptype!(pa.ptype(), |$P| {
24            &mut PrimitiveDictBuilder::<$P>::new(pa.dtype().nullability(), max_dict_bytes)
25        })
26    } else if let Some(vbv) = array.as_opt::<VarBinViewArray>() {
27        &mut BytesDictBuilder::new(vbv.dtype().clone(), max_dict_bytes)
28    } else if let Some(vb) = array.as_opt::<VarBinArray>() {
29        &mut BytesDictBuilder::new(vb.dtype().clone(), max_dict_bytes)
30    } else {
31        vortex_bail!("Can only encode primitive or varbin/view arrays")
32    };
33    let codes = downscale_integer_array(dict_builder.encode(array)?)?;
34
35    DictArray::try_new(codes, dict_builder.values()?)
36}
37
38pub fn dict_encode(array: &dyn Array) -> VortexResult<DictArray> {
39    let dict_array = dict_encode_max_sized(array, usize::MAX)?;
40    if dict_array.len() != array.len() {
41        vortex_bail!(
42            "must have encoded all {} elements, but only encoded {}",
43            array.len(),
44            dict_array.len(),
45        );
46    }
47    Ok(dict_array)
48}