vortex_dict/builders/
mod.rs

1use bytes::bytes_dict_builder;
2use primitive::primitive_dict_builder;
3use vortex_array::arrays::{PrimitiveVTable, VarBinVTable, VarBinViewVTable};
4use vortex_array::compress::downscale_integer_array;
5use vortex_array::{Array, ArrayRef};
6use vortex_dtype::match_each_native_ptype;
7use vortex_error::{VortexResult, vortex_bail};
8
9use crate::DictArray;
10
11mod bytes;
12mod primitive;
13
14#[derive(Clone)]
15pub struct DictConstraints {
16    pub max_bytes: usize,
17    pub max_len: usize,
18}
19
20pub const UNCONSTRAINED: DictConstraints = DictConstraints {
21    max_bytes: usize::MAX,
22    max_len: usize::MAX,
23};
24
25pub trait DictEncoder: Send {
26    fn encode(&mut self, array: &dyn Array) -> VortexResult<ArrayRef>;
27
28    fn values(&mut self) -> VortexResult<ArrayRef>;
29}
30
31pub fn dict_encoder(
32    array: &dyn Array,
33    constraints: &DictConstraints,
34) -> VortexResult<Box<dyn DictEncoder>> {
35    let dict_builder: Box<dyn DictEncoder> = if let Some(pa) = array.as_opt::<PrimitiveVTable>() {
36        match_each_native_ptype!(pa.ptype(), |$P| {
37            primitive_dict_builder::<$P>(pa.dtype().nullability(), &constraints)
38        })
39    } else if let Some(vbv) = array.as_opt::<VarBinViewVTable>() {
40        bytes_dict_builder(vbv.dtype().clone(), constraints)
41    } else if let Some(vb) = array.as_opt::<VarBinVTable>() {
42        bytes_dict_builder(vb.dtype().clone(), constraints)
43    } else {
44        vortex_bail!("Can only encode primitive or varbin/view arrays")
45    };
46    Ok(dict_builder)
47}
48
49pub fn dict_encode_with_constraints(
50    array: &dyn Array,
51    constraints: &DictConstraints,
52) -> VortexResult<DictArray> {
53    let mut encoder = dict_encoder(array, constraints)?;
54    let codes = downscale_integer_array(encoder.encode(array)?)?;
55    DictArray::try_new(codes, encoder.values()?)
56}
57
58pub fn dict_encode(array: &dyn Array) -> VortexResult<DictArray> {
59    let dict_array = dict_encode_with_constraints(array, &UNCONSTRAINED)?;
60    if dict_array.len() != array.len() {
61        vortex_bail!(
62            "must have encoded all {} elements, but only encoded {}",
63            array.len(),
64            dict_array.len(),
65        );
66    }
67    Ok(dict_array)
68}