vortex_array/builders/dict/
mod.rs1use bytes::bytes_dict_builder;
5use primitive::primitive_dict_builder;
6use vortex_error::VortexResult;
7use vortex_error::vortex_bail;
8use vortex_error::vortex_panic;
9
10use crate::ArrayRef;
11use crate::IntoArray;
12#[expect(deprecated)]
13use crate::ToCanonical as _;
14use crate::arrays::DictArray;
15use crate::arrays::Primitive;
16use crate::arrays::VarBin;
17use crate::arrays::VarBinView;
18use crate::arrays::primitive::PrimitiveArrayExt;
19use crate::dtype::PType;
20use crate::match_each_native_ptype;
21
22mod bytes;
23mod primitive;
24
25#[derive(Clone)]
26pub struct DictConstraints {
27 pub max_bytes: usize,
28 pub max_len: usize,
29}
30
31pub const UNCONSTRAINED: DictConstraints = DictConstraints {
32 max_bytes: usize::MAX,
33 max_len: usize::MAX,
34};
35
36pub trait DictEncoder: Send {
37 fn encode(&mut self, array: &ArrayRef) -> ArrayRef;
39
40 fn reset(&mut self) -> ArrayRef;
42
43 fn codes_ptype(&self) -> PType;
45}
46
47pub fn dict_encoder(array: &ArrayRef, constraints: &DictConstraints) -> Box<dyn DictEncoder> {
48 let dict_builder: Box<dyn DictEncoder> = if let Some(pa) = array.as_opt::<Primitive>() {
49 match_each_native_ptype!(pa.ptype(), |P| {
50 primitive_dict_builder::<P>(pa.dtype().nullability(), constraints)
51 })
52 } else if let Some(vbv) = array.as_opt::<VarBinView>() {
53 bytes_dict_builder(vbv.dtype().clone(), constraints)
54 } else if let Some(vb) = array.as_opt::<VarBin>() {
55 bytes_dict_builder(vb.dtype().clone(), constraints)
56 } else {
57 vortex_panic!("Can only encode primitive or varbin/view arrays")
58 };
59 dict_builder
60}
61
62pub fn dict_encode_with_constraints(
66 array: &ArrayRef,
67 constraints: &DictConstraints,
68) -> VortexResult<DictArray> {
69 let mut encoder = dict_encoder(array, constraints);
70 let encoded = encoder.encode(array);
71 #[expect(deprecated)]
72 let codes = encoded.to_primitive().narrow()?;
73 unsafe {
77 Ok(
78 DictArray::new_unchecked(codes.into_array(), encoder.reset())
79 .set_all_values_referenced(true),
80 )
81 }
82}
83
84pub fn dict_encode(array: &ArrayRef) -> VortexResult<DictArray> {
85 let dict_array = dict_encode_with_constraints(array, &UNCONSTRAINED)?;
86 if dict_array.len() != array.len() {
87 vortex_bail!(
88 "must have encoded all {} elements, but only encoded {}",
89 array.len(),
90 dict_array.len(),
91 );
92 }
93 Ok(dict_array)
94}