vortex_array/arrays/dict/vtable/
mod.rs1use kernel::PARENT_KERNELS;
5use vortex_dtype::DType;
6use vortex_dtype::Nullability;
7use vortex_dtype::PType;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_err;
12use vortex_session::VortexSession;
13
14use super::DictArray;
15use super::DictMetadata;
16use super::take_canonical;
17use crate::Array;
18use crate::ArrayRef;
19use crate::Canonical;
20use crate::DeserializeMetadata;
21use crate::IntoArray;
22use crate::ProstMetadata;
23use crate::SerializeMetadata;
24use crate::arrays::ConstantArray;
25use crate::arrays::dict::compute::rules::PARENT_RULES;
26use crate::buffer::BufferHandle;
27use crate::executor::ExecutionCtx;
28use crate::scalar::Scalar;
29use crate::serde::ArrayChildren;
30use crate::vtable;
31use crate::vtable::ArrayId;
32use crate::vtable::VTable;
33
34mod array;
35mod kernel;
36mod operations;
37mod validity;
38mod visitor;
39
40vtable!(Dict);
41
42#[derive(Debug)]
43pub struct DictVTable;
44
45impl DictVTable {
46 pub const ID: ArrayId = ArrayId::new_ref("vortex.dict");
47}
48
49impl VTable for DictVTable {
50 type Array = DictArray;
51
52 type Metadata = ProstMetadata<DictMetadata>;
53
54 type ArrayVTable = Self;
55 type OperationsVTable = Self;
56 type ValidityVTable = Self;
57 type VisitorVTable = Self;
58
59 fn id(_array: &Self::Array) -> ArrayId {
60 Self::ID
61 }
62
63 fn metadata(array: &DictArray) -> VortexResult<Self::Metadata> {
64 Ok(ProstMetadata(DictMetadata {
65 codes_ptype: PType::try_from(array.codes().dtype())? as i32,
66 values_len: u32::try_from(array.values().len()).map_err(|_| {
67 vortex_err!(
68 "Dictionary values size {} overflowed u32",
69 array.values().len()
70 )
71 })?,
72 is_nullable_codes: Some(array.codes().dtype().is_nullable()),
73 all_values_referenced: Some(array.all_values_referenced),
74 }))
75 }
76
77 fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
78 Ok(Some(metadata.serialize()))
79 }
80
81 fn deserialize(
82 bytes: &[u8],
83 _dtype: &DType,
84 _len: usize,
85 _buffers: &[BufferHandle],
86 _session: &VortexSession,
87 ) -> VortexResult<Self::Metadata> {
88 let metadata = <Self::Metadata as DeserializeMetadata>::deserialize(bytes)?;
89 Ok(ProstMetadata(metadata))
90 }
91
92 fn build(
93 dtype: &DType,
94 len: usize,
95 metadata: &Self::Metadata,
96 _buffers: &[BufferHandle],
97 children: &dyn ArrayChildren,
98 ) -> VortexResult<DictArray> {
99 if children.len() != 2 {
100 vortex_bail!(
101 "Expected 2 children for dict encoding, found {}",
102 children.len()
103 )
104 }
105 let codes_nullable = metadata
106 .is_nullable_codes
107 .map(Nullability::from)
108 .unwrap_or_else(|| dtype.nullability());
111 let codes_dtype = DType::Primitive(metadata.codes_ptype(), codes_nullable);
112 let codes = children.get(0, &codes_dtype, len)?;
113 let values = children.get(1, dtype, metadata.values_len as usize)?;
114 let all_values_referenced = metadata.all_values_referenced.unwrap_or(false);
115
116 Ok(unsafe {
118 DictArray::new_unchecked(codes, values).set_all_values_referenced(all_values_referenced)
119 })
120 }
121
122 fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
123 vortex_ensure!(
124 children.len() == 2,
125 "DictArray expects exactly 2 children (codes, values), got {}",
126 children.len()
127 );
128 let [codes, values]: [ArrayRef; 2] = children
129 .try_into()
130 .map_err(|_| vortex_err!("Failed to convert children to array"))?;
131 array.codes = codes;
132 array.values = values;
133 Ok(())
134 }
135
136 fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
137 if let Some(canonical) = execute_fast_path(array, ctx)? {
138 return Ok(canonical);
139 }
140
141 let values = array.values().clone().execute::<Canonical>(ctx)?;
143 let codes = array
144 .codes()
145 .clone()
146 .execute::<Canonical>(ctx)?
147 .into_primitive();
148
149 Ok(take_canonical(values, &codes, ctx)?.into_array())
155 }
156
157 fn reduce_parent(
158 array: &Self::Array,
159 parent: &ArrayRef,
160 child_idx: usize,
161 ) -> VortexResult<Option<ArrayRef>> {
162 PARENT_RULES.evaluate(array, parent, child_idx)
163 }
164
165 fn execute_parent(
166 array: &Self::Array,
167 parent: &ArrayRef,
168 child_idx: usize,
169 ctx: &mut ExecutionCtx,
170 ) -> VortexResult<Option<ArrayRef>> {
171 PARENT_KERNELS.execute(array, parent, child_idx, ctx)
172 }
173}
174
175pub(super) fn execute_fast_path(
177 array: &DictArray,
178 _ctx: &mut ExecutionCtx,
179) -> VortexResult<Option<ArrayRef>> {
180 if array.is_empty() {
182 let result_dtype = array
183 .dtype()
184 .union_nullability(array.codes().dtype().nullability());
185 return Ok(Some(Canonical::empty(&result_dtype).into_array()));
186 }
187
188 if array.codes.all_invalid()? {
190 return Ok(Some(
191 ConstantArray::new(Scalar::null(array.dtype().as_nullable()), array.codes.len())
192 .into_array(),
193 ));
194 }
195
196 Ok(None)
197}