vortex_array/arrays/dict/vtable/
mod.rs1use vortex_compute::take::Take;
5use vortex_dtype::DType;
6use vortex_dtype::Nullability;
7use vortex_dtype::PType;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_err;
12use vortex_vector::Vector;
13
14use super::DictArray;
15use super::DictMetadata;
16use crate::ArrayRef;
17use crate::DeserializeMetadata;
18use crate::ProstMetadata;
19use crate::SerializeMetadata;
20use crate::VectorExecutor;
21use crate::arrays::vtable::rules::PARENT_RULES;
22use crate::buffer::BufferHandle;
23use crate::executor::ExecutionCtx;
24use crate::serde::ArrayChildren;
25use crate::vtable;
26use crate::vtable::ArrayId;
27use crate::vtable::ArrayVTable;
28use crate::vtable::ArrayVTableExt;
29use crate::vtable::NotSupported;
30use crate::vtable::VTable;
31
32mod array;
33mod canonical;
34mod encode;
35mod operations;
36mod rules;
37mod validity;
38mod visitor;
39
40vtable!(Dict);
41
42#[derive(Debug)]
43pub struct DictVTable;
44
45impl VTable for DictVTable {
46 type Array = DictArray;
47
48 type Metadata = ProstMetadata<DictMetadata>;
49
50 type ArrayVTable = Self;
51 type CanonicalVTable = Self;
52 type OperationsVTable = Self;
53 type ValidityVTable = Self;
54 type VisitorVTable = Self;
55 type ComputeVTable = NotSupported;
56 type EncodeVTable = Self;
57
58 fn id(&self) -> ArrayId {
59 ArrayId::new_ref("vortex.dict")
60 }
61
62 fn encoding(_array: &Self::Array) -> ArrayVTable {
63 DictVTable.as_vtable()
64 }
65
66 fn metadata(array: &DictArray) -> VortexResult<Self::Metadata> {
67 Ok(ProstMetadata(DictMetadata {
68 codes_ptype: PType::try_from(array.codes().dtype())? as i32,
69 values_len: u32::try_from(array.values().len()).map_err(|_| {
70 vortex_err!(
71 "Dictionary values size {} overflowed u32",
72 array.values().len()
73 )
74 })?,
75 is_nullable_codes: Some(array.codes().dtype().is_nullable()),
76 all_values_referenced: Some(array.all_values_referenced),
77 }))
78 }
79
80 fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
81 Ok(Some(metadata.serialize()))
82 }
83
84 fn deserialize(buffer: &[u8]) -> VortexResult<Self::Metadata> {
85 let metadata = <Self::Metadata as DeserializeMetadata>::deserialize(buffer)?;
86 Ok(ProstMetadata(metadata))
87 }
88
89 fn build(
90 &self,
91 dtype: &DType,
92 len: usize,
93 metadata: &Self::Metadata,
94 _buffers: &[BufferHandle],
95 children: &dyn ArrayChildren,
96 ) -> VortexResult<DictArray> {
97 if children.len() != 2 {
98 vortex_bail!(
99 "Expected 2 children for dict encoding, found {}",
100 children.len()
101 )
102 }
103 let codes_nullable = metadata
104 .is_nullable_codes
105 .map(Nullability::from)
106 .unwrap_or_else(|| dtype.nullability());
109 let codes_dtype = DType::Primitive(metadata.codes_ptype(), codes_nullable);
110 let codes = children.get(0, &codes_dtype, len)?;
111 let values = children.get(1, dtype, metadata.values_len as usize)?;
112 let all_values_referenced = metadata.all_values_referenced.unwrap_or(false);
113
114 Ok(unsafe {
116 DictArray::new_unchecked(codes, values).set_all_values_referenced(all_values_referenced)
117 })
118 }
119
120 fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
121 vortex_ensure!(
122 children.len() == 2,
123 "DictArray expects exactly 2 children (codes, values), got {}",
124 children.len()
125 );
126 let [codes, values]: [ArrayRef; 2] = children
127 .try_into()
128 .map_err(|_| vortex_err!("Failed to convert children to array"))?;
129 array.codes = codes;
130 array.values = values;
131 Ok(())
132 }
133
134 fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
135 let values = array.values().execute(ctx)?;
136 let codes = array.codes().execute(ctx)?.into_primitive();
137 Ok(values.take(&codes))
138 }
139
140 fn reduce_parent(
141 array: &Self::Array,
142 parent: &ArrayRef,
143 child_idx: usize,
144 ) -> VortexResult<Option<ArrayRef>> {
145 PARENT_RULES.evaluate(array, parent, child_idx)
146 }
147}