vortex_array/arrays/chunked/vtable/
canonical.rs1use vortex_buffer::BufferMut;
5use vortex_dtype::{DType, Nullability, PType, StructFields};
6use vortex_error::VortexExpect;
7
8use crate::arrays::{
9 ChunkedArray, ChunkedVTable, ListViewArray, ListViewRebuildMode, PrimitiveArray, StructArray,
10};
11use crate::builders::{ArrayBuilder, builder_with_capacity};
12use crate::compute::cast;
13use crate::validity::Validity;
14use crate::vtable::CanonicalVTable;
15use crate::{Array, ArrayRef, Canonical, IntoArray, ToCanonical};
16
17impl CanonicalVTable<ChunkedVTable> for ChunkedVTable {
18 fn canonicalize(array: &ChunkedArray) -> Canonical {
19 if array.nchunks() == 0 {
20 return Canonical::empty(array.dtype());
21 }
22 if array.nchunks() == 1 {
23 return array.chunks()[0].to_canonical();
24 }
25
26 match array.dtype() {
27 DType::Struct(struct_dtype, _) => {
28 let struct_array = pack_struct_chunks(
29 array.chunks(),
30 Validity::copy_from_array(array.as_ref()),
31 struct_dtype,
32 );
33 Canonical::Struct(struct_array)
34 }
35 DType::List(elem_dtype, _) => Canonical::List(swizzle_list_chunks(
36 array.chunks(),
37 Validity::copy_from_array(array.as_ref()),
38 elem_dtype,
39 )),
40 _ => {
41 let mut builder = builder_with_capacity(array.dtype(), array.len());
42 array.append_to_builder(builder.as_mut());
43 builder.finish_into_canonical()
44 }
45 }
46 }
47
48 fn append_to_builder(array: &ChunkedArray, builder: &mut dyn ArrayBuilder) {
49 for chunk in array.chunks() {
50 chunk.append_to_builder(builder);
51 }
52 }
53}
54
55fn pack_struct_chunks(
60 chunks: &[ArrayRef],
61 validity: Validity,
62 struct_dtype: &StructFields,
63) -> StructArray {
64 let len = chunks.iter().map(|chunk| chunk.len()).sum();
65 let mut field_arrays = Vec::new();
66
67 for (field_idx, field_dtype) in struct_dtype.fields().enumerate() {
68 let field_chunks = chunks
69 .iter()
70 .map(|c| {
71 c.to_struct()
72 .fields()
73 .get(field_idx)
74 .vortex_expect("Invalid field index")
75 .to_array()
76 })
77 .collect::<Vec<_>>();
78
79 let field_array = unsafe { ChunkedArray::new_unchecked(field_chunks, field_dtype.clone()) };
82 field_arrays.push(field_array.into_array());
83 }
84
85 unsafe { StructArray::new_unchecked(field_arrays, struct_dtype.clone(), len, validity) }
88}
89
90fn swizzle_list_chunks(
96 chunks: &[ArrayRef],
97 validity: Validity,
98 elem_dtype: &DType,
99) -> ListViewArray {
100 let len: usize = chunks.iter().map(|c| c.len()).sum();
101
102 assert_eq!(
103 chunks[0]
104 .dtype()
105 .as_list_element_opt()
106 .vortex_expect("DType was somehow not a list")
107 .as_ref(),
108 elem_dtype
109 );
110
111 let mut list_elements_chunks = Vec::with_capacity(chunks.len());
115 let mut num_elements = 0;
116
117 let mut offsets = BufferMut::<u64>::with_capacity(len);
122 let mut sizes = BufferMut::<u64>::with_capacity(len);
123
124 for chunk in chunks {
125 let chunk_array = chunk.to_listview();
126 let chunk_array = chunk_array.rebuild(ListViewRebuildMode::MakeExact);
129
130 list_elements_chunks.push(chunk_array.elements().clone());
132
133 let offsets_arr = cast(
135 chunk_array.offsets(),
136 &DType::Primitive(PType::U64, Nullability::NonNullable),
137 )
138 .vortex_expect("Must be able to fit array offsets in u64")
139 .to_primitive();
140
141 let sizes_arr = cast(
142 chunk_array.sizes(),
143 &DType::Primitive(PType::U64, Nullability::NonNullable),
144 )
145 .vortex_expect("Must be able to fit array offsets in u64")
146 .to_primitive();
147
148 let offsets_slice = offsets_arr.as_slice::<u64>();
149 let sizes_slice = sizes_arr.as_slice::<u64>();
150
151 offsets.extend(offsets_slice.iter().map(|o| o + num_elements));
153 sizes.extend(sizes_slice);
154
155 num_elements += chunk_array.elements().len() as u64;
156 }
157
158 let chunked_elements =
160 unsafe { ChunkedArray::new_unchecked(list_elements_chunks, elem_dtype.clone()) }
161 .into_array();
162
163 let offsets = PrimitiveArray::new(offsets.freeze(), Validity::NonNullable).into_array();
164 let sizes = PrimitiveArray::new(sizes.freeze(), Validity::NonNullable).into_array();
165
166 unsafe {
174 ListViewArray::new_unchecked(chunked_elements, offsets, sizes, validity)
175 .with_zero_copy_to_list(true)
176 }
177}
178
179#[cfg(test)]
180mod tests {
181 use std::sync::Arc;
182
183 use vortex_buffer::buffer;
184 use vortex_dtype::DType::{List, Primitive};
185 use vortex_dtype::Nullability::NonNullable;
186 use vortex_dtype::PType::I32;
187
188 use crate::accessor::ArrayAccessor;
189 use crate::arrays::{ChunkedArray, ListArray, StructArray, VarBinViewArray};
190 use crate::validity::Validity;
191 use crate::{IntoArray, ToCanonical};
192
193 #[test]
194 pub fn pack_nested_structs() {
195 let struct_array = StructArray::try_new(
196 ["a"].into(),
197 vec![VarBinViewArray::from_iter_str(["foo", "bar", "baz", "quak"]).into_array()],
198 4,
199 Validity::NonNullable,
200 )
201 .unwrap();
202 let dtype = struct_array.dtype().clone();
203 let chunked = ChunkedArray::try_new(
204 vec![
205 ChunkedArray::try_new(vec![struct_array.to_array()], dtype.clone())
206 .unwrap()
207 .into_array(),
208 ],
209 dtype,
210 )
211 .unwrap()
212 .into_array();
213 let canonical_struct = chunked.to_struct();
214 let canonical_varbin = canonical_struct.fields()[0].to_varbinview();
215 let original_varbin = struct_array.fields()[0].to_varbinview();
216 let orig_values = original_varbin
217 .with_iterator(|it| it.map(|a| a.map(|v| v.to_vec())).collect::<Vec<_>>());
218 let canon_values = canonical_varbin
219 .with_iterator(|it| it.map(|a| a.map(|v| v.to_vec())).collect::<Vec<_>>());
220 assert_eq!(orig_values, canon_values);
221 }
222
223 #[test]
224 pub fn pack_nested_lists() {
225 let l1 = ListArray::try_new(
226 buffer![1, 2, 3, 4].into_array(),
227 buffer![0, 3].into_array(),
228 Validity::NonNullable,
229 )
230 .unwrap();
231
232 let l2 = ListArray::try_new(
233 buffer![5, 6].into_array(),
234 buffer![0, 2].into_array(),
235 Validity::NonNullable,
236 )
237 .unwrap();
238
239 let chunked_list = ChunkedArray::try_new(
240 vec![l1.clone().into_array(), l2.clone().into_array()],
241 List(Arc::new(Primitive(I32, NonNullable)), NonNullable),
242 );
243
244 let canon_values = chunked_list.unwrap().to_listview();
245
246 assert_eq!(l1.scalar_at(0), canon_values.scalar_at(0));
247 assert_eq!(l2.scalar_at(0), canon_values.scalar_at(1));
248 }
249}