vortex_array/arrays/listview/
rebuild.rs1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_dtype::IntegerPType;
7use vortex_dtype::Nullability;
8use vortex_dtype::match_each_integer_ptype;
9use vortex_error::VortexExpect;
10use vortex_scalar::Scalar;
11
12use crate::Array;
13use crate::IntoArray;
14use crate::ToCanonical;
15use crate::arrays::ListViewArray;
16use crate::builders::builder_with_capacity;
17use crate::compute;
18use crate::vtable::ValidityHelper;
19
20pub enum ListViewRebuildMode {
22 MakeZeroCopyToList,
30
31 TrimElements,
34
35 MakeExact,
42
43 OverlapCompression,
50}
51
52impl ListViewArray {
53 pub fn rebuild(&self, mode: ListViewRebuildMode) -> ListViewArray {
55 if self.is_empty() {
56 return self.clone();
57 }
58
59 match mode {
60 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
61 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
62 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
63 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
64 }
65 }
66
67 fn rebuild_zero_copy_to_list(&self) -> ListViewArray {
75 if self.is_zero_copy_to_list() {
76 return self.clone();
78 }
79
80 let offsets_ptype = self.offsets().dtype().as_ptype();
81 let sizes_ptype = self.sizes().dtype().as_ptype();
82
83 match_each_integer_ptype!(sizes_ptype, |S| {
91 match offsets_ptype {
92 PType::U8 => self.naive_rebuild::<u8, u32, S>(),
93 PType::U16 => self.naive_rebuild::<u16, u32, S>(),
94 PType::U32 => self.naive_rebuild::<u32, u32, S>(),
95 PType::U64 => self.naive_rebuild::<u64, u64, S>(),
96 PType::I8 => self.naive_rebuild::<i8, i32, S>(),
97 PType::I16 => self.naive_rebuild::<i16, i32, S>(),
98 PType::I32 => self.naive_rebuild::<i32, i32, S>(),
99 PType::I64 => self.naive_rebuild::<i64, i64, S>(),
100 _ => unreachable!("invalid offsets PType"),
101 }
102 })
103 }
104
105 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
110 &self,
111 ) -> ListViewArray {
112 let element_dtype = self
113 .dtype()
114 .as_list_element_opt()
115 .vortex_expect("somehow had a canonical list that was not a list");
116
117 let offsets_canonical = self.offsets().to_primitive();
118 let offsets_slice = offsets_canonical.as_slice::<O>();
119 let sizes_canonical = self.sizes().to_primitive();
120 let sizes_slice = sizes_canonical.as_slice::<S>();
121
122 let len = offsets_slice.len();
123
124 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
125 let mut new_sizes = BufferMut::<S>::with_capacity(len);
130
131 let elements_canonical = self.elements().to_canonical().into_array();
133
134 let mut new_elements_builder =
137 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
138
139 let mut n_elements = NewOffset::zero();
140 for index in 0..len {
141 if !self.is_valid(index) {
142 new_offsets.push(n_elements);
145 new_sizes.push(S::zero());
146 continue;
147 }
148
149 let offset = offsets_slice[index];
150 let size = sizes_slice[index];
151
152 let start = offset.as_();
153 let stop = start + size.as_();
154
155 new_offsets.push(n_elements);
156 new_sizes.push(size);
157 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop));
158
159 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
160 }
161
162 let offsets = new_offsets.into_array();
163 let sizes = new_sizes.into_array();
164 let elements = new_elements_builder.finish();
165
166 debug_assert_eq!(
167 n_elements.as_(),
168 elements.len(),
169 "The accumulated elements somehow had the wrong length"
170 );
171
172 unsafe {
181 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
182 .with_zero_copy_to_list(true)
183 }
184 }
185
186 fn rebuild_trim_elements(&self) -> ListViewArray {
190 let start = if self.is_zero_copy_to_list() {
191 self.offset_at(0)
195 } else {
196 self.offsets().statistics().compute_min().vortex_expect(
197 "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
198 )
199 };
200
201 let end = if self.is_zero_copy_to_list() {
202 let last_offset = self.offset_at(self.len() - 1);
205 let last_size = self.size_at(self.len() - 1);
206 last_offset + last_size
207 } else {
208 let min_max = compute::min_max(
209 &compute::add(self.offsets(), self.sizes())
210 .vortex_expect("`offsets + sizes` somehow overflowed"),
211 )
212 .vortex_expect("Something went wrong while computing min and max")
213 .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
214
215 min_max
216 .max
217 .as_primitive()
218 .as_::<usize>()
219 .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
220 };
221
222 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
223 let offset = <O as FromPrimitive>::from_usize(start)
224 .vortex_expect("unable to convert the min offset `start` into a `usize`");
225 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
226
227 compute::sub_scalar(self.offsets(), scalar)
228 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
229 });
230
231 let sliced_elements = self.elements().slice(start..end);
232
233 unsafe {
238 ListViewArray::new_unchecked(
239 sliced_elements,
240 adjusted_offsets,
241 self.sizes().clone(),
242 self.validity().clone(),
243 )
244 .with_zero_copy_to_list(self.is_zero_copy_to_list())
245 }
246 }
247
248 fn rebuild_make_exact(&self) -> ListViewArray {
249 if self.is_zero_copy_to_list() {
250 self.rebuild_trim_elements()
251 } else {
252 self.rebuild_zero_copy_to_list()
255 }
256 }
257}
258
259#[cfg(test)]
260mod tests {
261 use vortex_buffer::BitBuffer;
262 use vortex_dtype::Nullability;
263
264 use super::ListViewRebuildMode;
265 use crate::IntoArray;
266 use crate::ToCanonical;
267 use crate::arrays::ListViewArray;
268 use crate::arrays::PrimitiveArray;
269 use crate::validity::Validity;
270 use crate::vtable::ValidityHelper;
271
272 #[test]
273 fn test_rebuild_flatten_removes_overlaps() {
274 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
278 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
279 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
280
281 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
282
283 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList);
284
285 assert_eq!(flattened.elements().len(), 5);
288
289 assert_eq!(flattened.offset_at(0), 0);
291 assert_eq!(flattened.size_at(0), 3);
292 assert_eq!(flattened.offset_at(1), 3);
293 assert_eq!(flattened.size_at(1), 2);
294
295 let list0 = flattened.list_elements_at(0).to_primitive();
297 assert_eq!(list0.as_slice::<i32>(), &[1, 2, 3]);
298
299 let list1 = flattened.list_elements_at(1).to_primitive();
300 assert_eq!(list1.as_slice::<i32>(), &[2, 3]);
301 }
302
303 #[test]
304 fn test_rebuild_flatten_with_nullable() {
305 use crate::arrays::BoolArray;
306
307 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
309 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
310 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
311 let validity = Validity::Array(
312 BoolArray::from_bit_buffer(
313 BitBuffer::from(vec![true, false, true]),
314 Validity::NonNullable,
315 )
316 .into_array(),
317 );
318
319 let listview = ListViewArray::new(elements, offsets, sizes, validity);
320
321 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList);
322
323 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
325 assert!(flattened.validity().is_valid(0));
326 assert!(!flattened.validity().is_valid(1));
327 assert!(flattened.validity().is_valid(2));
328
329 let list0 = flattened.list_elements_at(0).to_primitive();
331 assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
332
333 let list2 = flattened.list_elements_at(2).to_primitive();
334 assert_eq!(list2.as_slice::<i32>(), &[3]);
335 }
336
337 #[test]
338 fn test_rebuild_trim_elements_basic() {
339 let elements =
347 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
348 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
349 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
350
351 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
352
353 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements);
354
355 assert_eq!(trimmed.elements().len(), 5);
357
358 assert_eq!(trimmed.offset_at(0), 0);
360 assert_eq!(trimmed.size_at(0), 2);
361 assert_eq!(trimmed.offset_at(1), 3);
362 assert_eq!(trimmed.size_at(1), 2);
363
364 let list0 = trimmed.list_elements_at(0).to_primitive();
366 assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
367
368 let list1 = trimmed.list_elements_at(1).to_primitive();
369 assert_eq!(list1.as_slice::<i32>(), &[3, 4]);
370
371 let all_elements = trimmed.elements().to_primitive();
373 assert_eq!(all_elements.scalar_at(2), 97i32.into());
374 }
375
376 #[test]
377 fn test_rebuild_with_trailing_nulls_regression() {
378 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
384 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
385 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
386 let validity = Validity::from_iter(vec![true, true, false, false]);
387
388 let listview = ListViewArray::new(elements, offsets, sizes, validity);
389
390 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList);
392 assert!(rebuilt.is_zero_copy_to_list());
393
394 assert_eq!(rebuilt.offset_at(0), 0);
397 assert_eq!(rebuilt.offset_at(1), 2);
398 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
403 assert_eq!(rebuilt.size_at(1), 2);
404 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact);
410
411 assert!(exact.is_valid(0));
413 assert!(exact.is_valid(1));
414 assert!(!exact.is_valid(2));
415 assert!(!exact.is_valid(3));
416
417 let list0 = exact.list_elements_at(0).to_primitive();
419 assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
420
421 let list1 = exact.list_elements_at(1).to_primitive();
422 assert_eq!(list1.as_slice::<i32>(), &[3, 4]);
423 }
424}