vortex_array/arrays/listview/
rebuild.rs1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_dtype::IntegerPType;
7use vortex_dtype::Nullability;
8use vortex_dtype::match_each_integer_ptype;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11
12use crate::Array;
13use crate::IntoArray;
14use crate::ToCanonical;
15use crate::arrays::ListViewArray;
16use crate::builders::builder_with_capacity;
17use crate::compute;
18use crate::scalar::Scalar;
19use crate::vtable::ValidityHelper;
20
21pub enum ListViewRebuildMode {
23 MakeZeroCopyToList,
31
32 TrimElements,
35
36 MakeExact,
43
44 OverlapCompression,
51}
52
53impl ListViewArray {
54 pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
56 if self.is_empty() {
57 return Ok(self.clone());
58 }
59
60 match mode {
61 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
62 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
63 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
64 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
65 }
66 }
67
68 fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
76 if self.is_zero_copy_to_list() {
77 return Ok(self.clone());
79 }
80
81 let offsets_ptype = self.offsets().dtype().as_ptype();
82 let sizes_ptype = self.sizes().dtype().as_ptype();
83
84 match_each_integer_ptype!(sizes_ptype, |S| {
92 match offsets_ptype {
93 PType::U8 => self.naive_rebuild::<u8, u32, S>(),
94 PType::U16 => self.naive_rebuild::<u16, u32, S>(),
95 PType::U32 => self.naive_rebuild::<u32, u32, S>(),
96 PType::U64 => self.naive_rebuild::<u64, u64, S>(),
97 PType::I8 => self.naive_rebuild::<i8, i32, S>(),
98 PType::I16 => self.naive_rebuild::<i16, i32, S>(),
99 PType::I32 => self.naive_rebuild::<i32, i32, S>(),
100 PType::I64 => self.naive_rebuild::<i64, i64, S>(),
101 _ => unreachable!("invalid offsets PType"),
102 }
103 })
104 }
105
106 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
111 &self,
112 ) -> VortexResult<ListViewArray> {
113 let element_dtype = self
114 .dtype()
115 .as_list_element_opt()
116 .vortex_expect("somehow had a canonical list that was not a list");
117
118 let offsets_canonical = self.offsets().to_primitive();
119 let offsets_slice = offsets_canonical.as_slice::<O>();
120 let sizes_canonical = self.sizes().to_primitive();
121 let sizes_slice = sizes_canonical.as_slice::<S>();
122
123 let len = offsets_slice.len();
124
125 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
126 let mut new_sizes = BufferMut::<S>::with_capacity(len);
131
132 let elements_canonical = self
134 .elements()
135 .to_canonical()
136 .vortex_expect("canonicalize elements for rebuild")
137 .into_array();
138
139 let mut new_elements_builder =
142 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
143
144 let mut n_elements = NewOffset::zero();
145 for index in 0..len {
146 if !self.is_valid(index)? {
147 new_offsets.push(n_elements);
150 new_sizes.push(S::zero());
151 continue;
152 }
153
154 let offset = offsets_slice[index];
155 let size = sizes_slice[index];
156
157 let start = offset.as_();
158 let stop = start + size.as_();
159
160 new_offsets.push(n_elements);
161 new_sizes.push(size);
162 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
163
164 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
165 }
166
167 let offsets = new_offsets.into_array();
168 let sizes = new_sizes.into_array();
169 let elements = new_elements_builder.finish();
170
171 debug_assert_eq!(
172 n_elements.as_(),
173 elements.len(),
174 "The accumulated elements somehow had the wrong length"
175 );
176
177 Ok(unsafe {
186 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
187 .with_zero_copy_to_list(true)
188 })
189 }
190
191 fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
195 let start = if self.is_zero_copy_to_list() {
196 self.offset_at(0)
200 } else {
201 self.offsets().statistics().compute_min().vortex_expect(
202 "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
203 )
204 };
205
206 let end = if self.is_zero_copy_to_list() {
207 let last_offset = self.offset_at(self.len() - 1);
210 let last_size = self.size_at(self.len() - 1);
211 last_offset + last_size
212 } else {
213 let min_max = compute::min_max(
214 &compute::add(self.offsets(), self.sizes())
215 .vortex_expect("`offsets + sizes` somehow overflowed"),
216 )
217 .vortex_expect("Something went wrong while computing min and max")
218 .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
219
220 min_max
221 .max
222 .as_primitive()
223 .as_::<usize>()
224 .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
225 };
226
227 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
228 let offset = <O as FromPrimitive>::from_usize(start)
229 .vortex_expect("unable to convert the min offset `start` into a `usize`");
230 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
231
232 compute::sub_scalar(self.offsets(), scalar)
233 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
234 });
235
236 let sliced_elements = self.elements().slice(start..end)?;
237
238 Ok(unsafe {
243 ListViewArray::new_unchecked(
244 sliced_elements,
245 adjusted_offsets,
246 self.sizes().clone(),
247 self.validity().clone(),
248 )
249 .with_zero_copy_to_list(self.is_zero_copy_to_list())
250 })
251 }
252
253 fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
254 if self.is_zero_copy_to_list() {
255 self.rebuild_trim_elements()
256 } else {
257 self.rebuild_zero_copy_to_list()
260 }
261 }
262}
263
264#[cfg(test)]
265mod tests {
266 use vortex_buffer::BitBuffer;
267 use vortex_dtype::Nullability;
268 use vortex_error::VortexResult;
269
270 use super::ListViewRebuildMode;
271 use crate::IntoArray;
272 use crate::ToCanonical;
273 use crate::arrays::ListViewArray;
274 use crate::arrays::PrimitiveArray;
275 use crate::assert_arrays_eq;
276 use crate::validity::Validity;
277 use crate::vtable::ValidityHelper;
278
279 #[test]
280 fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
281 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
285 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
286 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
287
288 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
289
290 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
291
292 assert_eq!(flattened.elements().len(), 5);
295
296 assert_eq!(flattened.offset_at(0), 0);
298 assert_eq!(flattened.size_at(0), 3);
299 assert_eq!(flattened.offset_at(1), 3);
300 assert_eq!(flattened.size_at(1), 2);
301
302 assert_arrays_eq!(
304 flattened.list_elements_at(0).unwrap(),
305 PrimitiveArray::from_iter([1i32, 2, 3])
306 );
307
308 assert_arrays_eq!(
309 flattened.list_elements_at(1).unwrap(),
310 PrimitiveArray::from_iter([2i32, 3])
311 );
312 Ok(())
313 }
314
315 #[test]
316 fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
317 use crate::arrays::BoolArray;
318
319 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
321 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
322 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
323 let validity = Validity::Array(
324 BoolArray::new(
325 BitBuffer::from(vec![true, false, true]),
326 Validity::NonNullable,
327 )
328 .into_array(),
329 );
330
331 let listview = ListViewArray::new(elements, offsets, sizes, validity);
332
333 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
334
335 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
337 assert!(flattened.validity().is_valid(0).unwrap());
338 assert!(!flattened.validity().is_valid(1).unwrap());
339 assert!(flattened.validity().is_valid(2).unwrap());
340
341 assert_arrays_eq!(
343 flattened.list_elements_at(0).unwrap(),
344 PrimitiveArray::from_iter([1i32, 2])
345 );
346
347 assert_arrays_eq!(
348 flattened.list_elements_at(2).unwrap(),
349 PrimitiveArray::from_iter([3i32])
350 );
351 Ok(())
352 }
353
354 #[test]
355 fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
356 let elements =
364 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
365 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
366 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
367
368 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
369
370 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
371
372 assert_eq!(trimmed.elements().len(), 5);
374
375 assert_eq!(trimmed.offset_at(0), 0);
377 assert_eq!(trimmed.size_at(0), 2);
378 assert_eq!(trimmed.offset_at(1), 3);
379 assert_eq!(trimmed.size_at(1), 2);
380
381 assert_arrays_eq!(
383 trimmed.list_elements_at(0).unwrap(),
384 PrimitiveArray::from_iter([1i32, 2])
385 );
386
387 assert_arrays_eq!(
388 trimmed.list_elements_at(1).unwrap(),
389 PrimitiveArray::from_iter([3i32, 4])
390 );
391
392 let all_elements = trimmed.elements().to_primitive();
394 assert_eq!(all_elements.scalar_at(2).unwrap(), 97i32.into());
395 Ok(())
396 }
397
398 #[test]
399 fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
400 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
406 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
407 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
408 let validity = Validity::from_iter(vec![true, true, false, false]);
409
410 let listview = ListViewArray::new(elements, offsets, sizes, validity);
411
412 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
414 assert!(rebuilt.is_zero_copy_to_list());
415
416 assert_eq!(rebuilt.offset_at(0), 0);
419 assert_eq!(rebuilt.offset_at(1), 2);
420 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
425 assert_eq!(rebuilt.size_at(1), 2);
426 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
432
433 assert!(exact.is_valid(0).unwrap());
435 assert!(exact.is_valid(1).unwrap());
436 assert!(!exact.is_valid(2).unwrap());
437 assert!(!exact.is_valid(3).unwrap());
438
439 assert_arrays_eq!(
441 exact.list_elements_at(0).unwrap(),
442 PrimitiveArray::from_iter([1i32, 2])
443 );
444
445 assert_arrays_eq!(
446 exact.list_elements_at(1).unwrap(),
447 PrimitiveArray::from_iter([3i32, 4])
448 );
449 Ok(())
450 }
451}