vortex_array/arrays/listview/
rebuild.rs1use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype};
5use vortex_error::VortexExpect;
6
7use crate::arrays::ListViewArray;
8use crate::builders::{ArrayBuilder, ListViewBuilder, PrimitiveBuilder, builder_with_capacity};
9use crate::vtable::ValidityHelper;
10use crate::{Array, IntoArray, ToCanonical};
11
12pub enum ListViewRebuildMode {
14 MakeZeroCopyToList,
22
23 RemoveGaps,
28
29 OverlapCompression,
36}
37
38impl ListViewArray {
39 pub fn rebuild(&self, mode: ListViewRebuildMode) -> ListViewArray {
41 match mode {
42 ListViewRebuildMode::RemoveGaps => self.rebuild_remove_gaps(),
43 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
44 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
45 }
46 }
47
48 fn rebuild_zero_copy_to_list(&self) -> ListViewArray {
56 let element_dtype = self
57 .dtype()
58 .as_list_element_opt()
59 .vortex_expect("somehow had a canonical list that was not a list");
60
61 let offsets_ptype = self.offsets().dtype().as_ptype();
62 let sizes_ptype = self.sizes().dtype().as_ptype();
63
64 match_each_integer_ptype!(offsets_ptype, |O| {
65 match_each_integer_ptype!(sizes_ptype, |S| {
66 let mut builder = ListViewBuilder::<O, S>::with_capacity(
67 element_dtype.clone(),
68 self.dtype().nullability(),
69 self.elements().len(),
70 self.len(),
71 );
72
73 builder.extend_from_array(self.as_ref());
74 builder.finish_into_listview()
75 })
76 })
77 }
78
79 fn rebuild_remove_gaps(&self) -> ListViewArray {
87 if self.is_empty() {
88 return self.clone();
89 }
90
91 let offset_ptype = self.offsets().dtype().as_ptype();
92 let size_ptype = self.sizes().dtype().as_ptype();
93
94 match_each_integer_ptype!(offset_ptype, |O| {
95 match_each_integer_ptype!(size_ptype, |S| { self.rebuild_remove_gaps_inner::<O, S>() })
96 })
97 }
98
99 fn rebuild_remove_gaps_inner<O, S>(&self) -> ListViewArray
100 where
101 O: IntegerPType,
102 S: IntegerPType,
103 {
104 let offsets_primitive = self.offsets().to_primitive();
105 let sizes_primitive = self.sizes().to_primitive();
106 let offsets_slice = offsets_primitive.as_slice::<O>();
107 let sizes_slice = sizes_primitive.as_slice::<S>();
108
109 let mut referenced_elements = vec![false; self.elements().len()];
111 for i in 0..self.len() {
112 let offset: usize = offsets_slice[i].as_();
113 let size: usize = sizes_slice[i].as_();
114 for j in offset..offset + size {
115 referenced_elements[j] = true;
116 }
117 }
118
119 if referenced_elements
121 .iter()
122 .all(|&is_referenced| is_referenced)
123 {
124 return self.clone();
125 }
126
127 let mut prefix_sum = vec![0; self.elements().len()];
130 let mut cumulative_sum = 0;
131 for i in 0..referenced_elements.len() {
132 prefix_sum[i] = cumulative_sum;
133 if referenced_elements[i] {
134 cumulative_sum += 1;
135 }
136 }
137
138 let mut elements_builder = builder_with_capacity(self.elements().dtype(), cumulative_sum);
140 for (i, &is_referenced) in referenced_elements.iter().enumerate() {
141 if is_referenced {
142 elements_builder
143 .append_scalar(&self.elements().scalar_at(i))
144 .vortex_expect("append scalar");
145 }
146 }
147
148 let mut new_offsets_builder =
150 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, self.len());
151 for &old_offset in offsets_slice {
152 let new_offset = prefix_sum[old_offset.as_()];
153 let offset_value =
154 O::from_usize(new_offset).vortex_expect("offset must fit in offset type");
155 new_offsets_builder.append_value(offset_value);
156 }
157
158 unsafe {
165 ListViewArray::new_unchecked(
166 elements_builder.finish(),
167 new_offsets_builder.finish().into_array(),
168 self.sizes().clone(),
169 self.validity().clone(),
170 )
171 }
172 }
173}
174
175#[cfg(test)]
176mod tests {
177 use vortex_dtype::Nullability;
178
179 use crate::arrays::{ListViewArray, PrimitiveArray};
180 use crate::validity::Validity;
181 use crate::vtable::ValidityHelper;
182 use crate::{IntoArray, ToCanonical};
183
184 #[test]
185 fn test_rebuild_remove_gaps_with_leading_garbage() {
186 let elements = PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 3]).into_array();
190 let offsets = PrimitiveArray::from_iter(vec![2u32, 3]).into_array();
191 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
192
193 let listview =
194 ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
195
196 let compacted = listview.rebuild_remove_gaps();
197
198 assert_eq!(compacted.elements().len(), 3);
200
201 assert_eq!(compacted.offset_at(0), 0);
203 assert_eq!(compacted.size_at(0), 2);
204 assert_eq!(compacted.offset_at(1), 1);
205 assert_eq!(compacted.size_at(1), 2);
206
207 let list0 = compacted.list_elements_at(0).to_primitive();
209 assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
210
211 let list1 = compacted.list_elements_at(1).to_primitive();
212 assert_eq!(list1.as_slice::<i32>(), &[2, 3]);
213 }
214
215 #[test]
216 fn test_rebuild_remove_gaps_with_trailing_garbage() {
217 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 99, 98]).into_array();
221 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
222 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
223
224 let listview =
225 ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
226
227 let compacted = listview.rebuild_remove_gaps();
228
229 assert_eq!(compacted.elements().len(), 3);
231
232 assert_eq!(compacted.offset_at(0), 0);
234 assert_eq!(compacted.offset_at(1), 1);
235 }
236
237 #[test]
238 fn test_rebuild_remove_gaps_no_garbage() {
239 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
243 let offsets = PrimitiveArray::from_iter(vec![0u32, 2]).into_array();
244 let sizes = PrimitiveArray::from_iter(vec![2u32, 1]).into_array();
245
246 let listview =
247 ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
248
249 let compacted = listview.rebuild_remove_gaps();
250
251 assert_eq!(compacted.elements().len(), 3);
253 assert_eq!(compacted.offset_at(0), 0);
254 assert_eq!(compacted.offset_at(1), 2);
255 }
256
257 #[test]
258 fn test_rebuild_remove_gaps_preserves_overlaps() {
259 let elements = PrimitiveArray::from_iter(vec![99i32, 1, 2, 3, 98]).into_array();
263 let offsets = PrimitiveArray::from_iter(vec![1u32, 2]).into_array();
264 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
265
266 let listview =
267 ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
268
269 let compacted = listview.rebuild_remove_gaps();
270
271 assert_eq!(compacted.elements().len(), 3);
273
274 assert_eq!(compacted.offset_at(0), 0);
277 assert_eq!(compacted.size_at(0), 3);
278 assert_eq!(compacted.offset_at(1), 1);
279 assert_eq!(compacted.size_at(1), 2);
280
281 let list0 = compacted.list_elements_at(0).to_primitive();
283 assert_eq!(list0.as_slice::<i32>(), &[1, 2, 3]);
284
285 let list1 = compacted.list_elements_at(1).to_primitive();
286 assert_eq!(list1.as_slice::<i32>(), &[2, 3]);
287 }
288
289 #[test]
290 fn test_rebuild_remove_gaps_multiple_gaps() {
291 let elements =
299 PrimitiveArray::from_iter(vec![99i32, 1, 2, 98, 3, 4, 97, 5, 6, 96]).into_array();
300 let offsets = PrimitiveArray::from_iter(vec![1u32, 4, 7]).into_array();
301 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 2]).into_array();
302
303 let listview =
304 ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
305
306 let compacted = listview.rebuild_remove_gaps();
307
308 assert_eq!(compacted.elements().len(), 6);
310
311 assert_eq!(compacted.offset_at(0), 0);
316 assert_eq!(compacted.size_at(0), 2);
317 assert_eq!(compacted.offset_at(1), 2);
318 assert_eq!(compacted.size_at(1), 2);
319 assert_eq!(compacted.offset_at(2), 4);
320 assert_eq!(compacted.size_at(2), 2);
321
322 let list0 = compacted.list_elements_at(0).to_primitive();
324 assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
325
326 let list1 = compacted.list_elements_at(1).to_primitive();
327 assert_eq!(list1.as_slice::<i32>(), &[3, 4]);
328
329 let list2 = compacted.list_elements_at(2).to_primitive();
330 assert_eq!(list2.as_slice::<i32>(), &[5, 6]);
331 }
332
333 #[test]
334 fn test_rebuild_flatten_removes_overlaps() {
335 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
339 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
340 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
341
342 let listview =
343 ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
344
345 let flattened = listview.rebuild_zero_copy_to_list();
346
347 assert_eq!(flattened.elements().len(), 5);
350
351 assert_eq!(flattened.offset_at(0), 0);
353 assert_eq!(flattened.size_at(0), 3);
354 assert_eq!(flattened.offset_at(1), 3);
355 assert_eq!(flattened.size_at(1), 2);
356
357 let list0 = flattened.list_elements_at(0).to_primitive();
359 assert_eq!(list0.as_slice::<i32>(), &[1, 2, 3]);
360
361 let list1 = flattened.list_elements_at(1).to_primitive();
362 assert_eq!(list1.as_slice::<i32>(), &[2, 3]);
363 }
364
365 #[test]
366 fn test_rebuild_flatten_with_nullable() {
367 use arrow_buffer::BooleanBuffer;
368
369 use crate::arrays::BoolArray;
370
371 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
373 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
374 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
375 let validity = Validity::Array(
376 BoolArray::from(BooleanBuffer::from(vec![true, false, true])).into_array(),
377 );
378
379 let listview = ListViewArray::try_new(elements, offsets, sizes, validity).unwrap();
380
381 let flattened = listview.rebuild_zero_copy_to_list();
382
383 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
385 assert!(flattened.validity().is_valid(0));
386 assert!(!flattened.validity().is_valid(1));
387 assert!(flattened.validity().is_valid(2));
388
389 let list0 = flattened.list_elements_at(0).to_primitive();
391 assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
392
393 let list2 = flattened.list_elements_at(2).to_primitive();
394 assert_eq!(list2.as_slice::<i32>(), &[3]);
395 }
396}