vortex_array/arrays/listview/
rebuild.rs1use num_traits::FromPrimitive;
5use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype};
6use vortex_error::VortexExpect;
7use vortex_scalar::Scalar;
8
9use crate::arrays::ListViewArray;
10use crate::builders::{ArrayBuilder, ListViewBuilder};
11use crate::vtable::ValidityHelper;
12use crate::{Array, compute};
13
14pub enum ListViewRebuildMode {
16 MakeZeroCopyToList,
24
25 TrimElements,
28
29 MakeExact,
36
37 OverlapCompression,
44}
45
46impl ListViewArray {
47 pub fn rebuild(&self, mode: ListViewRebuildMode) -> ListViewArray {
49 if self.is_empty() {
50 return self.clone();
51 }
52
53 match mode {
54 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
55 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
56 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
57 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
58 }
59 }
60
61 fn rebuild_zero_copy_to_list(&self) -> ListViewArray {
69 if self.is_zero_copy_to_list() {
70 return self.clone();
72 }
73
74 let offsets_ptype = self.offsets().dtype().as_ptype();
75 let sizes_ptype = self.sizes().dtype().as_ptype();
76
77 match_each_integer_ptype!(offsets_ptype, |O| {
78 match_each_integer_ptype!(sizes_ptype, |S| { self.naive_rebuild::<O, S>() })
79 })
80 }
81
82 fn naive_rebuild<O: IntegerPType, S: IntegerPType>(&self) -> ListViewArray {
85 let element_dtype = self
86 .dtype()
87 .as_list_element_opt()
88 .vortex_expect("somehow had a canonical list that was not a list");
89
90 let mut builder = ListViewBuilder::<O, S>::with_capacity(
91 element_dtype.clone(),
92 self.dtype().nullability(),
93 self.elements().len(),
94 self.len(),
95 );
96
97 for i in 0..self.len() {
98 let list = self.scalar_at(i);
99
100 builder
101 .append_scalar(&list)
102 .vortex_expect("was unable to extend the `ListViewBuilder`")
103 }
104
105 builder.finish_into_listview()
106 }
107
108 fn rebuild_trim_elements(&self) -> ListViewArray {
112 let start = if self.is_zero_copy_to_list() {
113 self.offset_at(0)
117 } else {
118 self.offsets().statistics().compute_min().vortex_expect(
119 "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
120 )
121 };
122
123 let end = if self.is_zero_copy_to_list() {
124 let last_offset = self.offset_at(self.len() - 1);
127 let last_size = self.size_at(self.len() - 1);
128 last_offset + last_size
129 } else {
130 let min_max = compute::min_max(
131 &compute::add(self.offsets(), self.sizes())
132 .vortex_expect("`offsets + sizes` somehow overflowed"),
133 )
134 .vortex_expect("Something went wrong while computing min and max")
135 .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
136
137 min_max
138 .max
139 .as_primitive()
140 .as_::<usize>()
141 .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
142 };
143
144 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
145 let offset = <O as FromPrimitive>::from_usize(start)
146 .vortex_expect("unable to convert the min offset `start` into a `usize`");
147 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
148
149 compute::sub_scalar(self.offsets(), scalar)
150 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
151 });
152
153 let sliced_elements = self.elements().slice(start..end);
154
155 unsafe {
160 ListViewArray::new_unchecked(
161 sliced_elements,
162 adjusted_offsets,
163 self.sizes().clone(),
164 self.validity().clone(),
165 )
166 .with_zero_copy_to_list(self.is_zero_copy_to_list())
167 }
168 }
169
170 fn rebuild_make_exact(&self) -> ListViewArray {
171 if self.is_zero_copy_to_list() {
172 self.rebuild_trim_elements()
173 } else {
174 self.rebuild_zero_copy_to_list()
177 }
178 }
179}
180
181#[cfg(test)]
182mod tests {
183 use vortex_buffer::BitBuffer;
184 use vortex_dtype::Nullability;
185
186 use super::ListViewRebuildMode;
187 use crate::arrays::{ListViewArray, PrimitiveArray};
188 use crate::validity::Validity;
189 use crate::vtable::ValidityHelper;
190 use crate::{IntoArray, ToCanonical};
191
192 #[test]
193 fn test_rebuild_flatten_removes_overlaps() {
194 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
198 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
199 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
200
201 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
202
203 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList);
204
205 assert_eq!(flattened.elements().len(), 5);
208
209 assert_eq!(flattened.offset_at(0), 0);
211 assert_eq!(flattened.size_at(0), 3);
212 assert_eq!(flattened.offset_at(1), 3);
213 assert_eq!(flattened.size_at(1), 2);
214
215 let list0 = flattened.list_elements_at(0).to_primitive();
217 assert_eq!(list0.as_slice::<i32>(), &[1, 2, 3]);
218
219 let list1 = flattened.list_elements_at(1).to_primitive();
220 assert_eq!(list1.as_slice::<i32>(), &[2, 3]);
221 }
222
223 #[test]
224 fn test_rebuild_flatten_with_nullable() {
225 use crate::arrays::BoolArray;
226
227 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
229 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
230 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
231 let validity = Validity::Array(
232 BoolArray::from_bit_buffer(
233 BitBuffer::from(vec![true, false, true]),
234 Validity::NonNullable,
235 )
236 .into_array(),
237 );
238
239 let listview = ListViewArray::new(elements, offsets, sizes, validity);
240
241 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList);
242
243 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
245 assert!(flattened.validity().is_valid(0));
246 assert!(!flattened.validity().is_valid(1));
247 assert!(flattened.validity().is_valid(2));
248
249 let list0 = flattened.list_elements_at(0).to_primitive();
251 assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
252
253 let list2 = flattened.list_elements_at(2).to_primitive();
254 assert_eq!(list2.as_slice::<i32>(), &[3]);
255 }
256
257 #[test]
258 fn test_rebuild_trim_elements_basic() {
259 let elements =
267 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
268 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
269 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
270
271 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
272
273 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements);
274
275 assert_eq!(trimmed.elements().len(), 5);
277
278 assert_eq!(trimmed.offset_at(0), 0);
280 assert_eq!(trimmed.size_at(0), 2);
281 assert_eq!(trimmed.offset_at(1), 3);
282 assert_eq!(trimmed.size_at(1), 2);
283
284 let list0 = trimmed.list_elements_at(0).to_primitive();
286 assert_eq!(list0.as_slice::<i32>(), &[1, 2]);
287
288 let list1 = trimmed.list_elements_at(1).to_primitive();
289 assert_eq!(list1.as_slice::<i32>(), &[3, 4]);
290
291 let all_elements = trimmed.elements().to_primitive();
293 assert_eq!(all_elements.scalar_at(2), 97i32.into());
294 }
295}