vortex_array/arrays/list/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use smallvec::smallvec;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_ensure;
14use vortex_error::vortex_panic;
15
16use crate::ArrayRef;
17use crate::ArraySlots;
18use crate::IntoArray;
19use crate::LEGACY_SESSION;
20use crate::VortexSessionExecute;
21use crate::aggregate_fn::fns::min_max::min_max;
22use crate::array::Array;
23use crate::array::ArrayParts;
24use crate::array::TypedArrayRef;
25use crate::array::child_to_validity;
26use crate::array::validity_to_child;
27use crate::arrays::ConstantArray;
28use crate::arrays::List;
29use crate::arrays::Primitive;
30use crate::builtins::ArrayBuiltins;
31use crate::dtype::DType;
32use crate::dtype::NativePType;
33use crate::match_each_integer_ptype;
34use crate::match_each_native_ptype;
35use crate::scalar_fn::fns::operators::Operator;
36use crate::validity::Validity;
37
38pub(super) const ELEMENTS_SLOT: usize = 0;
40pub(super) const OFFSETS_SLOT: usize = 1;
42pub(super) const VALIDITY_SLOT: usize = 2;
44pub(super) const NUM_SLOTS: usize = 3;
45pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "validity"];
46
47#[derive(Clone, Debug, Default)]
101pub struct ListData;
102
103impl Display for ListData {
104 fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
105 Ok(())
106 }
107}
108
109pub struct ListDataParts {
110 pub elements: ArrayRef,
111 pub offsets: ArrayRef,
112 pub validity: Validity,
113 pub dtype: DType,
114}
115
116impl ListData {
117 pub(crate) fn make_slots(
118 elements: &ArrayRef,
119 offsets: &ArrayRef,
120 validity: &Validity,
121 len: usize,
122 ) -> ArraySlots {
123 smallvec![
124 Some(elements.clone()),
125 Some(offsets.clone()),
126 validity_to_child(validity, len),
127 ]
128 }
129
130 pub fn build(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
137 Self::try_build(elements, offsets, validity).vortex_expect("ListArray new")
138 }
139
140 pub(crate) fn try_build(
149 elements: ArrayRef,
150 offsets: ArrayRef,
151 validity: Validity,
152 ) -> VortexResult<Self> {
153 Self::validate(&elements, &offsets, &validity)?;
154
155 Ok(unsafe { Self::new_unchecked() })
157 }
158
159 pub unsafe fn new_unchecked() -> Self {
176 Self
177 }
178
179 pub fn validate(
183 elements: &ArrayRef,
184 offsets: &ArrayRef,
185 validity: &Validity,
186 ) -> VortexResult<()> {
187 vortex_ensure!(
189 !offsets.is_empty(),
190 InvalidArgument: "Offsets must have at least one element, [0] for an empty list"
191 );
192
193 vortex_ensure!(
195 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
196 InvalidArgument: "offsets have invalid type {}",
197 offsets.dtype()
198 );
199
200 let offsets_ptype = offsets.dtype().as_ptype();
202 let mut ctx = LEGACY_SESSION.create_execution_ctx();
203
204 if let Some(is_sorted) = offsets.statistics().compute_is_sorted(&mut ctx) {
206 vortex_ensure!(is_sorted, InvalidArgument: "offsets must be sorted");
207 } else {
208 vortex_bail!(InvalidArgument: "offsets must report is_sorted statistic");
209 }
210
211 if let Some(min_max) = min_max(offsets, &mut ctx)? {
214 match_each_integer_ptype!(offsets_ptype, |P| {
215 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
216 {
217 let max = min_max
218 .max
219 .as_primitive()
220 .as_::<P>()
221 .vortex_expect("offsets type must fit offsets values");
222 let min = min_max
223 .min
224 .as_primitive()
225 .as_::<P>()
226 .vortex_expect("offsets type must fit offsets values");
227
228 vortex_ensure!(
229 min >= 0,
230 InvalidArgument: "offsets minimum {min} outside valid range [0, {max}]"
231 );
232
233 vortex_ensure!(
234 max <= P::try_from(elements.len()).unwrap_or_else(|_| vortex_panic!(
235 "Offsets type {} must be able to fit elements length {}",
236 <P as NativePType>::PTYPE,
237 elements.len()
238 )),
239 InvalidArgument: "Max offset {max} is beyond the length of the elements array {}",
240 elements.len()
241 );
242 }
243 })
244 } else {
245 vortex_bail!(
247 InvalidArgument: "offsets array with encoding {} must support min_max compute function",
248 offsets.encoding_id()
249 );
250 };
251
252 if let Some(validity_len) = validity.maybe_len() {
254 vortex_ensure!(
255 validity_len == offsets.len() - 1,
256 InvalidArgument: "validity with size {validity_len} does not match array size {}",
257 offsets.len() - 1
258 );
259 }
260
261 Ok(())
262 }
263 }
268
269pub trait ListArrayExt: TypedArrayRef<List> {
270 fn nullability(&self) -> crate::dtype::Nullability {
271 match self.as_ref().dtype() {
272 DType::List(_, nullability) => *nullability,
273 _ => unreachable!("ListArrayExt requires a list dtype"),
274 }
275 }
276
277 fn elements(&self) -> &ArrayRef {
278 self.as_ref().slots()[ELEMENTS_SLOT]
279 .as_ref()
280 .vortex_expect("ListArray elements slot")
281 }
282
283 fn offsets(&self) -> &ArrayRef {
284 self.as_ref().slots()[OFFSETS_SLOT]
285 .as_ref()
286 .vortex_expect("ListArray offsets slot")
287 }
288
289 fn list_validity(&self) -> Validity {
290 child_to_validity(
291 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
292 self.nullability(),
293 )
294 }
295
296 fn offset_at(&self, index: usize) -> VortexResult<usize> {
297 vortex_ensure!(
298 index <= self.as_ref().len(),
299 "Index {index} out of bounds 0..={}",
300 self.as_ref().len()
301 );
302
303 if let Some(p) = self.offsets().as_opt::<Primitive>() {
304 Ok(match_each_native_ptype!(p.ptype(), |P| {
305 p.as_slice::<P>()[index].as_()
306 }))
307 } else {
308 self.offsets()
309 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())?
310 .as_primitive()
311 .as_::<usize>()
312 .ok_or_else(|| vortex_error::vortex_err!("offset value does not fit in usize"))
313 }
314 }
315
316 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
317 let start = self.offset_at(index)?;
318 let end = self.offset_at(index + 1)?;
319 self.elements().slice(start..end)
320 }
321
322 fn sliced_elements(&self) -> VortexResult<ArrayRef> {
323 let start = self.offset_at(0)?;
324 let end = self.offset_at(self.as_ref().len())?;
325 self.elements().slice(start..end)
326 }
327
328 fn element_dtype(&self) -> &DType {
329 self.elements().dtype()
330 }
331
332 fn reset_offsets(&self, recurse: bool) -> VortexResult<Array<List>> {
333 let mut elements = self.sliced_elements()?;
334 if recurse && elements.is_canonical() {
335 #[expect(deprecated)]
336 let compacted = elements.to_canonical()?.compact()?.into_array();
337 elements = compacted;
338 } else if recurse && let Some(child_list_array) = elements.as_opt::<List>() {
339 elements = child_list_array
340 .into_owned()
341 .reset_offsets(recurse)?
342 .into_array();
343 }
344
345 let offsets = self.offsets();
346 let first_offset = offsets.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?;
347 let adjusted_offsets = offsets.clone().binary(
348 ConstantArray::new(first_offset, offsets.len()).into_array(),
349 Operator::Sub,
350 )?;
351
352 Array::<List>::try_new(elements, adjusted_offsets, self.list_validity())
353 }
354}
355impl<T: TypedArrayRef<List>> ListArrayExt for T {}
356
357impl Array<List> {
358 pub fn new(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
360 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
361 let len = offsets.len().saturating_sub(1);
362 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
363 let data = ListData::build(elements, offsets, validity);
364 unsafe {
365 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
366 }
367 }
368
369 pub fn try_new(
371 elements: ArrayRef,
372 offsets: ArrayRef,
373 validity: Validity,
374 ) -> VortexResult<Self> {
375 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
376 let len = offsets.len().saturating_sub(1);
377 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
378 let data = ListData::try_build(elements, offsets, validity)?;
379 Ok(unsafe {
380 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
381 })
382 }
383
384 pub unsafe fn new_unchecked(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
390 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
391 let len = offsets.len().saturating_sub(1);
392 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
393 let data = unsafe { ListData::new_unchecked() };
394 unsafe {
395 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
396 }
397 }
398
399 pub fn into_data_parts(self) -> ListDataParts {
400 let dtype = self.dtype().clone();
401 let elements = self.slots()[ELEMENTS_SLOT]
402 .clone()
403 .vortex_expect("ListArray elements slot");
404 let offsets = self.slots()[OFFSETS_SLOT]
405 .clone()
406 .vortex_expect("ListArray offsets slot");
407 let validity = self.list_validity();
408 ListDataParts {
409 elements,
410 offsets,
411 validity,
412 dtype,
413 }
414 }
415}