vortex_array/arrays/list/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use smallvec::smallvec;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_ensure;
14use vortex_error::vortex_panic;
15
16use crate::ArrayRef;
17use crate::ArraySlots;
18use crate::Canonical;
19use crate::ExecutionCtx;
20use crate::IntoArray;
21use crate::LEGACY_SESSION;
22use crate::VortexSessionExecute;
23use crate::aggregate_fn::fns::min_max::min_max;
24use crate::array::Array;
25use crate::array::ArrayParts;
26use crate::array::TypedArrayRef;
27use crate::array::child_to_validity;
28use crate::array::validity_to_child;
29use crate::arrays::ConstantArray;
30use crate::arrays::List;
31use crate::arrays::Primitive;
32use crate::builtins::ArrayBuiltins;
33use crate::dtype::DType;
34use crate::dtype::NativePType;
35use crate::match_each_integer_ptype;
36use crate::match_each_native_ptype;
37use crate::scalar_fn::fns::operators::Operator;
38use crate::validity::Validity;
39
40pub(super) const ELEMENTS_SLOT: usize = 0;
42pub(super) const OFFSETS_SLOT: usize = 1;
44pub(super) const VALIDITY_SLOT: usize = 2;
46pub(super) const NUM_SLOTS: usize = 3;
47pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "validity"];
48
49#[derive(Clone, Debug, Default)]
103pub struct ListData;
104
105impl Display for ListData {
106 fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
107 Ok(())
108 }
109}
110
111pub struct ListDataParts {
112 pub elements: ArrayRef,
113 pub offsets: ArrayRef,
114 pub validity: Validity,
115 pub dtype: DType,
116}
117
118impl ListData {
119 pub(crate) fn make_slots(
120 elements: &ArrayRef,
121 offsets: &ArrayRef,
122 validity: &Validity,
123 len: usize,
124 ) -> ArraySlots {
125 smallvec![
126 Some(elements.clone()),
127 Some(offsets.clone()),
128 validity_to_child(validity, len),
129 ]
130 }
131
132 pub fn build(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
139 Self::try_build(elements, offsets, validity).vortex_expect("ListArray new")
140 }
141
142 pub(crate) fn try_build(
151 elements: ArrayRef,
152 offsets: ArrayRef,
153 validity: Validity,
154 ) -> VortexResult<Self> {
155 Self::validate(&elements, &offsets, &validity)?;
156
157 Ok(unsafe { Self::new_unchecked() })
159 }
160
161 pub unsafe fn new_unchecked() -> Self {
178 Self
179 }
180
181 pub fn validate(
185 elements: &ArrayRef,
186 offsets: &ArrayRef,
187 validity: &Validity,
188 ) -> VortexResult<()> {
189 vortex_ensure!(
191 !offsets.is_empty(),
192 InvalidArgument: "Offsets must have at least one element, [0] for an empty list"
193 );
194
195 vortex_ensure!(
197 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
198 InvalidArgument: "offsets have invalid type {}",
199 offsets.dtype()
200 );
201
202 let offsets_ptype = offsets.dtype().as_ptype();
204 let mut ctx = LEGACY_SESSION.create_execution_ctx();
205
206 if let Some(is_sorted) = offsets.statistics().compute_is_sorted(&mut ctx) {
208 vortex_ensure!(is_sorted, InvalidArgument: "offsets must be sorted");
209 } else {
210 vortex_bail!(InvalidArgument: "offsets must report is_sorted statistic");
211 }
212
213 if let Some(min_max) = min_max(offsets, &mut ctx)? {
216 match_each_integer_ptype!(offsets_ptype, |P| {
217 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
218 {
219 let max = min_max
220 .max
221 .as_primitive()
222 .as_::<P>()
223 .vortex_expect("offsets type must fit offsets values");
224 let min = min_max
225 .min
226 .as_primitive()
227 .as_::<P>()
228 .vortex_expect("offsets type must fit offsets values");
229
230 vortex_ensure!(
231 min >= 0,
232 InvalidArgument: "offsets minimum {min} outside valid range [0, {max}]"
233 );
234
235 vortex_ensure!(
236 max <= P::try_from(elements.len()).unwrap_or_else(|_| vortex_panic!(
237 "Offsets type {} must be able to fit elements length {}",
238 <P as NativePType>::PTYPE,
239 elements.len()
240 )),
241 InvalidArgument: "Max offset {max} is beyond the length of the elements array {}",
242 elements.len()
243 );
244 }
245 })
246 } else {
247 vortex_bail!(
249 InvalidArgument: "offsets array with encoding {} must support min_max compute function",
250 offsets.encoding_id()
251 );
252 };
253
254 if let Some(validity_len) = validity.maybe_len() {
256 vortex_ensure!(
257 validity_len == offsets.len() - 1,
258 InvalidArgument: "validity with size {validity_len} does not match array size {}",
259 offsets.len() - 1
260 );
261 }
262
263 Ok(())
264 }
265 }
270
271pub trait ListArrayExt: TypedArrayRef<List> {
272 fn nullability(&self) -> crate::dtype::Nullability {
273 match self.as_ref().dtype() {
274 DType::List(_, nullability) => *nullability,
275 _ => unreachable!("ListArrayExt requires a list dtype"),
276 }
277 }
278
279 fn elements(&self) -> &ArrayRef {
280 self.as_ref().slots()[ELEMENTS_SLOT]
281 .as_ref()
282 .vortex_expect("ListArray elements slot")
283 }
284
285 fn offsets(&self) -> &ArrayRef {
286 self.as_ref().slots()[OFFSETS_SLOT]
287 .as_ref()
288 .vortex_expect("ListArray offsets slot")
289 }
290
291 fn list_validity(&self) -> Validity {
292 child_to_validity(
293 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
294 self.nullability(),
295 )
296 }
297
298 fn offset_at(&self, index: usize) -> VortexResult<usize> {
299 vortex_ensure!(
300 index <= self.as_ref().len(),
301 "Index {index} out of bounds 0..={}",
302 self.as_ref().len()
303 );
304
305 if let Some(p) = self.offsets().as_opt::<Primitive>() {
306 Ok(match_each_native_ptype!(p.ptype(), |P| {
307 p.as_slice::<P>()[index].as_()
308 }))
309 } else {
310 self.offsets()
311 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())?
312 .as_primitive()
313 .as_::<usize>()
314 .ok_or_else(|| vortex_error::vortex_err!("offset value does not fit in usize"))
315 }
316 }
317
318 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
319 let start = self.offset_at(index)?;
320 let end = self.offset_at(index + 1)?;
321 self.elements().slice(start..end)
322 }
323
324 fn sliced_elements(&self) -> VortexResult<ArrayRef> {
325 let start = self.offset_at(0)?;
326 let end = self.offset_at(self.as_ref().len())?;
327 self.elements().slice(start..end)
328 }
329
330 fn element_dtype(&self) -> &DType {
331 self.elements().dtype()
332 }
333
334 fn reset_offsets(&self, recurse: bool, ctx: &mut ExecutionCtx) -> VortexResult<Array<List>> {
335 let mut elements = self.sliced_elements()?;
336 if recurse && elements.is_canonical() {
337 let compacted = elements
338 .clone()
339 .execute::<Canonical>(ctx)?
340 .compact(ctx)?
341 .into_array();
342 elements = compacted;
343 } else if recurse && let Some(child_list_array) = elements.as_opt::<List>() {
344 elements = child_list_array
345 .into_owned()
346 .reset_offsets(recurse, ctx)?
347 .into_array();
348 }
349
350 let offsets = self.offsets();
351 let first_offset = offsets.execute_scalar(0, ctx)?;
352 let adjusted_offsets = offsets.clone().binary(
353 ConstantArray::new(first_offset, offsets.len()).into_array(),
354 Operator::Sub,
355 )?;
356
357 Array::<List>::try_new(elements, adjusted_offsets, self.list_validity())
358 }
359}
360impl<T: TypedArrayRef<List>> ListArrayExt for T {}
361
362impl Array<List> {
363 pub fn new(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
365 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
366 let len = offsets.len().saturating_sub(1);
367 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
368 let data = ListData::build(elements, offsets, validity);
369 unsafe {
370 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
371 }
372 }
373
374 pub fn try_new(
376 elements: ArrayRef,
377 offsets: ArrayRef,
378 validity: Validity,
379 ) -> VortexResult<Self> {
380 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
381 let len = offsets.len().saturating_sub(1);
382 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
383 let data = ListData::try_build(elements, offsets, validity)?;
384 Ok(unsafe {
385 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
386 })
387 }
388
389 pub unsafe fn new_unchecked(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
395 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
396 let len = offsets.len().saturating_sub(1);
397 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
398 let data = unsafe { ListData::new_unchecked() };
399 unsafe {
400 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
401 }
402 }
403
404 pub fn into_data_parts(self) -> ListDataParts {
405 let dtype = self.dtype().clone();
406 let elements = self.slots()[ELEMENTS_SLOT]
407 .clone()
408 .vortex_expect("ListArray elements slot");
409 let offsets = self.slots()[OFFSETS_SLOT]
410 .clone()
411 .vortex_expect("ListArray offsets slot");
412 let validity = self.list_validity();
413 ListDataParts {
414 elements,
415 offsets,
416 validity,
417 dtype,
418 }
419 }
420}