vortex_array/arrays/list/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use smallvec::smallvec;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_ensure;
14use vortex_error::vortex_panic;
15
16use crate::ArrayRef;
17use crate::ArraySlots;
18use crate::Canonical;
19use crate::ExecutionCtx;
20use crate::IntoArray;
21use crate::LEGACY_SESSION;
22use crate::VortexSessionExecute;
23use crate::aggregate_fn::NumericalAggregateOpts;
24use crate::aggregate_fn::fns::min_max::min_max;
25use crate::array::Array;
26use crate::array::ArrayParts;
27use crate::array::TypedArrayRef;
28use crate::array::child_to_validity;
29use crate::array::validity_to_child;
30use crate::arrays::ConstantArray;
31use crate::arrays::List;
32use crate::arrays::Primitive;
33use crate::builtins::ArrayBuiltins;
34use crate::dtype::DType;
35use crate::dtype::NativePType;
36use crate::match_each_integer_ptype;
37use crate::match_each_native_ptype;
38use crate::scalar_fn::fns::operators::Operator;
39use crate::validity::Validity;
40
41pub(super) const ELEMENTS_SLOT: usize = 0;
43pub(super) const OFFSETS_SLOT: usize = 1;
45pub(super) const VALIDITY_SLOT: usize = 2;
47pub(super) const NUM_SLOTS: usize = 3;
48pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "validity"];
49
50#[derive(Clone, Debug, Default)]
104pub struct ListData;
105
106impl Display for ListData {
107 fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
108 Ok(())
109 }
110}
111
112pub struct ListDataParts {
113 pub elements: ArrayRef,
114 pub offsets: ArrayRef,
115 pub validity: Validity,
116 pub dtype: DType,
117}
118
119impl ListData {
120 pub(crate) fn make_slots(
121 elements: &ArrayRef,
122 offsets: &ArrayRef,
123 validity: &Validity,
124 len: usize,
125 ) -> ArraySlots {
126 smallvec![
127 Some(elements.clone()),
128 Some(offsets.clone()),
129 validity_to_child(validity, len),
130 ]
131 }
132
133 pub fn build(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
140 Self::try_build(elements, offsets, validity).vortex_expect("ListArray new")
141 }
142
143 pub(crate) fn try_build(
152 elements: ArrayRef,
153 offsets: ArrayRef,
154 validity: Validity,
155 ) -> VortexResult<Self> {
156 Self::validate(&elements, &offsets, &validity)?;
157
158 Ok(unsafe { Self::new_unchecked() })
160 }
161
162 pub unsafe fn new_unchecked() -> Self {
179 Self
180 }
181
182 pub fn validate(
186 elements: &ArrayRef,
187 offsets: &ArrayRef,
188 validity: &Validity,
189 ) -> VortexResult<()> {
190 vortex_ensure!(
192 !offsets.is_empty(),
193 InvalidArgument: "Offsets must have at least one element, [0] for an empty list"
194 );
195
196 vortex_ensure!(
198 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
199 InvalidArgument: "offsets have invalid type {}",
200 offsets.dtype()
201 );
202
203 let offsets_ptype = offsets.dtype().as_ptype();
205 let mut ctx = LEGACY_SESSION.create_execution_ctx();
206
207 if let Some(is_sorted) = offsets.statistics().compute_is_sorted(&mut ctx) {
209 vortex_ensure!(is_sorted, InvalidArgument: "offsets must be sorted");
210 } else {
211 vortex_bail!(InvalidArgument: "offsets must report is_sorted statistic");
212 }
213
214 if let Some(min_max) = min_max(offsets, &mut ctx, NumericalAggregateOpts::default())? {
217 match_each_integer_ptype!(offsets_ptype, |P| {
218 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
219 {
220 let max = min_max
221 .max
222 .as_primitive()
223 .as_::<P>()
224 .vortex_expect("offsets type must fit offsets values");
225 let min = min_max
226 .min
227 .as_primitive()
228 .as_::<P>()
229 .vortex_expect("offsets type must fit offsets values");
230
231 vortex_ensure!(
232 min >= 0,
233 InvalidArgument: "offsets minimum {min} outside valid range [0, {max}]"
234 );
235
236 vortex_ensure!(
237 max <= P::try_from(elements.len()).unwrap_or_else(|_| vortex_panic!(
238 "Offsets type {} must be able to fit elements length {}",
239 <P as NativePType>::PTYPE,
240 elements.len()
241 )),
242 InvalidArgument: "Max offset {max} is beyond the length of the elements array {}",
243 elements.len()
244 );
245 }
246 })
247 } else {
248 vortex_bail!(
250 InvalidArgument: "offsets array with encoding {} must support min_max compute function",
251 offsets.encoding_id()
252 );
253 };
254
255 if let Some(validity_len) = validity.maybe_len() {
257 vortex_ensure!(
258 validity_len == offsets.len() - 1,
259 InvalidArgument: "validity with size {validity_len} does not match array size {}",
260 offsets.len() - 1
261 );
262 }
263
264 Ok(())
265 }
266 }
271
272pub trait ListArrayExt: TypedArrayRef<List> {
273 fn nullability(&self) -> crate::dtype::Nullability {
274 match self.as_ref().dtype() {
275 DType::List(_, nullability) => *nullability,
276 _ => unreachable!("ListArrayExt requires a list dtype"),
277 }
278 }
279
280 fn elements(&self) -> &ArrayRef {
281 self.as_ref().slots()[ELEMENTS_SLOT]
282 .as_ref()
283 .vortex_expect("ListArray elements slot")
284 }
285
286 fn offsets(&self) -> &ArrayRef {
287 self.as_ref().slots()[OFFSETS_SLOT]
288 .as_ref()
289 .vortex_expect("ListArray offsets slot")
290 }
291
292 fn list_validity(&self) -> Validity {
293 child_to_validity(
294 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
295 self.nullability(),
296 )
297 }
298
299 fn offset_at(&self, index: usize) -> VortexResult<usize> {
300 vortex_ensure!(
301 index <= self.as_ref().len(),
302 "Index {index} out of bounds 0..={}",
303 self.as_ref().len()
304 );
305
306 if let Some(p) = self.offsets().as_opt::<Primitive>() {
307 Ok(match_each_native_ptype!(p.ptype(), |P| {
308 p.as_slice::<P>()[index].as_()
309 }))
310 } else {
311 self.offsets()
312 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())?
313 .as_primitive()
314 .as_::<usize>()
315 .ok_or_else(|| vortex_error::vortex_err!("offset value does not fit in usize"))
316 }
317 }
318
319 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
320 let start = self.offset_at(index)?;
321 let end = self.offset_at(index + 1)?;
322 self.elements().slice(start..end)
323 }
324
325 fn sliced_elements(&self) -> VortexResult<ArrayRef> {
326 let start = self.offset_at(0)?;
327 let end = self.offset_at(self.as_ref().len())?;
328 self.elements().slice(start..end)
329 }
330
331 fn element_dtype(&self) -> &DType {
332 self.elements().dtype()
333 }
334
335 fn reset_offsets(&self, recurse: bool, ctx: &mut ExecutionCtx) -> VortexResult<Array<List>> {
336 let mut elements = self.sliced_elements()?;
337 if recurse && elements.is_canonical() {
338 let compacted = elements
339 .clone()
340 .execute::<Canonical>(ctx)?
341 .compact(ctx)?
342 .into_array();
343 elements = compacted;
344 } else if recurse && let Some(child_list_array) = elements.as_opt::<List>() {
345 elements = child_list_array
346 .into_owned()
347 .reset_offsets(recurse, ctx)?
348 .into_array();
349 }
350
351 let offsets = self.offsets();
352 let first_offset = offsets.execute_scalar(0, ctx)?;
353 let adjusted_offsets = offsets.clone().binary(
354 ConstantArray::new(first_offset, offsets.len()).into_array(),
355 Operator::Sub,
356 )?;
357
358 Array::<List>::try_new(elements, adjusted_offsets, self.list_validity())
359 }
360}
361impl<T: TypedArrayRef<List>> ListArrayExt for T {}
362
363impl Array<List> {
364 pub fn new(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
366 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
367 let len = offsets.len().saturating_sub(1);
368 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
369 let data = ListData::build(elements, offsets, validity);
370 unsafe {
371 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
372 }
373 }
374
375 pub fn try_new(
377 elements: ArrayRef,
378 offsets: ArrayRef,
379 validity: Validity,
380 ) -> VortexResult<Self> {
381 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
382 let len = offsets.len().saturating_sub(1);
383 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
384 let data = ListData::try_build(elements, offsets, validity)?;
385 Ok(unsafe {
386 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
387 })
388 }
389
390 pub unsafe fn new_unchecked(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
396 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
397 let len = offsets.len().saturating_sub(1);
398 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
399 let data = unsafe { ListData::new_unchecked() };
400 unsafe {
401 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
402 }
403 }
404
405 pub fn into_data_parts(self) -> ListDataParts {
406 let dtype = self.dtype().clone();
407 let elements = self.slots()[ELEMENTS_SLOT]
408 .clone()
409 .vortex_expect("ListArray elements slot");
410 let offsets = self.slots()[OFFSETS_SLOT]
411 .clone()
412 .vortex_expect("ListArray offsets slot");
413 let validity = self.list_validity();
414 ListDataParts {
415 elements,
416 offsets,
417 validity,
418 dtype,
419 }
420 }
421}