vortex_array/arrays/list/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_error::vortex_bail;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::IntoArray;
17use crate::LEGACY_SESSION;
18use crate::VortexSessionExecute;
19use crate::aggregate_fn::fns::min_max::min_max;
20use crate::array::Array;
21use crate::array::ArrayParts;
22use crate::array::TypedArrayRef;
23use crate::array::child_to_validity;
24use crate::array::validity_to_child;
25use crate::arrays::ConstantArray;
26use crate::arrays::List;
27use crate::arrays::Primitive;
28use crate::builtins::ArrayBuiltins;
29use crate::dtype::DType;
30use crate::dtype::NativePType;
31use crate::match_each_integer_ptype;
32use crate::match_each_native_ptype;
33use crate::scalar_fn::fns::operators::Operator;
34use crate::validity::Validity;
35
36pub(super) const ELEMENTS_SLOT: usize = 0;
38pub(super) const OFFSETS_SLOT: usize = 1;
40pub(super) const VALIDITY_SLOT: usize = 2;
42pub(super) const NUM_SLOTS: usize = 3;
43pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "validity"];
44
45#[derive(Clone, Debug, Default)]
99pub struct ListData;
100
101impl Display for ListData {
102 fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
103 Ok(())
104 }
105}
106
107pub struct ListDataParts {
108 pub elements: ArrayRef,
109 pub offsets: ArrayRef,
110 pub validity: Validity,
111 pub dtype: DType,
112}
113
114impl ListData {
115 pub(crate) fn make_slots(
116 elements: &ArrayRef,
117 offsets: &ArrayRef,
118 validity: &Validity,
119 len: usize,
120 ) -> Vec<Option<ArrayRef>> {
121 vec![
122 Some(elements.clone()),
123 Some(offsets.clone()),
124 validity_to_child(validity, len),
125 ]
126 }
127
128 pub fn build(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
135 Self::try_build(elements, offsets, validity).vortex_expect("ListArray new")
136 }
137
138 pub(crate) fn try_build(
147 elements: ArrayRef,
148 offsets: ArrayRef,
149 validity: Validity,
150 ) -> VortexResult<Self> {
151 Self::validate(&elements, &offsets, &validity)?;
152
153 Ok(unsafe { Self::new_unchecked() })
155 }
156
157 pub unsafe fn new_unchecked() -> Self {
174 Self
175 }
176
177 pub fn validate(
181 elements: &ArrayRef,
182 offsets: &ArrayRef,
183 validity: &Validity,
184 ) -> VortexResult<()> {
185 vortex_ensure!(
187 !offsets.is_empty(),
188 InvalidArgument: "Offsets must have at least one element, [0] for an empty list"
189 );
190
191 vortex_ensure!(
193 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
194 InvalidArgument: "offsets have invalid type {}",
195 offsets.dtype()
196 );
197
198 let offsets_ptype = offsets.dtype().as_ptype();
200 let mut ctx = LEGACY_SESSION.create_execution_ctx();
201
202 if let Some(is_sorted) = offsets.statistics().compute_is_sorted(&mut ctx) {
204 vortex_ensure!(is_sorted, InvalidArgument: "offsets must be sorted");
205 } else {
206 vortex_bail!(InvalidArgument: "offsets must report is_sorted statistic");
207 }
208
209 if let Some(min_max) = min_max(offsets, &mut ctx)? {
212 match_each_integer_ptype!(offsets_ptype, |P| {
213 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
214 {
215 let max = min_max
216 .max
217 .as_primitive()
218 .as_::<P>()
219 .vortex_expect("offsets type must fit offsets values");
220 let min = min_max
221 .min
222 .as_primitive()
223 .as_::<P>()
224 .vortex_expect("offsets type must fit offsets values");
225
226 vortex_ensure!(
227 min >= 0,
228 InvalidArgument: "offsets minimum {min} outside valid range [0, {max}]"
229 );
230
231 vortex_ensure!(
232 max <= P::try_from(elements.len()).unwrap_or_else(|_| vortex_panic!(
233 "Offsets type {} must be able to fit elements length {}",
234 <P as NativePType>::PTYPE,
235 elements.len()
236 )),
237 InvalidArgument: "Max offset {max} is beyond the length of the elements array {}",
238 elements.len()
239 );
240 }
241 })
242 } else {
243 vortex_bail!(
245 InvalidArgument: "offsets array with encoding {} must support min_max compute function",
246 offsets.encoding_id()
247 );
248 };
249
250 if let Some(validity_len) = validity.maybe_len() {
252 vortex_ensure!(
253 validity_len == offsets.len() - 1,
254 InvalidArgument: "validity with size {validity_len} does not match array size {}",
255 offsets.len() - 1
256 );
257 }
258
259 Ok(())
260 }
261 }
266
267pub trait ListArrayExt: TypedArrayRef<List> {
268 fn nullability(&self) -> crate::dtype::Nullability {
269 match self.as_ref().dtype() {
270 DType::List(_, nullability) => *nullability,
271 _ => unreachable!("ListArrayExt requires a list dtype"),
272 }
273 }
274
275 fn elements(&self) -> &ArrayRef {
276 self.as_ref().slots()[ELEMENTS_SLOT]
277 .as_ref()
278 .vortex_expect("ListArray elements slot")
279 }
280
281 fn offsets(&self) -> &ArrayRef {
282 self.as_ref().slots()[OFFSETS_SLOT]
283 .as_ref()
284 .vortex_expect("ListArray offsets slot")
285 }
286
287 fn list_validity(&self) -> Validity {
288 child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
289 }
290
291 fn offset_at(&self, index: usize) -> VortexResult<usize> {
292 vortex_ensure!(
293 index <= self.as_ref().len(),
294 "Index {index} out of bounds 0..={}",
295 self.as_ref().len()
296 );
297
298 if let Some(p) = self.offsets().as_opt::<Primitive>() {
299 Ok(match_each_native_ptype!(p.ptype(), |P| {
300 p.as_slice::<P>()[index].as_()
301 }))
302 } else {
303 self.offsets()
304 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())?
305 .as_primitive()
306 .as_::<usize>()
307 .ok_or_else(|| vortex_error::vortex_err!("offset value does not fit in usize"))
308 }
309 }
310
311 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
312 let start = self.offset_at(index)?;
313 let end = self.offset_at(index + 1)?;
314 self.elements().slice(start..end)
315 }
316
317 fn sliced_elements(&self) -> VortexResult<ArrayRef> {
318 let start = self.offset_at(0)?;
319 let end = self.offset_at(self.as_ref().len())?;
320 self.elements().slice(start..end)
321 }
322
323 fn element_dtype(&self) -> &DType {
324 self.elements().dtype()
325 }
326
327 fn reset_offsets(&self, recurse: bool) -> VortexResult<Array<List>> {
328 let mut elements = self.sliced_elements()?;
329 if recurse && elements.is_canonical() {
330 elements = elements.to_canonical()?.compact()?.into_array();
331 } else if recurse && let Some(child_list_array) = elements.as_opt::<List>() {
332 elements = child_list_array
333 .into_owned()
334 .reset_offsets(recurse)?
335 .into_array();
336 }
337
338 let offsets = self.offsets();
339 let first_offset = offsets.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?;
340 let adjusted_offsets = offsets.clone().binary(
341 ConstantArray::new(first_offset, offsets.len()).into_array(),
342 Operator::Sub,
343 )?;
344
345 Array::<List>::try_new(elements, adjusted_offsets, self.list_validity())
346 }
347}
348impl<T: TypedArrayRef<List>> ListArrayExt for T {}
349
350impl Array<List> {
351 pub fn new(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
353 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
354 let len = offsets.len().saturating_sub(1);
355 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
356 let data = ListData::build(elements, offsets, validity);
357 unsafe {
358 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
359 }
360 }
361
362 pub fn try_new(
364 elements: ArrayRef,
365 offsets: ArrayRef,
366 validity: Validity,
367 ) -> VortexResult<Self> {
368 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
369 let len = offsets.len().saturating_sub(1);
370 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
371 let data = ListData::try_build(elements, offsets, validity)?;
372 Ok(unsafe {
373 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
374 })
375 }
376
377 pub unsafe fn new_unchecked(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
383 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
384 let len = offsets.len().saturating_sub(1);
385 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
386 let data = unsafe { ListData::new_unchecked() };
387 unsafe {
388 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
389 }
390 }
391
392 pub fn into_data_parts(self) -> ListDataParts {
393 let dtype = self.dtype().clone();
394 let elements = self.slots()[ELEMENTS_SLOT]
395 .clone()
396 .vortex_expect("ListArray elements slot");
397 let offsets = self.slots()[OFFSETS_SLOT]
398 .clone()
399 .vortex_expect("ListArray offsets slot");
400 let validity = self.list_validity();
401 ListDataParts {
402 elements,
403 offsets,
404 validity,
405 dtype,
406 }
407 }
408}