vortex_array/arrays/list/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_error::vortex_bail;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::IntoArray;
17use crate::LEGACY_SESSION;
18use crate::VortexSessionExecute;
19use crate::aggregate_fn::fns::min_max::min_max;
20use crate::array::Array;
21use crate::array::ArrayParts;
22use crate::array::TypedArrayRef;
23use crate::array::child_to_validity;
24use crate::array::validity_to_child;
25use crate::arrays::ConstantArray;
26use crate::arrays::List;
27use crate::arrays::Primitive;
28use crate::builtins::ArrayBuiltins;
29use crate::dtype::DType;
30use crate::dtype::NativePType;
31use crate::match_each_integer_ptype;
32use crate::match_each_native_ptype;
33use crate::scalar_fn::fns::operators::Operator;
34use crate::validity::Validity;
35
36pub(super) const ELEMENTS_SLOT: usize = 0;
38pub(super) const OFFSETS_SLOT: usize = 1;
40pub(super) const VALIDITY_SLOT: usize = 2;
42pub(super) const NUM_SLOTS: usize = 3;
43pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "validity"];
44
45#[derive(Clone, Debug, Default)]
99pub struct ListData;
100
101impl Display for ListData {
102 fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
103 Ok(())
104 }
105}
106
107pub struct ListDataParts {
108 pub elements: ArrayRef,
109 pub offsets: ArrayRef,
110 pub validity: Validity,
111 pub dtype: DType,
112}
113
114impl ListData {
115 pub(crate) fn make_slots(
116 elements: &ArrayRef,
117 offsets: &ArrayRef,
118 validity: &Validity,
119 len: usize,
120 ) -> Vec<Option<ArrayRef>> {
121 vec![
122 Some(elements.clone()),
123 Some(offsets.clone()),
124 validity_to_child(validity, len),
125 ]
126 }
127
128 pub fn build(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
135 Self::try_build(elements, offsets, validity).vortex_expect("ListArray new")
136 }
137
138 pub(crate) fn try_build(
147 elements: ArrayRef,
148 offsets: ArrayRef,
149 validity: Validity,
150 ) -> VortexResult<Self> {
151 Self::validate(&elements, &offsets, &validity)?;
152
153 Ok(unsafe { Self::new_unchecked() })
155 }
156
157 pub unsafe fn new_unchecked() -> Self {
174 Self
175 }
176
177 pub fn validate(
181 elements: &ArrayRef,
182 offsets: &ArrayRef,
183 validity: &Validity,
184 ) -> VortexResult<()> {
185 vortex_ensure!(
187 !offsets.is_empty(),
188 InvalidArgument: "Offsets must have at least one element, [0] for an empty list"
189 );
190
191 vortex_ensure!(
193 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
194 InvalidArgument: "offsets have invalid type {}",
195 offsets.dtype()
196 );
197
198 let offsets_ptype = offsets.dtype().as_ptype();
200
201 if let Some(is_sorted) = offsets.statistics().compute_is_sorted() {
203 vortex_ensure!(is_sorted, InvalidArgument: "offsets must be sorted");
204 } else {
205 vortex_bail!(InvalidArgument: "offsets must report is_sorted statistic");
206 }
207
208 let mut ctx = LEGACY_SESSION.create_execution_ctx();
211 if let Some(min_max) = min_max(offsets, &mut ctx)? {
212 match_each_integer_ptype!(offsets_ptype, |P| {
213 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
214 {
215 let max = min_max
216 .max
217 .as_primitive()
218 .as_::<P>()
219 .vortex_expect("offsets type must fit offsets values");
220 let min = min_max
221 .min
222 .as_primitive()
223 .as_::<P>()
224 .vortex_expect("offsets type must fit offsets values");
225
226 vortex_ensure!(
227 min >= 0,
228 InvalidArgument: "offsets minimum {min} outside valid range [0, {max}]"
229 );
230
231 vortex_ensure!(
232 max <= P::try_from(elements.len()).unwrap_or_else(|_| vortex_panic!(
233 "Offsets type {} must be able to fit elements length {}",
234 <P as NativePType>::PTYPE,
235 elements.len()
236 )),
237 InvalidArgument: "Max offset {max} is beyond the length of the elements array {}",
238 elements.len()
239 );
240 }
241 })
242 } else {
243 vortex_bail!(
245 InvalidArgument: "offsets array with encoding {} must support min_max compute function",
246 offsets.encoding_id()
247 );
248 };
249
250 if let Some(validity_len) = validity.maybe_len() {
252 vortex_ensure!(
253 validity_len == offsets.len() - 1,
254 InvalidArgument: "validity with size {validity_len} does not match array size {}",
255 offsets.len() - 1
256 );
257 }
258
259 Ok(())
260 }
261 }
266
267pub trait ListArrayExt: TypedArrayRef<List> {
268 fn nullability(&self) -> crate::dtype::Nullability {
269 match self.as_ref().dtype() {
270 DType::List(_, nullability) => *nullability,
271 _ => unreachable!("ListArrayExt requires a list dtype"),
272 }
273 }
274
275 fn elements(&self) -> &ArrayRef {
276 self.as_ref().slots()[ELEMENTS_SLOT]
277 .as_ref()
278 .vortex_expect("ListArray elements slot")
279 }
280
281 fn offsets(&self) -> &ArrayRef {
282 self.as_ref().slots()[OFFSETS_SLOT]
283 .as_ref()
284 .vortex_expect("ListArray offsets slot")
285 }
286
287 fn list_validity(&self) -> Validity {
288 child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
289 }
290
291 fn list_validity_mask(&self) -> vortex_mask::Mask {
292 self.list_validity().to_mask(self.as_ref().len())
293 }
294
295 fn offset_at(&self, index: usize) -> VortexResult<usize> {
296 vortex_ensure!(
297 index <= self.as_ref().len(),
298 "Index {index} out of bounds 0..={}",
299 self.as_ref().len()
300 );
301
302 if let Some(p) = self.offsets().as_opt::<Primitive>() {
303 Ok(match_each_native_ptype!(p.ptype(), |P| {
304 p.as_slice::<P>()[index].as_()
305 }))
306 } else {
307 self.offsets()
308 .scalar_at(index)?
309 .as_primitive()
310 .as_::<usize>()
311 .ok_or_else(|| vortex_error::vortex_err!("offset value does not fit in usize"))
312 }
313 }
314
315 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
316 let start = self.offset_at(index)?;
317 let end = self.offset_at(index + 1)?;
318 self.elements().slice(start..end)
319 }
320
321 fn sliced_elements(&self) -> VortexResult<ArrayRef> {
322 let start = self.offset_at(0)?;
323 let end = self.offset_at(self.as_ref().len())?;
324 self.elements().slice(start..end)
325 }
326
327 fn element_dtype(&self) -> &DType {
328 self.elements().dtype()
329 }
330
331 fn reset_offsets(&self, recurse: bool) -> VortexResult<Array<List>> {
332 let mut elements = self.sliced_elements()?;
333 if recurse && elements.is_canonical() {
334 elements = elements.to_canonical()?.compact()?.into_array();
335 } else if recurse && let Some(child_list_array) = elements.as_opt::<List>() {
336 elements = child_list_array
337 .into_owned()
338 .reset_offsets(recurse)?
339 .into_array();
340 }
341
342 let offsets = self.offsets();
343 let first_offset = offsets.scalar_at(0)?;
344 let adjusted_offsets = offsets.clone().binary(
345 ConstantArray::new(first_offset, offsets.len()).into_array(),
346 Operator::Sub,
347 )?;
348
349 Array::<List>::try_new(elements, adjusted_offsets, self.list_validity())
350 }
351}
352impl<T: TypedArrayRef<List>> ListArrayExt for T {}
353
354impl Array<List> {
355 pub fn new(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
357 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
358 let len = offsets.len().saturating_sub(1);
359 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
360 let data = ListData::build(elements, offsets, validity);
361 unsafe {
362 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
363 }
364 }
365
366 pub fn try_new(
368 elements: ArrayRef,
369 offsets: ArrayRef,
370 validity: Validity,
371 ) -> VortexResult<Self> {
372 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
373 let len = offsets.len().saturating_sub(1);
374 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
375 let data = ListData::try_build(elements, offsets, validity)?;
376 Ok(unsafe {
377 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
378 })
379 }
380
381 pub unsafe fn new_unchecked(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
387 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
388 let len = offsets.len().saturating_sub(1);
389 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
390 let data = unsafe { ListData::new_unchecked() };
391 unsafe {
392 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
393 }
394 }
395
396 pub fn into_data_parts(self) -> ListDataParts {
397 let dtype = self.dtype().clone();
398 let elements = self.slots()[ELEMENTS_SLOT]
399 .clone()
400 .vortex_expect("ListArray elements slot");
401 let offsets = self.slots()[OFFSETS_SLOT]
402 .clone()
403 .vortex_expect("ListArray offsets slot");
404 let validity = self.list_validity();
405 ListDataParts {
406 elements,
407 offsets,
408 validity,
409 dtype,
410 }
411 }
412}