vortex_array/arrays/list/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_error::vortex_bail;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::IntoArray;
17use crate::LEGACY_SESSION;
18use crate::VortexSessionExecute;
19use crate::aggregate_fn::fns::min_max::min_max;
20use crate::array::Array;
21use crate::array::ArrayParts;
22use crate::array::TypedArrayRef;
23use crate::array::child_to_validity;
24use crate::array::validity_to_child;
25use crate::arrays::ConstantArray;
26use crate::arrays::List;
27use crate::arrays::Primitive;
28use crate::builtins::ArrayBuiltins;
29use crate::dtype::DType;
30use crate::dtype::NativePType;
31use crate::match_each_integer_ptype;
32use crate::match_each_native_ptype;
33use crate::scalar_fn::fns::operators::Operator;
34use crate::validity::Validity;
35
36pub(super) const ELEMENTS_SLOT: usize = 0;
38pub(super) const OFFSETS_SLOT: usize = 1;
40pub(super) const VALIDITY_SLOT: usize = 2;
42pub(super) const NUM_SLOTS: usize = 3;
43pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "validity"];
44
45#[derive(Clone, Debug, Default)]
99pub struct ListData;
100
101impl Display for ListData {
102 fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
103 Ok(())
104 }
105}
106
107pub struct ListDataParts {
108 pub elements: ArrayRef,
109 pub offsets: ArrayRef,
110 pub validity: Validity,
111 pub dtype: DType,
112}
113
114impl ListData {
115 pub(crate) fn make_slots(
116 elements: &ArrayRef,
117 offsets: &ArrayRef,
118 validity: &Validity,
119 len: usize,
120 ) -> Vec<Option<ArrayRef>> {
121 vec![
122 Some(elements.clone()),
123 Some(offsets.clone()),
124 validity_to_child(validity, len),
125 ]
126 }
127
128 pub fn build(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
135 Self::try_build(elements, offsets, validity).vortex_expect("ListArray new")
136 }
137
138 pub(crate) fn try_build(
147 elements: ArrayRef,
148 offsets: ArrayRef,
149 validity: Validity,
150 ) -> VortexResult<Self> {
151 Self::validate(&elements, &offsets, &validity)?;
152
153 Ok(unsafe { Self::new_unchecked() })
155 }
156
157 pub unsafe fn new_unchecked() -> Self {
174 Self
175 }
176
177 pub fn validate(
181 elements: &ArrayRef,
182 offsets: &ArrayRef,
183 validity: &Validity,
184 ) -> VortexResult<()> {
185 vortex_ensure!(
187 !offsets.is_empty(),
188 InvalidArgument: "Offsets must have at least one element, [0] for an empty list"
189 );
190
191 vortex_ensure!(
193 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
194 InvalidArgument: "offsets have invalid type {}",
195 offsets.dtype()
196 );
197
198 let offsets_ptype = offsets.dtype().as_ptype();
200 let mut ctx = LEGACY_SESSION.create_execution_ctx();
201
202 if let Some(is_sorted) = offsets.statistics().compute_is_sorted(&mut ctx) {
204 vortex_ensure!(is_sorted, InvalidArgument: "offsets must be sorted");
205 } else {
206 vortex_bail!(InvalidArgument: "offsets must report is_sorted statistic");
207 }
208
209 if let Some(min_max) = min_max(offsets, &mut ctx)? {
212 match_each_integer_ptype!(offsets_ptype, |P| {
213 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
214 {
215 let max = min_max
216 .max
217 .as_primitive()
218 .as_::<P>()
219 .vortex_expect("offsets type must fit offsets values");
220 let min = min_max
221 .min
222 .as_primitive()
223 .as_::<P>()
224 .vortex_expect("offsets type must fit offsets values");
225
226 vortex_ensure!(
227 min >= 0,
228 InvalidArgument: "offsets minimum {min} outside valid range [0, {max}]"
229 );
230
231 vortex_ensure!(
232 max <= P::try_from(elements.len()).unwrap_or_else(|_| vortex_panic!(
233 "Offsets type {} must be able to fit elements length {}",
234 <P as NativePType>::PTYPE,
235 elements.len()
236 )),
237 InvalidArgument: "Max offset {max} is beyond the length of the elements array {}",
238 elements.len()
239 );
240 }
241 })
242 } else {
243 vortex_bail!(
245 InvalidArgument: "offsets array with encoding {} must support min_max compute function",
246 offsets.encoding_id()
247 );
248 };
249
250 if let Some(validity_len) = validity.maybe_len() {
252 vortex_ensure!(
253 validity_len == offsets.len() - 1,
254 InvalidArgument: "validity with size {validity_len} does not match array size {}",
255 offsets.len() - 1
256 );
257 }
258
259 Ok(())
260 }
261 }
266
267pub trait ListArrayExt: TypedArrayRef<List> {
268 fn nullability(&self) -> crate::dtype::Nullability {
269 match self.as_ref().dtype() {
270 DType::List(_, nullability) => *nullability,
271 _ => unreachable!("ListArrayExt requires a list dtype"),
272 }
273 }
274
275 fn elements(&self) -> &ArrayRef {
276 self.as_ref().slots()[ELEMENTS_SLOT]
277 .as_ref()
278 .vortex_expect("ListArray elements slot")
279 }
280
281 fn offsets(&self) -> &ArrayRef {
282 self.as_ref().slots()[OFFSETS_SLOT]
283 .as_ref()
284 .vortex_expect("ListArray offsets slot")
285 }
286
287 fn list_validity(&self) -> Validity {
288 child_to_validity(
289 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
290 self.nullability(),
291 )
292 }
293
294 fn offset_at(&self, index: usize) -> VortexResult<usize> {
295 vortex_ensure!(
296 index <= self.as_ref().len(),
297 "Index {index} out of bounds 0..={}",
298 self.as_ref().len()
299 );
300
301 if let Some(p) = self.offsets().as_opt::<Primitive>() {
302 Ok(match_each_native_ptype!(p.ptype(), |P| {
303 p.as_slice::<P>()[index].as_()
304 }))
305 } else {
306 self.offsets()
307 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())?
308 .as_primitive()
309 .as_::<usize>()
310 .ok_or_else(|| vortex_error::vortex_err!("offset value does not fit in usize"))
311 }
312 }
313
314 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
315 let start = self.offset_at(index)?;
316 let end = self.offset_at(index + 1)?;
317 self.elements().slice(start..end)
318 }
319
320 fn sliced_elements(&self) -> VortexResult<ArrayRef> {
321 let start = self.offset_at(0)?;
322 let end = self.offset_at(self.as_ref().len())?;
323 self.elements().slice(start..end)
324 }
325
326 fn element_dtype(&self) -> &DType {
327 self.elements().dtype()
328 }
329
330 fn reset_offsets(&self, recurse: bool) -> VortexResult<Array<List>> {
331 let mut elements = self.sliced_elements()?;
332 if recurse && elements.is_canonical() {
333 #[expect(deprecated)]
334 let compacted = elements.to_canonical()?.compact()?.into_array();
335 elements = compacted;
336 } else if recurse && let Some(child_list_array) = elements.as_opt::<List>() {
337 elements = child_list_array
338 .into_owned()
339 .reset_offsets(recurse)?
340 .into_array();
341 }
342
343 let offsets = self.offsets();
344 let first_offset = offsets.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?;
345 let adjusted_offsets = offsets.clone().binary(
346 ConstantArray::new(first_offset, offsets.len()).into_array(),
347 Operator::Sub,
348 )?;
349
350 Array::<List>::try_new(elements, adjusted_offsets, self.list_validity())
351 }
352}
353impl<T: TypedArrayRef<List>> ListArrayExt for T {}
354
355impl Array<List> {
356 pub fn new(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
358 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
359 let len = offsets.len().saturating_sub(1);
360 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
361 let data = ListData::build(elements, offsets, validity);
362 unsafe {
363 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
364 }
365 }
366
367 pub fn try_new(
369 elements: ArrayRef,
370 offsets: ArrayRef,
371 validity: Validity,
372 ) -> VortexResult<Self> {
373 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
374 let len = offsets.len().saturating_sub(1);
375 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
376 let data = ListData::try_build(elements, offsets, validity)?;
377 Ok(unsafe {
378 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
379 })
380 }
381
382 pub unsafe fn new_unchecked(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
388 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
389 let len = offsets.len().saturating_sub(1);
390 let slots = ListData::make_slots(&elements, &offsets, &validity, len);
391 let data = unsafe { ListData::new_unchecked() };
392 unsafe {
393 Array::from_parts_unchecked(ArrayParts::new(List, dtype, len, data).with_slots(slots))
394 }
395 }
396
397 pub fn into_data_parts(self) -> ListDataParts {
398 let dtype = self.dtype().clone();
399 let elements = self.slots()[ELEMENTS_SLOT]
400 .clone()
401 .vortex_expect("ListArray elements slot");
402 let offsets = self.slots()[OFFSETS_SLOT]
403 .clone()
404 .vortex_expect("ListArray offsets slot");
405 let validity = self.list_validity();
406 ListDataParts {
407 elements,
408 offsets,
409 validity,
410 dtype,
411 }
412 }
413}