vortex_array/arrays/list/
mod.rs1mod compute;
5mod serde;
6
7use std::ops::Range;
8use std::sync::Arc;
9
10#[cfg(feature = "test-harness")]
11use itertools::Itertools;
12use num_traits::AsPrimitive;
13use vortex_dtype::{DType, match_each_integer_ptype, match_each_native_ptype};
14use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_ensure};
15use vortex_scalar::Scalar;
16
17#[cfg(feature = "test-harness")]
18use crate::OffsetPType;
19use crate::arrays::PrimitiveVTable;
20#[cfg(feature = "test-harness")]
21use crate::builders::{ArrayBuilder, ListBuilder};
22use crate::compute::{min_max, sub_scalar};
23use crate::stats::{ArrayStats, StatsSetRef};
24use crate::validity::Validity;
25use crate::vtable::{
26 ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
27 ValidityVTableFromValidityHelper,
28};
29use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
30
31vtable!(List);
32
33impl VTable for ListVTable {
34 type Array = ListArray;
35 type Encoding = ListEncoding;
36
37 type ArrayVTable = Self;
38 type CanonicalVTable = Self;
39 type OperationsVTable = Self;
40 type ValidityVTable = ValidityVTableFromValidityHelper;
41 type VisitorVTable = Self;
42 type ComputeVTable = NotSupported;
43 type EncodeVTable = NotSupported;
44 type PipelineVTable = NotSupported;
45 type SerdeVTable = Self;
46
47 fn id(_encoding: &Self::Encoding) -> EncodingId {
48 EncodingId::new_ref("vortex.list")
49 }
50
51 fn encoding(_array: &Self::Array) -> EncodingRef {
52 EncodingRef::new_ref(ListEncoding.as_ref())
53 }
54}
55
56#[derive(Clone, Debug)]
109pub struct ListArray {
110 dtype: DType,
111 elements: ArrayRef,
112 offsets: ArrayRef,
113 validity: Validity,
114 stats_set: ArrayStats,
115}
116
117#[derive(Clone, Debug)]
118pub struct ListEncoding;
119
120impl ListArray {
121 pub fn new(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
128 Self::try_new(elements, offsets, validity).vortex_expect("ListArray new")
129 }
130
131 pub fn try_new(
140 elements: ArrayRef,
141 offsets: ArrayRef,
142 validity: Validity,
143 ) -> VortexResult<Self> {
144 Self::validate(&elements, &offsets, &validity)?;
145
146 Ok(unsafe { Self::new_unchecked(elements, offsets, validity) })
148 }
149
150 pub unsafe fn new_unchecked(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
167 Self {
168 dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
169 elements,
170 offsets,
171 validity,
172 stats_set: Default::default(),
173 }
174 }
175
176 pub(crate) fn validate(
180 elements: &dyn Array,
181 offsets: &dyn Array,
182 validity: &Validity,
183 ) -> VortexResult<()> {
184 vortex_ensure!(
186 !offsets.is_empty(),
187 "Offsets must have at least one element, [0] for an empty list"
188 );
189
190 vortex_ensure!(
192 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
193 "offsets have invalid type {}",
194 offsets.dtype()
195 );
196
197 let offsets_ptype = offsets.dtype().as_ptype();
199
200 if let Some(is_sorted) = offsets.statistics().compute_is_sorted() {
202 vortex_ensure!(is_sorted, "offsets must be sorted");
203 } else {
204 vortex_bail!("offsets must report is_sorted statistic");
205 }
206
207 if let Some(min_max) = min_max(offsets)? {
210 match_each_integer_ptype!(offsets_ptype, |P| {
211 let max_offset = P::try_from(offsets.scalar_at(offsets.len() - 1))
212 .vortex_expect("Offsets type must fit offsets values");
213
214 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
215 {
216 if let Some(min) = min_max.min.as_primitive().as_::<P>() {
217 vortex_ensure!(
218 min >= 0 && min <= max_offset,
219 "offsets minimum {min} outside valid range [0, {max_offset}]"
220 );
221 }
222
223 if let Some(max) = min_max.max.as_primitive().as_::<P>() {
224 vortex_ensure!(
225 max >= 0 && max <= max_offset,
226 "offsets maximum {max} outside valid range [0, {max_offset}]"
227 )
228 }
229 }
230
231 vortex_ensure!(
232 max_offset
233 <= P::try_from(elements.len())
234 .vortex_expect("Offsets type must be able to fit elements length"),
235 "Max offset {max_offset} is beyond the length of the elements array {}",
236 elements.len()
237 );
238 })
239 } else {
240 vortex_bail!(
242 "offsets array with encoding {} must support min_max compute function",
243 offsets.encoding_id()
244 );
245 };
246
247 if let Some(validity_len) = validity.maybe_len() {
249 vortex_ensure!(
250 validity_len == offsets.len() - 1,
251 "validity with size {validity_len} does not match array size {}",
252 offsets.len() - 1
253 );
254 }
255
256 Ok(())
257 }
258
259 pub fn offset_at(&self, index: usize) -> usize {
263 assert!(
264 index <= self.len(),
265 "Index {index} out of bounds 0..={}",
266 self.len()
267 );
268
269 self.offsets()
270 .as_opt::<PrimitiveVTable>()
271 .map(|p| match_each_native_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
272 .unwrap_or_else(|| {
273 self.offsets()
274 .scalar_at(index)
275 .as_primitive()
276 .as_::<usize>()
277 .vortex_expect("index must fit in usize")
278 })
279 }
280
281 pub fn list_elements_at(&self, index: usize) -> ArrayRef {
283 let start = self.offset_at(index);
284 let end = self.offset_at(index + 1);
285 self.elements().slice(start..end)
286 }
287
288 pub fn sliced_elements(&self) -> ArrayRef {
293 let start = self.offset_at(0);
294 let end = self.offset_at(self.len());
295 self.elements().slice(start..end)
296 }
297
298 pub fn offsets(&self) -> &ArrayRef {
300 &self.offsets
301 }
302
303 pub fn elements(&self) -> &ArrayRef {
305 &self.elements
306 }
307
308 pub fn reset_offsets(&self) -> VortexResult<Self> {
311 let elements = self.sliced_elements();
312 let offsets = self.offsets();
313 let first_offset = offsets.scalar_at(0);
314 let adjusted_offsets = sub_scalar(offsets, first_offset)?;
315
316 Self::try_new(elements, adjusted_offsets, self.validity.clone())
317 }
318}
319
320impl ArrayVTable<ListVTable> for ListVTable {
321 fn len(array: &ListArray) -> usize {
322 array.offsets.len().saturating_sub(1)
323 }
324
325 fn dtype(array: &ListArray) -> &DType {
326 &array.dtype
327 }
328
329 fn stats(array: &ListArray) -> StatsSetRef<'_> {
330 array.stats_set.to_ref(array.as_ref())
331 }
332}
333
334impl OperationsVTable<ListVTable> for ListVTable {
335 fn slice(array: &ListArray, range: Range<usize>) -> ArrayRef {
336 ListArray::new(
337 array.elements().clone(),
338 array.offsets().slice(range.start..range.end + 1),
339 array.validity().slice(range),
340 )
341 .into_array()
342 }
343
344 fn scalar_at(array: &ListArray, index: usize) -> Scalar {
345 let elems = array.list_elements_at(index);
347 let scalars: Vec<Scalar> = (0..elems.len()).map(|i| elems.scalar_at(i)).collect();
348
349 Scalar::list(
350 Arc::new(elems.dtype().clone()),
351 scalars,
352 array.dtype().nullability(),
353 )
354 }
355}
356
357impl CanonicalVTable<ListVTable> for ListVTable {
358 fn canonicalize(array: &ListArray) -> Canonical {
359 Canonical::List(array.clone())
360 }
361}
362
363impl ValidityHelper for ListArray {
364 fn validity(&self) -> &Validity {
365 &self.validity
366 }
367}
368
369#[cfg(feature = "test-harness")]
370impl ListArray {
371 pub fn from_iter_slow<O: OffsetPType, I: IntoIterator>(
375 iter: I,
376 dtype: Arc<DType>,
377 ) -> VortexResult<ArrayRef>
378 where
379 I::Item: IntoIterator,
380 <I::Item as IntoIterator>::Item: Into<Scalar>,
381 {
382 let iter = iter.into_iter();
383 let mut builder = ListBuilder::<O>::with_capacity(
384 dtype.clone(),
385 vortex_dtype::Nullability::NonNullable,
386 iter.size_hint().0,
387 );
388
389 for v in iter {
390 let elem = Scalar::list(
391 dtype.clone(),
392 v.into_iter().map(|x| x.into()).collect_vec(),
393 dtype.nullability(),
394 );
395 builder.append_value(elem.as_list())?
396 }
397 Ok(builder.finish())
398 }
399
400 pub fn from_iter_opt_slow<O: OffsetPType, I: IntoIterator<Item = Option<T>>, T>(
401 iter: I,
402 dtype: Arc<DType>,
403 ) -> VortexResult<ArrayRef>
404 where
405 T: IntoIterator,
406 T::Item: Into<Scalar>,
407 {
408 let iter = iter.into_iter();
409 let mut builder = ListBuilder::<O>::with_capacity(
410 dtype.clone(),
411 vortex_dtype::Nullability::Nullable,
412 iter.size_hint().0,
413 );
414
415 for v in iter {
416 if let Some(v) = v {
417 let elem = Scalar::list(
418 dtype.clone(),
419 v.into_iter().map(|x| x.into()).collect_vec(),
420 dtype.nullability(),
421 );
422 builder.append_value(elem.as_list())?
423 } else {
424 builder.append_null()
425 }
426 }
427 Ok(builder.finish())
428 }
429}
430
431#[cfg(test)]
432mod tests;