vortex_array/arrays/list/
mod.rs1mod compute;
5mod serde;
6
7use std::ops::Range;
8use std::sync::Arc;
9
10#[cfg(feature = "test-harness")]
11use itertools::Itertools;
12use num_traits::{AsPrimitive, PrimInt};
13use vortex_dtype::{DType, NativePType, match_each_integer_ptype, match_each_native_ptype};
14use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_ensure};
15use vortex_scalar::Scalar;
16
17use crate::arrays::PrimitiveVTable;
18#[cfg(feature = "test-harness")]
19use crate::builders::{ArrayBuilder, ListBuilder};
20use crate::compute::{min_max, sub_scalar};
21use crate::stats::{ArrayStats, StatsSetRef};
22use crate::validity::Validity;
23use crate::vtable::{
24 ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
25 ValidityVTableFromValidityHelper,
26};
27use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
28
29vtable!(List);
30
31impl VTable for ListVTable {
32 type Array = ListArray;
33 type Encoding = ListEncoding;
34
35 type ArrayVTable = Self;
36 type CanonicalVTable = Self;
37 type OperationsVTable = Self;
38 type ValidityVTable = ValidityVTableFromValidityHelper;
39 type VisitorVTable = Self;
40 type ComputeVTable = NotSupported;
41 type EncodeVTable = NotSupported;
42 type PipelineVTable = NotSupported;
43 type SerdeVTable = Self;
44
45 fn id(_encoding: &Self::Encoding) -> EncodingId {
46 EncodingId::new_ref("vortex.list")
47 }
48
49 fn encoding(_array: &Self::Array) -> EncodingRef {
50 EncodingRef::new_ref(ListEncoding.as_ref())
51 }
52}
53
54#[derive(Clone, Debug)]
106pub struct ListArray {
107 dtype: DType,
108 elements: ArrayRef,
109 offsets: ArrayRef,
110 validity: Validity,
111 stats_set: ArrayStats,
112}
113
114#[derive(Clone, Debug)]
115pub struct ListEncoding;
116
117pub trait OffsetPType: NativePType + PrimInt + AsPrimitive<usize> + Into<Scalar> {}
118
119impl<T> OffsetPType for T where T: NativePType + PrimInt + AsPrimitive<usize> + Into<Scalar> {}
120
121impl ListArray {
122 pub fn new(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
123 Self::try_new(elements, offsets, validity).vortex_expect("ListArray new")
124 }
125
126 pub fn try_new(
127 elements: ArrayRef,
128 offsets: ArrayRef,
129 validity: Validity,
130 ) -> VortexResult<Self> {
131 if !offsets.dtype().is_int() || offsets.dtype().is_nullable() {
132 vortex_bail!(
133 "Expected offsets to be an non-nullable integer type, got {:?}",
134 offsets.dtype()
135 );
136 }
137
138 if offsets.is_empty() {
139 vortex_bail!("Offsets must have at least one element, [0] for an empty list");
140 }
141
142 Self::validate(&elements, &offsets, &validity)?;
143
144 Ok(unsafe { Self::new_unchecked(elements, offsets, validity) })
145 }
146
147 pub unsafe fn new_unchecked(elements: ArrayRef, offsets: ArrayRef, validity: Validity) -> Self {
150 Self {
151 dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
152 elements,
153 offsets,
154 validity,
155 stats_set: Default::default(),
156 }
157 }
158
159 pub fn offset_at(&self, index: usize) -> usize {
163 assert!(
164 index <= self.len(),
165 "Index {index} out of bounds 0..={}",
166 self.len()
167 );
168
169 self.offsets()
170 .as_opt::<PrimitiveVTable>()
171 .map(|p| match_each_native_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
172 .unwrap_or_else(|| {
173 self.offsets()
174 .scalar_at(index)
175 .as_primitive()
176 .as_::<usize>()
177 .vortex_expect("index must fit in usize")
178 })
179 }
180
181 pub fn elements_at(&self, index: usize) -> ArrayRef {
183 let start = self.offset_at(index);
184 let end = self.offset_at(index + 1);
185 self.elements().slice(start..end)
186 }
187
188 pub fn sliced_elements(&self) -> ArrayRef {
190 let start = self.offset_at(0);
191 let end = self.offset_at(self.len());
192 self.elements().slice(start..end)
193 }
194
195 pub fn offsets(&self) -> &ArrayRef {
197 &self.offsets
198 }
199
200 pub fn elements(&self) -> &ArrayRef {
202 &self.elements
203 }
204
205 pub fn reset_offsets(&self) -> VortexResult<Self> {
207 let elements = self.sliced_elements();
208 let offsets = self.offsets();
209 let first_offset = offsets.scalar_at(0);
210 let adjusted_offsets = sub_scalar(offsets, first_offset)?;
211
212 Self::try_new(elements, adjusted_offsets, self.validity.clone())
213 }
214
215 fn validate(
223 elements: &dyn Array,
224 offsets: &dyn Array,
225 validity: &Validity,
226 ) -> VortexResult<()> {
227 vortex_ensure!(
229 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
230 "offsets have invalid type {}",
231 offsets.dtype()
232 );
233
234 let offsets_ptype = offsets.dtype().as_ptype();
236
237 if let Some(is_sorted) = offsets.statistics().compute_is_sorted() {
239 vortex_ensure!(is_sorted, "offsets must be sorted");
240 } else {
241 vortex_bail!("offsets must report is_sorted statistic");
242 }
243
244 if let Some(min_max) = min_max(offsets)? {
247 match_each_integer_ptype!(offsets_ptype, |P| {
248 let max_offset = P::try_from(offsets.scalar_at(offsets.len() - 1))
249 .vortex_expect("Offsets type must fit offsets values");
250
251 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
252 {
253 if let Some(min) = min_max.min.as_primitive().as_::<P>() {
254 vortex_ensure!(
255 min >= 0 && min <= max_offset,
256 "offsets minimum {min} outside valid range [0, {max_offset}]"
257 );
258 }
259
260 if let Some(max) = min_max.max.as_primitive().as_::<P>() {
261 vortex_ensure!(
262 max >= 0 && max <= max_offset,
263 "offsets maximum {max} outside valid range [0, {max_offset}]"
264 )
265 }
266 }
267
268 vortex_ensure!(
269 max_offset
270 <= P::try_from(elements.len())
271 .vortex_expect("Offsets type must be able to fit elements length"),
272 "Max offset {max_offset} is beyond the length of the elements array {}",
273 elements.len()
274 );
275 })
276 } else {
277 vortex_bail!(
279 "offsets array with encoding {} must support min_max compute function",
280 offsets.encoding_id()
281 );
282 };
283
284 if let Some(validity_len) = validity.maybe_len() {
286 vortex_ensure!(
287 validity_len == offsets.len() - 1,
288 "validity with size {validity_len} does not match array size {}",
289 offsets.len() - 1
290 );
291 }
292
293 Ok(())
294 }
295}
296
297impl ArrayVTable<ListVTable> for ListVTable {
298 fn len(array: &ListArray) -> usize {
299 array.offsets.len().saturating_sub(1)
300 }
301
302 fn dtype(array: &ListArray) -> &DType {
303 &array.dtype
304 }
305
306 fn stats(array: &ListArray) -> StatsSetRef<'_> {
307 array.stats_set.to_ref(array.as_ref())
308 }
309}
310
311impl OperationsVTable<ListVTable> for ListVTable {
312 fn slice(array: &ListArray, range: Range<usize>) -> ArrayRef {
313 ListArray::new(
314 array.elements().clone(),
315 array.offsets().slice(range.start..range.end + 1),
316 array.validity().slice(range),
317 )
318 .into_array()
319 }
320
321 fn scalar_at(array: &ListArray, index: usize) -> Scalar {
322 let elem = array.elements_at(index);
323 let scalars: Vec<Scalar> = (0..elem.len()).map(|i| elem.scalar_at(i)).collect();
324
325 Scalar::list(
326 Arc::new(elem.dtype().clone()),
327 scalars,
328 array.dtype().nullability(),
329 )
330 }
331}
332
333impl CanonicalVTable<ListVTable> for ListVTable {
334 fn canonicalize(array: &ListArray) -> Canonical {
335 Canonical::List(array.clone())
336 }
337}
338
339impl ValidityHelper for ListArray {
340 fn validity(&self) -> &Validity {
341 &self.validity
342 }
343}
344
345#[cfg(feature = "test-harness")]
346impl ListArray {
347 pub fn from_iter_slow<O: OffsetPType, I: IntoIterator>(
351 iter: I,
352 dtype: Arc<DType>,
353 ) -> VortexResult<ArrayRef>
354 where
355 I::Item: IntoIterator,
356 <I::Item as IntoIterator>::Item: Into<Scalar>,
357 {
358 let iter = iter.into_iter();
359 let mut builder = ListBuilder::<O>::with_capacity(
360 dtype.clone(),
361 vortex_dtype::Nullability::NonNullable,
362 iter.size_hint().0,
363 );
364
365 for v in iter {
366 let elem = Scalar::list(
367 dtype.clone(),
368 v.into_iter().map(|x| x.into()).collect_vec(),
369 dtype.nullability(),
370 );
371 builder.append_value(elem.as_list())?
372 }
373 Ok(builder.finish())
374 }
375
376 pub fn from_iter_opt_slow<O: OffsetPType, I: IntoIterator<Item = Option<T>>, T>(
377 iter: I,
378 dtype: Arc<DType>,
379 ) -> VortexResult<ArrayRef>
380 where
381 T: IntoIterator,
382 T::Item: Into<Scalar>,
383 {
384 let iter = iter.into_iter();
385 let mut builder = ListBuilder::<O>::with_capacity(
386 dtype.clone(),
387 vortex_dtype::Nullability::Nullable,
388 iter.size_hint().0,
389 );
390
391 for v in iter {
392 if let Some(v) = v {
393 let elem = Scalar::list(
394 dtype.clone(),
395 v.into_iter().map(|x| x.into()).collect_vec(),
396 dtype.nullability(),
397 );
398 builder.append_value(elem.as_list())?
399 } else {
400 builder.append_null()
401 }
402 }
403 Ok(builder.finish())
404 }
405}
406
407#[cfg(test)]
408mod tests;