arrow_array/array/
list_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21    iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
22    ArrowPrimitiveType, FixedSizeListArray,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31/// A type that can be used within a variable-size array to encode offset information
32///
33/// See [`ListArray`], [`LargeListArray`], [`BinaryArray`], [`LargeBinaryArray`],
34/// [`StringArray`] and [`LargeStringArray`]
35///
36/// [`BinaryArray`]: crate::array::BinaryArray
37/// [`LargeBinaryArray`]: crate::array::LargeBinaryArray
38/// [`StringArray`]: crate::array::StringArray
39/// [`LargeStringArray`]: crate::array::LargeStringArray
40pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
41    /// True for 64 bit offset size and false for 32 bit offset size
42    const IS_LARGE: bool;
43    /// Prefix for the offset size
44    const PREFIX: &'static str;
45}
46
47impl OffsetSizeTrait for i32 {
48    const IS_LARGE: bool = false;
49    const PREFIX: &'static str = "";
50}
51
52impl OffsetSizeTrait for i64 {
53    const IS_LARGE: bool = true;
54    const PREFIX: &'static str = "Large";
55}
56
57/// An array of [variable length lists], similar to JSON arrays
58/// (e.g. `["A", "B", "C"]`).
59///
60/// Lists are represented using `offsets` into a `values` child
61/// array. Offsets are stored in two adjacent entries of an
62/// [`OffsetBuffer`].
63///
64/// Arrow defines [`ListArray`] with `i32` offsets and
65/// [`LargeListArray`] with `i64` offsets.
66///
67/// Use [`GenericListBuilder`] to construct a [`GenericListArray`].
68///
69/// # Representation
70///
71/// A [`ListArray`] can represent a list of values of any other
72/// supported Arrow type. Each element of the `ListArray` itself is
73/// a list which may be empty, may contain NULL and non-null values,
74/// or may itself be NULL.
75///
76/// For example, the `ListArray` shown in the following diagram stores
77/// lists of strings. Note that `[]` represents an empty (length
78/// 0), but non NULL list.
79///
80/// ```text
81/// ┌─────────────┐
82/// │   [A,B,C]   │
83/// ├─────────────┤
84/// │     []      │
85/// ├─────────────┤
86/// │    NULL     │
87/// ├─────────────┤
88/// │     [D]     │
89/// ├─────────────┤
90/// │  [NULL, F]  │
91/// └─────────────┘
92/// ```
93///
94/// The `values` are stored in a child [`StringArray`] and the offsets
95/// are stored in an [`OffsetBuffer`] as shown in the following
96/// diagram. The logical values and offsets are shown on the left, and
97/// the actual `ListArray` encoding on the right.
98///
99/// ```text
100///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
101///                                                                 ┌ ─ ─ ─ ─ ─ ─ ┐    │
102///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐       ┌───┐ ┌───┐
103///  │   [A,B,C]   │  │ (0,3) │                   │ 1 │   │ 0 │     │ │ 1 │ │ A │ │ 0  │
104///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
105///  │      []     │  │ (3,3) │                   │ 1 │   │ 3 │     │ │ 1 │ │ B │ │ 1  │
106///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
107///  │    NULL     │  │ (3,4) │                   │ 0 │   │ 3 │     │ │ 1 │ │ C │ │ 2  │
108///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
109///  │     [D]     │  │ (4,5) │                   │ 1 │   │ 4 │     │ │ ? │ │ ? │ │ 3  │
110///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
111///  │  [NULL, F]  │  │ (5,7) │                   │ 1 │   │ 5 │     │ │ 1 │ │ D │ │ 4  │
112///  └─────────────┘  └───────┘             │     └───┘   ├───┤       ├───┤ ├───┤
113///                                                       │ 7 │     │ │ 0 │ │ ? │ │ 5  │
114///                                         │  Validity   └───┘       ├───┤ ├───┤
115///     Logical       Logical                  (nulls)   Offsets    │ │ 1 │ │ F │ │ 6  │
116///      Values       Offsets               │                         └───┘ └───┘
117///                                                                 │    Values   │    │
118///                 (offsets[i],            │   ListArray               (Array)
119///                offsets[i+1])                                    └ ─ ─ ─ ─ ─ ─ ┘    │
120///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
121///
122///
123/// ```
124///
125/// [`StringArray`]: crate::array::StringArray
126/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
127pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
128    data_type: DataType,
129    nulls: Option<NullBuffer>,
130    values: ArrayRef,
131    value_offsets: OffsetBuffer<OffsetSize>,
132}
133
134impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
135    fn clone(&self) -> Self {
136        Self {
137            data_type: self.data_type.clone(),
138            nulls: self.nulls.clone(),
139            values: self.values.clone(),
140            value_offsets: self.value_offsets.clone(),
141        }
142    }
143}
144
145impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
146    /// The data type constructor of list array.
147    /// The input is the schema of the child array and
148    /// the output is the [`DataType`], List or LargeList.
149    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
150        DataType::LargeList
151    } else {
152        DataType::List
153    };
154
155    /// Create a new [`GenericListArray`] from the provided parts
156    ///
157    /// # Errors
158    ///
159    /// Errors if
160    ///
161    /// * `offsets.len() - 1 != nulls.len()`
162    /// * `offsets.last() > values.len()`
163    /// * `!field.is_nullable() && values.is_nullable()`
164    /// * `field.data_type() != values.data_type()`
165    pub fn try_new(
166        field: FieldRef,
167        offsets: OffsetBuffer<OffsetSize>,
168        values: ArrayRef,
169        nulls: Option<NullBuffer>,
170    ) -> Result<Self, ArrowError> {
171        let len = offsets.len() - 1; // Offsets guaranteed to not be empty
172        let end_offset = offsets.last().unwrap().as_usize();
173        // don't need to check other values of `offsets` because they are checked
174        // during construction of `OffsetBuffer`
175        if end_offset > values.len() {
176            return Err(ArrowError::InvalidArgumentError(format!(
177                "Max offset of {end_offset} exceeds length of values {}",
178                values.len()
179            )));
180        }
181
182        if let Some(n) = nulls.as_ref() {
183            if n.len() != len {
184                return Err(ArrowError::InvalidArgumentError(format!(
185                    "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
186                    OffsetSize::PREFIX,
187                    n.len(),
188                )));
189            }
190        }
191        if !field.is_nullable() && values.is_nullable() {
192            return Err(ArrowError::InvalidArgumentError(format!(
193                "Non-nullable field of {}ListArray {:?} cannot contain nulls",
194                OffsetSize::PREFIX,
195                field.name()
196            )));
197        }
198
199        if field.data_type() != values.data_type() {
200            return Err(ArrowError::InvalidArgumentError(format!(
201                "{}ListArray expected data type {} got {} for {:?}",
202                OffsetSize::PREFIX,
203                field.data_type(),
204                values.data_type(),
205                field.name()
206            )));
207        }
208
209        Ok(Self {
210            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
211            nulls,
212            values,
213            value_offsets: offsets,
214        })
215    }
216
217    /// Create a new [`GenericListArray`] from the provided parts
218    ///
219    /// # Panics
220    ///
221    /// Panics if [`Self::try_new`] returns an error
222    pub fn new(
223        field: FieldRef,
224        offsets: OffsetBuffer<OffsetSize>,
225        values: ArrayRef,
226        nulls: Option<NullBuffer>,
227    ) -> Self {
228        Self::try_new(field, offsets, values, nulls).unwrap()
229    }
230
231    /// Create a new [`GenericListArray`] of length `len` where all values are null
232    pub fn new_null(field: FieldRef, len: usize) -> Self {
233        let values = new_empty_array(field.data_type());
234        Self {
235            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
236            nulls: Some(NullBuffer::new_null(len)),
237            value_offsets: OffsetBuffer::new_zeroed(len),
238            values,
239        }
240    }
241
242    /// Deconstruct this array into its constituent parts
243    pub fn into_parts(
244        self,
245    ) -> (
246        FieldRef,
247        OffsetBuffer<OffsetSize>,
248        ArrayRef,
249        Option<NullBuffer>,
250    ) {
251        let f = match self.data_type {
252            DataType::List(f) | DataType::LargeList(f) => f,
253            _ => unreachable!(),
254        };
255        (f, self.value_offsets, self.values, self.nulls)
256    }
257
258    /// Returns a reference to the offsets of this list
259    ///
260    /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
261    /// allowing for zero-copy cloning
262    #[inline]
263    pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
264        &self.value_offsets
265    }
266
267    /// Returns a reference to the values of this list
268    #[inline]
269    pub fn values(&self) -> &ArrayRef {
270        &self.values
271    }
272
273    /// Returns a clone of the value type of this list.
274    pub fn value_type(&self) -> DataType {
275        self.values.data_type().clone()
276    }
277
278    /// Returns ith value of this list array.
279    /// # Safety
280    /// Caller must ensure that the index is within the array bounds
281    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
282        let end = self.value_offsets().get_unchecked(i + 1).as_usize();
283        let start = self.value_offsets().get_unchecked(i).as_usize();
284        self.values.slice(start, end - start)
285    }
286
287    /// Returns ith value of this list array.
288    pub fn value(&self, i: usize) -> ArrayRef {
289        let end = self.value_offsets()[i + 1].as_usize();
290        let start = self.value_offsets()[i].as_usize();
291        self.values.slice(start, end - start)
292    }
293
294    /// Returns the offset values in the offsets buffer
295    #[inline]
296    pub fn value_offsets(&self) -> &[OffsetSize] {
297        &self.value_offsets
298    }
299
300    /// Returns the length for value at index `i`.
301    #[inline]
302    pub fn value_length(&self, i: usize) -> OffsetSize {
303        let offsets = self.value_offsets();
304        offsets[i + 1] - offsets[i]
305    }
306
307    /// constructs a new iterator
308    pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
309        GenericListArrayIter::<'a, OffsetSize>::new(self)
310    }
311
312    #[inline]
313    fn get_type(data_type: &DataType) -> Option<&DataType> {
314        match (OffsetSize::IS_LARGE, data_type) {
315            (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
316                Some(child.data_type())
317            }
318            _ => None,
319        }
320    }
321
322    /// Returns a zero-copy slice of this array with the indicated offset and length.
323    pub fn slice(&self, offset: usize, length: usize) -> Self {
324        Self {
325            data_type: self.data_type.clone(),
326            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
327            values: self.values.clone(),
328            value_offsets: self.value_offsets.slice(offset, length),
329        }
330    }
331
332    /// Creates a [`GenericListArray`] from an iterator of primitive values
333    /// # Example
334    /// ```
335    /// # use arrow_array::ListArray;
336    /// # use arrow_array::types::Int32Type;
337    ///
338    /// let data = vec![
339    ///    Some(vec![Some(0), Some(1), Some(2)]),
340    ///    None,
341    ///    Some(vec![Some(3), None, Some(5)]),
342    ///    Some(vec![Some(6), Some(7)]),
343    /// ];
344    /// let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
345    /// println!("{:?}", list_array);
346    /// ```
347    pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
348    where
349        T: ArrowPrimitiveType,
350        P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
351        I: IntoIterator<Item = Option<P>>,
352    {
353        let iter = iter.into_iter();
354        let size_hint = iter.size_hint().0;
355        let mut builder =
356            GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
357
358        for i in iter {
359            match i {
360                Some(p) => {
361                    for t in p {
362                        builder.values().append_option(t);
363                    }
364                    builder.append(true);
365                }
366                None => builder.append(false),
367            }
368        }
369        builder.finish()
370    }
371}
372
373impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
374    fn from(data: ArrayData) -> Self {
375        Self::try_new_from_array_data(data)
376            .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
377    }
378}
379
380impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
381    fn from(array: GenericListArray<OffsetSize>) -> Self {
382        let len = array.len();
383        let builder = ArrayDataBuilder::new(array.data_type)
384            .len(len)
385            .nulls(array.nulls)
386            .buffers(vec![array.value_offsets.into_inner().into_inner()])
387            .child_data(vec![array.values.to_data()]);
388
389        unsafe { builder.build_unchecked() }
390    }
391}
392
393impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
394    fn from(value: FixedSizeListArray) -> Self {
395        let (field, size) = match value.data_type() {
396            DataType::FixedSizeList(f, size) => (f, *size as usize),
397            _ => unreachable!(),
398        };
399
400        let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
401
402        Self {
403            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
404            nulls: value.nulls().cloned(),
405            values: value.values().clone(),
406            value_offsets: offsets,
407        }
408    }
409}
410
411impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
412    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
413        if data.buffers().len() != 1 {
414            return Err(ArrowError::InvalidArgumentError(format!(
415                "ListArray data should contain a single buffer only (value offsets), had {}",
416                data.buffers().len()
417            )));
418        }
419
420        if data.child_data().len() != 1 {
421            return Err(ArrowError::InvalidArgumentError(format!(
422                "ListArray should contain a single child array (values array), had {}",
423                data.child_data().len()
424            )));
425        }
426
427        let values = data.child_data()[0].clone();
428
429        if let Some(child_data_type) = Self::get_type(data.data_type()) {
430            if values.data_type() != child_data_type {
431                return Err(ArrowError::InvalidArgumentError(format!(
432                    "[Large]ListArray's child datatype {:?} does not \
433                             correspond to the List's datatype {:?}",
434                    values.data_type(),
435                    child_data_type
436                )));
437            }
438        } else {
439            return Err(ArrowError::InvalidArgumentError(format!(
440                "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
441                data.data_type()
442            )));
443        }
444
445        let values = make_array(values);
446        // SAFETY:
447        // ArrayData is valid, and verified type above
448        let value_offsets = unsafe { get_offsets(&data) };
449
450        Ok(Self {
451            data_type: data.data_type().clone(),
452            nulls: data.nulls().cloned(),
453            values,
454            value_offsets,
455        })
456    }
457}
458
459impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
460    fn as_any(&self) -> &dyn Any {
461        self
462    }
463
464    fn to_data(&self) -> ArrayData {
465        self.clone().into()
466    }
467
468    fn into_data(self) -> ArrayData {
469        self.into()
470    }
471
472    fn data_type(&self) -> &DataType {
473        &self.data_type
474    }
475
476    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
477        Arc::new(self.slice(offset, length))
478    }
479
480    fn len(&self) -> usize {
481        self.value_offsets.len() - 1
482    }
483
484    fn is_empty(&self) -> bool {
485        self.value_offsets.len() <= 1
486    }
487
488    fn shrink_to_fit(&mut self) {
489        if let Some(nulls) = &mut self.nulls {
490            nulls.shrink_to_fit();
491        }
492        self.values.shrink_to_fit();
493        self.value_offsets.shrink_to_fit();
494    }
495
496    fn offset(&self) -> usize {
497        0
498    }
499
500    fn nulls(&self) -> Option<&NullBuffer> {
501        self.nulls.as_ref()
502    }
503
504    fn logical_null_count(&self) -> usize {
505        // More efficient that the default implementation
506        self.null_count()
507    }
508
509    fn get_buffer_memory_size(&self) -> usize {
510        let mut size = self.values.get_buffer_memory_size();
511        size += self.value_offsets.inner().inner().capacity();
512        if let Some(n) = self.nulls.as_ref() {
513            size += n.buffer().capacity();
514        }
515        size
516    }
517
518    fn get_array_memory_size(&self) -> usize {
519        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
520        size += self.value_offsets.inner().inner().capacity();
521        if let Some(n) = self.nulls.as_ref() {
522            size += n.buffer().capacity();
523        }
524        size
525    }
526}
527
528impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
529    type Item = ArrayRef;
530
531    fn value(&self, index: usize) -> Self::Item {
532        GenericListArray::value(self, index)
533    }
534
535    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
536        GenericListArray::value(self, index)
537    }
538}
539
540impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
541    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
542        let prefix = OffsetSize::PREFIX;
543
544        write!(f, "{prefix}ListArray\n[\n")?;
545        print_long_array(self, f, |array, index, f| {
546            std::fmt::Debug::fmt(&array.value(index), f)
547        })?;
548        write!(f, "]")
549    }
550}
551
552/// A [`GenericListArray`] of variable size lists, storing offsets as `i32`.
553///
554// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a [`ListArray`]
555pub type ListArray = GenericListArray<i32>;
556
557/// A [`GenericListArray`] of variable size lists, storing offsets as `i64`.
558///
559// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`]
560pub type LargeListArray = GenericListArray<i64>;
561
562#[cfg(test)]
563mod tests {
564    use super::*;
565    use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
566    use crate::cast::AsArray;
567    use crate::types::Int32Type;
568    use crate::{Int32Array, Int64Array};
569    use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
570    use arrow_schema::Field;
571
572    fn create_from_buffers() -> ListArray {
573        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
574        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
575        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
576        let field = Arc::new(Field::new("item", DataType::Int32, true));
577        ListArray::new(field, offsets, Arc::new(values), None)
578    }
579
580    #[test]
581    fn test_from_iter_primitive() {
582        let data = vec![
583            Some(vec![Some(0), Some(1), Some(2)]),
584            Some(vec![Some(3), Some(4), Some(5)]),
585            Some(vec![Some(6), Some(7)]),
586        ];
587        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
588
589        let another = create_from_buffers();
590        assert_eq!(list_array, another)
591    }
592
593    #[test]
594    fn test_empty_list_array() {
595        // Construct an empty value array
596        let value_data = ArrayData::builder(DataType::Int32)
597            .len(0)
598            .add_buffer(Buffer::from([]))
599            .build()
600            .unwrap();
601
602        // Construct an empty offset buffer
603        let value_offsets = Buffer::from([]);
604
605        // Construct a list array from the above two
606        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
607        let list_data = ArrayData::builder(list_data_type)
608            .len(0)
609            .add_buffer(value_offsets)
610            .add_child_data(value_data)
611            .build()
612            .unwrap();
613
614        let list_array = ListArray::from(list_data);
615        assert_eq!(list_array.len(), 0)
616    }
617
618    #[test]
619    fn test_list_array() {
620        // Construct a value array
621        let value_data = ArrayData::builder(DataType::Int32)
622            .len(8)
623            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
624            .build()
625            .unwrap();
626
627        // Construct a buffer for value offsets, for the nested array:
628        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
629        let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
630
631        // Construct a list array from the above two
632        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
633        let list_data = ArrayData::builder(list_data_type.clone())
634            .len(3)
635            .add_buffer(value_offsets.clone())
636            .add_child_data(value_data.clone())
637            .build()
638            .unwrap();
639        let list_array = ListArray::from(list_data);
640
641        let values = list_array.values();
642        assert_eq!(value_data, values.to_data());
643        assert_eq!(DataType::Int32, list_array.value_type());
644        assert_eq!(3, list_array.len());
645        assert_eq!(0, list_array.null_count());
646        assert_eq!(6, list_array.value_offsets()[2]);
647        assert_eq!(2, list_array.value_length(2));
648        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
649        assert_eq!(
650            0,
651            unsafe { list_array.value_unchecked(0) }
652                .as_primitive::<Int32Type>()
653                .value(0)
654        );
655        for i in 0..3 {
656            assert!(list_array.is_valid(i));
657            assert!(!list_array.is_null(i));
658        }
659
660        // Now test with a non-zero offset (skip first element)
661        //  [[3, 4, 5], [6, 7]]
662        let list_data = ArrayData::builder(list_data_type)
663            .len(2)
664            .offset(1)
665            .add_buffer(value_offsets)
666            .add_child_data(value_data.clone())
667            .build()
668            .unwrap();
669        let list_array = ListArray::from(list_data);
670
671        let values = list_array.values();
672        assert_eq!(value_data, values.to_data());
673        assert_eq!(DataType::Int32, list_array.value_type());
674        assert_eq!(2, list_array.len());
675        assert_eq!(0, list_array.null_count());
676        assert_eq!(6, list_array.value_offsets()[1]);
677        assert_eq!(2, list_array.value_length(1));
678        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
679        assert_eq!(
680            3,
681            unsafe { list_array.value_unchecked(0) }
682                .as_primitive::<Int32Type>()
683                .value(0)
684        );
685    }
686
687    #[test]
688    fn test_large_list_array() {
689        // Construct a value array
690        let value_data = ArrayData::builder(DataType::Int32)
691            .len(8)
692            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
693            .build()
694            .unwrap();
695
696        // Construct a buffer for value offsets, for the nested array:
697        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
698        let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
699
700        // Construct a list array from the above two
701        let list_data_type = DataType::new_large_list(DataType::Int32, false);
702        let list_data = ArrayData::builder(list_data_type.clone())
703            .len(3)
704            .add_buffer(value_offsets.clone())
705            .add_child_data(value_data.clone())
706            .build()
707            .unwrap();
708        let list_array = LargeListArray::from(list_data);
709
710        let values = list_array.values();
711        assert_eq!(value_data, values.to_data());
712        assert_eq!(DataType::Int32, list_array.value_type());
713        assert_eq!(3, list_array.len());
714        assert_eq!(0, list_array.null_count());
715        assert_eq!(6, list_array.value_offsets()[2]);
716        assert_eq!(2, list_array.value_length(2));
717        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
718        assert_eq!(
719            0,
720            unsafe { list_array.value_unchecked(0) }
721                .as_primitive::<Int32Type>()
722                .value(0)
723        );
724        for i in 0..3 {
725            assert!(list_array.is_valid(i));
726            assert!(!list_array.is_null(i));
727        }
728
729        // Now test with a non-zero offset
730        //  [[3, 4, 5], [6, 7]]
731        let list_data = ArrayData::builder(list_data_type)
732            .len(2)
733            .offset(1)
734            .add_buffer(value_offsets)
735            .add_child_data(value_data.clone())
736            .build()
737            .unwrap();
738        let list_array = LargeListArray::from(list_data);
739
740        let values = list_array.values();
741        assert_eq!(value_data, values.to_data());
742        assert_eq!(DataType::Int32, list_array.value_type());
743        assert_eq!(2, list_array.len());
744        assert_eq!(0, list_array.null_count());
745        assert_eq!(6, list_array.value_offsets()[1]);
746        assert_eq!(2, list_array.value_length(1));
747        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
748        assert_eq!(
749            3,
750            unsafe { list_array.value_unchecked(0) }
751                .as_primitive::<Int32Type>()
752                .value(0)
753        );
754    }
755
756    #[test]
757    fn test_list_array_slice() {
758        // Construct a value array
759        let value_data = ArrayData::builder(DataType::Int32)
760            .len(10)
761            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
762            .build()
763            .unwrap();
764
765        // Construct a buffer for value offsets, for the nested array:
766        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
767        let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
768        // 01011001 00000001
769        let mut null_bits: [u8; 2] = [0; 2];
770        bit_util::set_bit(&mut null_bits, 0);
771        bit_util::set_bit(&mut null_bits, 3);
772        bit_util::set_bit(&mut null_bits, 4);
773        bit_util::set_bit(&mut null_bits, 6);
774        bit_util::set_bit(&mut null_bits, 8);
775
776        // Construct a list array from the above two
777        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
778        let list_data = ArrayData::builder(list_data_type)
779            .len(9)
780            .add_buffer(value_offsets)
781            .add_child_data(value_data.clone())
782            .null_bit_buffer(Some(Buffer::from(null_bits)))
783            .build()
784            .unwrap();
785        let list_array = ListArray::from(list_data);
786
787        let values = list_array.values();
788        assert_eq!(value_data, values.to_data());
789        assert_eq!(DataType::Int32, list_array.value_type());
790        assert_eq!(9, list_array.len());
791        assert_eq!(4, list_array.null_count());
792        assert_eq!(2, list_array.value_offsets()[3]);
793        assert_eq!(2, list_array.value_length(3));
794
795        let sliced_array = list_array.slice(1, 6);
796        assert_eq!(6, sliced_array.len());
797        assert_eq!(3, sliced_array.null_count());
798
799        for i in 0..sliced_array.len() {
800            if bit_util::get_bit(&null_bits, 1 + i) {
801                assert!(sliced_array.is_valid(i));
802            } else {
803                assert!(sliced_array.is_null(i));
804            }
805        }
806
807        // Check offset and length for each non-null value.
808        let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
809        assert_eq!(2, sliced_list_array.value_offsets()[2]);
810        assert_eq!(2, sliced_list_array.value_length(2));
811        assert_eq!(4, sliced_list_array.value_offsets()[3]);
812        assert_eq!(2, sliced_list_array.value_length(3));
813        assert_eq!(6, sliced_list_array.value_offsets()[5]);
814        assert_eq!(3, sliced_list_array.value_length(5));
815    }
816
817    #[test]
818    fn test_large_list_array_slice() {
819        // Construct a value array
820        let value_data = ArrayData::builder(DataType::Int32)
821            .len(10)
822            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
823            .build()
824            .unwrap();
825
826        // Construct a buffer for value offsets, for the nested array:
827        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
828        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
829        // 01011001 00000001
830        let mut null_bits: [u8; 2] = [0; 2];
831        bit_util::set_bit(&mut null_bits, 0);
832        bit_util::set_bit(&mut null_bits, 3);
833        bit_util::set_bit(&mut null_bits, 4);
834        bit_util::set_bit(&mut null_bits, 6);
835        bit_util::set_bit(&mut null_bits, 8);
836
837        // Construct a list array from the above two
838        let list_data_type = DataType::new_large_list(DataType::Int32, false);
839        let list_data = ArrayData::builder(list_data_type)
840            .len(9)
841            .add_buffer(value_offsets)
842            .add_child_data(value_data.clone())
843            .null_bit_buffer(Some(Buffer::from(null_bits)))
844            .build()
845            .unwrap();
846        let list_array = LargeListArray::from(list_data);
847
848        let values = list_array.values();
849        assert_eq!(value_data, values.to_data());
850        assert_eq!(DataType::Int32, list_array.value_type());
851        assert_eq!(9, list_array.len());
852        assert_eq!(4, list_array.null_count());
853        assert_eq!(2, list_array.value_offsets()[3]);
854        assert_eq!(2, list_array.value_length(3));
855
856        let sliced_array = list_array.slice(1, 6);
857        assert_eq!(6, sliced_array.len());
858        assert_eq!(3, sliced_array.null_count());
859
860        for i in 0..sliced_array.len() {
861            if bit_util::get_bit(&null_bits, 1 + i) {
862                assert!(sliced_array.is_valid(i));
863            } else {
864                assert!(sliced_array.is_null(i));
865            }
866        }
867
868        // Check offset and length for each non-null value.
869        let sliced_list_array = sliced_array
870            .as_any()
871            .downcast_ref::<LargeListArray>()
872            .unwrap();
873        assert_eq!(2, sliced_list_array.value_offsets()[2]);
874        assert_eq!(2, sliced_list_array.value_length(2));
875        assert_eq!(4, sliced_list_array.value_offsets()[3]);
876        assert_eq!(2, sliced_list_array.value_length(3));
877        assert_eq!(6, sliced_list_array.value_offsets()[5]);
878        assert_eq!(3, sliced_list_array.value_length(5));
879    }
880
881    #[test]
882    #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
883    fn test_list_array_index_out_of_bound() {
884        // Construct a value array
885        let value_data = ArrayData::builder(DataType::Int32)
886            .len(10)
887            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
888            .build()
889            .unwrap();
890
891        // Construct a buffer for value offsets, for the nested array:
892        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
893        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
894        // 01011001 00000001
895        let mut null_bits: [u8; 2] = [0; 2];
896        bit_util::set_bit(&mut null_bits, 0);
897        bit_util::set_bit(&mut null_bits, 3);
898        bit_util::set_bit(&mut null_bits, 4);
899        bit_util::set_bit(&mut null_bits, 6);
900        bit_util::set_bit(&mut null_bits, 8);
901
902        // Construct a list array from the above two
903        let list_data_type = DataType::new_large_list(DataType::Int32, false);
904        let list_data = ArrayData::builder(list_data_type)
905            .len(9)
906            .add_buffer(value_offsets)
907            .add_child_data(value_data)
908            .null_bit_buffer(Some(Buffer::from(null_bits)))
909            .build()
910            .unwrap();
911        let list_array = LargeListArray::from(list_data);
912        assert_eq!(9, list_array.len());
913
914        list_array.value(10);
915    }
916    #[test]
917    #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
918    // Different error messages, so skip for now
919    // https://github.com/apache/arrow-rs/issues/1545
920    #[cfg(not(feature = "force_validate"))]
921    fn test_list_array_invalid_buffer_len() {
922        let value_data = unsafe {
923            ArrayData::builder(DataType::Int32)
924                .len(8)
925                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
926                .build_unchecked()
927        };
928        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
929        let list_data = unsafe {
930            ArrayData::builder(list_data_type)
931                .len(3)
932                .add_child_data(value_data)
933                .build_unchecked()
934        };
935        drop(ListArray::from(list_data));
936    }
937
938    #[test]
939    #[should_panic(expected = "ListArray should contain a single child array (values array)")]
940    // Different error messages, so skip for now
941    // https://github.com/apache/arrow-rs/issues/1545
942    #[cfg(not(feature = "force_validate"))]
943    fn test_list_array_invalid_child_array_len() {
944        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
945        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
946        let list_data = unsafe {
947            ArrayData::builder(list_data_type)
948                .len(3)
949                .add_buffer(value_offsets)
950                .build_unchecked()
951        };
952        drop(ListArray::from(list_data));
953    }
954
955    #[test]
956    #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
957    fn test_from_array_data_validation() {
958        let mut builder = ListBuilder::new(Int32Builder::new());
959        builder.values().append_value(1);
960        builder.append(true);
961        let array = builder.finish();
962        let _ = LargeListArray::from(array.into_data());
963    }
964
965    #[test]
966    fn test_list_array_offsets_need_not_start_at_zero() {
967        let value_data = ArrayData::builder(DataType::Int32)
968            .len(8)
969            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
970            .build()
971            .unwrap();
972
973        let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
974
975        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
976        let list_data = ArrayData::builder(list_data_type)
977            .len(3)
978            .add_buffer(value_offsets)
979            .add_child_data(value_data)
980            .build()
981            .unwrap();
982
983        let list_array = ListArray::from(list_data);
984        assert_eq!(list_array.value_length(0), 0);
985        assert_eq!(list_array.value_length(1), 3);
986        assert_eq!(list_array.value_length(2), 2);
987    }
988
989    #[test]
990    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
991    // Different error messages, so skip for now
992    // https://github.com/apache/arrow-rs/issues/1545
993    #[cfg(not(feature = "force_validate"))]
994    fn test_primitive_array_alignment() {
995        let buf = Buffer::from_slice_ref([0_u64]);
996        let buf2 = buf.slice(1);
997        let array_data = unsafe {
998            ArrayData::builder(DataType::Int32)
999                .add_buffer(buf2)
1000                .build_unchecked()
1001        };
1002        drop(Int32Array::from(array_data));
1003    }
1004
1005    #[test]
1006    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1007    // Different error messages, so skip for now
1008    // https://github.com/apache/arrow-rs/issues/1545
1009    #[cfg(not(feature = "force_validate"))]
1010    fn test_list_array_alignment() {
1011        let buf = Buffer::from_slice_ref([0_u64]);
1012        let buf2 = buf.slice(1);
1013
1014        let values: [i32; 8] = [0; 8];
1015        let value_data = unsafe {
1016            ArrayData::builder(DataType::Int32)
1017                .add_buffer(Buffer::from_slice_ref(values))
1018                .build_unchecked()
1019        };
1020
1021        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
1022        let list_data = unsafe {
1023            ArrayData::builder(list_data_type)
1024                .add_buffer(buf2)
1025                .add_child_data(value_data)
1026                .build_unchecked()
1027        };
1028        drop(ListArray::from(list_data));
1029    }
1030
1031    #[test]
1032    fn list_array_equality() {
1033        // test scaffold
1034        fn do_comparison(
1035            lhs_data: Vec<Option<Vec<Option<i32>>>>,
1036            rhs_data: Vec<Option<Vec<Option<i32>>>>,
1037            should_equal: bool,
1038        ) {
1039            let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1040            let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1041            assert_eq!(lhs == rhs, should_equal);
1042
1043            let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1044            let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1045            assert_eq!(lhs == rhs, should_equal);
1046        }
1047
1048        do_comparison(
1049            vec![
1050                Some(vec![Some(0), Some(1), Some(2)]),
1051                None,
1052                Some(vec![Some(3), None, Some(5)]),
1053                Some(vec![Some(6), Some(7)]),
1054            ],
1055            vec![
1056                Some(vec![Some(0), Some(1), Some(2)]),
1057                None,
1058                Some(vec![Some(3), None, Some(5)]),
1059                Some(vec![Some(6), Some(7)]),
1060            ],
1061            true,
1062        );
1063
1064        do_comparison(
1065            vec![
1066                None,
1067                None,
1068                Some(vec![Some(3), None, Some(5)]),
1069                Some(vec![Some(6), Some(7)]),
1070            ],
1071            vec![
1072                Some(vec![Some(0), Some(1), Some(2)]),
1073                None,
1074                Some(vec![Some(3), None, Some(5)]),
1075                Some(vec![Some(6), Some(7)]),
1076            ],
1077            false,
1078        );
1079
1080        do_comparison(
1081            vec![
1082                None,
1083                None,
1084                Some(vec![Some(3), None, Some(5)]),
1085                Some(vec![Some(6), Some(7)]),
1086            ],
1087            vec![
1088                None,
1089                None,
1090                Some(vec![Some(3), None, Some(5)]),
1091                Some(vec![Some(0), Some(0)]),
1092            ],
1093            false,
1094        );
1095
1096        do_comparison(
1097            vec![None, None, Some(vec![Some(1)])],
1098            vec![None, None, Some(vec![Some(2)])],
1099            false,
1100        );
1101    }
1102
1103    #[test]
1104    fn test_empty_offsets() {
1105        let f = Arc::new(Field::new("element", DataType::Int32, true));
1106        let string = ListArray::from(
1107            ArrayData::builder(DataType::List(f.clone()))
1108                .buffers(vec![Buffer::from(&[])])
1109                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1110                .build()
1111                .unwrap(),
1112        );
1113        assert_eq!(string.value_offsets(), &[0]);
1114        let string = LargeListArray::from(
1115            ArrayData::builder(DataType::LargeList(f))
1116                .buffers(vec![Buffer::from(&[])])
1117                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1118                .build()
1119                .unwrap(),
1120        );
1121        assert_eq!(string.len(), 0);
1122        assert_eq!(string.value_offsets(), &[0]);
1123    }
1124
1125    #[test]
1126    fn test_try_new() {
1127        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1128        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1129        let values = Arc::new(values) as ArrayRef;
1130
1131        let field = Arc::new(Field::new("element", DataType::Int32, false));
1132        ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1133
1134        let nulls = NullBuffer::new_null(3);
1135        ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1136
1137        let nulls = NullBuffer::new_null(3);
1138        let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1139        let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1140            .unwrap_err();
1141
1142        assert_eq!(
1143            err.to_string(),
1144            "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1145        );
1146
1147        let field = Arc::new(Field::new("element", DataType::Int64, false));
1148        let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1149            .unwrap_err();
1150
1151        assert_eq!(
1152            err.to_string(),
1153            "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1154        );
1155
1156        let nulls = NullBuffer::new_null(7);
1157        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1158        let values = Arc::new(values);
1159
1160        let err =
1161            LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1162
1163        assert_eq!(
1164            err.to_string(),
1165            "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1166        );
1167
1168        let field = Arc::new(Field::new("element", DataType::Int64, true));
1169        LargeListArray::new(field.clone(), offsets.clone(), values, None);
1170
1171        let values = Int64Array::new(vec![0; 2].into(), None);
1172        let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1173
1174        assert_eq!(
1175            err.to_string(),
1176            "Invalid argument error: Max offset of 5 exceeds length of values 2"
1177        );
1178    }
1179
1180    #[test]
1181    fn test_from_fixed_size_list() {
1182        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1183        builder.values().append_slice(&[1, 2, 3]);
1184        builder.append(true);
1185        builder.values().append_slice(&[0, 0, 0]);
1186        builder.append(false);
1187        builder.values().append_slice(&[4, 5, 6]);
1188        builder.append(true);
1189        let list: ListArray = builder.finish().into();
1190
1191        let values: Vec<_> = list
1192            .iter()
1193            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1194            .collect();
1195        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1196    }
1197
1198    #[test]
1199    fn test_nullable_union() {
1200        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1201        let mut builder = UnionBuilder::new_dense();
1202        builder.append::<Int32Type>("a", 1).unwrap();
1203        builder.append::<Int32Type>("b", 2).unwrap();
1204        builder.append::<Int32Type>("b", 3).unwrap();
1205        builder.append::<Int32Type>("a", 4).unwrap();
1206        builder.append::<Int32Type>("a", 5).unwrap();
1207        let values = builder.build().unwrap();
1208        let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1209        ListArray::new(field.clone(), offsets, Arc::new(values), None);
1210    }
1211}