arrow_array/array/
list_view_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow_buffer::{NullBuffer, ScalarBuffer};
19use arrow_data::{ArrayData, ArrayDataBuilder};
20use arrow_schema::{ArrowError, DataType, FieldRef};
21use std::any::Any;
22use std::ops::Add;
23use std::sync::Arc;
24
25use crate::array::{make_array, print_long_array};
26use crate::iterator::GenericListViewArrayIter;
27use crate::{new_empty_array, Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait};
28
29/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
30pub type ListViewArray = GenericListViewArray<i32>;
31
32/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i64`.
33pub type LargeListViewArray = GenericListViewArray<i64>;
34
35/// An array of [variable length lists], specifically in the [list-view layout].
36///
37/// Differs from [`GenericListArray`] (which represents the [list layout]) in that
38/// the sizes of the child arrays are explicitly encoded in a separate buffer, instead
39/// of being derived from the difference between subsequent offsets in the offset buffer.
40///
41/// This allows the offsets (and subsequently child data) to be out of order. It also
42/// allows take / filter operations to be implemented without copying the underlying data.
43///
44/// # Representation
45///
46/// Given the same example array from [`GenericListArray`], it would be represented
47/// as such via a list-view layout array:
48///
49/// ```text
50///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
51///                                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │
52///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐   ┌───┐       ┌───┐ ┌───┐
53///  │   [A,B,C]   │  │ (0,3) │                   │ 1 │   │ 0 │   │ 3 │     │ │ 1 │ │ A │ │ 0  │
54///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
55///  │      []     │  │ (3,0) │                   │ 1 │   │ 3 │   │ 0 │     │ │ 1 │ │ B │ │ 1  │
56///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
57///  │    NULL     │  │ (?,?) │                   │ 0 │   │ ? │   │ ? │     │ │ 1 │ │ C │ │ 2  │
58///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
59///  │     [D]     │  │ (4,1) │                   │ 1 │   │ 4 │   │ 1 │     │ │ ? │ │ ? │ │ 3  │
60///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
61///  │  [NULL, F]  │  │ (5,2) │                   │ 1 │   │ 5 │   │ 2 │     │ │ 1 │ │ D │ │ 4  │
62///  └─────────────┘  └───────┘             │     └───┘   └───┘   └───┘       ├───┤ ├───┤
63///                                                                         │ │ 0 │ │ ? │ │ 5  │
64///     Logical       Logical               │  Validity  Offsets  Sizes       ├───┤ ├───┤
65///      Values       Offset                   (nulls)                      │ │ 1 │ │ F │ │ 6  │
66///                   & Size                │                                 └───┘ └───┘
67///                                                                         │    Values   │    │
68///                 (offsets[i],            │   ListViewArray                   (Array)
69///                  sizes[i])                                              └ ─ ─ ─ ─ ─ ─ ┘    │
70///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
71/// ```
72///
73/// Another way of representing the same array but taking advantage of the offsets being out of order:
74///
75/// ```text
76///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
77///                                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │
78///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐   ┌───┐       ┌───┐ ┌───┐
79///  │   [A,B,C]   │  │ (2,3) │                   │ 1 │   │ 2 │   │ 3 │     │ │ 0 │ │ ? │ │ 0  │
80///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
81///  │      []     │  │ (0,0) │                   │ 1 │   │ 0 │   │ 0 │     │ │ 1 │ │ F │ │ 1  │
82///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
83///  │    NULL     │  │ (?,?) │                   │ 0 │   │ ? │   │ ? │     │ │ 1 │ │ A │ │ 2  │
84///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
85///  │     [D]     │  │ (5,1) │                   │ 1 │   │ 5 │   │ 1 │     │ │ 1 │ │ B │ │ 3  │
86///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
87///  │  [NULL, F]  │  │ (0,2) │                   │ 1 │   │ 0 │   │ 2 │     │ │ 1 │ │ C │ │ 4  │
88///  └─────────────┘  └───────┘             │     └───┘   └───┘   └───┘       ├───┤ ├───┤
89///                                                                         │ │ 1 │ │ D │ │ 5  │
90///     Logical       Logical               │  Validity  Offsets  Sizes       └───┘ └───┘
91///      Values       Offset                   (nulls)                      │    Values   │    │
92///                   & Size                │                                   (Array)  
93///                                                                         └ ─ ─ ─ ─ ─ ─ ┘    │
94///                 (offsets[i],            │   ListViewArray                          
95///                  sizes[i])                                                                 │
96///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
97/// ```
98///
99/// [`GenericListArray`]: crate::array::GenericListArray
100/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
101/// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout
102/// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout
103#[derive(Clone)]
104pub struct GenericListViewArray<OffsetSize: OffsetSizeTrait> {
105    data_type: DataType,
106    nulls: Option<NullBuffer>,
107    values: ArrayRef,
108    // Unlike GenericListArray, we do not use OffsetBuffer here as offsets are not
109    // guaranteed to be monotonically increasing.
110    value_offsets: ScalarBuffer<OffsetSize>,
111    value_sizes: ScalarBuffer<OffsetSize>,
112}
113
114impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
115    /// The data type constructor of listview array.
116    /// The input is the schema of the child array and
117    /// the output is the [`DataType`], ListView or LargeListView.
118    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
119        DataType::LargeListView
120    } else {
121        DataType::ListView
122    };
123
124    /// Create a new [`GenericListViewArray`] from the provided parts
125    ///
126    /// # Errors
127    ///
128    /// Errors if
129    ///
130    /// * `offsets.len() != sizes.len()`
131    /// * `offsets.len() != nulls.len()`
132    /// * `offsets[i] > values.len()`
133    /// * `!field.is_nullable() && values.is_nullable()`
134    /// * `field.data_type() != values.data_type()`
135    /// * `0 <= offsets[i] <= length of the child array`
136    /// * `0 <= offsets[i] + size[i] <= length of the child array`
137    pub fn try_new(
138        field: FieldRef,
139        offsets: ScalarBuffer<OffsetSize>,
140        sizes: ScalarBuffer<OffsetSize>,
141        values: ArrayRef,
142        nulls: Option<NullBuffer>,
143    ) -> Result<Self, ArrowError> {
144        let len = offsets.len();
145        if let Some(n) = nulls.as_ref() {
146            if n.len() != len {
147                return Err(ArrowError::InvalidArgumentError(format!(
148                    "Incorrect length of null buffer for {}ListViewArray, expected {len} got {}",
149                    OffsetSize::PREFIX,
150                    n.len(),
151                )));
152            }
153        }
154        if len != sizes.len() {
155            return Err(ArrowError::InvalidArgumentError(format!(
156                "Length of offsets buffer and sizes buffer must be equal for {}ListViewArray, got {len} and {}",
157                OffsetSize::PREFIX, sizes.len()
158            )));
159        }
160
161        for (offset, size) in offsets.iter().zip(sizes.iter()) {
162            let offset = offset.as_usize();
163            let size = size.as_usize();
164            if offset.checked_add(size).ok_or_else(|| {
165                ArrowError::InvalidArgumentError(format!(
166                    "Overflow in offset + size for {}ListViewArray",
167                    OffsetSize::PREFIX
168                ))
169            })? > values.len()
170            {
171                return Err(ArrowError::InvalidArgumentError(format!(
172                    "Offset + size for {}ListViewArray must be within the bounds of the child array, got offset: {offset}, size: {size}, child array length: {}",
173                    OffsetSize::PREFIX,
174                    values.len()
175                )));
176            }
177        }
178
179        if !field.is_nullable() && values.is_nullable() {
180            return Err(ArrowError::InvalidArgumentError(format!(
181                "Non-nullable field of {}ListViewArray {:?} cannot contain nulls",
182                OffsetSize::PREFIX,
183                field.name()
184            )));
185        }
186
187        if field.data_type() != values.data_type() {
188            return Err(ArrowError::InvalidArgumentError(format!(
189                "{}ListViewArray expected data type {} got {} for {:?}",
190                OffsetSize::PREFIX,
191                field.data_type(),
192                values.data_type(),
193                field.name()
194            )));
195        }
196
197        Ok(Self {
198            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
199            nulls,
200            values,
201            value_offsets: offsets,
202            value_sizes: sizes,
203        })
204    }
205
206    /// Create a new [`GenericListViewArray`] from the provided parts
207    ///
208    /// # Panics
209    ///
210    /// Panics if [`Self::try_new`] returns an error
211    pub fn new(
212        field: FieldRef,
213        offsets: ScalarBuffer<OffsetSize>,
214        sizes: ScalarBuffer<OffsetSize>,
215        values: ArrayRef,
216        nulls: Option<NullBuffer>,
217    ) -> Self {
218        Self::try_new(field, offsets, sizes, values, nulls).unwrap()
219    }
220
221    /// Create a new [`GenericListViewArray`] of length `len` where all values are null
222    pub fn new_null(field: FieldRef, len: usize) -> Self {
223        let values = new_empty_array(field.data_type());
224        Self {
225            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
226            nulls: Some(NullBuffer::new_null(len)),
227            value_offsets: ScalarBuffer::from(vec![]),
228            value_sizes: ScalarBuffer::from(vec![]),
229            values,
230        }
231    }
232
233    /// Deconstruct this array into its constituent parts
234    pub fn into_parts(
235        self,
236    ) -> (
237        FieldRef,
238        ScalarBuffer<OffsetSize>,
239        ScalarBuffer<OffsetSize>,
240        ArrayRef,
241        Option<NullBuffer>,
242    ) {
243        let f = match self.data_type {
244            DataType::ListView(f) | DataType::LargeListView(f) => f,
245            _ => unreachable!(),
246        };
247        (
248            f,
249            self.value_offsets,
250            self.value_sizes,
251            self.values,
252            self.nulls,
253        )
254    }
255
256    /// Returns a reference to the offsets of this list
257    ///
258    /// Unlike [`Self::value_offsets`] this returns the [`ScalarBuffer`]
259    /// allowing for zero-copy cloning
260    #[inline]
261    pub fn offsets(&self) -> &ScalarBuffer<OffsetSize> {
262        &self.value_offsets
263    }
264
265    /// Returns a reference to the values of this list
266    #[inline]
267    pub fn values(&self) -> &ArrayRef {
268        &self.values
269    }
270
271    /// Returns a reference to the sizes of this list
272    ///
273    /// Unlike [`Self::value_sizes`] this returns the [`ScalarBuffer`]
274    /// allowing for zero-copy cloning
275    #[inline]
276    pub fn sizes(&self) -> &ScalarBuffer<OffsetSize> {
277        &self.value_sizes
278    }
279
280    /// Returns a clone of the value type of this list.
281    pub fn value_type(&self) -> DataType {
282        self.values.data_type().clone()
283    }
284
285    /// Returns ith value of this list view array.
286    ///
287    /// Note: This method does not check for nulls and the value is arbitrary
288    /// if [`is_null`](Self::is_null) returns true for the index.
289    ///
290    /// # Safety
291    /// Caller must ensure that the index is within the array bounds
292    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
293        let offset = self.value_offsets().get_unchecked(i).as_usize();
294        let length = self.value_sizes().get_unchecked(i).as_usize();
295        self.values.slice(offset, length)
296    }
297
298    /// Returns ith value of this list view array.
299    ///
300    /// Note: This method does not check for nulls and the value is arbitrary
301    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
302    ///
303    /// # Panics
304    /// Panics if the index is out of bounds
305    pub fn value(&self, i: usize) -> ArrayRef {
306        let offset = self.value_offsets()[i].as_usize();
307        let length = self.value_sizes()[i].as_usize();
308        self.values.slice(offset, length)
309    }
310
311    /// Returns the offset values in the offsets buffer
312    #[inline]
313    pub fn value_offsets(&self) -> &[OffsetSize] {
314        &self.value_offsets
315    }
316
317    /// Returns the sizes values in the offsets buffer
318    #[inline]
319    pub fn value_sizes(&self) -> &[OffsetSize] {
320        &self.value_sizes
321    }
322
323    /// Returns the size for value at index `i`.
324    #[inline]
325    pub fn value_size(&self, i: usize) -> OffsetSize {
326        self.value_sizes[i]
327    }
328
329    /// Returns the offset for value at index `i`.
330    pub fn value_offset(&self, i: usize) -> OffsetSize {
331        self.value_offsets[i]
332    }
333
334    /// Constructs a new iterator
335    pub fn iter(&self) -> GenericListViewArrayIter<'_, OffsetSize> {
336        GenericListViewArrayIter::<'_, OffsetSize>::new(self)
337    }
338
339    #[inline]
340    fn get_type(data_type: &DataType) -> Option<&DataType> {
341        match (OffsetSize::IS_LARGE, data_type) {
342            (true, DataType::LargeListView(child)) | (false, DataType::ListView(child)) => {
343                Some(child.data_type())
344            }
345            _ => None,
346        }
347    }
348
349    /// Returns a zero-copy slice of this array with the indicated offset and length.
350    pub fn slice(&self, offset: usize, length: usize) -> Self {
351        Self {
352            data_type: self.data_type.clone(),
353            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
354            values: self.values.clone(),
355            value_offsets: self.value_offsets.slice(offset, length),
356            value_sizes: self.value_sizes.slice(offset, length),
357        }
358    }
359}
360
361impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<OffsetSize> {
362    type Item = ArrayRef;
363
364    fn value(&self, index: usize) -> Self::Item {
365        GenericListViewArray::value(self, index)
366    }
367
368    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
369        GenericListViewArray::value_unchecked(self, index)
370    }
371}
372
373impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
374    fn as_any(&self) -> &dyn Any {
375        self
376    }
377
378    fn to_data(&self) -> ArrayData {
379        self.clone().into()
380    }
381
382    fn into_data(self) -> ArrayData {
383        self.into()
384    }
385
386    fn data_type(&self) -> &DataType {
387        &self.data_type
388    }
389
390    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
391        Arc::new(self.slice(offset, length))
392    }
393
394    fn len(&self) -> usize {
395        self.sizes().len()
396    }
397
398    fn is_empty(&self) -> bool {
399        self.value_sizes.is_empty()
400    }
401
402    fn shrink_to_fit(&mut self) {
403        if let Some(nulls) = &mut self.nulls {
404            nulls.shrink_to_fit();
405        }
406        self.values.shrink_to_fit();
407        self.value_offsets.shrink_to_fit();
408        self.value_sizes.shrink_to_fit();
409    }
410
411    fn offset(&self) -> usize {
412        0
413    }
414
415    fn nulls(&self) -> Option<&NullBuffer> {
416        self.nulls.as_ref()
417    }
418
419    fn logical_null_count(&self) -> usize {
420        // More efficient that the default implementation
421        self.null_count()
422    }
423
424    fn get_buffer_memory_size(&self) -> usize {
425        let mut size = self.values.get_buffer_memory_size();
426        size += self.value_offsets.inner().capacity();
427        size += self.value_sizes.inner().capacity();
428        if let Some(n) = self.nulls.as_ref() {
429            size += n.buffer().capacity();
430        }
431        size
432    }
433
434    fn get_array_memory_size(&self) -> usize {
435        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
436        size += self.value_offsets.inner().capacity();
437        size += self.value_sizes.inner().capacity();
438        if let Some(n) = self.nulls.as_ref() {
439            size += n.buffer().capacity();
440        }
441        size
442    }
443}
444
445impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<OffsetSize> {
446    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
447        let prefix = OffsetSize::PREFIX;
448        write!(f, "{prefix}ListViewArray\n[\n")?;
449        print_long_array(self, f, |array, index, f| {
450            std::fmt::Debug::fmt(&array.value(index), f)
451        })?;
452        write!(f, "]")
453    }
454}
455
456impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
457    fn from(array: GenericListViewArray<OffsetSize>) -> Self {
458        let len = array.len();
459        let builder = ArrayDataBuilder::new(array.data_type)
460            .len(len)
461            .nulls(array.nulls)
462            .buffers(vec![
463                array.value_offsets.into_inner(),
464                array.value_sizes.into_inner(),
465            ])
466            .child_data(vec![array.values.to_data()]);
467
468        unsafe { builder.build_unchecked() }
469    }
470}
471
472impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListViewArray<OffsetSize> {
473    fn from(data: ArrayData) -> Self {
474        Self::try_new_from_array_data(data)
475            .expect("Expected infallible creation of GenericListViewArray from ArrayDataRef failed")
476    }
477}
478
479impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewArray<OffsetSize> {
480    fn from(value: FixedSizeListArray) -> Self {
481        let (field, size) = match value.data_type() {
482            DataType::FixedSizeList(f, size) => (f, *size as usize),
483            _ => unreachable!(),
484        };
485        let mut acc = 0_usize;
486        let iter = std::iter::repeat_n(size, value.len());
487        let mut sizes = Vec::with_capacity(iter.size_hint().0);
488        let mut offsets = Vec::with_capacity(iter.size_hint().0);
489
490        for size in iter {
491            offsets.push(OffsetSize::usize_as(acc));
492            acc = acc.add(size);
493            sizes.push(OffsetSize::usize_as(size));
494        }
495        let sizes = ScalarBuffer::from(sizes);
496        let offsets = ScalarBuffer::from(offsets);
497        Self {
498            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
499            nulls: value.nulls().cloned(),
500            values: value.values().clone(),
501            value_offsets: offsets,
502            value_sizes: sizes,
503        }
504    }
505}
506
507impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
508    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
509        if data.buffers().len() != 2 {
510            return Err(ArrowError::InvalidArgumentError(format!(
511                "ListViewArray data should contain two buffers (value offsets & value sizes), had {}",
512                data.buffers().len()
513            )));
514        }
515
516        if data.child_data().len() != 1 {
517            return Err(ArrowError::InvalidArgumentError(format!(
518                "ListViewArray should contain a single child array (values array), had {}",
519                data.child_data().len()
520            )));
521        }
522
523        let values = data.child_data()[0].clone();
524
525        if let Some(child_data_type) = Self::get_type(data.data_type()) {
526            if values.data_type() != child_data_type {
527                return Err(ArrowError::InvalidArgumentError(format!(
528                    "{}ListViewArray's child datatype {:?} does not \
529                             correspond to the List's datatype {:?}",
530                    OffsetSize::PREFIX,
531                    values.data_type(),
532                    child_data_type
533                )));
534            }
535        } else {
536            return Err(ArrowError::InvalidArgumentError(format!(
537                "{}ListViewArray's datatype must be {}ListViewArray(). It is {:?}",
538                OffsetSize::PREFIX,
539                OffsetSize::PREFIX,
540                data.data_type()
541            )));
542        }
543
544        let values = make_array(values);
545        // ArrayData is valid, and verified type above
546        let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
547        let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len());
548
549        Ok(Self {
550            data_type: data.data_type().clone(),
551            nulls: data.nulls().cloned(),
552            values,
553            value_offsets,
554            value_sizes,
555        })
556    }
557}
558
559#[cfg(test)]
560mod tests {
561    use arrow_buffer::{bit_util, BooleanBuffer, Buffer, ScalarBuffer};
562    use arrow_schema::Field;
563
564    use crate::builder::{FixedSizeListBuilder, Int32Builder};
565    use crate::cast::AsArray;
566    use crate::types::Int32Type;
567    use crate::{Int32Array, Int64Array};
568
569    use super::*;
570
571    #[test]
572    fn test_empty_list_view_array() {
573        // Construct an empty value array
574        let vec: Vec<i32> = vec![];
575        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
576        let sizes = ScalarBuffer::from(vec![]);
577        let offsets = ScalarBuffer::from(vec![]);
578        let values = Int32Array::from(vec);
579        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
580
581        assert_eq!(list_array.len(), 0)
582    }
583
584    #[test]
585    fn test_list_view_array() {
586        // Construct a value array
587        let value_data = ArrayData::builder(DataType::Int32)
588            .len(8)
589            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
590            .build()
591            .unwrap();
592
593        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
594        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
595        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
596        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
597        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
598
599        let values = list_array.values();
600        assert_eq!(value_data, values.to_data());
601        assert_eq!(DataType::Int32, list_array.value_type());
602        assert_eq!(3, list_array.len());
603        assert_eq!(0, list_array.null_count());
604        assert_eq!(6, list_array.value_offsets()[2]);
605        assert_eq!(2, list_array.value_sizes()[2]);
606        assert_eq!(2, list_array.value_size(2));
607        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
608        assert_eq!(
609            0,
610            unsafe { list_array.value_unchecked(0) }
611                .as_primitive::<Int32Type>()
612                .value(0)
613        );
614        for i in 0..3 {
615            assert!(list_array.is_valid(i));
616            assert!(!list_array.is_null(i));
617        }
618    }
619
620    #[test]
621    fn test_large_list_view_array() {
622        // Construct a value array
623        let value_data = ArrayData::builder(DataType::Int32)
624            .len(8)
625            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
626            .build()
627            .unwrap();
628
629        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
630        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
631        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
632        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
633        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
634
635        let values = list_array.values();
636        assert_eq!(value_data, values.to_data());
637        assert_eq!(DataType::Int32, list_array.value_type());
638        assert_eq!(3, list_array.len());
639        assert_eq!(0, list_array.null_count());
640        assert_eq!(6, list_array.value_offsets()[2]);
641        assert_eq!(2, list_array.value_sizes()[2]);
642        assert_eq!(2, list_array.value_size(2));
643        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
644        assert_eq!(
645            0,
646            unsafe { list_array.value_unchecked(0) }
647                .as_primitive::<Int32Type>()
648                .value(0)
649        );
650        for i in 0..3 {
651            assert!(list_array.is_valid(i));
652            assert!(!list_array.is_null(i));
653        }
654    }
655
656    #[test]
657    fn test_list_view_array_slice() {
658        // Construct a value array
659        let value_data = ArrayData::builder(DataType::Int32)
660            .len(10)
661            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
662            .build()
663            .unwrap();
664
665        // 01011001 00000001
666        let mut null_bits: [u8; 2] = [0; 2];
667        bit_util::set_bit(&mut null_bits, 0);
668        bit_util::set_bit(&mut null_bits, 3);
669        bit_util::set_bit(&mut null_bits, 4);
670        bit_util::set_bit(&mut null_bits, 6);
671        bit_util::set_bit(&mut null_bits, 8);
672        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
673        let null_buffer = NullBuffer::new(buffer);
674
675        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
676        let sizes = ScalarBuffer::from(vec![2, 0, 0, 2, 2, 0, 3, 0, 1]);
677        let offsets = ScalarBuffer::from(vec![0, 2, 2, 2, 4, 6, 6, 9, 9]);
678        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
679        let list_array =
680            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
681
682        let values = list_array.values();
683        assert_eq!(value_data, values.to_data());
684        assert_eq!(DataType::Int32, list_array.value_type());
685        assert_eq!(9, list_array.len());
686        assert_eq!(4, list_array.null_count());
687        assert_eq!(2, list_array.value_offsets()[3]);
688        assert_eq!(2, list_array.value_sizes()[3]);
689        assert_eq!(2, list_array.value_size(3));
690
691        let sliced_array = list_array.slice(1, 6);
692        assert_eq!(6, sliced_array.len());
693        assert_eq!(3, sliced_array.null_count());
694
695        for i in 0..sliced_array.len() {
696            if bit_util::get_bit(&null_bits, 1 + i) {
697                assert!(sliced_array.is_valid(i));
698            } else {
699                assert!(sliced_array.is_null(i));
700            }
701        }
702
703        // Check offset and length for each non-null value.
704        let sliced_list_array = sliced_array
705            .as_any()
706            .downcast_ref::<ListViewArray>()
707            .unwrap();
708        assert_eq!(2, sliced_list_array.value_offsets()[2]);
709        assert_eq!(2, sliced_list_array.value_sizes()[2]);
710        assert_eq!(2, sliced_list_array.value_size(2));
711
712        assert_eq!(4, sliced_list_array.value_offsets()[3]);
713        assert_eq!(2, sliced_list_array.value_sizes()[3]);
714        assert_eq!(2, sliced_list_array.value_size(3));
715
716        assert_eq!(6, sliced_list_array.value_offsets()[5]);
717        assert_eq!(3, sliced_list_array.value_sizes()[5]);
718        assert_eq!(3, sliced_list_array.value_size(5));
719    }
720
721    #[test]
722    fn test_large_list_view_array_slice() {
723        // Construct a value array
724        let value_data = ArrayData::builder(DataType::Int32)
725            .len(10)
726            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
727            .build()
728            .unwrap();
729
730        // 01011001 00000001
731        let mut null_bits: [u8; 2] = [0; 2];
732        bit_util::set_bit(&mut null_bits, 0);
733        bit_util::set_bit(&mut null_bits, 3);
734        bit_util::set_bit(&mut null_bits, 4);
735        bit_util::set_bit(&mut null_bits, 6);
736        bit_util::set_bit(&mut null_bits, 8);
737        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
738        let null_buffer = NullBuffer::new(buffer);
739
740        // Construct a large list view array from the above two
741        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
742        let sizes = ScalarBuffer::from(vec![2i64, 0, 0, 2, 2, 0, 3, 0, 1]);
743        let offsets = ScalarBuffer::from(vec![0i64, 2, 2, 2, 4, 6, 6, 9, 9]);
744        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
745        let list_array =
746            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
747
748        let values = list_array.values();
749        assert_eq!(value_data, values.to_data());
750        assert_eq!(DataType::Int32, list_array.value_type());
751        assert_eq!(9, list_array.len());
752        assert_eq!(4, list_array.null_count());
753        assert_eq!(2, list_array.value_offsets()[3]);
754        assert_eq!(2, list_array.value_sizes()[3]);
755        assert_eq!(2, list_array.value_size(3));
756
757        let sliced_array = list_array.slice(1, 6);
758        assert_eq!(6, sliced_array.len());
759        assert_eq!(3, sliced_array.null_count());
760
761        for i in 0..sliced_array.len() {
762            if bit_util::get_bit(&null_bits, 1 + i) {
763                assert!(sliced_array.is_valid(i));
764            } else {
765                assert!(sliced_array.is_null(i));
766            }
767        }
768
769        // Check offset and length for each non-null value.
770        let sliced_list_array = sliced_array
771            .as_any()
772            .downcast_ref::<LargeListViewArray>()
773            .unwrap();
774        assert_eq!(2, sliced_list_array.value_offsets()[2]);
775        assert_eq!(2, sliced_list_array.value_size(2));
776        assert_eq!(2, sliced_list_array.value_sizes()[2]);
777
778        assert_eq!(4, sliced_list_array.value_offsets()[3]);
779        assert_eq!(2, sliced_list_array.value_size(3));
780        assert_eq!(2, sliced_list_array.value_sizes()[3]);
781
782        assert_eq!(6, sliced_list_array.value_offsets()[5]);
783        assert_eq!(3, sliced_list_array.value_size(5));
784        assert_eq!(2, sliced_list_array.value_sizes()[3]);
785    }
786
787    #[test]
788    #[should_panic(expected = "index out of bounds: the len is 9 but the index is 10")]
789    fn test_list_view_array_index_out_of_bound() {
790        // 01011001 00000001
791        let mut null_bits: [u8; 2] = [0; 2];
792        bit_util::set_bit(&mut null_bits, 0);
793        bit_util::set_bit(&mut null_bits, 3);
794        bit_util::set_bit(&mut null_bits, 4);
795        bit_util::set_bit(&mut null_bits, 6);
796        bit_util::set_bit(&mut null_bits, 8);
797        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
798        let null_buffer = NullBuffer::new(buffer);
799
800        // Construct a buffer for value offsets, for the nested array:
801        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
802        // Construct a list array from the above two
803        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
804        let sizes = ScalarBuffer::from(vec![2i32, 0, 0, 2, 2, 0, 3, 0, 1]);
805        let offsets = ScalarBuffer::from(vec![0i32, 2, 2, 2, 4, 6, 6, 9, 9]);
806        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
807        let list_array =
808            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
809
810        assert_eq!(9, list_array.len());
811        list_array.value(10);
812    }
813    #[test]
814    #[should_panic(
815        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 0"
816    )]
817    #[cfg(not(feature = "force_validate"))]
818    fn test_list_view_array_invalid_buffer_len() {
819        let value_data = unsafe {
820            ArrayData::builder(DataType::Int32)
821                .len(8)
822                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
823                .build_unchecked()
824        };
825        let list_data_type =
826            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
827        let list_data = unsafe {
828            ArrayData::builder(list_data_type)
829                .len(3)
830                .add_child_data(value_data)
831                .build_unchecked()
832        };
833        drop(ListViewArray::from(list_data));
834    }
835
836    #[test]
837    #[should_panic(
838        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 1"
839    )]
840    #[cfg(not(feature = "force_validate"))]
841    fn test_list_view_array_invalid_child_array_len() {
842        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
843        let list_data_type =
844            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
845        let list_data = unsafe {
846            ArrayData::builder(list_data_type)
847                .len(3)
848                .add_buffer(value_offsets)
849                .build_unchecked()
850        };
851        drop(ListViewArray::from(list_data));
852    }
853
854    #[test]
855    fn test_list_view_array_offsets_need_not_start_at_zero() {
856        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
857        let sizes = ScalarBuffer::from(vec![0i32, 0, 3]);
858        let offsets = ScalarBuffer::from(vec![2i32, 2, 5]);
859        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
860        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
861
862        assert_eq!(list_array.value_size(0), 0);
863        assert_eq!(list_array.value_size(1), 0);
864        assert_eq!(list_array.value_size(2), 3);
865    }
866
867    #[test]
868    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
869    #[cfg(not(feature = "force_validate"))]
870    fn test_list_view_array_alignment() {
871        let offset_buf = Buffer::from_slice_ref([0_u64]);
872        let offset_buf2 = offset_buf.slice(1);
873
874        let size_buf = Buffer::from_slice_ref([0_u64]);
875        let size_buf2 = size_buf.slice(1);
876
877        let values: [i32; 8] = [0; 8];
878        let value_data = unsafe {
879            ArrayData::builder(DataType::Int32)
880                .add_buffer(Buffer::from_slice_ref(values))
881                .build_unchecked()
882        };
883
884        let list_data_type =
885            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
886        let list_data = unsafe {
887            ArrayData::builder(list_data_type)
888                .add_buffer(offset_buf2)
889                .add_buffer(size_buf2)
890                .add_child_data(value_data)
891                .build_unchecked()
892        };
893        drop(ListViewArray::from(list_data));
894    }
895
896    #[test]
897    fn test_empty_offsets() {
898        let f = Arc::new(Field::new("element", DataType::Int32, true));
899        let string = ListViewArray::from(
900            ArrayData::builder(DataType::ListView(f.clone()))
901                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
902                .add_child_data(ArrayData::new_empty(&DataType::Int32))
903                .build()
904                .unwrap(),
905        );
906        assert_eq!(string.value_offsets(), &[] as &[i32; 0]);
907        assert_eq!(string.value_sizes(), &[] as &[i32; 0]);
908
909        let string = LargeListViewArray::from(
910            ArrayData::builder(DataType::LargeListView(f))
911                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
912                .add_child_data(ArrayData::new_empty(&DataType::Int32))
913                .build()
914                .unwrap(),
915        );
916        assert_eq!(string.len(), 0);
917        assert_eq!(string.value_offsets(), &[] as &[i64; 0]);
918        assert_eq!(string.value_sizes(), &[] as &[i64; 0]);
919    }
920
921    #[test]
922    fn test_try_new() {
923        let offsets = ScalarBuffer::from(vec![0, 1, 4, 5]);
924        let sizes = ScalarBuffer::from(vec![1, 3, 1, 0]);
925        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
926        let values = Arc::new(values) as ArrayRef;
927
928        let field = Arc::new(Field::new("element", DataType::Int32, false));
929        ListViewArray::new(
930            field.clone(),
931            offsets.clone(),
932            sizes.clone(),
933            values.clone(),
934            None,
935        );
936
937        let nulls = NullBuffer::new_null(4);
938        ListViewArray::new(
939            field.clone(),
940            offsets,
941            sizes.clone(),
942            values.clone(),
943            Some(nulls),
944        );
945
946        let nulls = NullBuffer::new_null(4);
947        let offsets = ScalarBuffer::from(vec![0, 1, 2, 3, 4]);
948        let sizes = ScalarBuffer::from(vec![1, 1, 1, 1, 0]);
949        let err = LargeListViewArray::try_new(
950            field,
951            offsets.clone(),
952            sizes.clone(),
953            values.clone(),
954            Some(nulls),
955        )
956        .unwrap_err();
957
958        assert_eq!(
959            err.to_string(),
960            "Invalid argument error: Incorrect length of null buffer for LargeListViewArray, expected 5 got 4"
961        );
962
963        let field = Arc::new(Field::new("element", DataType::Int64, false));
964        let err = LargeListViewArray::try_new(
965            field.clone(),
966            offsets.clone(),
967            sizes.clone(),
968            values.clone(),
969            None,
970        )
971        .unwrap_err();
972
973        assert_eq!(
974            err.to_string(),
975            "Invalid argument error: LargeListViewArray expected data type Int64 got Int32 for \"element\""
976        );
977
978        let nulls = NullBuffer::new_null(7);
979        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
980        let values = Arc::new(values);
981
982        let err = LargeListViewArray::try_new(
983            field,
984            offsets.clone(),
985            sizes.clone(),
986            values.clone(),
987            None,
988        )
989        .unwrap_err();
990
991        assert_eq!(
992            err.to_string(),
993            "Invalid argument error: Non-nullable field of LargeListViewArray \"element\" cannot contain nulls"
994        );
995    }
996
997    #[test]
998    fn test_from_fixed_size_list() {
999        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1000        builder.values().append_slice(&[1, 2, 3]);
1001        builder.append(true);
1002        builder.values().append_slice(&[0, 0, 0]);
1003        builder.append(false);
1004        builder.values().append_slice(&[4, 5, 6]);
1005        builder.append(true);
1006        let list: ListViewArray = builder.finish().into();
1007        let values: Vec<_> = list
1008            .iter()
1009            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1010            .collect();
1011        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])]);
1012        let offsets = list.value_offsets();
1013        assert_eq!(offsets, &[0, 3, 6]);
1014        let sizes = list.value_sizes();
1015        assert_eq!(sizes, &[3, 3, 3]);
1016    }
1017
1018    #[test]
1019    fn test_list_view_array_overlap_lists() {
1020        let value_data = unsafe {
1021            ArrayData::builder(DataType::Int32)
1022                .len(8)
1023                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1024                .build_unchecked()
1025        };
1026        let list_data_type =
1027            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
1028        let list_data = unsafe {
1029            ArrayData::builder(list_data_type)
1030                .len(2)
1031                .add_buffer(Buffer::from_slice_ref([0, 3])) // offsets
1032                .add_buffer(Buffer::from_slice_ref([5, 5])) // sizes
1033                .add_child_data(value_data)
1034                .build_unchecked()
1035        };
1036        let array = ListViewArray::from(list_data);
1037
1038        assert_eq!(array.len(), 2);
1039        assert_eq!(array.value_size(0), 5);
1040        assert_eq!(array.value_size(1), 5);
1041
1042        let values: Vec<_> = array
1043            .iter()
1044            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1045            .collect();
1046        assert_eq!(
1047            values,
1048            vec![Some(vec![0, 1, 2, 3, 4]), Some(vec![3, 4, 5, 6, 7])]
1049        );
1050    }
1051
1052    #[test]
1053    fn test_list_view_array_incomplete_offsets() {
1054        let value_data = unsafe {
1055            ArrayData::builder(DataType::Int32)
1056                .len(50)
1057                .add_buffer(Buffer::from_slice_ref((0..50).collect::<Vec<i32>>()))
1058                .build_unchecked()
1059        };
1060        let list_data_type =
1061            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
1062        let list_data = unsafe {
1063            ArrayData::builder(list_data_type)
1064                .len(3)
1065                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // offsets
1066                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // sizes
1067                .add_child_data(value_data)
1068                .build_unchecked()
1069        };
1070        let array = ListViewArray::from(list_data);
1071
1072        assert_eq!(array.len(), 3);
1073        assert_eq!(array.value_size(0), 0);
1074        assert_eq!(array.value_size(1), 5);
1075        assert_eq!(array.value_size(2), 10);
1076
1077        let values: Vec<_> = array
1078            .iter()
1079            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1080            .collect();
1081        assert_eq!(
1082            values,
1083            vec![
1084                Some(vec![]),
1085                Some(vec![5, 6, 7, 8, 9]),
1086                Some(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
1087            ]
1088        );
1089    }
1090
1091    #[test]
1092    fn test_list_view_array_empty_lists() {
1093        let value_data = unsafe {
1094            ArrayData::builder(DataType::Int32)
1095                .len(0)
1096                .add_buffer(Buffer::from_slice_ref::<i32, &[_; 0]>(&[]))
1097                .build_unchecked()
1098        };
1099        let list_data_type =
1100            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
1101        let list_data = unsafe {
1102            ArrayData::builder(list_data_type)
1103                .len(3)
1104                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // offsets
1105                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // sizes
1106                .add_child_data(value_data)
1107                .build_unchecked()
1108        };
1109        let array = ListViewArray::from(list_data);
1110
1111        assert_eq!(array.len(), 3);
1112        assert_eq!(array.value_size(0), 0);
1113        assert_eq!(array.value_size(1), 0);
1114        assert_eq!(array.value_size(2), 0);
1115
1116        let values: Vec<_> = array
1117            .iter()
1118            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1119            .collect();
1120        assert_eq!(values, vec![Some(vec![]), Some(vec![]), Some(vec![])]);
1121    }
1122}