arrow_array/array/
list_view_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow_buffer::{NullBuffer, ScalarBuffer};
19use arrow_data::{ArrayData, ArrayDataBuilder};
20use arrow_schema::{ArrowError, DataType, FieldRef};
21use std::any::Any;
22use std::ops::Add;
23use std::sync::Arc;
24
25use crate::array::{make_array, print_long_array};
26use crate::iterator::GenericListViewArrayIter;
27use crate::{new_empty_array, Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait};
28
29/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
30pub type ListViewArray = GenericListViewArray<i32>;
31
32/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i64`.
33pub type LargeListViewArray = GenericListViewArray<i64>;
34
35///
36/// Different from [`crate::GenericListArray`] as it stores both an offset and length
37/// meaning that take / filter operations can be implemented without copying the underlying data.
38///
39/// [Variable-size List Layout: ListView Layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout
40#[derive(Clone)]
41pub struct GenericListViewArray<OffsetSize: OffsetSizeTrait> {
42    data_type: DataType,
43    nulls: Option<NullBuffer>,
44    values: ArrayRef,
45    value_offsets: ScalarBuffer<OffsetSize>,
46    value_sizes: ScalarBuffer<OffsetSize>,
47}
48
49impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
50    /// The data type constructor of listview array.
51    /// The input is the schema of the child array and
52    /// the output is the [`DataType`], ListView or LargeListView.
53    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
54        DataType::LargeListView
55    } else {
56        DataType::ListView
57    };
58
59    /// Create a new [`GenericListViewArray`] from the provided parts
60    ///
61    /// # Errors
62    ///
63    /// Errors if
64    ///
65    /// * `offsets.len() != sizes.len()`
66    /// * `offsets.len() != nulls.len()`
67    /// * `offsets[i] > values.len()`
68    /// * `!field.is_nullable() && values.is_nullable()`
69    /// * `field.data_type() != values.data_type()`
70    /// * `0 <= offsets[i] <= length of the child array`
71    /// * `0 <= offsets[i] + size[i] <= length of the child array`
72    pub fn try_new(
73        field: FieldRef,
74        offsets: ScalarBuffer<OffsetSize>,
75        sizes: ScalarBuffer<OffsetSize>,
76        values: ArrayRef,
77        nulls: Option<NullBuffer>,
78    ) -> Result<Self, ArrowError> {
79        let len = offsets.len();
80        if let Some(n) = nulls.as_ref() {
81            if n.len() != len {
82                return Err(ArrowError::InvalidArgumentError(format!(
83                    "Incorrect length of null buffer for {}ListViewArray, expected {len} got {}",
84                    OffsetSize::PREFIX,
85                    n.len(),
86                )));
87            }
88        }
89        if len != sizes.len() {
90            return Err(ArrowError::InvalidArgumentError(format!(
91                "Length of offsets buffer and sizes buffer must be equal for {}ListViewArray, got {len} and {}",
92                OffsetSize::PREFIX, sizes.len()
93            )));
94        }
95
96        for (offset, size) in offsets.iter().zip(sizes.iter()) {
97            let offset = offset.as_usize();
98            let size = size.as_usize();
99            if offset.checked_add(size).ok_or_else(|| {
100                ArrowError::InvalidArgumentError(format!(
101                    "Overflow in offset + size for {}ListViewArray",
102                    OffsetSize::PREFIX
103                ))
104            })? > values.len()
105            {
106                return Err(ArrowError::InvalidArgumentError(format!(
107                    "Offset + size for {}ListViewArray must be within the bounds of the child array, got offset: {offset}, size: {size}, child array length: {}",
108                    OffsetSize::PREFIX,
109                    values.len()
110                )));
111            }
112        }
113
114        if !field.is_nullable() && values.is_nullable() {
115            return Err(ArrowError::InvalidArgumentError(format!(
116                "Non-nullable field of {}ListViewArray {:?} cannot contain nulls",
117                OffsetSize::PREFIX,
118                field.name()
119            )));
120        }
121
122        if field.data_type() != values.data_type() {
123            return Err(ArrowError::InvalidArgumentError(format!(
124                "{}ListViewArray expected data type {} got {} for {:?}",
125                OffsetSize::PREFIX,
126                field.data_type(),
127                values.data_type(),
128                field.name()
129            )));
130        }
131
132        Ok(Self {
133            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
134            nulls,
135            values,
136            value_offsets: offsets,
137            value_sizes: sizes,
138        })
139    }
140
141    /// Create a new [`GenericListViewArray`] from the provided parts
142    ///
143    /// # Panics
144    ///
145    /// Panics if [`Self::try_new`] returns an error
146    pub fn new(
147        field: FieldRef,
148        offsets: ScalarBuffer<OffsetSize>,
149        sizes: ScalarBuffer<OffsetSize>,
150        values: ArrayRef,
151        nulls: Option<NullBuffer>,
152    ) -> Self {
153        Self::try_new(field, offsets, sizes, values, nulls).unwrap()
154    }
155
156    /// Create a new [`GenericListViewArray`] of length `len` where all values are null
157    pub fn new_null(field: FieldRef, len: usize) -> Self {
158        let values = new_empty_array(field.data_type());
159        Self {
160            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
161            nulls: Some(NullBuffer::new_null(len)),
162            value_offsets: ScalarBuffer::from(vec![]),
163            value_sizes: ScalarBuffer::from(vec![]),
164            values,
165        }
166    }
167
168    /// Deconstruct this array into its constituent parts
169    pub fn into_parts(
170        self,
171    ) -> (
172        FieldRef,
173        ScalarBuffer<OffsetSize>,
174        ScalarBuffer<OffsetSize>,
175        ArrayRef,
176        Option<NullBuffer>,
177    ) {
178        let f = match self.data_type {
179            DataType::ListView(f) | DataType::LargeListView(f) => f,
180            _ => unreachable!(),
181        };
182        (
183            f,
184            self.value_offsets,
185            self.value_sizes,
186            self.values,
187            self.nulls,
188        )
189    }
190
191    /// Returns a reference to the offsets of this list
192    ///
193    /// Unlike [`Self::value_offsets`] this returns the [`ScalarBuffer`]
194    /// allowing for zero-copy cloning
195    #[inline]
196    pub fn offsets(&self) -> &ScalarBuffer<OffsetSize> {
197        &self.value_offsets
198    }
199
200    /// Returns a reference to the values of this list
201    #[inline]
202    pub fn values(&self) -> &ArrayRef {
203        &self.values
204    }
205
206    /// Returns a reference to the sizes of this list
207    ///
208    /// Unlike [`Self::value_sizes`] this returns the [`ScalarBuffer`]
209    /// allowing for zero-copy cloning
210    #[inline]
211    pub fn sizes(&self) -> &ScalarBuffer<OffsetSize> {
212        &self.value_sizes
213    }
214
215    /// Returns a clone of the value type of this list.
216    pub fn value_type(&self) -> DataType {
217        self.values.data_type().clone()
218    }
219
220    /// Returns ith value of this list view array.
221    /// # Safety
222    /// Caller must ensure that the index is within the array bounds
223    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
224        let offset = self.value_offsets().get_unchecked(i).as_usize();
225        let length = self.value_sizes().get_unchecked(i).as_usize();
226        self.values.slice(offset, length)
227    }
228
229    /// Returns ith value of this list view array.
230    /// # Panics
231    /// Panics if the index is out of bounds
232    pub fn value(&self, i: usize) -> ArrayRef {
233        let offset = self.value_offsets()[i].as_usize();
234        let length = self.value_sizes()[i].as_usize();
235        self.values.slice(offset, length)
236    }
237
238    /// Returns the offset values in the offsets buffer
239    #[inline]
240    pub fn value_offsets(&self) -> &[OffsetSize] {
241        &self.value_offsets
242    }
243
244    /// Returns the sizes values in the offsets buffer
245    #[inline]
246    pub fn value_sizes(&self) -> &[OffsetSize] {
247        &self.value_sizes
248    }
249
250    /// Returns the size for value at index `i`.
251    #[inline]
252    pub fn value_size(&self, i: usize) -> OffsetSize {
253        self.value_sizes[i]
254    }
255
256    /// Returns the offset for value at index `i`.
257    pub fn value_offset(&self, i: usize) -> OffsetSize {
258        self.value_offsets[i]
259    }
260
261    /// Constructs a new iterator
262    pub fn iter(&self) -> GenericListViewArrayIter<'_, OffsetSize> {
263        GenericListViewArrayIter::<'_, OffsetSize>::new(self)
264    }
265
266    #[inline]
267    fn get_type(data_type: &DataType) -> Option<&DataType> {
268        match (OffsetSize::IS_LARGE, data_type) {
269            (true, DataType::LargeListView(child)) | (false, DataType::ListView(child)) => {
270                Some(child.data_type())
271            }
272            _ => None,
273        }
274    }
275
276    /// Returns a zero-copy slice of this array with the indicated offset and length.
277    pub fn slice(&self, offset: usize, length: usize) -> Self {
278        Self {
279            data_type: self.data_type.clone(),
280            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
281            values: self.values.clone(),
282            value_offsets: self.value_offsets.slice(offset, length),
283            value_sizes: self.value_sizes.slice(offset, length),
284        }
285    }
286}
287
288impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<OffsetSize> {
289    type Item = ArrayRef;
290
291    fn value(&self, index: usize) -> Self::Item {
292        GenericListViewArray::value(self, index)
293    }
294
295    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
296        GenericListViewArray::value_unchecked(self, index)
297    }
298}
299
300impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
301    fn as_any(&self) -> &dyn Any {
302        self
303    }
304
305    fn to_data(&self) -> ArrayData {
306        self.clone().into()
307    }
308
309    fn into_data(self) -> ArrayData {
310        self.into()
311    }
312
313    fn data_type(&self) -> &DataType {
314        &self.data_type
315    }
316
317    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
318        Arc::new(self.slice(offset, length))
319    }
320
321    fn len(&self) -> usize {
322        self.sizes().len()
323    }
324
325    fn is_empty(&self) -> bool {
326        self.value_sizes.is_empty()
327    }
328
329    fn shrink_to_fit(&mut self) {
330        if let Some(nulls) = &mut self.nulls {
331            nulls.shrink_to_fit();
332        }
333        self.values.shrink_to_fit();
334        self.value_offsets.shrink_to_fit();
335        self.value_sizes.shrink_to_fit();
336    }
337
338    fn offset(&self) -> usize {
339        0
340    }
341
342    fn nulls(&self) -> Option<&NullBuffer> {
343        self.nulls.as_ref()
344    }
345
346    fn logical_null_count(&self) -> usize {
347        // More efficient that the default implementation
348        self.null_count()
349    }
350
351    fn get_buffer_memory_size(&self) -> usize {
352        let mut size = self.values.get_buffer_memory_size();
353        size += self.value_offsets.inner().capacity();
354        size += self.value_sizes.inner().capacity();
355        if let Some(n) = self.nulls.as_ref() {
356            size += n.buffer().capacity();
357        }
358        size
359    }
360
361    fn get_array_memory_size(&self) -> usize {
362        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
363        size += self.value_offsets.inner().capacity();
364        size += self.value_sizes.inner().capacity();
365        if let Some(n) = self.nulls.as_ref() {
366            size += n.buffer().capacity();
367        }
368        size
369    }
370}
371
372impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<OffsetSize> {
373    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
374        let prefix = OffsetSize::PREFIX;
375        write!(f, "{prefix}ListViewArray\n[\n")?;
376        print_long_array(self, f, |array, index, f| {
377            std::fmt::Debug::fmt(&array.value(index), f)
378        })?;
379        write!(f, "]")
380    }
381}
382
383impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
384    fn from(array: GenericListViewArray<OffsetSize>) -> Self {
385        let len = array.len();
386        let builder = ArrayDataBuilder::new(array.data_type)
387            .len(len)
388            .nulls(array.nulls)
389            .buffers(vec![
390                array.value_offsets.into_inner(),
391                array.value_sizes.into_inner(),
392            ])
393            .child_data(vec![array.values.to_data()]);
394
395        unsafe { builder.build_unchecked() }
396    }
397}
398
399impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListViewArray<OffsetSize> {
400    fn from(data: ArrayData) -> Self {
401        Self::try_new_from_array_data(data)
402            .expect("Expected infallible creation of GenericListViewArray from ArrayDataRef failed")
403    }
404}
405
406impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewArray<OffsetSize> {
407    fn from(value: FixedSizeListArray) -> Self {
408        let (field, size) = match value.data_type() {
409            DataType::FixedSizeList(f, size) => (f, *size as usize),
410            _ => unreachable!(),
411        };
412        let mut acc = 0_usize;
413        let iter = std::iter::repeat(size).take(value.len());
414        let mut sizes = Vec::with_capacity(iter.size_hint().0);
415        let mut offsets = Vec::with_capacity(iter.size_hint().0);
416
417        for size in iter {
418            offsets.push(OffsetSize::usize_as(acc));
419            acc = acc.add(size);
420            sizes.push(OffsetSize::usize_as(size));
421        }
422        let sizes = ScalarBuffer::from(sizes);
423        let offsets = ScalarBuffer::from(offsets);
424        Self {
425            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
426            nulls: value.nulls().cloned(),
427            values: value.values().clone(),
428            value_offsets: offsets,
429            value_sizes: sizes,
430        }
431    }
432}
433
434impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
435    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
436        if data.buffers().len() != 2 {
437            return Err(ArrowError::InvalidArgumentError(format!(
438                "ListViewArray data should contain two buffers (value offsets & value sizes), had {}",
439                data.buffers().len()
440            )));
441        }
442
443        if data.child_data().len() != 1 {
444            return Err(ArrowError::InvalidArgumentError(format!(
445                "ListViewArray should contain a single child array (values array), had {}",
446                data.child_data().len()
447            )));
448        }
449
450        let values = data.child_data()[0].clone();
451
452        if let Some(child_data_type) = Self::get_type(data.data_type()) {
453            if values.data_type() != child_data_type {
454                return Err(ArrowError::InvalidArgumentError(format!(
455                    "{}ListViewArray's child datatype {:?} does not \
456                             correspond to the List's datatype {:?}",
457                    OffsetSize::PREFIX,
458                    values.data_type(),
459                    child_data_type
460                )));
461            }
462        } else {
463            return Err(ArrowError::InvalidArgumentError(format!(
464                "{}ListViewArray's datatype must be {}ListViewArray(). It is {:?}",
465                OffsetSize::PREFIX,
466                OffsetSize::PREFIX,
467                data.data_type()
468            )));
469        }
470
471        let values = make_array(values);
472        // ArrayData is valid, and verified type above
473        let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
474        let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len());
475
476        Ok(Self {
477            data_type: data.data_type().clone(),
478            nulls: data.nulls().cloned(),
479            values,
480            value_offsets,
481            value_sizes,
482        })
483    }
484}
485
486#[cfg(test)]
487mod tests {
488    use arrow_buffer::{bit_util, BooleanBuffer, Buffer, ScalarBuffer};
489    use arrow_schema::Field;
490
491    use crate::builder::{FixedSizeListBuilder, Int32Builder};
492    use crate::cast::AsArray;
493    use crate::types::Int32Type;
494    use crate::{Int32Array, Int64Array};
495
496    use super::*;
497
498    #[test]
499    fn test_empty_list_view_array() {
500        // Construct an empty value array
501        let vec: Vec<i32> = vec![];
502        let field = Arc::new(Field::new("item", DataType::Int32, true));
503        let sizes = ScalarBuffer::from(vec![]);
504        let offsets = ScalarBuffer::from(vec![]);
505        let values = Int32Array::from(vec);
506        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
507
508        assert_eq!(list_array.len(), 0)
509    }
510
511    #[test]
512    fn test_list_view_array() {
513        // Construct a value array
514        let value_data = ArrayData::builder(DataType::Int32)
515            .len(8)
516            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
517            .build()
518            .unwrap();
519
520        let field = Arc::new(Field::new("item", DataType::Int32, true));
521        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
522        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
523        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
524        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
525
526        let values = list_array.values();
527        assert_eq!(value_data, values.to_data());
528        assert_eq!(DataType::Int32, list_array.value_type());
529        assert_eq!(3, list_array.len());
530        assert_eq!(0, list_array.null_count());
531        assert_eq!(6, list_array.value_offsets()[2]);
532        assert_eq!(2, list_array.value_sizes()[2]);
533        assert_eq!(2, list_array.value_size(2));
534        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
535        assert_eq!(
536            0,
537            unsafe { list_array.value_unchecked(0) }
538                .as_primitive::<Int32Type>()
539                .value(0)
540        );
541        for i in 0..3 {
542            assert!(list_array.is_valid(i));
543            assert!(!list_array.is_null(i));
544        }
545    }
546
547    #[test]
548    fn test_large_list_view_array() {
549        // Construct a value array
550        let value_data = ArrayData::builder(DataType::Int32)
551            .len(8)
552            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
553            .build()
554            .unwrap();
555
556        let field = Arc::new(Field::new("item", DataType::Int32, true));
557        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
558        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
559        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
560        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
561
562        let values = list_array.values();
563        assert_eq!(value_data, values.to_data());
564        assert_eq!(DataType::Int32, list_array.value_type());
565        assert_eq!(3, list_array.len());
566        assert_eq!(0, list_array.null_count());
567        assert_eq!(6, list_array.value_offsets()[2]);
568        assert_eq!(2, list_array.value_sizes()[2]);
569        assert_eq!(2, list_array.value_size(2));
570        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
571        assert_eq!(
572            0,
573            unsafe { list_array.value_unchecked(0) }
574                .as_primitive::<Int32Type>()
575                .value(0)
576        );
577        for i in 0..3 {
578            assert!(list_array.is_valid(i));
579            assert!(!list_array.is_null(i));
580        }
581    }
582
583    #[test]
584    fn test_list_view_array_slice() {
585        // Construct a value array
586        let value_data = ArrayData::builder(DataType::Int32)
587            .len(10)
588            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
589            .build()
590            .unwrap();
591
592        // 01011001 00000001
593        let mut null_bits: [u8; 2] = [0; 2];
594        bit_util::set_bit(&mut null_bits, 0);
595        bit_util::set_bit(&mut null_bits, 3);
596        bit_util::set_bit(&mut null_bits, 4);
597        bit_util::set_bit(&mut null_bits, 6);
598        bit_util::set_bit(&mut null_bits, 8);
599        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
600        let null_buffer = NullBuffer::new(buffer);
601
602        let field = Arc::new(Field::new("item", DataType::Int32, true));
603        let sizes = ScalarBuffer::from(vec![2, 0, 0, 2, 2, 0, 3, 0, 1]);
604        let offsets = ScalarBuffer::from(vec![0, 2, 2, 2, 4, 6, 6, 9, 9]);
605        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
606        let list_array =
607            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
608
609        let values = list_array.values();
610        assert_eq!(value_data, values.to_data());
611        assert_eq!(DataType::Int32, list_array.value_type());
612        assert_eq!(9, list_array.len());
613        assert_eq!(4, list_array.null_count());
614        assert_eq!(2, list_array.value_offsets()[3]);
615        assert_eq!(2, list_array.value_sizes()[3]);
616        assert_eq!(2, list_array.value_size(3));
617
618        let sliced_array = list_array.slice(1, 6);
619        assert_eq!(6, sliced_array.len());
620        assert_eq!(3, sliced_array.null_count());
621
622        for i in 0..sliced_array.len() {
623            if bit_util::get_bit(&null_bits, 1 + i) {
624                assert!(sliced_array.is_valid(i));
625            } else {
626                assert!(sliced_array.is_null(i));
627            }
628        }
629
630        // Check offset and length for each non-null value.
631        let sliced_list_array = sliced_array
632            .as_any()
633            .downcast_ref::<ListViewArray>()
634            .unwrap();
635        assert_eq!(2, sliced_list_array.value_offsets()[2]);
636        assert_eq!(2, sliced_list_array.value_sizes()[2]);
637        assert_eq!(2, sliced_list_array.value_size(2));
638
639        assert_eq!(4, sliced_list_array.value_offsets()[3]);
640        assert_eq!(2, sliced_list_array.value_sizes()[3]);
641        assert_eq!(2, sliced_list_array.value_size(3));
642
643        assert_eq!(6, sliced_list_array.value_offsets()[5]);
644        assert_eq!(3, sliced_list_array.value_sizes()[5]);
645        assert_eq!(3, sliced_list_array.value_size(5));
646    }
647
648    #[test]
649    fn test_large_list_view_array_slice() {
650        // Construct a value array
651        let value_data = ArrayData::builder(DataType::Int32)
652            .len(10)
653            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
654            .build()
655            .unwrap();
656
657        // 01011001 00000001
658        let mut null_bits: [u8; 2] = [0; 2];
659        bit_util::set_bit(&mut null_bits, 0);
660        bit_util::set_bit(&mut null_bits, 3);
661        bit_util::set_bit(&mut null_bits, 4);
662        bit_util::set_bit(&mut null_bits, 6);
663        bit_util::set_bit(&mut null_bits, 8);
664        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
665        let null_buffer = NullBuffer::new(buffer);
666
667        // Construct a large list view array from the above two
668        let field = Arc::new(Field::new("item", DataType::Int32, true));
669        let sizes = ScalarBuffer::from(vec![2i64, 0, 0, 2, 2, 0, 3, 0, 1]);
670        let offsets = ScalarBuffer::from(vec![0i64, 2, 2, 2, 4, 6, 6, 9, 9]);
671        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
672        let list_array =
673            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
674
675        let values = list_array.values();
676        assert_eq!(value_data, values.to_data());
677        assert_eq!(DataType::Int32, list_array.value_type());
678        assert_eq!(9, list_array.len());
679        assert_eq!(4, list_array.null_count());
680        assert_eq!(2, list_array.value_offsets()[3]);
681        assert_eq!(2, list_array.value_sizes()[3]);
682        assert_eq!(2, list_array.value_size(3));
683
684        let sliced_array = list_array.slice(1, 6);
685        assert_eq!(6, sliced_array.len());
686        assert_eq!(3, sliced_array.null_count());
687
688        for i in 0..sliced_array.len() {
689            if bit_util::get_bit(&null_bits, 1 + i) {
690                assert!(sliced_array.is_valid(i));
691            } else {
692                assert!(sliced_array.is_null(i));
693            }
694        }
695
696        // Check offset and length for each non-null value.
697        let sliced_list_array = sliced_array
698            .as_any()
699            .downcast_ref::<LargeListViewArray>()
700            .unwrap();
701        assert_eq!(2, sliced_list_array.value_offsets()[2]);
702        assert_eq!(2, sliced_list_array.value_size(2));
703        assert_eq!(2, sliced_list_array.value_sizes()[2]);
704
705        assert_eq!(4, sliced_list_array.value_offsets()[3]);
706        assert_eq!(2, sliced_list_array.value_size(3));
707        assert_eq!(2, sliced_list_array.value_sizes()[3]);
708
709        assert_eq!(6, sliced_list_array.value_offsets()[5]);
710        assert_eq!(3, sliced_list_array.value_size(5));
711        assert_eq!(2, sliced_list_array.value_sizes()[3]);
712    }
713
714    #[test]
715    #[should_panic(expected = "index out of bounds: the len is 9 but the index is 10")]
716    fn test_list_view_array_index_out_of_bound() {
717        // 01011001 00000001
718        let mut null_bits: [u8; 2] = [0; 2];
719        bit_util::set_bit(&mut null_bits, 0);
720        bit_util::set_bit(&mut null_bits, 3);
721        bit_util::set_bit(&mut null_bits, 4);
722        bit_util::set_bit(&mut null_bits, 6);
723        bit_util::set_bit(&mut null_bits, 8);
724        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
725        let null_buffer = NullBuffer::new(buffer);
726
727        // Construct a buffer for value offsets, for the nested array:
728        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
729        // Construct a list array from the above two
730        let field = Arc::new(Field::new("item", DataType::Int32, true));
731        let sizes = ScalarBuffer::from(vec![2i32, 0, 0, 2, 2, 0, 3, 0, 1]);
732        let offsets = ScalarBuffer::from(vec![0i32, 2, 2, 2, 4, 6, 6, 9, 9]);
733        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
734        let list_array =
735            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
736
737        assert_eq!(9, list_array.len());
738        list_array.value(10);
739    }
740    #[test]
741    #[should_panic(
742        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 0"
743    )]
744    #[cfg(not(feature = "force_validate"))]
745    fn test_list_view_array_invalid_buffer_len() {
746        let value_data = unsafe {
747            ArrayData::builder(DataType::Int32)
748                .len(8)
749                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
750                .build_unchecked()
751        };
752        let list_data_type =
753            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
754        let list_data = unsafe {
755            ArrayData::builder(list_data_type)
756                .len(3)
757                .add_child_data(value_data)
758                .build_unchecked()
759        };
760        drop(ListViewArray::from(list_data));
761    }
762
763    #[test]
764    #[should_panic(
765        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 1"
766    )]
767    #[cfg(not(feature = "force_validate"))]
768    fn test_list_view_array_invalid_child_array_len() {
769        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
770        let list_data_type =
771            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
772        let list_data = unsafe {
773            ArrayData::builder(list_data_type)
774                .len(3)
775                .add_buffer(value_offsets)
776                .build_unchecked()
777        };
778        drop(ListViewArray::from(list_data));
779    }
780
781    #[test]
782    fn test_list_view_array_offsets_need_not_start_at_zero() {
783        let field = Arc::new(Field::new("item", DataType::Int32, true));
784        let sizes = ScalarBuffer::from(vec![0i32, 0, 3]);
785        let offsets = ScalarBuffer::from(vec![2i32, 2, 5]);
786        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
787        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
788
789        assert_eq!(list_array.value_size(0), 0);
790        assert_eq!(list_array.value_size(1), 0);
791        assert_eq!(list_array.value_size(2), 3);
792    }
793
794    #[test]
795    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
796    #[cfg(not(feature = "force_validate"))]
797    fn test_list_view_array_alignment() {
798        let offset_buf = Buffer::from_slice_ref([0_u64]);
799        let offset_buf2 = offset_buf.slice(1);
800
801        let size_buf = Buffer::from_slice_ref([0_u64]);
802        let size_buf2 = size_buf.slice(1);
803
804        let values: [i32; 8] = [0; 8];
805        let value_data = unsafe {
806            ArrayData::builder(DataType::Int32)
807                .add_buffer(Buffer::from_slice_ref(values))
808                .build_unchecked()
809        };
810
811        let list_data_type =
812            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
813        let list_data = unsafe {
814            ArrayData::builder(list_data_type)
815                .add_buffer(offset_buf2)
816                .add_buffer(size_buf2)
817                .add_child_data(value_data)
818                .build_unchecked()
819        };
820        drop(ListViewArray::from(list_data));
821    }
822
823    #[test]
824    fn test_empty_offsets() {
825        let f = Arc::new(Field::new("element", DataType::Int32, true));
826        let string = ListViewArray::from(
827            ArrayData::builder(DataType::ListView(f.clone()))
828                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
829                .add_child_data(ArrayData::new_empty(&DataType::Int32))
830                .build()
831                .unwrap(),
832        );
833        assert_eq!(string.value_offsets(), &[]);
834        assert_eq!(string.value_sizes(), &[]);
835
836        let string = LargeListViewArray::from(
837            ArrayData::builder(DataType::LargeListView(f))
838                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
839                .add_child_data(ArrayData::new_empty(&DataType::Int32))
840                .build()
841                .unwrap(),
842        );
843        assert_eq!(string.len(), 0);
844        assert_eq!(string.value_offsets(), &[]);
845        assert_eq!(string.value_sizes(), &[]);
846    }
847
848    #[test]
849    fn test_try_new() {
850        let offsets = ScalarBuffer::from(vec![0, 1, 4, 5]);
851        let sizes = ScalarBuffer::from(vec![1, 3, 1, 0]);
852        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
853        let values = Arc::new(values) as ArrayRef;
854
855        let field = Arc::new(Field::new("element", DataType::Int32, false));
856        ListViewArray::new(
857            field.clone(),
858            offsets.clone(),
859            sizes.clone(),
860            values.clone(),
861            None,
862        );
863
864        let nulls = NullBuffer::new_null(4);
865        ListViewArray::new(
866            field.clone(),
867            offsets,
868            sizes.clone(),
869            values.clone(),
870            Some(nulls),
871        );
872
873        let nulls = NullBuffer::new_null(4);
874        let offsets = ScalarBuffer::from(vec![0, 1, 2, 3, 4]);
875        let sizes = ScalarBuffer::from(vec![1, 1, 1, 1, 0]);
876        let err = LargeListViewArray::try_new(
877            field,
878            offsets.clone(),
879            sizes.clone(),
880            values.clone(),
881            Some(nulls),
882        )
883        .unwrap_err();
884
885        assert_eq!(
886            err.to_string(),
887            "Invalid argument error: Incorrect length of null buffer for LargeListViewArray, expected 5 got 4"
888        );
889
890        let field = Arc::new(Field::new("element", DataType::Int64, false));
891        let err = LargeListViewArray::try_new(
892            field.clone(),
893            offsets.clone(),
894            sizes.clone(),
895            values.clone(),
896            None,
897        )
898        .unwrap_err();
899
900        assert_eq!(
901            err.to_string(),
902            "Invalid argument error: LargeListViewArray expected data type Int64 got Int32 for \"element\""
903        );
904
905        let nulls = NullBuffer::new_null(7);
906        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
907        let values = Arc::new(values);
908
909        let err = LargeListViewArray::try_new(
910            field,
911            offsets.clone(),
912            sizes.clone(),
913            values.clone(),
914            None,
915        )
916        .unwrap_err();
917
918        assert_eq!(
919            err.to_string(),
920            "Invalid argument error: Non-nullable field of LargeListViewArray \"element\" cannot contain nulls"
921        );
922    }
923
924    #[test]
925    fn test_from_fixed_size_list() {
926        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
927        builder.values().append_slice(&[1, 2, 3]);
928        builder.append(true);
929        builder.values().append_slice(&[0, 0, 0]);
930        builder.append(false);
931        builder.values().append_slice(&[4, 5, 6]);
932        builder.append(true);
933        let list: ListViewArray = builder.finish().into();
934        let values: Vec<_> = list
935            .iter()
936            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
937            .collect();
938        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])]);
939        let offsets = list.value_offsets();
940        assert_eq!(offsets, &[0, 3, 6]);
941        let sizes = list.value_sizes();
942        assert_eq!(sizes, &[3, 3, 3]);
943    }
944
945    #[test]
946    fn test_list_view_array_overlap_lists() {
947        let value_data = unsafe {
948            ArrayData::builder(DataType::Int32)
949                .len(8)
950                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
951                .build_unchecked()
952        };
953        let list_data_type =
954            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
955        let list_data = unsafe {
956            ArrayData::builder(list_data_type)
957                .len(2)
958                .add_buffer(Buffer::from_slice_ref([0, 3])) // offsets
959                .add_buffer(Buffer::from_slice_ref([5, 5])) // sizes
960                .add_child_data(value_data)
961                .build_unchecked()
962        };
963        let array = ListViewArray::from(list_data);
964
965        assert_eq!(array.len(), 2);
966        assert_eq!(array.value_size(0), 5);
967        assert_eq!(array.value_size(1), 5);
968
969        let values: Vec<_> = array
970            .iter()
971            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
972            .collect();
973        assert_eq!(
974            values,
975            vec![Some(vec![0, 1, 2, 3, 4]), Some(vec![3, 4, 5, 6, 7])]
976        );
977    }
978
979    #[test]
980    fn test_list_view_array_incomplete_offsets() {
981        let value_data = unsafe {
982            ArrayData::builder(DataType::Int32)
983                .len(50)
984                .add_buffer(Buffer::from_slice_ref((0..50).collect::<Vec<i32>>()))
985                .build_unchecked()
986        };
987        let list_data_type =
988            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
989        let list_data = unsafe {
990            ArrayData::builder(list_data_type)
991                .len(3)
992                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // offsets
993                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // sizes
994                .add_child_data(value_data)
995                .build_unchecked()
996        };
997        let array = ListViewArray::from(list_data);
998
999        assert_eq!(array.len(), 3);
1000        assert_eq!(array.value_size(0), 0);
1001        assert_eq!(array.value_size(1), 5);
1002        assert_eq!(array.value_size(2), 10);
1003
1004        let values: Vec<_> = array
1005            .iter()
1006            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1007            .collect();
1008        assert_eq!(
1009            values,
1010            vec![
1011                Some(vec![]),
1012                Some(vec![5, 6, 7, 8, 9]),
1013                Some(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
1014            ]
1015        );
1016    }
1017
1018    #[test]
1019    fn test_list_view_array_empty_lists() {
1020        let value_data = unsafe {
1021            ArrayData::builder(DataType::Int32)
1022                .len(0)
1023                .add_buffer(Buffer::from_slice_ref::<i32, &[_; 0]>(&[]))
1024                .build_unchecked()
1025        };
1026        let list_data_type =
1027            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
1028        let list_data = unsafe {
1029            ArrayData::builder(list_data_type)
1030                .len(3)
1031                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // offsets
1032                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // sizes
1033                .add_child_data(value_data)
1034                .build_unchecked()
1035        };
1036        let array = ListViewArray::from(list_data);
1037
1038        assert_eq!(array.len(), 3);
1039        assert_eq!(array.value_size(0), 0);
1040        assert_eq!(array.value_size(1), 0);
1041        assert_eq!(array.value_size(2), 0);
1042
1043        let values: Vec<_> = array
1044            .iter()
1045            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1046            .collect();
1047        assert_eq!(values, vec![Some(vec![]), Some(vec![]), Some(vec![])]);
1048    }
1049}