vortex_array/arrays/chunked/
ops.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_scalar::Scalar;
6
7use crate::arrays::ChunkedVTable;
8use crate::arrays::chunked::ChunkedArray;
9use crate::vtable::OperationsVTable;
10use crate::{Array, ArrayRef, IntoArray};
11
12impl OperationsVTable<ChunkedVTable> for ChunkedVTable {
13    fn slice(array: &ChunkedArray, start: usize, stop: usize) -> ArrayRef {
14        assert!(
15            !array.is_empty() || (start > 0 && stop > 0),
16            "Empty chunked array can't be sliced from {start} to {stop}"
17        );
18
19        if array.is_empty() {
20            // SAFETY: empty chunked array trivially satisfies all validations
21            unsafe {
22                return ChunkedArray::new_unchecked(vec![], array.dtype().clone()).into_array();
23            }
24        }
25
26        let (offset_chunk, offset_in_first_chunk) = array.find_chunk_idx(start);
27        let (length_chunk, length_in_last_chunk) = array.find_chunk_idx(stop);
28
29        if length_chunk == offset_chunk {
30            let chunk = array.chunk(offset_chunk);
31            return chunk.slice(offset_in_first_chunk, length_in_last_chunk);
32        }
33
34        let mut chunks = (offset_chunk..length_chunk + 1)
35            .map(|i| array.chunk(i).clone())
36            .collect_vec();
37        if let Some(c) = chunks.first_mut() {
38            *c = c.slice(offset_in_first_chunk, c.len());
39        }
40
41        if length_in_last_chunk == 0 {
42            chunks.pop();
43        } else if let Some(c) = chunks.last_mut() {
44            *c = c.slice(0, length_in_last_chunk);
45        }
46
47        // SAFETY: all chunks still have same DType
48        unsafe { ChunkedArray::new_unchecked(chunks, array.dtype().clone()).into_array() }
49    }
50
51    fn scalar_at(array: &ChunkedArray, index: usize) -> Scalar {
52        let (chunk_index, chunk_offset) = array.find_chunk_idx(index);
53        array.chunk(chunk_index).scalar_at(chunk_offset)
54    }
55}
56
57#[cfg(test)]
58mod tests {
59    use vortex_buffer::Buffer;
60    use vortex_dtype::{DType, NativePType, Nullability, PType};
61
62    use crate::IntoArray;
63    use crate::array::Array;
64    use crate::arrays::{ChunkedArray, ChunkedVTable, PrimitiveArray};
65    use crate::canonical::ToCanonical;
66
67    fn chunked_array() -> ChunkedArray {
68        ChunkedArray::try_new(
69            vec![
70                PrimitiveArray::from_iter([1u64, 2, 3]).into_array(),
71                PrimitiveArray::from_iter([4u64, 5, 6]).into_array(),
72                PrimitiveArray::from_iter([7u64, 8, 9]).into_array(),
73            ],
74            DType::Primitive(PType::U64, Nullability::NonNullable),
75        )
76        .unwrap()
77    }
78
79    fn assert_equal_slices<T: NativePType>(arr: &dyn Array, slice: &[T]) {
80        let mut values = Vec::with_capacity(arr.len());
81        if let Some(arr) = arr.as_opt::<ChunkedVTable>() {
82            arr.chunks()
83                .iter()
84                .map(|a| a.to_primitive().unwrap())
85                .for_each(|a| values.extend_from_slice(a.as_slice::<T>()));
86        } else {
87            values.extend_from_slice(arr.to_primitive().unwrap().as_slice::<T>());
88        }
89        assert_eq!(values, slice);
90    }
91
92    #[test]
93    fn slice_middle() {
94        assert_equal_slices(&chunked_array().slice(2, 5), &[3u64, 4, 5])
95    }
96
97    #[test]
98    fn slice_begin() {
99        assert_equal_slices(&chunked_array().slice(1, 3), &[2u64, 3]);
100    }
101
102    #[test]
103    fn slice_aligned() {
104        assert_equal_slices(&chunked_array().slice(3, 6), &[4u64, 5, 6]);
105    }
106
107    #[test]
108    fn slice_many_aligned() {
109        assert_equal_slices(&chunked_array().slice(0, 6), &[1u64, 2, 3, 4, 5, 6]);
110    }
111
112    #[test]
113    fn slice_end() {
114        assert_equal_slices(&chunked_array().slice(7, 8), &[8u64]);
115    }
116
117    #[test]
118    fn slice_exactly_end() {
119        assert_equal_slices(&chunked_array().slice(6, 9), &[7u64, 8, 9]);
120    }
121
122    #[test]
123    fn slice_empty() {
124        let chunked = ChunkedArray::try_new(vec![], PType::U32.into()).unwrap();
125        let sliced = chunked.slice(0, 0);
126
127        assert!(sliced.is_empty());
128    }
129
130    #[test]
131    fn scalar_at_empty_children_both_sides() {
132        let array = ChunkedArray::try_new(
133            vec![
134                Buffer::<u64>::empty().into_array(),
135                Buffer::<u64>::empty().into_array(),
136                PrimitiveArray::from_iter([1u64, 2]).into_array(),
137                Buffer::<u64>::empty().into_array(),
138                Buffer::<u64>::empty().into_array(),
139            ],
140            DType::Primitive(PType::U64, Nullability::NonNullable),
141        )
142        .unwrap();
143        assert_eq!(array.scalar_at(0), 1u64.into());
144        assert_eq!(array.scalar_at(1), 2u64.into());
145    }
146
147    #[test]
148    fn scalar_at_empty_children_trailing() {
149        let array = ChunkedArray::try_new(
150            vec![
151                PrimitiveArray::from_iter([1u64, 2]).into_array(),
152                Buffer::<u64>::empty().into_array(),
153                Buffer::<u64>::empty().into_array(),
154                PrimitiveArray::from_iter([3u64, 4]).into_array(),
155            ],
156            DType::Primitive(PType::U64, Nullability::NonNullable),
157        )
158        .unwrap();
159        assert_eq!(array.scalar_at(0), 1u64.into());
160        assert_eq!(array.scalar_at(1), 2u64.into());
161        assert_eq!(array.scalar_at(2), 3u64.into());
162        assert_eq!(array.scalar_at(3), 4u64.into());
163    }
164
165    #[test]
166    fn scalar_at_empty_children_leading() {
167        let array = ChunkedArray::try_new(
168            vec![
169                Buffer::<u64>::empty().into_array(),
170                Buffer::<u64>::empty().into_array(),
171                PrimitiveArray::from_iter([1u64, 2]).into_array(),
172                PrimitiveArray::from_iter([3u64, 4]).into_array(),
173            ],
174            DType::Primitive(PType::U64, Nullability::NonNullable),
175        )
176        .unwrap();
177        assert_eq!(array.scalar_at(0), 1u64.into());
178        assert_eq!(array.scalar_at(1), 2u64.into());
179        assert_eq!(array.scalar_at(2), 3u64.into());
180        assert_eq!(array.scalar_at(3), 4u64.into());
181    }
182}