vortex_array/arrays/chunked/
ops.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::ops::Range;
5
6use itertools::Itertools;
7use vortex_scalar::Scalar;
8
9use crate::arrays::ChunkedVTable;
10use crate::arrays::chunked::ChunkedArray;
11use crate::vtable::OperationsVTable;
12use crate::{Array, ArrayRef, IntoArray};
13
14impl OperationsVTable<ChunkedVTable> for ChunkedVTable {
15    fn slice(array: &ChunkedArray, range: Range<usize>) -> ArrayRef {
16        assert!(
17            !array.is_empty() || (range.start > 0 && range.end > 0),
18            "Empty chunked array can't be sliced from {} to {}",
19            range.start,
20            range.end
21        );
22
23        if array.is_empty() {
24            // SAFETY: empty chunked array trivially satisfies all validations
25            unsafe {
26                return ChunkedArray::new_unchecked(vec![], array.dtype().clone()).into_array();
27            }
28        }
29
30        let (offset_chunk, offset_in_first_chunk) = array.find_chunk_idx(range.start);
31        let (length_chunk, length_in_last_chunk) = array.find_chunk_idx(range.end);
32
33        if length_chunk == offset_chunk {
34            let chunk = array.chunk(offset_chunk);
35            return chunk.slice(offset_in_first_chunk..length_in_last_chunk);
36        }
37
38        let mut chunks = (offset_chunk..length_chunk + 1)
39            .map(|i| array.chunk(i).clone())
40            .collect_vec();
41        if let Some(c) = chunks.first_mut() {
42            *c = c.slice(offset_in_first_chunk..c.len());
43        }
44
45        if length_in_last_chunk == 0 {
46            chunks.pop();
47        } else if let Some(c) = chunks.last_mut() {
48            *c = c.slice(0..length_in_last_chunk);
49        }
50
51        // SAFETY: all chunks still have same DType
52        unsafe { ChunkedArray::new_unchecked(chunks, array.dtype().clone()).into_array() }
53    }
54
55    fn scalar_at(array: &ChunkedArray, index: usize) -> Scalar {
56        let (chunk_index, chunk_offset) = array.find_chunk_idx(index);
57        array.chunk(chunk_index).scalar_at(chunk_offset)
58    }
59}
60
61#[cfg(test)]
62mod tests {
63    use vortex_buffer::Buffer;
64    use vortex_dtype::{DType, NativePType, Nullability, PType};
65
66    use crate::IntoArray;
67    use crate::array::Array;
68    use crate::arrays::{ChunkedArray, ChunkedVTable, PrimitiveArray};
69    use crate::canonical::ToCanonical;
70
71    fn chunked_array() -> ChunkedArray {
72        ChunkedArray::try_new(
73            vec![
74                PrimitiveArray::from_iter([1u64, 2, 3]).into_array(),
75                PrimitiveArray::from_iter([4u64, 5, 6]).into_array(),
76                PrimitiveArray::from_iter([7u64, 8, 9]).into_array(),
77            ],
78            DType::Primitive(PType::U64, Nullability::NonNullable),
79        )
80        .unwrap()
81    }
82
83    fn assert_equal_slices<T: NativePType>(arr: &dyn Array, slice: &[T]) {
84        let mut values = Vec::with_capacity(arr.len());
85        if let Some(arr) = arr.as_opt::<ChunkedVTable>() {
86            arr.chunks()
87                .iter()
88                .map(|a| a.to_primitive().unwrap())
89                .for_each(|a| values.extend_from_slice(a.as_slice::<T>()));
90        } else {
91            values.extend_from_slice(arr.to_primitive().unwrap().as_slice::<T>());
92        }
93        assert_eq!(values, slice);
94    }
95
96    #[test]
97    fn slice_middle() {
98        assert_equal_slices(&chunked_array().slice(2..5), &[3u64, 4, 5])
99    }
100
101    #[test]
102    fn slice_begin() {
103        assert_equal_slices(&chunked_array().slice(1..3), &[2u64, 3]);
104    }
105
106    #[test]
107    fn slice_aligned() {
108        assert_equal_slices(&chunked_array().slice(3..6), &[4u64, 5, 6]);
109    }
110
111    #[test]
112    fn slice_many_aligned() {
113        assert_equal_slices(&chunked_array().slice(0..6), &[1u64, 2, 3, 4, 5, 6]);
114    }
115
116    #[test]
117    fn slice_end() {
118        assert_equal_slices(&chunked_array().slice(7..8), &[8u64]);
119    }
120
121    #[test]
122    fn slice_exactly_end() {
123        assert_equal_slices(&chunked_array().slice(6..9), &[7u64, 8, 9]);
124    }
125
126    #[test]
127    fn slice_empty() {
128        let chunked = ChunkedArray::try_new(vec![], PType::U32.into()).unwrap();
129        let sliced = chunked.slice(0..0);
130
131        assert!(sliced.is_empty());
132    }
133
134    #[test]
135    fn scalar_at_empty_children_both_sides() {
136        let array = ChunkedArray::try_new(
137            vec![
138                Buffer::<u64>::empty().into_array(),
139                Buffer::<u64>::empty().into_array(),
140                PrimitiveArray::from_iter([1u64, 2]).into_array(),
141                Buffer::<u64>::empty().into_array(),
142                Buffer::<u64>::empty().into_array(),
143            ],
144            DType::Primitive(PType::U64, Nullability::NonNullable),
145        )
146        .unwrap();
147        assert_eq!(array.scalar_at(0), 1u64.into());
148        assert_eq!(array.scalar_at(1), 2u64.into());
149    }
150
151    #[test]
152    fn scalar_at_empty_children_trailing() {
153        let array = ChunkedArray::try_new(
154            vec![
155                PrimitiveArray::from_iter([1u64, 2]).into_array(),
156                Buffer::<u64>::empty().into_array(),
157                Buffer::<u64>::empty().into_array(),
158                PrimitiveArray::from_iter([3u64, 4]).into_array(),
159            ],
160            DType::Primitive(PType::U64, Nullability::NonNullable),
161        )
162        .unwrap();
163        assert_eq!(array.scalar_at(0), 1u64.into());
164        assert_eq!(array.scalar_at(1), 2u64.into());
165        assert_eq!(array.scalar_at(2), 3u64.into());
166        assert_eq!(array.scalar_at(3), 4u64.into());
167    }
168
169    #[test]
170    fn scalar_at_empty_children_leading() {
171        let array = ChunkedArray::try_new(
172            vec![
173                Buffer::<u64>::empty().into_array(),
174                Buffer::<u64>::empty().into_array(),
175                PrimitiveArray::from_iter([1u64, 2]).into_array(),
176                PrimitiveArray::from_iter([3u64, 4]).into_array(),
177            ],
178            DType::Primitive(PType::U64, Nullability::NonNullable),
179        )
180        .unwrap();
181        assert_eq!(array.scalar_at(0), 1u64.into());
182        assert_eq!(array.scalar_at(1), 2u64.into());
183        assert_eq!(array.scalar_at(2), 3u64.into());
184        assert_eq!(array.scalar_at(3), 4u64.into());
185    }
186}