vortex_array/arrays/chunked/
ops.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::ops::Range;
5
6use itertools::Itertools;
7use vortex_scalar::Scalar;
8
9use crate::arrays::ChunkedVTable;
10use crate::arrays::chunked::ChunkedArray;
11use crate::vtable::OperationsVTable;
12use crate::{Array, ArrayRef, IntoArray};
13
14impl OperationsVTable<ChunkedVTable> for ChunkedVTable {
15    fn slice(array: &ChunkedArray, range: Range<usize>) -> ArrayRef {
16        assert!(
17            !array.is_empty() || (range.start > 0 && range.end > 0),
18            "Empty chunked array can't be sliced from {} to {}",
19            range.start,
20            range.end
21        );
22
23        if array.is_empty() {
24            // SAFETY: empty chunked array trivially satisfies all validations
25            unsafe {
26                return ChunkedArray::new_unchecked(vec![], array.dtype().clone()).into_array();
27            }
28        }
29
30        let (offset_chunk, offset_in_first_chunk) = array.find_chunk_idx(range.start);
31        let (length_chunk, length_in_last_chunk) = array.find_chunk_idx(range.end);
32
33        if length_chunk == offset_chunk {
34            let chunk = array.chunk(offset_chunk);
35            return chunk.slice(offset_in_first_chunk..length_in_last_chunk);
36        }
37
38        let mut chunks = (offset_chunk..length_chunk + 1)
39            .map(|i| array.chunk(i).clone())
40            .collect_vec();
41        if let Some(c) = chunks.first_mut() {
42            *c = c.slice(offset_in_first_chunk..c.len());
43        }
44
45        if length_in_last_chunk == 0 {
46            chunks.pop();
47        } else if let Some(c) = chunks.last_mut() {
48            *c = c.slice(0..length_in_last_chunk);
49        }
50
51        // SAFETY: chunks are slices of the original valid chunks, preserving their dtype.
52        // All chunks maintain the same dtype as the original array.
53        unsafe { ChunkedArray::new_unchecked(chunks, array.dtype().clone()).into_array() }
54    }
55
56    fn scalar_at(array: &ChunkedArray, index: usize) -> Scalar {
57        let (chunk_index, chunk_offset) = array.find_chunk_idx(index);
58        array.chunk(chunk_index).scalar_at(chunk_offset)
59    }
60}
61
62#[cfg(test)]
63mod tests {
64    use vortex_buffer::{Buffer, buffer};
65    use vortex_dtype::{DType, NativePType, Nullability, PType};
66
67    use crate::IntoArray;
68    use crate::array::Array;
69    use crate::arrays::{ChunkedArray, ChunkedVTable};
70    use crate::canonical::ToCanonical;
71
72    fn chunked_array() -> ChunkedArray {
73        ChunkedArray::try_new(
74            vec![
75                buffer![1u64, 2, 3].into_array(),
76                buffer![4u64, 5, 6].into_array(),
77                buffer![7u64, 8, 9].into_array(),
78            ],
79            DType::Primitive(PType::U64, Nullability::NonNullable),
80        )
81        .unwrap()
82    }
83
84    fn assert_equal_slices<T: NativePType>(arr: &dyn Array, slice: &[T]) {
85        let mut values = Vec::with_capacity(arr.len());
86        if let Some(arr) = arr.as_opt::<ChunkedVTable>() {
87            arr.chunks()
88                .iter()
89                .map(|a| a.to_primitive())
90                .for_each(|a| values.extend_from_slice(a.as_slice::<T>()));
91        } else {
92            values.extend_from_slice(arr.to_primitive().as_slice::<T>());
93        }
94        assert_eq!(values, slice);
95    }
96
97    #[test]
98    fn slice_middle() {
99        assert_equal_slices(&chunked_array().slice(2..5), &[3u64, 4, 5])
100    }
101
102    #[test]
103    fn slice_begin() {
104        assert_equal_slices(&chunked_array().slice(1..3), &[2u64, 3]);
105    }
106
107    #[test]
108    fn slice_aligned() {
109        assert_equal_slices(&chunked_array().slice(3..6), &[4u64, 5, 6]);
110    }
111
112    #[test]
113    fn slice_many_aligned() {
114        assert_equal_slices(&chunked_array().slice(0..6), &[1u64, 2, 3, 4, 5, 6]);
115    }
116
117    #[test]
118    fn slice_end() {
119        assert_equal_slices(&chunked_array().slice(7..8), &[8u64]);
120    }
121
122    #[test]
123    fn slice_exactly_end() {
124        assert_equal_slices(&chunked_array().slice(6..9), &[7u64, 8, 9]);
125    }
126
127    #[test]
128    fn slice_empty() {
129        let chunked = ChunkedArray::try_new(vec![], PType::U32.into()).unwrap();
130        let sliced = chunked.slice(0..0);
131
132        assert!(sliced.is_empty());
133    }
134
135    #[test]
136    fn scalar_at_empty_children_both_sides() {
137        let array = ChunkedArray::try_new(
138            vec![
139                Buffer::<u64>::empty().into_array(),
140                Buffer::<u64>::empty().into_array(),
141                buffer![1u64, 2].into_array(),
142                Buffer::<u64>::empty().into_array(),
143                Buffer::<u64>::empty().into_array(),
144            ],
145            DType::Primitive(PType::U64, Nullability::NonNullable),
146        )
147        .unwrap();
148        assert_eq!(array.scalar_at(0), 1u64.into());
149        assert_eq!(array.scalar_at(1), 2u64.into());
150    }
151
152    #[test]
153    fn scalar_at_empty_children_trailing() {
154        let array = ChunkedArray::try_new(
155            vec![
156                buffer![1u64, 2].into_array(),
157                Buffer::<u64>::empty().into_array(),
158                Buffer::<u64>::empty().into_array(),
159                buffer![3u64, 4].into_array(),
160            ],
161            DType::Primitive(PType::U64, Nullability::NonNullable),
162        )
163        .unwrap();
164        assert_eq!(array.scalar_at(0), 1u64.into());
165        assert_eq!(array.scalar_at(1), 2u64.into());
166        assert_eq!(array.scalar_at(2), 3u64.into());
167        assert_eq!(array.scalar_at(3), 4u64.into());
168    }
169
170    #[test]
171    fn scalar_at_empty_children_leading() {
172        let array = ChunkedArray::try_new(
173            vec![
174                Buffer::<u64>::empty().into_array(),
175                Buffer::<u64>::empty().into_array(),
176                buffer![1u64, 2].into_array(),
177                buffer![3u64, 4].into_array(),
178            ],
179            DType::Primitive(PType::U64, Nullability::NonNullable),
180        )
181        .unwrap();
182        assert_eq!(array.scalar_at(0), 1u64.into());
183        assert_eq!(array.scalar_at(1), 2u64.into());
184        assert_eq!(array.scalar_at(2), 3u64.into());
185        assert_eq!(array.scalar_at(3), 4u64.into());
186    }
187}