vortex_array/arrays/primitive/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::iter;
6
7mod accessor;
8
9use arrow_buffer::BooleanBufferBuilder;
10use vortex_buffer::{Alignment, Buffer, BufferMut, ByteBuffer, ByteBufferMut};
11use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
12use vortex_error::{VortexResult, vortex_panic};
13
14use crate::builders::ArrayBuilder;
15use crate::stats::{ArrayStats, StatsSetRef};
16use crate::validity::Validity;
17use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
18
19mod compute;
20mod downcast;
21mod native_value;
22mod ops;
23mod patch;
24mod serde;
25mod top_value;
26
27pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
28pub use native_value::NativeValue;
29
30use crate::vtable::{
31    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
32    ValidityVTableFromValidityHelper,
33};
34
35vtable!(Primitive);
36
37impl VTable for PrimitiveVTable {
38    type Array = PrimitiveArray;
39    type Encoding = PrimitiveEncoding;
40
41    type ArrayVTable = Self;
42    type CanonicalVTable = Self;
43    type OperationsVTable = Self;
44    type ValidityVTable = ValidityVTableFromValidityHelper;
45    type VisitorVTable = Self;
46    type ComputeVTable = NotSupported;
47    type EncodeVTable = NotSupported;
48    type PipelineVTable = Self;
49    type SerdeVTable = Self;
50
51    fn id(_encoding: &Self::Encoding) -> EncodingId {
52        EncodingId::new_ref("vortex.primitive")
53    }
54
55    fn encoding(_array: &Self::Array) -> EncodingRef {
56        EncodingRef::new_ref(PrimitiveEncoding.as_ref())
57    }
58}
59
60/// A primitive array that stores [native types][vortex_dtype::NativePType] in a contiguous buffer
61/// of memory, along with an optional validity child.
62///
63/// This mirrors the Apache Arrow Primitive layout and can be converted into and out of one
64/// without allocations or copies.
65///
66/// The underlying buffer must be natively aligned to the primitive type they are representing.
67///
68/// Values are stored in their native representation with proper alignment.
69/// Null values still occupy space in the buffer but are marked invalid in the validity mask.
70///
71/// # Examples
72///
73/// ```
74/// use vortex_array::arrays::PrimitiveArray;
75/// use vortex_array::compute::sum;
76/// ///
77/// // Create from iterator using FromIterator impl
78/// let array: PrimitiveArray = [1i32, 2, 3, 4, 5].into_iter().collect();
79///
80/// // Slice the array
81/// let sliced = array.slice(1, 3);
82///
83/// // Access individual values
84/// let value = sliced.scalar_at(0);
85/// assert_eq!(value, 2i32.into());
86///
87/// // Convert into a type-erased array that can be passed to compute functions.
88/// let summed = sum(sliced.as_ref()).unwrap().as_primitive().typed_value::<i64>().unwrap();
89/// assert_eq!(summed, 5i64);
90/// ```
91#[derive(Clone, Debug)]
92pub struct PrimitiveArray {
93    dtype: DType,
94    buffer: ByteBuffer,
95    validity: Validity,
96    stats_set: ArrayStats,
97}
98
99#[derive(Clone, Debug)]
100pub struct PrimitiveEncoding;
101
102impl PrimitiveArray {
103    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
104        let buffer = buffer.into();
105        if let Some(len) = validity.maybe_len()
106            && buffer.len() != len
107        {
108            vortex_panic!(
109                "Buffer and validity length mismatch: buffer={}, validity={}",
110                buffer.len(),
111                len
112            );
113        }
114
115        Self {
116            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
117            buffer: buffer.into_byte_buffer(),
118            validity,
119            stats_set: Default::default(),
120        }
121    }
122
123    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
124        Self::new(Buffer::<T>::empty(), nullability.into())
125    }
126
127    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
128        match_each_native_ptype!(ptype, |T| {
129            Self::new::<T>(Buffer::from_byte_buffer(buffer), validity)
130        })
131    }
132
133    /// Create a PrimitiveArray from an iterator of `T`.
134    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
135    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
136        let iter = iter.into_iter();
137        let mut values = BufferMut::with_capacity(iter.size_hint().0);
138        let mut validity = BooleanBufferBuilder::new(values.capacity());
139
140        for i in iter {
141            match i {
142                None => {
143                    validity.append(false);
144                    values.push(T::default());
145                }
146                Some(e) => {
147                    validity.append(true);
148                    values.push(e);
149                }
150            }
151        }
152        Self::new(values.freeze(), Validity::from(validity.finish()))
153    }
154
155    /// Create a PrimitiveArray from a byte buffer containing only the valid elements.
156    pub fn from_values_byte_buffer(
157        valid_elems_buffer: ByteBuffer,
158        ptype: PType,
159        validity: Validity,
160        n_rows: usize,
161    ) -> VortexResult<Self> {
162        let byte_width = ptype.byte_width();
163        let alignment = Alignment::new(byte_width);
164        let buffer = match &validity {
165            Validity::AllValid | Validity::NonNullable => valid_elems_buffer.aligned(alignment),
166            Validity::AllInvalid => ByteBuffer::zeroed_aligned(n_rows * byte_width, alignment),
167            Validity::Array(is_valid) => {
168                let bool_array = is_valid.to_canonical()?.into_bool()?;
169                let bool_buffer = bool_array.boolean_buffer();
170                let mut bytes = ByteBufferMut::zeroed_aligned(n_rows * byte_width, alignment);
171                for (i, valid_i) in bool_buffer.set_indices().enumerate() {
172                    bytes[valid_i * byte_width..(valid_i + 1) * byte_width]
173                        .copy_from_slice(&valid_elems_buffer[i * byte_width..(i + 1) * byte_width])
174                }
175                bytes.freeze()
176            }
177        };
178
179        Ok(Self::from_byte_buffer(buffer, ptype, validity))
180    }
181
182    pub fn ptype(&self) -> PType {
183        self.dtype().as_ptype()
184    }
185
186    pub fn byte_buffer(&self) -> &ByteBuffer {
187        &self.buffer
188    }
189
190    pub fn into_byte_buffer(self) -> ByteBuffer {
191        self.buffer
192    }
193
194    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
195        if T::PTYPE != self.ptype() {
196            vortex_panic!(
197                "Attempted to get buffer of type {} from array of type {}",
198                T::PTYPE,
199                self.ptype()
200            )
201        }
202        Buffer::from_byte_buffer(self.byte_buffer().clone())
203    }
204
205    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
206        if T::PTYPE != self.ptype() {
207            vortex_panic!(
208                "Attempted to get buffer of type {} from array of type {}",
209                T::PTYPE,
210                self.ptype()
211            )
212        }
213        Buffer::from_byte_buffer(self.buffer)
214    }
215
216    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
217    /// if the buffer is uniquely owned, otherwise will make a copy.
218    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
219        if T::PTYPE != self.ptype() {
220            vortex_panic!(
221                "Attempted to get buffer_mut of type {} from array of type {}",
222                T::PTYPE,
223                self.ptype()
224            )
225        }
226        self.into_buffer()
227            .try_into_mut()
228            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
229    }
230
231    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
232    #[allow(clippy::panic_in_result_fn)]
233    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
234        if T::PTYPE != self.ptype() {
235            vortex_panic!(
236                "Attempted to get buffer_mut of type {} from array of type {}",
237                T::PTYPE,
238                self.ptype()
239            )
240        }
241        let validity = self.validity().clone();
242        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
243            .try_into_mut()
244            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
245    }
246
247    /// Map each element in the array to a new value.
248    ///
249    /// This ignores validity and maps over all maybe-null elements.
250    ///
251    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
252    ///   over the valid elements.
253    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
254    where
255        T: NativePType,
256        R: NativePType,
257        F: FnMut(T) -> R,
258    {
259        let validity = self.validity().clone();
260        let buffer = match self.try_into_buffer_mut() {
261            Ok(buffer_mut) => buffer_mut.map_each(f),
262            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
263        };
264        PrimitiveArray::new(buffer.freeze(), validity)
265    }
266
267    /// Map each element in the array to a new value.
268    ///
269    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
270    /// valid and false otherwise.
271    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
272    where
273        T: NativePType,
274        R: NativePType,
275        F: FnMut((T, bool)) -> R,
276    {
277        let validity = self.validity();
278
279        let buf_iter = self.buffer::<T>().into_iter();
280
281        let buffer = match &validity {
282            Validity::NonNullable | Validity::AllValid => {
283                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
284            }
285            Validity::AllInvalid => {
286                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
287            }
288            Validity::Array(val) => {
289                let val = val.to_canonical()?.into_bool()?;
290                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
291            }
292        };
293        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
294    }
295
296    /// Return a slice of the array's buffer.
297    ///
298    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
299    pub fn as_slice<T: NativePType>(&self) -> &[T] {
300        if T::PTYPE != self.ptype() {
301            vortex_panic!(
302                "Attempted to get slice of type {} from array of type {}",
303                T::PTYPE,
304                self.ptype()
305            )
306        }
307        let raw_slice = self.byte_buffer().as_ptr();
308        // SAFETY: alignment of Buffer is checked on construction
309        unsafe {
310            std::slice::from_raw_parts(raw_slice.cast(), self.byte_buffer().len() / size_of::<T>())
311        }
312    }
313
314    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
315        if self.ptype() == ptype {
316            return self.clone();
317        }
318
319        assert_eq!(
320            self.ptype().byte_width(),
321            ptype.byte_width(),
322            "can't reinterpret cast between integers of two different widths"
323        );
324
325        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
326    }
327}
328
329impl ArrayVTable<PrimitiveVTable> for PrimitiveVTable {
330    fn len(array: &PrimitiveArray) -> usize {
331        array.byte_buffer().len() / array.ptype().byte_width()
332    }
333
334    fn dtype(array: &PrimitiveArray) -> &DType {
335        &array.dtype
336    }
337
338    fn stats(array: &PrimitiveArray) -> StatsSetRef<'_> {
339        array.stats_set.to_ref(array.as_ref())
340    }
341}
342
343impl ValidityHelper for PrimitiveArray {
344    fn validity(&self) -> &Validity {
345        &self.validity
346    }
347}
348
349impl<T: NativePType> FromIterator<T> for PrimitiveArray {
350    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
351        let values = BufferMut::from_iter(iter);
352        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
353    }
354}
355
356impl<T: NativePType> IntoArray for Buffer<T> {
357    fn into_array(self) -> ArrayRef {
358        PrimitiveArray::new(self, Validity::NonNullable).into_array()
359    }
360}
361
362impl<T: NativePType> IntoArray for BufferMut<T> {
363    fn into_array(self) -> ArrayRef {
364        self.freeze().into_array()
365    }
366}
367
368impl CanonicalVTable<PrimitiveVTable> for PrimitiveVTable {
369    fn canonicalize(array: &PrimitiveArray) -> VortexResult<Canonical> {
370        Ok(Canonical::Primitive(array.clone()))
371    }
372
373    fn append_to_builder(
374        array: &PrimitiveArray,
375        builder: &mut dyn ArrayBuilder,
376    ) -> VortexResult<()> {
377        builder.extend_from_array(array.as_ref())
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use vortex_buffer::buffer;
384    use vortex_scalar::PValue;
385
386    use crate::arrays::{BoolArray, PrimitiveArray};
387    use crate::compute::conformance::filter::test_filter_conformance;
388    use crate::compute::conformance::mask::test_mask_conformance;
389    use crate::compute::conformance::search_sorted::rstest_reuse::apply;
390    use crate::compute::conformance::search_sorted::{search_sorted_conformance, *};
391    use crate::search_sorted::{SearchResult, SearchSorted, SearchSortedSide};
392    use crate::validity::Validity;
393    use crate::{ArrayRef, IntoArray};
394
395    #[apply(search_sorted_conformance)]
396    fn test_search_sorted_primitive(
397        #[case] array: ArrayRef,
398        #[case] value: i32,
399        #[case] side: SearchSortedSide,
400        #[case] expected: SearchResult,
401    ) {
402        let res = array
403            .as_primitive_typed()
404            .search_sorted(&Some(PValue::from(value)), side);
405        assert_eq!(res, expected);
406    }
407
408    #[test]
409    fn test_mask_primitive_array() {
410        test_mask_conformance(
411            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
412        );
413        test_mask_conformance(
414            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
415        );
416        test_mask_conformance(
417            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllInvalid).as_ref(),
418        );
419        test_mask_conformance(
420            PrimitiveArray::new(
421                buffer![0, 1, 2, 3, 4],
422                Validity::Array(
423                    BoolArray::from_iter([true, false, true, false, true]).into_array(),
424                ),
425            )
426            .as_ref(),
427        );
428    }
429
430    #[test]
431    fn test_filter_primitive_array() {
432        // Test various sizes
433        test_filter_conformance(
434            PrimitiveArray::new(buffer![42i32], Validity::NonNullable).as_ref(),
435        );
436        test_filter_conformance(PrimitiveArray::new(buffer![0, 1], Validity::NonNullable).as_ref());
437        test_filter_conformance(
438            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
439        );
440        test_filter_conformance(
441            PrimitiveArray::new(buffer![0, 1, 2, 3, 4, 5, 6, 7], Validity::NonNullable).as_ref(),
442        );
443
444        // Test with validity
445        test_filter_conformance(
446            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
447        );
448        test_filter_conformance(
449            PrimitiveArray::new(
450                buffer![0, 1, 2, 3, 4, 5],
451                Validity::Array(
452                    BoolArray::from_iter([true, false, true, false, true, true]).into_array(),
453                ),
454            )
455            .as_ref(),
456        );
457    }
458}