vortex_array/arrays/primitive/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::iter;
6
7mod accessor;
8
9use arrow_buffer::BooleanBufferBuilder;
10use vortex_buffer::{Alignment, Buffer, BufferMut, ByteBuffer, ByteBufferMut};
11use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
12use vortex_error::{VortexResult, vortex_panic};
13
14use crate::builders::ArrayBuilder;
15use crate::stats::{ArrayStats, StatsSetRef};
16use crate::validity::Validity;
17use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
18
19mod compute;
20mod native_value;
21mod ops;
22mod patch;
23mod serde;
24mod top_value;
25
26pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
27pub use native_value::NativeValue;
28
29use crate::vtable::{
30    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
31    ValidityVTableFromValidityHelper,
32};
33
34vtable!(Primitive);
35
36impl VTable for PrimitiveVTable {
37    type Array = PrimitiveArray;
38    type Encoding = PrimitiveEncoding;
39
40    type ArrayVTable = Self;
41    type CanonicalVTable = Self;
42    type OperationsVTable = Self;
43    type ValidityVTable = ValidityVTableFromValidityHelper;
44    type VisitorVTable = Self;
45    type ComputeVTable = NotSupported;
46    type EncodeVTable = NotSupported;
47    type SerdeVTable = Self;
48
49    fn id(_encoding: &Self::Encoding) -> EncodingId {
50        EncodingId::new_ref("vortex.primitive")
51    }
52
53    fn encoding(_array: &Self::Array) -> EncodingRef {
54        EncodingRef::new_ref(PrimitiveEncoding.as_ref())
55    }
56}
57
58/// A primitive array that stores [native types][vortex_dtype::NativePType] in a contiguous buffer
59/// of memory, along with an optional validity child.
60///
61/// This mirrors the Apache Arrow Primitive layout and can be converted into and out of one
62/// without allocations or copies.
63///
64/// The underlying buffer must be natively aligned to the primitive type they are representing.
65///
66/// Values are stored in their native representation with proper alignment.
67/// Null values still occupy space in the buffer but are marked invalid in the validity mask.
68///
69/// # Examples
70///
71/// ```
72/// use vortex_array::arrays::PrimitiveArray;
73/// use vortex_array::compute::sum;
74/// ///
75/// // Create from iterator using FromIterator impl
76/// let array: PrimitiveArray = [1i32, 2, 3, 4, 5].into_iter().collect();
77///
78/// // Slice the array
79/// let sliced = array.slice(1, 3).unwrap();
80///
81/// // Access individual values
82/// let value = sliced.scalar_at(0).unwrap();
83/// assert_eq!(value, 2i32.into());
84///
85/// // Convert into a type-erased array that can be passed to compute functions.
86/// let summed = sum(sliced.as_ref()).unwrap().as_primitive().typed_value::<i64>().unwrap();
87/// assert_eq!(summed, 5i64);
88/// ```
89#[derive(Clone, Debug)]
90pub struct PrimitiveArray {
91    dtype: DType,
92    buffer: ByteBuffer,
93    validity: Validity,
94    stats_set: ArrayStats,
95}
96
97#[derive(Clone, Debug)]
98pub struct PrimitiveEncoding;
99
100impl PrimitiveArray {
101    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
102        let buffer = buffer.into();
103        if let Some(len) = validity.maybe_len() {
104            if buffer.len() != len {
105                vortex_panic!(
106                    "Buffer and validity length mismatch: buffer={}, validity={}",
107                    buffer.len(),
108                    len
109                );
110            }
111        }
112        Self {
113            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
114            buffer: buffer.into_byte_buffer(),
115            validity,
116            stats_set: Default::default(),
117        }
118    }
119
120    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
121        Self::new(Buffer::<T>::empty(), nullability.into())
122    }
123
124    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
125        match_each_native_ptype!(ptype, |T| {
126            Self::new::<T>(Buffer::from_byte_buffer(buffer), validity)
127        })
128    }
129
130    /// Create a PrimitiveArray from an iterator of `T`.
131    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
132    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
133        let iter = iter.into_iter();
134        let mut values = BufferMut::with_capacity(iter.size_hint().0);
135        let mut validity = BooleanBufferBuilder::new(values.capacity());
136
137        for i in iter {
138            match i {
139                None => {
140                    validity.append(false);
141                    values.push(T::default());
142                }
143                Some(e) => {
144                    validity.append(true);
145                    values.push(e);
146                }
147            }
148        }
149        Self::new(values.freeze(), Validity::from(validity.finish()))
150    }
151
152    /// Create a PrimitiveArray from a byte buffer containing only the valid elements.
153    pub fn from_values_byte_buffer(
154        valid_elems_buffer: ByteBuffer,
155        ptype: PType,
156        validity: Validity,
157        n_rows: usize,
158    ) -> VortexResult<Self> {
159        let byte_width = ptype.byte_width();
160        let alignment = Alignment::new(byte_width);
161        let buffer = match &validity {
162            Validity::AllValid | Validity::NonNullable => valid_elems_buffer.aligned(alignment),
163            Validity::AllInvalid => ByteBuffer::zeroed_aligned(n_rows * byte_width, alignment),
164            Validity::Array(is_valid) => {
165                let bool_array = is_valid.to_canonical()?.into_bool()?;
166                let bool_buffer = bool_array.boolean_buffer();
167                let mut bytes = ByteBufferMut::zeroed_aligned(n_rows * byte_width, alignment);
168                for (i, valid_i) in bool_buffer.set_indices().enumerate() {
169                    bytes[valid_i * byte_width..(valid_i + 1) * byte_width]
170                        .copy_from_slice(&valid_elems_buffer[i * byte_width..(i + 1) * byte_width])
171                }
172                bytes.freeze()
173            }
174        };
175
176        Ok(Self::from_byte_buffer(buffer, ptype, validity))
177    }
178
179    pub fn ptype(&self) -> PType {
180        self.dtype().as_ptype()
181    }
182
183    pub fn byte_buffer(&self) -> &ByteBuffer {
184        &self.buffer
185    }
186
187    pub fn into_byte_buffer(self) -> ByteBuffer {
188        self.buffer
189    }
190
191    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
192        if T::PTYPE != self.ptype() {
193            vortex_panic!(
194                "Attempted to get buffer of type {} from array of type {}",
195                T::PTYPE,
196                self.ptype()
197            )
198        }
199        Buffer::from_byte_buffer(self.byte_buffer().clone())
200    }
201
202    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
203        if T::PTYPE != self.ptype() {
204            vortex_panic!(
205                "Attempted to get buffer of type {} from array of type {}",
206                T::PTYPE,
207                self.ptype()
208            )
209        }
210        Buffer::from_byte_buffer(self.buffer)
211    }
212
213    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
214    /// if the buffer is uniquely owned, otherwise will make a copy.
215    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
216        if T::PTYPE != self.ptype() {
217            vortex_panic!(
218                "Attempted to get buffer_mut of type {} from array of type {}",
219                T::PTYPE,
220                self.ptype()
221            )
222        }
223        self.into_buffer()
224            .try_into_mut()
225            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
226    }
227
228    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
229    #[allow(clippy::panic_in_result_fn)]
230    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
231        if T::PTYPE != self.ptype() {
232            vortex_panic!(
233                "Attempted to get buffer_mut of type {} from array of type {}",
234                T::PTYPE,
235                self.ptype()
236            )
237        }
238        let validity = self.validity().clone();
239        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
240            .try_into_mut()
241            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
242    }
243
244    /// Map each element in the array to a new value.
245    ///
246    /// This ignores validity and maps over all maybe-null elements.
247    ///
248    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
249    ///   over the valid elements.
250    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
251    where
252        T: NativePType,
253        R: NativePType,
254        F: FnMut(T) -> R,
255    {
256        let validity = self.validity().clone();
257        let buffer = match self.try_into_buffer_mut() {
258            Ok(buffer_mut) => buffer_mut.map_each(f),
259            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
260        };
261        PrimitiveArray::new(buffer.freeze(), validity)
262    }
263
264    /// Map each element in the array to a new value.
265    ///
266    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
267    /// valid and false otherwise.
268    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
269    where
270        T: NativePType,
271        R: NativePType,
272        F: FnMut((T, bool)) -> R,
273    {
274        let validity = self.validity();
275
276        let buf_iter = self.buffer::<T>().into_iter();
277
278        let buffer = match &validity {
279            Validity::NonNullable | Validity::AllValid => {
280                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
281            }
282            Validity::AllInvalid => {
283                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
284            }
285            Validity::Array(val) => {
286                let val = val.to_canonical()?.into_bool()?;
287                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
288            }
289        };
290        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
291    }
292
293    /// Return a slice of the array's buffer.
294    ///
295    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
296    pub fn as_slice<T: NativePType>(&self) -> &[T] {
297        if T::PTYPE != self.ptype() {
298            vortex_panic!(
299                "Attempted to get slice of type {} from array of type {}",
300                T::PTYPE,
301                self.ptype()
302            )
303        }
304        let raw_slice = self.byte_buffer().as_ptr();
305        // SAFETY: alignment of Buffer is checked on construction
306        unsafe {
307            std::slice::from_raw_parts(raw_slice.cast(), self.byte_buffer().len() / size_of::<T>())
308        }
309    }
310
311    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
312        if self.ptype() == ptype {
313            return self.clone();
314        }
315
316        assert_eq!(
317            self.ptype().byte_width(),
318            ptype.byte_width(),
319            "can't reinterpret cast between integers of two different widths"
320        );
321
322        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
323    }
324}
325
326impl ArrayVTable<PrimitiveVTable> for PrimitiveVTable {
327    fn len(array: &PrimitiveArray) -> usize {
328        array.byte_buffer().len() / array.ptype().byte_width()
329    }
330
331    fn dtype(array: &PrimitiveArray) -> &DType {
332        &array.dtype
333    }
334
335    fn stats(array: &PrimitiveArray) -> StatsSetRef<'_> {
336        array.stats_set.to_ref(array.as_ref())
337    }
338}
339
340impl ValidityHelper for PrimitiveArray {
341    fn validity(&self) -> &Validity {
342        &self.validity
343    }
344}
345
346impl<T: NativePType> FromIterator<T> for PrimitiveArray {
347    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
348        let values = BufferMut::from_iter(iter);
349        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
350    }
351}
352
353impl<T: NativePType> IntoArray for Buffer<T> {
354    fn into_array(self) -> ArrayRef {
355        PrimitiveArray::new(self, Validity::NonNullable).into_array()
356    }
357}
358
359impl<T: NativePType> IntoArray for BufferMut<T> {
360    fn into_array(self) -> ArrayRef {
361        self.freeze().into_array()
362    }
363}
364
365impl CanonicalVTable<PrimitiveVTable> for PrimitiveVTable {
366    fn canonicalize(array: &PrimitiveArray) -> VortexResult<Canonical> {
367        Ok(Canonical::Primitive(array.clone()))
368    }
369
370    fn append_to_builder(
371        array: &PrimitiveArray,
372        builder: &mut dyn ArrayBuilder,
373    ) -> VortexResult<()> {
374        builder.extend_from_array(array.as_ref())
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use vortex_buffer::buffer;
381    use vortex_scalar::PValue;
382
383    use crate::arrays::{BoolArray, PrimitiveArray};
384    use crate::compute::conformance::filter::test_filter_conformance;
385    use crate::compute::conformance::mask::test_mask_conformance;
386    use crate::compute::conformance::search_sorted::rstest_reuse::apply;
387    use crate::compute::conformance::search_sorted::{search_sorted_conformance, *};
388    use crate::search_sorted::{SearchResult, SearchSorted, SearchSortedSide};
389    use crate::validity::Validity;
390    use crate::{ArrayRef, IntoArray};
391
392    #[apply(search_sorted_conformance)]
393    fn test_search_sorted_primitive(
394        #[case] array: ArrayRef,
395        #[case] value: i32,
396        #[case] side: SearchSortedSide,
397        #[case] expected: SearchResult,
398    ) {
399        let res = array
400            .as_primitive_typed()
401            .search_sorted(&Some(PValue::from(value)), side);
402        assert_eq!(res, expected);
403    }
404
405    #[test]
406    fn test_mask_primitive_array() {
407        test_mask_conformance(
408            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
409        );
410        test_mask_conformance(
411            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
412        );
413        test_mask_conformance(
414            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllInvalid).as_ref(),
415        );
416        test_mask_conformance(
417            PrimitiveArray::new(
418                buffer![0, 1, 2, 3, 4],
419                Validity::Array(
420                    BoolArray::from_iter([true, false, true, false, true]).into_array(),
421                ),
422            )
423            .as_ref(),
424        );
425    }
426
427    #[test]
428    fn test_filter_primitive_array() {
429        // Test various sizes
430        test_filter_conformance(
431            PrimitiveArray::new(buffer![42i32], Validity::NonNullable).as_ref(),
432        );
433        test_filter_conformance(PrimitiveArray::new(buffer![0, 1], Validity::NonNullable).as_ref());
434        test_filter_conformance(
435            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref(),
436        );
437        test_filter_conformance(
438            PrimitiveArray::new(buffer![0, 1, 2, 3, 4, 5, 6, 7], Validity::NonNullable).as_ref(),
439        );
440
441        // Test with validity
442        test_filter_conformance(
443            PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref(),
444        );
445        test_filter_conformance(
446            PrimitiveArray::new(
447                buffer![0, 1, 2, 3, 4, 5],
448                Validity::Array(
449                    BoolArray::from_iter([true, false, true, false, true, true]).into_array(),
450                ),
451            )
452            .as_ref(),
453        );
454    }
455}