vortex_array/arrays/primitive/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::iter;
5
6use vortex_buffer::Alignment;
7use vortex_buffer::Buffer;
8use vortex_buffer::BufferMut;
9use vortex_buffer::ByteBuffer;
10use vortex_buffer::ByteBufferMut;
11use vortex_dtype::DType;
12use vortex_dtype::NativePType;
13use vortex_dtype::Nullability;
14use vortex_dtype::PType;
15use vortex_dtype::match_each_native_ptype;
16use vortex_error::VortexExpect;
17use vortex_error::VortexResult;
18use vortex_error::vortex_err;
19
20use crate::ToCanonical;
21use crate::stats::ArrayStats;
22use crate::validity::Validity;
23use crate::vtable::ValidityHelper;
24
25mod accessor;
26mod cast;
27mod conversion;
28mod patch;
29mod top_value;
30
31pub use patch::patch_chunk;
32
33/// A primitive array that stores [native types][vortex_dtype::NativePType] in a contiguous buffer
34/// of memory, along with an optional validity child.
35///
36/// This mirrors the Apache Arrow Primitive layout and can be converted into and out of one
37/// without allocations or copies.
38///
39/// The underlying buffer must be natively aligned to the primitive type they are representing.
40///
41/// Values are stored in their native representation with proper alignment.
42/// Null values still occupy space in the buffer but are marked invalid in the validity mask.
43///
44/// # Examples
45///
46/// ```
47/// use vortex_array::arrays::PrimitiveArray;
48/// use vortex_array::compute::sum;
49/// ///
50/// // Create from iterator using FromIterator impl
51/// let array: PrimitiveArray = [1i32, 2, 3, 4, 5].into_iter().collect();
52///
53/// // Slice the array
54/// let sliced = array.slice(1..3);
55///
56/// // Access individual values
57/// let value = sliced.scalar_at(0);
58/// assert_eq!(value, 2i32.into());
59///
60/// // Convert into a type-erased array that can be passed to compute functions.
61/// let summed = sum(sliced.as_ref()).unwrap().as_primitive().typed_value::<i64>().unwrap();
62/// assert_eq!(summed, 5i64);
63/// ```
64#[derive(Clone, Debug)]
65pub struct PrimitiveArray {
66    pub(super) dtype: DType,
67    pub(super) buffer: ByteBuffer,
68    pub(super) validity: Validity,
69    pub(super) stats_set: ArrayStats,
70}
71
72// TODO(connor): There are a lot of places where we could be using `new_unchecked` in the codebase.
73impl PrimitiveArray {
74    /// Creates a new [`PrimitiveArray`].
75    ///
76    /// # Panics
77    ///
78    /// Panics if the provided components do not satisfy the invariants documented
79    /// in [`PrimitiveArray::new_unchecked`].
80    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
81        let buffer = buffer.into();
82        Self::try_new(buffer, validity).vortex_expect("PrimitiveArray construction failed")
83    }
84
85    /// Constructs a new `PrimitiveArray`.
86    ///
87    /// See [`PrimitiveArray::new_unchecked`] for more information.
88    ///
89    /// # Errors
90    ///
91    /// Returns an error if the provided components do not satisfy the invariants documented in
92    /// [`PrimitiveArray::new_unchecked`].
93    #[inline]
94    pub fn try_new<T: NativePType>(buffer: Buffer<T>, validity: Validity) -> VortexResult<Self> {
95        Self::validate(&buffer, &validity)?;
96
97        // SAFETY: validate ensures all invariants are met.
98        Ok(unsafe { Self::new_unchecked(buffer, validity) })
99    }
100
101    /// Creates a new [`PrimitiveArray`] without validation from these components:
102    ///
103    /// * `buffer` is a typed buffer containing the primitive values.
104    /// * `validity` holds the null values.
105    ///
106    /// # Safety
107    ///
108    /// The caller must ensure all of the following invariants are satisfied:
109    ///
110    /// ## Validity Requirements
111    ///
112    /// - If `validity` is [`Validity::Array`], its length must exactly equal `buffer.len()`.
113    #[inline]
114    pub unsafe fn new_unchecked<T: NativePType>(buffer: Buffer<T>, validity: Validity) -> Self {
115        #[cfg(debug_assertions)]
116        Self::validate(&buffer, &validity)
117            .vortex_expect("[Debug Assertion]: Invalid `PrimitiveArray` parameters");
118
119        Self {
120            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
121            buffer: buffer.into_byte_buffer(),
122            validity,
123            stats_set: Default::default(),
124        }
125    }
126
127    /// Validates the components that would be used to create a [`PrimitiveArray`].
128    ///
129    /// This function checks all the invariants required by [`PrimitiveArray::new_unchecked`].
130    #[inline]
131    pub fn validate<T: NativePType>(buffer: &Buffer<T>, validity: &Validity) -> VortexResult<()> {
132        if let Some(len) = validity.maybe_len()
133            && buffer.len() != len
134        {
135            return Err(vortex_err!(
136                "Buffer and validity length mismatch: buffer={}, validity={}",
137                buffer.len(),
138                len
139            ));
140        }
141        Ok(())
142    }
143
144    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
145        Self::new(Buffer::<T>::empty(), nullability.into())
146    }
147
148    pub fn ptype(&self) -> PType {
149        self.dtype().as_ptype()
150    }
151
152    pub fn byte_buffer(&self) -> &ByteBuffer {
153        &self.buffer
154    }
155
156    pub fn into_byte_buffer(self) -> ByteBuffer {
157        self.buffer
158    }
159
160    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
161        match_each_native_ptype!(ptype, |T| {
162            Self::new::<T>(Buffer::from_byte_buffer(buffer), validity)
163        })
164    }
165
166    /// Create a PrimitiveArray from a byte buffer containing only the valid elements.
167    pub fn from_values_byte_buffer(
168        valid_elems_buffer: ByteBuffer,
169        ptype: PType,
170        validity: Validity,
171        n_rows: usize,
172    ) -> Self {
173        let byte_width = ptype.byte_width();
174        let alignment = Alignment::new(byte_width);
175        let buffer = match &validity {
176            Validity::AllValid | Validity::NonNullable => valid_elems_buffer.aligned(alignment),
177            Validity::AllInvalid => ByteBuffer::zeroed_aligned(n_rows * byte_width, alignment),
178            Validity::Array(is_valid) => {
179                let bool_array = is_valid.to_bool();
180                let bool_buffer = bool_array.bit_buffer();
181                let mut bytes = ByteBufferMut::zeroed_aligned(n_rows * byte_width, alignment);
182                for (i, valid_i) in bool_buffer.set_indices().enumerate() {
183                    bytes[valid_i * byte_width..(valid_i + 1) * byte_width]
184                        .copy_from_slice(&valid_elems_buffer[i * byte_width..(i + 1) * byte_width])
185                }
186                bytes.freeze()
187            }
188        };
189
190        Self::from_byte_buffer(buffer, ptype, validity)
191    }
192
193    /// Map each element in the array to a new value.
194    ///
195    /// This ignores validity and maps over all maybe-null elements.
196    ///
197    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
198    ///   over the valid elements.
199    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
200    where
201        T: NativePType,
202        R: NativePType,
203        F: FnMut(T) -> R,
204    {
205        let validity = self.validity().clone();
206        let buffer = match self.try_into_buffer_mut() {
207            Ok(buffer_mut) => buffer_mut.map_each_in_place(f),
208            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
209        };
210        PrimitiveArray::new(buffer.freeze(), validity)
211    }
212
213    /// Map each element in the array to a new value.
214    ///
215    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
216    /// valid and false otherwise.
217    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
218    where
219        T: NativePType,
220        R: NativePType,
221        F: FnMut((T, bool)) -> R,
222    {
223        let validity = self.validity();
224
225        let buf_iter = self.buffer::<T>().into_iter();
226
227        let buffer = match &validity {
228            Validity::NonNullable | Validity::AllValid => {
229                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
230            }
231            Validity::AllInvalid => {
232                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
233            }
234            Validity::Array(val) => {
235                let val = val.to_bool();
236                BufferMut::<R>::from_iter(buf_iter.zip(val.bit_buffer()).map(f))
237            }
238        };
239        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
240    }
241}