Skip to main content

vortex_array/arrays/primitive/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::iter;
5
6use vortex_buffer::Alignment;
7use vortex_buffer::Buffer;
8use vortex_buffer::BufferMut;
9use vortex_buffer::ByteBuffer;
10use vortex_buffer::ByteBufferMut;
11use vortex_dtype::DType;
12use vortex_dtype::NativePType;
13use vortex_dtype::Nullability;
14use vortex_dtype::PType;
15use vortex_dtype::match_each_native_ptype;
16use vortex_error::VortexExpect;
17use vortex_error::VortexResult;
18use vortex_error::vortex_err;
19
20use crate::ToCanonical;
21use crate::stats::ArrayStats;
22use crate::validity::Validity;
23use crate::vtable::ValidityHelper;
24
25mod accessor;
26mod cast;
27mod conversion;
28mod patch;
29mod top_value;
30
31pub use patch::chunk_range;
32pub use patch::patch_chunk;
33
34use crate::buffer::BufferHandle;
35
36/// A primitive array that stores [native types][vortex_dtype::NativePType] in a contiguous buffer
37/// of memory, along with an optional validity child.
38///
39/// This mirrors the Apache Arrow Primitive layout and can be converted into and out of one
40/// without allocations or copies.
41///
42/// The underlying buffer must be natively aligned to the primitive type they are representing.
43///
44/// Values are stored in their native representation with proper alignment.
45/// Null values still occupy space in the buffer but are marked invalid in the validity mask.
46///
47/// # Examples
48///
49/// ```
50/// # fn main() -> vortex_error::VortexResult<()> {
51/// use vortex_array::arrays::PrimitiveArray;
52/// use vortex_array::compute::sum;
53///
54/// // Create from iterator using FromIterator impl
55/// let array: PrimitiveArray = [1i32, 2, 3, 4, 5].into_iter().collect();
56///
57/// // Slice the array
58/// let sliced = array.slice(1..3)?;
59///
60/// // Access individual values
61/// let value = sliced.scalar_at(0).unwrap();
62/// assert_eq!(value, 2i32.into());
63///
64/// // Convert into a type-erased array that can be passed to compute functions.
65/// let summed = sum(sliced.as_ref()).unwrap().as_primitive().typed_value::<i64>().unwrap();
66/// assert_eq!(summed, 5i64);
67/// # Ok(())
68/// # }
69/// ```
70#[derive(Clone, Debug)]
71pub struct PrimitiveArray {
72    pub(super) dtype: DType,
73    pub(super) buffer: BufferHandle,
74    pub(super) validity: Validity,
75    pub(super) stats_set: ArrayStats,
76}
77
78pub struct PrimitiveArrayParts {
79    pub ptype: PType,
80    pub buffer: BufferHandle,
81    pub validity: Validity,
82}
83
84// TODO(connor): There are a lot of places where we could be using `new_unchecked` in the codebase.
85impl PrimitiveArray {
86    /// Create a new array from a buffer handle.
87    ///
88    /// # Safety
89    ///
90    /// Should ensure that the provided BufferHandle points at sufficiently large region of aligned
91    /// memory to hold the `ptype` values.
92    pub unsafe fn new_unchecked_from_handle(
93        handle: BufferHandle,
94        ptype: PType,
95        validity: Validity,
96    ) -> Self {
97        Self {
98            buffer: handle,
99            dtype: DType::Primitive(ptype, validity.nullability()),
100            validity,
101            stats_set: ArrayStats::default(),
102        }
103    }
104
105    /// Creates a new [`PrimitiveArray`].
106    ///
107    /// # Panics
108    ///
109    /// Panics if the provided components do not satisfy the invariants documented
110    /// in [`PrimitiveArray::new_unchecked`].
111    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
112        let buffer = buffer.into();
113        Self::try_new(buffer, validity).vortex_expect("PrimitiveArray construction failed")
114    }
115
116    /// Constructs a new `PrimitiveArray`.
117    ///
118    /// See [`PrimitiveArray::new_unchecked`] for more information.
119    ///
120    /// # Errors
121    ///
122    /// Returns an error if the provided components do not satisfy the invariants documented in
123    /// [`PrimitiveArray::new_unchecked`].
124    #[inline]
125    pub fn try_new<T: NativePType>(buffer: Buffer<T>, validity: Validity) -> VortexResult<Self> {
126        Self::validate(&buffer, &validity)?;
127
128        // SAFETY: validate ensures all invariants are met.
129        Ok(unsafe { Self::new_unchecked(buffer, validity) })
130    }
131
132    /// Creates a new [`PrimitiveArray`] without validation from these components:
133    ///
134    /// * `buffer` is a typed buffer containing the primitive values.
135    /// * `validity` holds the null values.
136    ///
137    /// # Safety
138    ///
139    /// The caller must ensure all of the following invariants are satisfied:
140    ///
141    /// ## Validity Requirements
142    ///
143    /// - If `validity` is [`Validity::Array`], its length must exactly equal `buffer.len()`.
144    #[inline]
145    pub unsafe fn new_unchecked<T: NativePType>(buffer: Buffer<T>, validity: Validity) -> Self {
146        #[cfg(debug_assertions)]
147        Self::validate(&buffer, &validity)
148            .vortex_expect("[Debug Assertion]: Invalid `PrimitiveArray` parameters");
149
150        Self {
151            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
152            buffer: BufferHandle::new_host(buffer.into_byte_buffer()),
153            validity,
154            stats_set: Default::default(),
155        }
156    }
157
158    /// Validates the components that would be used to create a [`PrimitiveArray`].
159    ///
160    /// This function checks all the invariants required by [`PrimitiveArray::new_unchecked`].
161    #[inline]
162    pub fn validate<T: NativePType>(buffer: &Buffer<T>, validity: &Validity) -> VortexResult<()> {
163        if let Some(len) = validity.maybe_len()
164            && buffer.len() != len
165        {
166            return Err(vortex_err!(
167                InvalidArgument:
168                "Buffer and validity length mismatch: buffer={}, validity={}",
169                buffer.len(),
170                len
171            ));
172        }
173        Ok(())
174    }
175
176    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
177        Self::new(Buffer::<T>::empty(), nullability.into())
178    }
179}
180
181impl PrimitiveArray {
182    /// Consume the primitive array and returns its component parts.
183    pub fn into_parts(self) -> PrimitiveArrayParts {
184        let ptype = self.ptype();
185        PrimitiveArrayParts {
186            ptype,
187            buffer: self.buffer,
188            validity: self.validity,
189        }
190    }
191}
192
193impl PrimitiveArray {
194    pub fn ptype(&self) -> PType {
195        self.dtype().as_ptype()
196    }
197
198    /// Get access to the buffer handle backing the array.
199    pub fn buffer_handle(&self) -> &BufferHandle {
200        &self.buffer
201    }
202
203    pub fn from_buffer_handle(handle: BufferHandle, ptype: PType, validity: Validity) -> Self {
204        let dtype = DType::Primitive(ptype, validity.nullability());
205        Self {
206            buffer: handle,
207            dtype,
208            validity,
209            stats_set: ArrayStats::default(),
210        }
211    }
212
213    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
214        match_each_native_ptype!(ptype, |T| {
215            Self::new::<T>(Buffer::from_byte_buffer(buffer), validity)
216        })
217    }
218
219    /// Create a PrimitiveArray from a byte buffer containing only the valid elements.
220    pub fn from_values_byte_buffer(
221        valid_elems_buffer: ByteBuffer,
222        ptype: PType,
223        validity: Validity,
224        n_rows: usize,
225    ) -> Self {
226        let byte_width = ptype.byte_width();
227        let alignment = Alignment::new(byte_width);
228        let buffer = match &validity {
229            Validity::AllValid | Validity::NonNullable => valid_elems_buffer.aligned(alignment),
230            Validity::AllInvalid => ByteBuffer::zeroed_aligned(n_rows * byte_width, alignment),
231            Validity::Array(is_valid) => {
232                let bool_array = is_valid.to_bool();
233                let bool_buffer = bool_array.to_bit_buffer();
234                let mut bytes = ByteBufferMut::zeroed_aligned(n_rows * byte_width, alignment);
235                for (i, valid_i) in bool_buffer.set_indices().enumerate() {
236                    bytes[valid_i * byte_width..(valid_i + 1) * byte_width]
237                        .copy_from_slice(&valid_elems_buffer[i * byte_width..(i + 1) * byte_width])
238                }
239                bytes.freeze()
240            }
241        };
242
243        Self::from_byte_buffer(buffer, ptype, validity)
244    }
245
246    /// Map each element in the array to a new value.
247    ///
248    /// This ignores validity and maps over all maybe-null elements.
249    ///
250    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
251    ///   over the valid elements.
252    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
253    where
254        T: NativePType,
255        R: NativePType,
256        F: FnMut(T) -> R,
257    {
258        let validity = self.validity().clone();
259        let buffer = match self.try_into_buffer_mut() {
260            Ok(buffer_mut) => buffer_mut.map_each_in_place(f),
261            Err(buffer) => BufferMut::from_iter(buffer.iter().copied().map(f)),
262        };
263        PrimitiveArray::new(buffer.freeze(), validity)
264    }
265
266    /// Map each element in the array to a new value.
267    ///
268    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
269    /// valid and false otherwise.
270    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
271    where
272        T: NativePType,
273        R: NativePType,
274        F: FnMut((T, bool)) -> R,
275    {
276        let validity = self.validity();
277
278        let buf_iter = self.to_buffer::<T>().into_iter();
279
280        let buffer = match &validity {
281            Validity::NonNullable | Validity::AllValid => {
282                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
283            }
284            Validity::AllInvalid => {
285                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
286            }
287            Validity::Array(val) => {
288                let val = val.to_bool().into_bit_buffer();
289                BufferMut::<R>::from_iter(buf_iter.zip(val.iter()).map(f))
290            }
291        };
292        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
293    }
294}