Skip to main content

vortex_array/arrays/primitive/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::iter;
5
6use vortex_buffer::Alignment;
7use vortex_buffer::Buffer;
8use vortex_buffer::BufferMut;
9use vortex_buffer::ByteBuffer;
10use vortex_buffer::ByteBufferMut;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_err;
14
15use crate::ToCanonical;
16use crate::dtype::DType;
17use crate::dtype::NativePType;
18use crate::dtype::Nullability;
19use crate::dtype::PType;
20use crate::match_each_native_ptype;
21use crate::stats::ArrayStats;
22use crate::validity::Validity;
23use crate::vtable::ValidityHelper;
24
25mod accessor;
26mod cast;
27mod conversion;
28mod patch;
29mod top_value;
30
31pub use patch::chunk_range;
32pub use patch::patch_chunk;
33
34use crate::buffer::BufferHandle;
35
36/// A primitive array that stores [native types][crate::dtype::NativePType] in a contiguous buffer
37/// of memory, along with an optional validity child.
38///
39/// This mirrors the Apache Arrow Primitive layout and can be converted into and out of one
40/// without allocations or copies.
41///
42/// The underlying buffer must be natively aligned to the primitive type they are representing.
43///
44/// Values are stored in their native representation with proper alignment.
45/// Null values still occupy space in the buffer but are marked invalid in the validity mask.
46///
47/// # Examples
48///
49/// ```
50/// # fn main() -> vortex_error::VortexResult<()> {
51/// use vortex_array::arrays::PrimitiveArray;
52/// use vortex_array::compute::sum;
53///
54/// // Create from iterator using FromIterator impl
55/// let array: PrimitiveArray = [1i32, 2, 3, 4, 5].into_iter().collect();
56///
57/// // Slice the array
58/// let sliced = array.slice(1..3)?;
59///
60/// // Access individual values
61/// let value = sliced.scalar_at(0).unwrap();
62/// assert_eq!(value, 2i32.into());
63///
64/// // Convert into a type-erased array that can be passed to compute functions.
65/// use vortex_array::IntoArray;
66/// let summed = sum(&sliced.into_array()).unwrap().as_primitive().typed_value::<i64>().unwrap();
67/// assert_eq!(summed, 5i64);
68/// # Ok(())
69/// # }
70/// ```
71#[derive(Clone, Debug)]
72pub struct PrimitiveArray {
73    pub(super) dtype: DType,
74    pub(super) buffer: BufferHandle,
75    pub(super) validity: Validity,
76    pub(super) stats_set: ArrayStats,
77}
78
79pub struct PrimitiveArrayParts {
80    pub ptype: PType,
81    pub buffer: BufferHandle,
82    pub validity: Validity,
83}
84
85// TODO(connor): There are a lot of places where we could be using `new_unchecked` in the codebase.
86impl PrimitiveArray {
87    /// Create a new array from a buffer handle.
88    ///
89    /// # Safety
90    ///
91    /// Should ensure that the provided BufferHandle points at sufficiently large region of aligned
92    /// memory to hold the `ptype` values.
93    pub unsafe fn new_unchecked_from_handle(
94        handle: BufferHandle,
95        ptype: PType,
96        validity: Validity,
97    ) -> Self {
98        Self {
99            buffer: handle,
100            dtype: DType::Primitive(ptype, validity.nullability()),
101            validity,
102            stats_set: ArrayStats::default(),
103        }
104    }
105
106    /// Creates a new [`PrimitiveArray`].
107    ///
108    /// # Panics
109    ///
110    /// Panics if the provided components do not satisfy the invariants documented
111    /// in [`PrimitiveArray::new_unchecked`].
112    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
113        let buffer = buffer.into();
114        Self::try_new(buffer, validity).vortex_expect("PrimitiveArray construction failed")
115    }
116
117    /// Constructs a new `PrimitiveArray`.
118    ///
119    /// See [`PrimitiveArray::new_unchecked`] for more information.
120    ///
121    /// # Errors
122    ///
123    /// Returns an error if the provided components do not satisfy the invariants documented in
124    /// [`PrimitiveArray::new_unchecked`].
125    #[inline]
126    pub fn try_new<T: NativePType>(buffer: Buffer<T>, validity: Validity) -> VortexResult<Self> {
127        Self::validate(&buffer, &validity)?;
128
129        // SAFETY: validate ensures all invariants are met.
130        Ok(unsafe { Self::new_unchecked(buffer, validity) })
131    }
132
133    /// Creates a new [`PrimitiveArray`] without validation from these components:
134    ///
135    /// * `buffer` is a typed buffer containing the primitive values.
136    /// * `validity` holds the null values.
137    ///
138    /// # Safety
139    ///
140    /// The caller must ensure all of the following invariants are satisfied:
141    ///
142    /// ## Validity Requirements
143    ///
144    /// - If `validity` is [`Validity::Array`], its length must exactly equal `buffer.len()`.
145    #[inline]
146    pub unsafe fn new_unchecked<T: NativePType>(buffer: Buffer<T>, validity: Validity) -> Self {
147        #[cfg(debug_assertions)]
148        Self::validate(&buffer, &validity)
149            .vortex_expect("[Debug Assertion]: Invalid `PrimitiveArray` parameters");
150
151        Self {
152            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
153            buffer: BufferHandle::new_host(buffer.into_byte_buffer()),
154            validity,
155            stats_set: Default::default(),
156        }
157    }
158
159    /// Validates the components that would be used to create a [`PrimitiveArray`].
160    ///
161    /// This function checks all the invariants required by [`PrimitiveArray::new_unchecked`].
162    #[inline]
163    pub fn validate<T: NativePType>(buffer: &Buffer<T>, validity: &Validity) -> VortexResult<()> {
164        if let Some(len) = validity.maybe_len()
165            && buffer.len() != len
166        {
167            return Err(vortex_err!(
168                InvalidArgument:
169                "Buffer and validity length mismatch: buffer={}, validity={}",
170                buffer.len(),
171                len
172            ));
173        }
174        Ok(())
175    }
176
177    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
178        Self::new(Buffer::<T>::empty(), nullability.into())
179    }
180}
181
182impl PrimitiveArray {
183    /// Consume the primitive array and returns its component parts.
184    pub fn into_parts(self) -> PrimitiveArrayParts {
185        let ptype = self.ptype();
186        PrimitiveArrayParts {
187            ptype,
188            buffer: self.buffer,
189            validity: self.validity,
190        }
191    }
192}
193
194impl PrimitiveArray {
195    pub fn ptype(&self) -> PType {
196        self.dtype().as_ptype()
197    }
198
199    /// Get access to the buffer handle backing the array.
200    pub fn buffer_handle(&self) -> &BufferHandle {
201        &self.buffer
202    }
203
204    pub fn from_buffer_handle(handle: BufferHandle, ptype: PType, validity: Validity) -> Self {
205        let dtype = DType::Primitive(ptype, validity.nullability());
206        Self {
207            buffer: handle,
208            dtype,
209            validity,
210            stats_set: ArrayStats::default(),
211        }
212    }
213
214    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
215        match_each_native_ptype!(ptype, |T| {
216            Self::new::<T>(Buffer::from_byte_buffer(buffer), validity)
217        })
218    }
219
220    /// Create a PrimitiveArray from a byte buffer containing only the valid elements.
221    pub fn from_values_byte_buffer(
222        valid_elems_buffer: ByteBuffer,
223        ptype: PType,
224        validity: Validity,
225        n_rows: usize,
226    ) -> Self {
227        let byte_width = ptype.byte_width();
228        let alignment = Alignment::new(byte_width);
229        let buffer = match &validity {
230            Validity::AllValid | Validity::NonNullable => valid_elems_buffer.aligned(alignment),
231            Validity::AllInvalid => ByteBuffer::zeroed_aligned(n_rows * byte_width, alignment),
232            Validity::Array(is_valid) => {
233                let bool_array = is_valid.to_bool();
234                let bool_buffer = bool_array.to_bit_buffer();
235                let mut bytes = ByteBufferMut::zeroed_aligned(n_rows * byte_width, alignment);
236                for (i, valid_i) in bool_buffer.set_indices().enumerate() {
237                    bytes[valid_i * byte_width..(valid_i + 1) * byte_width]
238                        .copy_from_slice(&valid_elems_buffer[i * byte_width..(i + 1) * byte_width])
239                }
240                bytes.freeze()
241            }
242        };
243
244        Self::from_byte_buffer(buffer, ptype, validity)
245    }
246
247    /// Map each element in the array to a new value.
248    ///
249    /// This ignores validity and maps over all maybe-null elements.
250    ///
251    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
252    ///   over the valid elements.
253    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
254    where
255        T: NativePType,
256        R: NativePType,
257        F: FnMut(T) -> R,
258    {
259        let validity = self.validity().clone();
260        let buffer = match self.try_into_buffer_mut() {
261            Ok(buffer_mut) => buffer_mut.map_each_in_place(f),
262            Err(buffer) => BufferMut::from_iter(buffer.iter().copied().map(f)),
263        };
264        PrimitiveArray::new(buffer.freeze(), validity)
265    }
266
267    /// Map each element in the array to a new value.
268    ///
269    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
270    /// valid and false otherwise.
271    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
272    where
273        T: NativePType,
274        R: NativePType,
275        F: FnMut((T, bool)) -> R,
276    {
277        let validity = self.validity();
278
279        let buf_iter = self.to_buffer::<T>().into_iter();
280
281        let buffer = match &validity {
282            Validity::NonNullable | Validity::AllValid => {
283                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
284            }
285            Validity::AllInvalid => {
286                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
287            }
288            Validity::Array(val) => {
289                let val = val.to_bool().into_bit_buffer();
290                BufferMut::<R>::from_iter(buf_iter.zip(val.iter()).map(f))
291            }
292        };
293        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
294    }
295}