vortex_array/arrays/primitive/
mod.rs

1use std::fmt::Debug;
2use std::iter;
3
4mod accessor;
5
6use arrow_buffer::BooleanBufferBuilder;
7use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
8use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11
12use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
13use crate::builders::ArrayBuilder;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::validity::Validity;
16use crate::variants::PrimitiveArrayTrait;
17use crate::vtable::{EncodingVTable, VTableRef};
18use crate::{
19    Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
20    Encoding, EncodingId, IntoArray, try_from_array_ref,
21};
22
23mod compute;
24mod patch;
25mod serde;
26mod stats;
27
28#[derive(Clone, Debug)]
29pub struct PrimitiveArray {
30    dtype: DType,
31    buffer: ByteBuffer,
32    validity: Validity,
33    stats_set: ArrayStats,
34}
35
36try_from_array_ref!(PrimitiveArray);
37
38pub struct PrimitiveEncoding;
39impl Encoding for PrimitiveEncoding {
40    type Array = PrimitiveArray;
41    type Metadata = EmptyMetadata;
42}
43
44impl EncodingVTable for PrimitiveEncoding {
45    fn id(&self) -> EncodingId {
46        EncodingId::new_ref("vortex.primitive")
47    }
48}
49
50impl PrimitiveArray {
51    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
52        let buffer = buffer.into();
53        if let Some(len) = validity.maybe_len() {
54            if buffer.len() != len {
55                vortex_panic!(
56                    "Buffer and validity length mismatch: buffer={}, validity={}",
57                    buffer.len(),
58                    len
59                );
60            }
61        }
62        Self {
63            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
64            buffer: buffer.into_byte_buffer(),
65            validity,
66            stats_set: Default::default(),
67        }
68    }
69
70    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
71        Self::new(Buffer::<T>::empty(), nullability.into())
72    }
73
74    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
75        match_each_native_ptype!(ptype, |$T| {
76            Self::new::<$T>(Buffer::from_byte_buffer(buffer), validity)
77        })
78    }
79
80    /// Create a PrimitiveArray from an iterator of `T`.
81    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
82    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
83        let iter = iter.into_iter();
84        let mut values = BufferMut::with_capacity(iter.size_hint().0);
85        let mut validity = BooleanBufferBuilder::new(values.capacity());
86
87        for i in iter {
88            match i {
89                None => {
90                    validity.append(false);
91                    values.push(T::default());
92                }
93                Some(e) => {
94                    validity.append(true);
95                    values.push(e);
96                }
97            }
98        }
99        Self::new(values.freeze(), Validity::from(validity.finish()))
100    }
101
102    pub fn validity(&self) -> &Validity {
103        &self.validity
104    }
105
106    pub fn byte_buffer(&self) -> &ByteBuffer {
107        &self.buffer
108    }
109
110    pub fn into_byte_buffer(self) -> ByteBuffer {
111        self.buffer
112    }
113
114    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
115        if T::PTYPE != self.ptype() {
116            vortex_panic!(
117                "Attempted to get buffer of type {} from array of type {}",
118                T::PTYPE,
119                self.ptype()
120            )
121        }
122        Buffer::from_byte_buffer(self.byte_buffer().clone())
123    }
124
125    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
126        if T::PTYPE != self.ptype() {
127            vortex_panic!(
128                "Attempted to get buffer of type {} from array of type {}",
129                T::PTYPE,
130                self.ptype()
131            )
132        }
133        Buffer::from_byte_buffer(self.buffer)
134    }
135
136    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
137    /// if the buffer is uniquely owned, otherwise will make a copy.
138    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
139        if T::PTYPE != self.ptype() {
140            vortex_panic!(
141                "Attempted to get buffer_mut of type {} from array of type {}",
142                T::PTYPE,
143                self.ptype()
144            )
145        }
146        self.into_buffer()
147            .try_into_mut()
148            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
149    }
150
151    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
152    #[allow(clippy::panic_in_result_fn)]
153    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
154        if T::PTYPE != self.ptype() {
155            vortex_panic!(
156                "Attempted to get buffer_mut of type {} from array of type {}",
157                T::PTYPE,
158                self.ptype()
159            )
160        }
161        let validity = self.validity().clone();
162        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
163            .try_into_mut()
164            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
165    }
166
167    /// Map each element in the array to a new value.
168    ///
169    /// This ignores validity and maps over all maybe-null elements.
170    ///
171    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
172    ///   over the valid elements.
173    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
174    where
175        T: NativePType,
176        R: NativePType,
177        F: FnMut(T) -> R,
178    {
179        let validity = self.validity().clone();
180        let buffer = match self.try_into_buffer_mut() {
181            Ok(buffer_mut) => buffer_mut.map_each(f),
182            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
183        };
184        PrimitiveArray::new(buffer.freeze(), validity)
185    }
186
187    /// Map each element in the array to a new value.
188    ///
189    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
190    /// valid and false otherwise.
191    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
192    where
193        T: NativePType,
194        R: NativePType,
195        F: FnMut((T, bool)) -> R,
196    {
197        let validity = self.validity();
198
199        let buf_iter = self.buffer::<T>().into_iter();
200
201        let buffer = match &validity {
202            Validity::NonNullable | Validity::AllValid => {
203                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
204            }
205            Validity::AllInvalid => {
206                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
207            }
208            Validity::Array(val) => {
209                let val = val.to_canonical()?.into_bool()?;
210                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
211            }
212        };
213        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
214    }
215
216    /// Return a slice of the array's buffer.
217    ///
218    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
219    pub fn as_slice<T: NativePType>(&self) -> &[T] {
220        if T::PTYPE != self.ptype() {
221            vortex_panic!(
222                "Attempted to get slice of type {} from array of type {}",
223                T::PTYPE,
224                self.ptype()
225            )
226        }
227        let length = self.len();
228        let raw_slice = self.byte_buffer().as_slice();
229        debug_assert_eq!(raw_slice.len() / size_of::<T>(), length);
230        // SAFETY: alignment of Buffer is checked on construction
231        unsafe { std::slice::from_raw_parts(raw_slice.as_ptr().cast(), length) }
232    }
233
234    pub fn get_as_cast<T: NativePType>(&self, idx: usize) -> T {
235        match_each_native_ptype!(self.ptype(), |$P| {
236            T::from(self.as_slice::<$P>()[idx]).expect("failed to cast")
237        })
238    }
239
240    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
241        if self.ptype() == ptype {
242            return self.clone();
243        }
244
245        assert_eq!(
246            self.ptype().byte_width(),
247            ptype.byte_width(),
248            "can't reinterpret cast between integers of two different widths"
249        );
250
251        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
252    }
253}
254
255impl ArrayImpl for PrimitiveArray {
256    type Encoding = PrimitiveEncoding;
257
258    fn _len(&self) -> usize {
259        self.byte_buffer().len() / self.ptype().byte_width()
260    }
261
262    fn _dtype(&self) -> &DType {
263        &self.dtype
264    }
265    fn _vtable(&self) -> VTableRef {
266        VTableRef::new_ref(&PrimitiveEncoding)
267    }
268}
269
270impl ArrayStatisticsImpl for PrimitiveArray {
271    fn _stats_ref(&self) -> StatsSetRef<'_> {
272        self.stats_set.to_ref(self)
273    }
274}
275
276impl ArrayVariantsImpl for PrimitiveArray {
277    fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
278        Some(self)
279    }
280}
281
282impl PrimitiveArrayTrait for PrimitiveArray {}
283
284impl<T: NativePType> FromIterator<T> for PrimitiveArray {
285    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
286        let values = BufferMut::from_iter(iter);
287        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
288    }
289}
290
291impl<T: NativePType> IntoArray for Buffer<T> {
292    fn into_array(self) -> ArrayRef {
293        PrimitiveArray::new(self, Validity::NonNullable).into_array()
294    }
295}
296
297impl<T: NativePType> IntoArray for BufferMut<T> {
298    fn into_array(self) -> ArrayRef {
299        self.freeze().into_array()
300    }
301}
302
303impl ArrayCanonicalImpl for PrimitiveArray {
304    fn _to_canonical(&self) -> VortexResult<Canonical> {
305        Ok(Canonical::Primitive(self.clone()))
306    }
307
308    fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
309        builder.extend_from_array(self)
310    }
311}
312
313impl ArrayValidityImpl for PrimitiveArray {
314    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
315        self.validity.is_valid(index)
316    }
317
318    fn _all_valid(&self) -> VortexResult<bool> {
319        self.validity.all_valid()
320    }
321
322    fn _all_invalid(&self) -> VortexResult<bool> {
323        self.validity.all_invalid()
324    }
325
326    fn _validity_mask(&self) -> VortexResult<Mask> {
327        self.validity.to_logical(self.len())
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use vortex_buffer::buffer;
334
335    use crate::array::Array;
336    use crate::arrays::{BoolArray, PrimitiveArray};
337    use crate::compute::test_harness::test_mask;
338    use crate::validity::Validity;
339
340    #[test]
341    fn test_mask_primitive_array() {
342        test_mask(&PrimitiveArray::new(
343            buffer![0, 1, 2, 3, 4],
344            Validity::NonNullable,
345        ));
346        test_mask(&PrimitiveArray::new(
347            buffer![0, 1, 2, 3, 4],
348            Validity::AllValid,
349        ));
350        test_mask(&PrimitiveArray::new(
351            buffer![0, 1, 2, 3, 4],
352            Validity::AllInvalid,
353        ));
354        test_mask(&PrimitiveArray::new(
355            buffer![0, 1, 2, 3, 4],
356            Validity::Array(BoolArray::from_iter([true, false, true, false, true]).into_array()),
357        ));
358    }
359}