vortex_array/arrays/primitive/
mod.rs

1use std::fmt::Debug;
2use std::iter;
3
4mod accessor;
5
6use arrow_buffer::BooleanBufferBuilder;
7use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
8use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
9use vortex_error::{VortexResult, vortex_panic};
10
11use crate::builders::ArrayBuilder;
12use crate::stats::{ArrayStats, StatsSetRef};
13use crate::validity::Validity;
14use crate::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
15
16mod compute;
17mod native_value;
18mod ops;
19mod patch;
20mod serde;
21mod top_value;
22
23pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
24pub use native_value::NativeValue;
25
26use crate::vtable::{
27    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
28    ValidityVTableFromValidityHelper,
29};
30
31vtable!(Primitive);
32
33impl VTable for PrimitiveVTable {
34    type Array = PrimitiveArray;
35    type Encoding = PrimitiveEncoding;
36
37    type ArrayVTable = Self;
38    type CanonicalVTable = Self;
39    type OperationsVTable = Self;
40    type ValidityVTable = ValidityVTableFromValidityHelper;
41    type VisitorVTable = Self;
42    type ComputeVTable = NotSupported;
43    type EncodeVTable = NotSupported;
44    type SerdeVTable = Self;
45
46    fn id(_encoding: &Self::Encoding) -> EncodingId {
47        EncodingId::new_ref("vortex.primitive")
48    }
49
50    fn encoding(_array: &Self::Array) -> EncodingRef {
51        EncodingRef::new_ref(PrimitiveEncoding.as_ref())
52    }
53}
54
55#[derive(Clone, Debug)]
56pub struct PrimitiveArray {
57    dtype: DType,
58    buffer: ByteBuffer,
59    validity: Validity,
60    stats_set: ArrayStats,
61}
62
63#[derive(Clone, Debug)]
64pub struct PrimitiveEncoding;
65
66impl PrimitiveArray {
67    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
68        let buffer = buffer.into();
69        if let Some(len) = validity.maybe_len() {
70            if buffer.len() != len {
71                vortex_panic!(
72                    "Buffer and validity length mismatch: buffer={}, validity={}",
73                    buffer.len(),
74                    len
75                );
76            }
77        }
78        Self {
79            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
80            buffer: buffer.into_byte_buffer(),
81            validity,
82            stats_set: Default::default(),
83        }
84    }
85
86    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
87        Self::new(Buffer::<T>::empty(), nullability.into())
88    }
89
90    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
91        match_each_native_ptype!(ptype, |$T| {
92            Self::new::<$T>(Buffer::from_byte_buffer(buffer), validity)
93        })
94    }
95
96    /// Create a PrimitiveArray from an iterator of `T`.
97    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
98    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
99        let iter = iter.into_iter();
100        let mut values = BufferMut::with_capacity(iter.size_hint().0);
101        let mut validity = BooleanBufferBuilder::new(values.capacity());
102
103        for i in iter {
104            match i {
105                None => {
106                    validity.append(false);
107                    values.push(T::default());
108                }
109                Some(e) => {
110                    validity.append(true);
111                    values.push(e);
112                }
113            }
114        }
115        Self::new(values.freeze(), Validity::from(validity.finish()))
116    }
117
118    pub fn ptype(&self) -> PType {
119        self.dtype().to_ptype()
120    }
121
122    pub fn byte_buffer(&self) -> &ByteBuffer {
123        &self.buffer
124    }
125
126    pub fn into_byte_buffer(self) -> ByteBuffer {
127        self.buffer
128    }
129
130    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
131        if T::PTYPE != self.ptype() {
132            vortex_panic!(
133                "Attempted to get buffer of type {} from array of type {}",
134                T::PTYPE,
135                self.ptype()
136            )
137        }
138        Buffer::from_byte_buffer(self.byte_buffer().clone())
139    }
140
141    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
142        if T::PTYPE != self.ptype() {
143            vortex_panic!(
144                "Attempted to get buffer of type {} from array of type {}",
145                T::PTYPE,
146                self.ptype()
147            )
148        }
149        Buffer::from_byte_buffer(self.buffer)
150    }
151
152    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
153    /// if the buffer is uniquely owned, otherwise will make a copy.
154    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
155        if T::PTYPE != self.ptype() {
156            vortex_panic!(
157                "Attempted to get buffer_mut of type {} from array of type {}",
158                T::PTYPE,
159                self.ptype()
160            )
161        }
162        self.into_buffer()
163            .try_into_mut()
164            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
165    }
166
167    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
168    #[allow(clippy::panic_in_result_fn)]
169    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
170        if T::PTYPE != self.ptype() {
171            vortex_panic!(
172                "Attempted to get buffer_mut of type {} from array of type {}",
173                T::PTYPE,
174                self.ptype()
175            )
176        }
177        let validity = self.validity().clone();
178        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
179            .try_into_mut()
180            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
181    }
182
183    /// Map each element in the array to a new value.
184    ///
185    /// This ignores validity and maps over all maybe-null elements.
186    ///
187    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
188    ///   over the valid elements.
189    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
190    where
191        T: NativePType,
192        R: NativePType,
193        F: FnMut(T) -> R,
194    {
195        let validity = self.validity().clone();
196        let buffer = match self.try_into_buffer_mut() {
197            Ok(buffer_mut) => buffer_mut.map_each(f),
198            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
199        };
200        PrimitiveArray::new(buffer.freeze(), validity)
201    }
202
203    /// Map each element in the array to a new value.
204    ///
205    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
206    /// valid and false otherwise.
207    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
208    where
209        T: NativePType,
210        R: NativePType,
211        F: FnMut((T, bool)) -> R,
212    {
213        let validity = self.validity();
214
215        let buf_iter = self.buffer::<T>().into_iter();
216
217        let buffer = match &validity {
218            Validity::NonNullable | Validity::AllValid => {
219                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
220            }
221            Validity::AllInvalid => {
222                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
223            }
224            Validity::Array(val) => {
225                let val = val.to_canonical()?.into_bool()?;
226                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
227            }
228        };
229        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
230    }
231
232    /// Return a slice of the array's buffer.
233    ///
234    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
235    pub fn as_slice<T: NativePType>(&self) -> &[T] {
236        if T::PTYPE != self.ptype() {
237            vortex_panic!(
238                "Attempted to get slice of type {} from array of type {}",
239                T::PTYPE,
240                self.ptype()
241            )
242        }
243        let raw_slice = self.byte_buffer().as_ptr();
244        // SAFETY: alignment of Buffer is checked on construction
245        unsafe {
246            std::slice::from_raw_parts(raw_slice.cast(), self.byte_buffer().len() / size_of::<T>())
247        }
248    }
249
250    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
251        if self.ptype() == ptype {
252            return self.clone();
253        }
254
255        assert_eq!(
256            self.ptype().byte_width(),
257            ptype.byte_width(),
258            "can't reinterpret cast between integers of two different widths"
259        );
260
261        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
262    }
263}
264
265impl ArrayVTable<PrimitiveVTable> for PrimitiveVTable {
266    fn len(array: &PrimitiveArray) -> usize {
267        array.byte_buffer().len() / array.ptype().byte_width()
268    }
269
270    fn dtype(array: &PrimitiveArray) -> &DType {
271        &array.dtype
272    }
273
274    fn stats(array: &PrimitiveArray) -> StatsSetRef<'_> {
275        array.stats_set.to_ref(array.as_ref())
276    }
277}
278
279impl ValidityHelper for PrimitiveArray {
280    fn validity(&self) -> &Validity {
281        &self.validity
282    }
283}
284
285impl<T: NativePType> FromIterator<T> for PrimitiveArray {
286    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
287        let values = BufferMut::from_iter(iter);
288        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
289    }
290}
291
292impl<T: NativePType> IntoArray for Buffer<T> {
293    fn into_array(self) -> ArrayRef {
294        PrimitiveArray::new(self, Validity::NonNullable).into_array()
295    }
296}
297
298impl<T: NativePType> IntoArray for BufferMut<T> {
299    fn into_array(self) -> ArrayRef {
300        self.freeze().into_array()
301    }
302}
303
304impl CanonicalVTable<PrimitiveVTable> for PrimitiveVTable {
305    fn canonicalize(array: &PrimitiveArray) -> VortexResult<Canonical> {
306        Ok(Canonical::Primitive(array.clone()))
307    }
308
309    fn append_to_builder(
310        array: &PrimitiveArray,
311        builder: &mut dyn ArrayBuilder,
312    ) -> VortexResult<()> {
313        builder.extend_from_array(array.as_ref())
314    }
315}
316
317#[cfg(test)]
318mod tests {
319    use vortex_buffer::buffer;
320    use vortex_scalar::PValue;
321
322    use crate::arrays::{BoolArray, PrimitiveArray};
323    use crate::compute::conformance::mask::test_mask;
324    use crate::compute::conformance::search_sorted::rstest_reuse::apply;
325    use crate::compute::conformance::search_sorted::{search_sorted_conformance, *};
326    use crate::search_sorted::{SearchResult, SearchSorted, SearchSortedSide};
327    use crate::validity::Validity;
328    use crate::{ArrayRef, IntoArray};
329
330    #[apply(search_sorted_conformance)]
331    fn search_sorted_primitive(
332        #[case] array: ArrayRef,
333        #[case] value: i32,
334        #[case] side: SearchSortedSide,
335        #[case] expected: SearchResult,
336    ) {
337        let res = array
338            .as_primitive_typed()
339            .search_sorted(&Some(PValue::from(value)), side);
340        assert_eq!(res, expected);
341    }
342
343    #[test]
344    fn test_mask_primitive_array() {
345        test_mask(PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::NonNullable).as_ref());
346        test_mask(PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllValid).as_ref());
347        test_mask(PrimitiveArray::new(buffer![0, 1, 2, 3, 4], Validity::AllInvalid).as_ref());
348        test_mask(
349            PrimitiveArray::new(
350                buffer![0, 1, 2, 3, 4],
351                Validity::Array(
352                    BoolArray::from_iter([true, false, true, false, true]).into_array(),
353                ),
354            )
355            .as_ref(),
356        );
357    }
358}