vortex_array/arrays/primitive/
mod.rs

1use std::fmt::Debug;
2use std::iter;
3
4mod accessor;
5
6use arrow_buffer::BooleanBufferBuilder;
7use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
8use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11
12use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
13use crate::builders::ArrayBuilder;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::validity::Validity;
16use crate::variants::PrimitiveArrayTrait;
17use crate::vtable::VTableRef;
18use crate::{
19    Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
20    Encoding, IntoArray, try_from_array_ref,
21};
22
23mod compute;
24mod native_value;
25mod patch;
26mod serde;
27mod top_value;
28
29pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
30pub use native_value::NativeValue;
31
32#[derive(Clone, Debug)]
33pub struct PrimitiveArray {
34    dtype: DType,
35    buffer: ByteBuffer,
36    validity: Validity,
37    stats_set: ArrayStats,
38}
39
40try_from_array_ref!(PrimitiveArray);
41
42#[derive(Debug)]
43pub struct PrimitiveEncoding;
44impl Encoding for PrimitiveEncoding {
45    type Array = PrimitiveArray;
46    type Metadata = EmptyMetadata;
47}
48
49impl PrimitiveArray {
50    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
51        let buffer = buffer.into();
52        if let Some(len) = validity.maybe_len() {
53            if buffer.len() != len {
54                vortex_panic!(
55                    "Buffer and validity length mismatch: buffer={}, validity={}",
56                    buffer.len(),
57                    len
58                );
59            }
60        }
61        Self {
62            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
63            buffer: buffer.into_byte_buffer(),
64            validity,
65            stats_set: Default::default(),
66        }
67    }
68
69    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
70        Self::new(Buffer::<T>::empty(), nullability.into())
71    }
72
73    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
74        match_each_native_ptype!(ptype, |$T| {
75            Self::new::<$T>(Buffer::from_byte_buffer(buffer), validity)
76        })
77    }
78
79    /// Create a PrimitiveArray from an iterator of `T`.
80    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
81    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
82        let iter = iter.into_iter();
83        let mut values = BufferMut::with_capacity(iter.size_hint().0);
84        let mut validity = BooleanBufferBuilder::new(values.capacity());
85
86        for i in iter {
87            match i {
88                None => {
89                    validity.append(false);
90                    values.push(T::default());
91                }
92                Some(e) => {
93                    validity.append(true);
94                    values.push(e);
95                }
96            }
97        }
98        Self::new(values.freeze(), Validity::from(validity.finish()))
99    }
100
101    pub fn validity(&self) -> &Validity {
102        &self.validity
103    }
104
105    pub fn byte_buffer(&self) -> &ByteBuffer {
106        &self.buffer
107    }
108
109    pub fn into_byte_buffer(self) -> ByteBuffer {
110        self.buffer
111    }
112
113    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
114        if T::PTYPE != self.ptype() {
115            vortex_panic!(
116                "Attempted to get buffer of type {} from array of type {}",
117                T::PTYPE,
118                self.ptype()
119            )
120        }
121        Buffer::from_byte_buffer(self.byte_buffer().clone())
122    }
123
124    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
125        if T::PTYPE != self.ptype() {
126            vortex_panic!(
127                "Attempted to get buffer of type {} from array of type {}",
128                T::PTYPE,
129                self.ptype()
130            )
131        }
132        Buffer::from_byte_buffer(self.buffer)
133    }
134
135    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
136    /// if the buffer is uniquely owned, otherwise will make a copy.
137    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
138        if T::PTYPE != self.ptype() {
139            vortex_panic!(
140                "Attempted to get buffer_mut of type {} from array of type {}",
141                T::PTYPE,
142                self.ptype()
143            )
144        }
145        self.into_buffer()
146            .try_into_mut()
147            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
148    }
149
150    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
151    #[allow(clippy::panic_in_result_fn)]
152    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
153        if T::PTYPE != self.ptype() {
154            vortex_panic!(
155                "Attempted to get buffer_mut of type {} from array of type {}",
156                T::PTYPE,
157                self.ptype()
158            )
159        }
160        let validity = self.validity().clone();
161        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
162            .try_into_mut()
163            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
164    }
165
166    /// Map each element in the array to a new value.
167    ///
168    /// This ignores validity and maps over all maybe-null elements.
169    ///
170    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
171    ///   over the valid elements.
172    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
173    where
174        T: NativePType,
175        R: NativePType,
176        F: FnMut(T) -> R,
177    {
178        let validity = self.validity().clone();
179        let buffer = match self.try_into_buffer_mut() {
180            Ok(buffer_mut) => buffer_mut.map_each(f),
181            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
182        };
183        PrimitiveArray::new(buffer.freeze(), validity)
184    }
185
186    /// Map each element in the array to a new value.
187    ///
188    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
189    /// valid and false otherwise.
190    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
191    where
192        T: NativePType,
193        R: NativePType,
194        F: FnMut((T, bool)) -> R,
195    {
196        let validity = self.validity();
197
198        let buf_iter = self.buffer::<T>().into_iter();
199
200        let buffer = match &validity {
201            Validity::NonNullable | Validity::AllValid => {
202                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
203            }
204            Validity::AllInvalid => {
205                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
206            }
207            Validity::Array(val) => {
208                let val = val.to_canonical()?.into_bool()?;
209                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
210            }
211        };
212        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
213    }
214
215    /// Return a slice of the array's buffer.
216    ///
217    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
218    pub fn as_slice<T: NativePType>(&self) -> &[T] {
219        if T::PTYPE != self.ptype() {
220            vortex_panic!(
221                "Attempted to get slice of type {} from array of type {}",
222                T::PTYPE,
223                self.ptype()
224            )
225        }
226        let length = self.len();
227        let raw_slice = self.byte_buffer().as_slice();
228        debug_assert_eq!(raw_slice.len() / size_of::<T>(), length);
229        // SAFETY: alignment of Buffer is checked on construction
230        unsafe { std::slice::from_raw_parts(raw_slice.as_ptr().cast(), length) }
231    }
232
233    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
234        if self.ptype() == ptype {
235            return self.clone();
236        }
237
238        assert_eq!(
239            self.ptype().byte_width(),
240            ptype.byte_width(),
241            "can't reinterpret cast between integers of two different widths"
242        );
243
244        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
245    }
246}
247
248impl ArrayImpl for PrimitiveArray {
249    type Encoding = PrimitiveEncoding;
250
251    fn _len(&self) -> usize {
252        self.byte_buffer().len() / self.ptype().byte_width()
253    }
254
255    fn _dtype(&self) -> &DType {
256        &self.dtype
257    }
258    fn _vtable(&self) -> VTableRef {
259        VTableRef::new_ref(&PrimitiveEncoding)
260    }
261
262    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
263        let validity = if self.validity().is_array() {
264            Validity::Array(children[0].clone())
265        } else {
266            self.validity().clone()
267        };
268
269        Ok(Self::from_byte_buffer(
270            self.byte_buffer().clone(),
271            self.ptype(),
272            validity,
273        ))
274    }
275}
276
277impl ArrayStatisticsImpl for PrimitiveArray {
278    fn _stats_ref(&self) -> StatsSetRef<'_> {
279        self.stats_set.to_ref(self)
280    }
281}
282
283impl ArrayVariantsImpl for PrimitiveArray {
284    fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
285        Some(self)
286    }
287}
288
289impl PrimitiveArrayTrait for PrimitiveArray {}
290
291impl<T: NativePType> FromIterator<T> for PrimitiveArray {
292    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
293        let values = BufferMut::from_iter(iter);
294        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
295    }
296}
297
298impl<T: NativePType> IntoArray for Buffer<T> {
299    fn into_array(self) -> ArrayRef {
300        PrimitiveArray::new(self, Validity::NonNullable).into_array()
301    }
302}
303
304impl<T: NativePType> IntoArray for BufferMut<T> {
305    fn into_array(self) -> ArrayRef {
306        self.freeze().into_array()
307    }
308}
309
310impl ArrayCanonicalImpl for PrimitiveArray {
311    fn _to_canonical(&self) -> VortexResult<Canonical> {
312        Ok(Canonical::Primitive(self.clone()))
313    }
314
315    fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
316        builder.extend_from_array(self)
317    }
318}
319
320impl ArrayValidityImpl for PrimitiveArray {
321    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
322        self.validity.is_valid(index)
323    }
324
325    fn _all_valid(&self) -> VortexResult<bool> {
326        self.validity.all_valid()
327    }
328
329    fn _all_invalid(&self) -> VortexResult<bool> {
330        self.validity.all_invalid()
331    }
332
333    fn _validity_mask(&self) -> VortexResult<Mask> {
334        self.validity.to_mask(self.len())
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use vortex_buffer::buffer;
341
342    use crate::array::Array;
343    use crate::arrays::{BoolArray, PrimitiveArray};
344    use crate::compute::conformance::mask::test_mask;
345    use crate::validity::Validity;
346
347    #[test]
348    fn test_mask_primitive_array() {
349        test_mask(&PrimitiveArray::new(
350            buffer![0, 1, 2, 3, 4],
351            Validity::NonNullable,
352        ));
353        test_mask(&PrimitiveArray::new(
354            buffer![0, 1, 2, 3, 4],
355            Validity::AllValid,
356        ));
357        test_mask(&PrimitiveArray::new(
358            buffer![0, 1, 2, 3, 4],
359            Validity::AllInvalid,
360        ));
361        test_mask(&PrimitiveArray::new(
362            buffer![0, 1, 2, 3, 4],
363            Validity::Array(BoolArray::from_iter([true, false, true, false, true]).into_array()),
364        ));
365    }
366}