vortex_array/arrays/primitive/
mod.rs

1use std::fmt::Debug;
2use std::iter;
3
4mod accessor;
5
6use arrow_buffer::BooleanBufferBuilder;
7use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
8use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11
12use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
13use crate::builders::ArrayBuilder;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::validity::Validity;
16use crate::variants::PrimitiveArrayTrait;
17use crate::vtable::VTableRef;
18use crate::{
19    Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
20    Encoding, IntoArray, try_from_array_ref,
21};
22
23mod compute;
24mod native_value;
25mod patch;
26mod serde;
27mod top_value;
28
29pub use native_value::NativeValue;
30
31#[derive(Clone, Debug)]
32pub struct PrimitiveArray {
33    dtype: DType,
34    buffer: ByteBuffer,
35    validity: Validity,
36    stats_set: ArrayStats,
37}
38
39try_from_array_ref!(PrimitiveArray);
40
41pub struct PrimitiveEncoding;
42impl Encoding for PrimitiveEncoding {
43    type Array = PrimitiveArray;
44    type Metadata = EmptyMetadata;
45}
46
47impl PrimitiveArray {
48    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
49        let buffer = buffer.into();
50        if let Some(len) = validity.maybe_len() {
51            if buffer.len() != len {
52                vortex_panic!(
53                    "Buffer and validity length mismatch: buffer={}, validity={}",
54                    buffer.len(),
55                    len
56                );
57            }
58        }
59        Self {
60            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
61            buffer: buffer.into_byte_buffer(),
62            validity,
63            stats_set: Default::default(),
64        }
65    }
66
67    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
68        Self::new(Buffer::<T>::empty(), nullability.into())
69    }
70
71    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
72        match_each_native_ptype!(ptype, |$T| {
73            Self::new::<$T>(Buffer::from_byte_buffer(buffer), validity)
74        })
75    }
76
77    /// Create a PrimitiveArray from an iterator of `T`.
78    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
79    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
80        let iter = iter.into_iter();
81        let mut values = BufferMut::with_capacity(iter.size_hint().0);
82        let mut validity = BooleanBufferBuilder::new(values.capacity());
83
84        for i in iter {
85            match i {
86                None => {
87                    validity.append(false);
88                    values.push(T::default());
89                }
90                Some(e) => {
91                    validity.append(true);
92                    values.push(e);
93                }
94            }
95        }
96        Self::new(values.freeze(), Validity::from(validity.finish()))
97    }
98
99    pub fn validity(&self) -> &Validity {
100        &self.validity
101    }
102
103    pub fn byte_buffer(&self) -> &ByteBuffer {
104        &self.buffer
105    }
106
107    pub fn into_byte_buffer(self) -> ByteBuffer {
108        self.buffer
109    }
110
111    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
112        if T::PTYPE != self.ptype() {
113            vortex_panic!(
114                "Attempted to get buffer of type {} from array of type {}",
115                T::PTYPE,
116                self.ptype()
117            )
118        }
119        Buffer::from_byte_buffer(self.byte_buffer().clone())
120    }
121
122    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
123        if T::PTYPE != self.ptype() {
124            vortex_panic!(
125                "Attempted to get buffer of type {} from array of type {}",
126                T::PTYPE,
127                self.ptype()
128            )
129        }
130        Buffer::from_byte_buffer(self.buffer)
131    }
132
133    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
134    /// if the buffer is uniquely owned, otherwise will make a copy.
135    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
136        if T::PTYPE != self.ptype() {
137            vortex_panic!(
138                "Attempted to get buffer_mut of type {} from array of type {}",
139                T::PTYPE,
140                self.ptype()
141            )
142        }
143        self.into_buffer()
144            .try_into_mut()
145            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
146    }
147
148    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
149    #[allow(clippy::panic_in_result_fn)]
150    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
151        if T::PTYPE != self.ptype() {
152            vortex_panic!(
153                "Attempted to get buffer_mut of type {} from array of type {}",
154                T::PTYPE,
155                self.ptype()
156            )
157        }
158        let validity = self.validity().clone();
159        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
160            .try_into_mut()
161            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
162    }
163
164    /// Map each element in the array to a new value.
165    ///
166    /// This ignores validity and maps over all maybe-null elements.
167    ///
168    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
169    ///   over the valid elements.
170    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
171    where
172        T: NativePType,
173        R: NativePType,
174        F: FnMut(T) -> R,
175    {
176        let validity = self.validity().clone();
177        let buffer = match self.try_into_buffer_mut() {
178            Ok(buffer_mut) => buffer_mut.map_each(f),
179            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
180        };
181        PrimitiveArray::new(buffer.freeze(), validity)
182    }
183
184    /// Map each element in the array to a new value.
185    ///
186    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
187    /// valid and false otherwise.
188    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
189    where
190        T: NativePType,
191        R: NativePType,
192        F: FnMut((T, bool)) -> R,
193    {
194        let validity = self.validity();
195
196        let buf_iter = self.buffer::<T>().into_iter();
197
198        let buffer = match &validity {
199            Validity::NonNullable | Validity::AllValid => {
200                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
201            }
202            Validity::AllInvalid => {
203                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
204            }
205            Validity::Array(val) => {
206                let val = val.to_canonical()?.into_bool()?;
207                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
208            }
209        };
210        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
211    }
212
213    /// Return a slice of the array's buffer.
214    ///
215    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
216    pub fn as_slice<T: NativePType>(&self) -> &[T] {
217        if T::PTYPE != self.ptype() {
218            vortex_panic!(
219                "Attempted to get slice of type {} from array of type {}",
220                T::PTYPE,
221                self.ptype()
222            )
223        }
224        let length = self.len();
225        let raw_slice = self.byte_buffer().as_slice();
226        debug_assert_eq!(raw_slice.len() / size_of::<T>(), length);
227        // SAFETY: alignment of Buffer is checked on construction
228        unsafe { std::slice::from_raw_parts(raw_slice.as_ptr().cast(), length) }
229    }
230
231    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
232        if self.ptype() == ptype {
233            return self.clone();
234        }
235
236        assert_eq!(
237            self.ptype().byte_width(),
238            ptype.byte_width(),
239            "can't reinterpret cast between integers of two different widths"
240        );
241
242        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
243    }
244}
245
246impl ArrayImpl for PrimitiveArray {
247    type Encoding = PrimitiveEncoding;
248
249    fn _len(&self) -> usize {
250        self.byte_buffer().len() / self.ptype().byte_width()
251    }
252
253    fn _dtype(&self) -> &DType {
254        &self.dtype
255    }
256    fn _vtable(&self) -> VTableRef {
257        VTableRef::new_ref(&PrimitiveEncoding)
258    }
259
260    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
261        let validity = if self.validity().is_array() {
262            Validity::Array(children[0].clone())
263        } else {
264            self.validity().clone()
265        };
266
267        Ok(Self::from_byte_buffer(
268            self.byte_buffer().clone(),
269            self.ptype(),
270            validity,
271        ))
272    }
273}
274
275impl ArrayStatisticsImpl for PrimitiveArray {
276    fn _stats_ref(&self) -> StatsSetRef<'_> {
277        self.stats_set.to_ref(self)
278    }
279}
280
281impl ArrayVariantsImpl for PrimitiveArray {
282    fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
283        Some(self)
284    }
285}
286
287impl PrimitiveArrayTrait for PrimitiveArray {}
288
289impl<T: NativePType> FromIterator<T> for PrimitiveArray {
290    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
291        let values = BufferMut::from_iter(iter);
292        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
293    }
294}
295
296impl<T: NativePType> IntoArray for Buffer<T> {
297    fn into_array(self) -> ArrayRef {
298        PrimitiveArray::new(self, Validity::NonNullable).into_array()
299    }
300}
301
302impl<T: NativePType> IntoArray for BufferMut<T> {
303    fn into_array(self) -> ArrayRef {
304        self.freeze().into_array()
305    }
306}
307
308impl ArrayCanonicalImpl for PrimitiveArray {
309    fn _to_canonical(&self) -> VortexResult<Canonical> {
310        Ok(Canonical::Primitive(self.clone()))
311    }
312
313    fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
314        builder.extend_from_array(self)
315    }
316}
317
318impl ArrayValidityImpl for PrimitiveArray {
319    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
320        self.validity.is_valid(index)
321    }
322
323    fn _all_valid(&self) -> VortexResult<bool> {
324        self.validity.all_valid()
325    }
326
327    fn _all_invalid(&self) -> VortexResult<bool> {
328        self.validity.all_invalid()
329    }
330
331    fn _validity_mask(&self) -> VortexResult<Mask> {
332        self.validity.to_mask(self.len())
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use vortex_buffer::buffer;
339
340    use crate::array::Array;
341    use crate::arrays::{BoolArray, PrimitiveArray};
342    use crate::compute::conformance::mask::test_mask;
343    use crate::validity::Validity;
344
345    #[test]
346    fn test_mask_primitive_array() {
347        test_mask(&PrimitiveArray::new(
348            buffer![0, 1, 2, 3, 4],
349            Validity::NonNullable,
350        ));
351        test_mask(&PrimitiveArray::new(
352            buffer![0, 1, 2, 3, 4],
353            Validity::AllValid,
354        ));
355        test_mask(&PrimitiveArray::new(
356            buffer![0, 1, 2, 3, 4],
357            Validity::AllInvalid,
358        ));
359        test_mask(&PrimitiveArray::new(
360            buffer![0, 1, 2, 3, 4],
361            Validity::Array(BoolArray::from_iter([true, false, true, false, true]).into_array()),
362        ));
363    }
364}