vortex_array/arrays/primitive/
mod.rs

1use std::fmt::Debug;
2use std::iter;
3
4mod accessor;
5
6use arrow_buffer::BooleanBufferBuilder;
7use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
8use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11
12use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
13use crate::builders::ArrayBuilder;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::validity::Validity;
16use crate::variants::PrimitiveArrayTrait;
17use crate::vtable::VTableRef;
18use crate::{
19    Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
20    Encoding, IntoArray, try_from_array_ref,
21};
22
23mod compute;
24mod native_value;
25mod patch;
26mod serde;
27mod top_value;
28
29pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
30pub use native_value::NativeValue;
31
32#[derive(Clone, Debug)]
33pub struct PrimitiveArray {
34    dtype: DType,
35    buffer: ByteBuffer,
36    validity: Validity,
37    stats_set: ArrayStats,
38}
39
40try_from_array_ref!(PrimitiveArray);
41
42pub struct PrimitiveEncoding;
43impl Encoding for PrimitiveEncoding {
44    type Array = PrimitiveArray;
45    type Metadata = EmptyMetadata;
46}
47
48impl PrimitiveArray {
49    pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
50        let buffer = buffer.into();
51        if let Some(len) = validity.maybe_len() {
52            if buffer.len() != len {
53                vortex_panic!(
54                    "Buffer and validity length mismatch: buffer={}, validity={}",
55                    buffer.len(),
56                    len
57                );
58            }
59        }
60        Self {
61            dtype: DType::Primitive(T::PTYPE, validity.nullability()),
62            buffer: buffer.into_byte_buffer(),
63            validity,
64            stats_set: Default::default(),
65        }
66    }
67
68    pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
69        Self::new(Buffer::<T>::empty(), nullability.into())
70    }
71
72    pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
73        match_each_native_ptype!(ptype, |$T| {
74            Self::new::<$T>(Buffer::from_byte_buffer(buffer), validity)
75        })
76    }
77
78    /// Create a PrimitiveArray from an iterator of `T`.
79    /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
80    pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
81        let iter = iter.into_iter();
82        let mut values = BufferMut::with_capacity(iter.size_hint().0);
83        let mut validity = BooleanBufferBuilder::new(values.capacity());
84
85        for i in iter {
86            match i {
87                None => {
88                    validity.append(false);
89                    values.push(T::default());
90                }
91                Some(e) => {
92                    validity.append(true);
93                    values.push(e);
94                }
95            }
96        }
97        Self::new(values.freeze(), Validity::from(validity.finish()))
98    }
99
100    pub fn validity(&self) -> &Validity {
101        &self.validity
102    }
103
104    pub fn byte_buffer(&self) -> &ByteBuffer {
105        &self.buffer
106    }
107
108    pub fn into_byte_buffer(self) -> ByteBuffer {
109        self.buffer
110    }
111
112    pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
113        if T::PTYPE != self.ptype() {
114            vortex_panic!(
115                "Attempted to get buffer of type {} from array of type {}",
116                T::PTYPE,
117                self.ptype()
118            )
119        }
120        Buffer::from_byte_buffer(self.byte_buffer().clone())
121    }
122
123    pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
124        if T::PTYPE != self.ptype() {
125            vortex_panic!(
126                "Attempted to get buffer of type {} from array of type {}",
127                T::PTYPE,
128                self.ptype()
129            )
130        }
131        Buffer::from_byte_buffer(self.buffer)
132    }
133
134    /// Extract a mutable buffer from the PrimitiveArray. Attempts to do this with zero-copy
135    /// if the buffer is uniquely owned, otherwise will make a copy.
136    pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
137        if T::PTYPE != self.ptype() {
138            vortex_panic!(
139                "Attempted to get buffer_mut of type {} from array of type {}",
140                T::PTYPE,
141                self.ptype()
142            )
143        }
144        self.into_buffer()
145            .try_into_mut()
146            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
147    }
148
149    /// Try to extract a mutable buffer from the PrimitiveArray with zero copy.
150    #[allow(clippy::panic_in_result_fn)]
151    pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
152        if T::PTYPE != self.ptype() {
153            vortex_panic!(
154                "Attempted to get buffer_mut of type {} from array of type {}",
155                T::PTYPE,
156                self.ptype()
157            )
158        }
159        let validity = self.validity().clone();
160        Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
161            .try_into_mut()
162            .map_err(|buffer| PrimitiveArray::new(buffer, validity))
163    }
164
165    /// Map each element in the array to a new value.
166    ///
167    /// This ignores validity and maps over all maybe-null elements.
168    ///
169    /// TODO(ngates): we could be smarter here if validity is sparse and only run the function
170    ///   over the valid elements.
171    pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
172    where
173        T: NativePType,
174        R: NativePType,
175        F: FnMut(T) -> R,
176    {
177        let validity = self.validity().clone();
178        let buffer = match self.try_into_buffer_mut() {
179            Ok(buffer_mut) => buffer_mut.map_each(f),
180            Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
181        };
182        PrimitiveArray::new(buffer.freeze(), validity)
183    }
184
185    /// Map each element in the array to a new value.
186    ///
187    /// This doesn't ignore validity and maps over all maybe-null elements, with a bool true if
188    /// valid and false otherwise.
189    pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
190    where
191        T: NativePType,
192        R: NativePType,
193        F: FnMut((T, bool)) -> R,
194    {
195        let validity = self.validity();
196
197        let buf_iter = self.buffer::<T>().into_iter();
198
199        let buffer = match &validity {
200            Validity::NonNullable | Validity::AllValid => {
201                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
202            }
203            Validity::AllInvalid => {
204                BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
205            }
206            Validity::Array(val) => {
207                let val = val.to_canonical()?.into_bool()?;
208                BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
209            }
210        };
211        Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
212    }
213
214    /// Return a slice of the array's buffer.
215    ///
216    /// NOTE: these values may be nonsense if the validity buffer indicates that the value is null.
217    pub fn as_slice<T: NativePType>(&self) -> &[T] {
218        if T::PTYPE != self.ptype() {
219            vortex_panic!(
220                "Attempted to get slice of type {} from array of type {}",
221                T::PTYPE,
222                self.ptype()
223            )
224        }
225        let length = self.len();
226        let raw_slice = self.byte_buffer().as_slice();
227        debug_assert_eq!(raw_slice.len() / size_of::<T>(), length);
228        // SAFETY: alignment of Buffer is checked on construction
229        unsafe { std::slice::from_raw_parts(raw_slice.as_ptr().cast(), length) }
230    }
231
232    pub fn reinterpret_cast(&self, ptype: PType) -> Self {
233        if self.ptype() == ptype {
234            return self.clone();
235        }
236
237        assert_eq!(
238            self.ptype().byte_width(),
239            ptype.byte_width(),
240            "can't reinterpret cast between integers of two different widths"
241        );
242
243        PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
244    }
245}
246
247impl ArrayImpl for PrimitiveArray {
248    type Encoding = PrimitiveEncoding;
249
250    fn _len(&self) -> usize {
251        self.byte_buffer().len() / self.ptype().byte_width()
252    }
253
254    fn _dtype(&self) -> &DType {
255        &self.dtype
256    }
257    fn _vtable(&self) -> VTableRef {
258        VTableRef::new_ref(&PrimitiveEncoding)
259    }
260
261    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
262        let validity = if self.validity().is_array() {
263            Validity::Array(children[0].clone())
264        } else {
265            self.validity().clone()
266        };
267
268        Ok(Self::from_byte_buffer(
269            self.byte_buffer().clone(),
270            self.ptype(),
271            validity,
272        ))
273    }
274}
275
276impl ArrayStatisticsImpl for PrimitiveArray {
277    fn _stats_ref(&self) -> StatsSetRef<'_> {
278        self.stats_set.to_ref(self)
279    }
280}
281
282impl ArrayVariantsImpl for PrimitiveArray {
283    fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
284        Some(self)
285    }
286}
287
288impl PrimitiveArrayTrait for PrimitiveArray {}
289
290impl<T: NativePType> FromIterator<T> for PrimitiveArray {
291    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
292        let values = BufferMut::from_iter(iter);
293        PrimitiveArray::new(values.freeze(), Validity::NonNullable)
294    }
295}
296
297impl<T: NativePType> IntoArray for Buffer<T> {
298    fn into_array(self) -> ArrayRef {
299        PrimitiveArray::new(self, Validity::NonNullable).into_array()
300    }
301}
302
303impl<T: NativePType> IntoArray for BufferMut<T> {
304    fn into_array(self) -> ArrayRef {
305        self.freeze().into_array()
306    }
307}
308
309impl ArrayCanonicalImpl for PrimitiveArray {
310    fn _to_canonical(&self) -> VortexResult<Canonical> {
311        Ok(Canonical::Primitive(self.clone()))
312    }
313
314    fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
315        builder.extend_from_array(self)
316    }
317}
318
319impl ArrayValidityImpl for PrimitiveArray {
320    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
321        self.validity.is_valid(index)
322    }
323
324    fn _all_valid(&self) -> VortexResult<bool> {
325        self.validity.all_valid()
326    }
327
328    fn _all_invalid(&self) -> VortexResult<bool> {
329        self.validity.all_invalid()
330    }
331
332    fn _validity_mask(&self) -> VortexResult<Mask> {
333        self.validity.to_mask(self.len())
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use vortex_buffer::buffer;
340
341    use crate::array::Array;
342    use crate::arrays::{BoolArray, PrimitiveArray};
343    use crate::compute::conformance::mask::test_mask;
344    use crate::validity::Validity;
345
346    #[test]
347    fn test_mask_primitive_array() {
348        test_mask(&PrimitiveArray::new(
349            buffer![0, 1, 2, 3, 4],
350            Validity::NonNullable,
351        ));
352        test_mask(&PrimitiveArray::new(
353            buffer![0, 1, 2, 3, 4],
354            Validity::AllValid,
355        ));
356        test_mask(&PrimitiveArray::new(
357            buffer![0, 1, 2, 3, 4],
358            Validity::AllInvalid,
359        ));
360        test_mask(&PrimitiveArray::new(
361            buffer![0, 1, 2, 3, 4],
362            Validity::Array(BoolArray::from_iter([true, false, true, false, true]).into_array()),
363        ));
364    }
365}