vortex_array/arrays/primitive/
mod.rs1use std::fmt::Debug;
2use std::iter;
3
4mod accessor;
5
6use arrow_buffer::BooleanBufferBuilder;
7use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
8use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11
12use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
13use crate::builders::ArrayBuilder;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::validity::Validity;
16use crate::variants::PrimitiveArrayTrait;
17use crate::vtable::VTableRef;
18use crate::{
19 Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
20 Encoding, IntoArray, try_from_array_ref,
21};
22
23mod compute;
24mod native_value;
25mod patch;
26mod serde;
27mod top_value;
28
29pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
30pub use native_value::NativeValue;
31
32#[derive(Clone, Debug)]
33pub struct PrimitiveArray {
34 dtype: DType,
35 buffer: ByteBuffer,
36 validity: Validity,
37 stats_set: ArrayStats,
38}
39
40try_from_array_ref!(PrimitiveArray);
41
42#[derive(Debug)]
43pub struct PrimitiveEncoding;
44impl Encoding for PrimitiveEncoding {
45 type Array = PrimitiveArray;
46 type Metadata = EmptyMetadata;
47}
48
49impl PrimitiveArray {
50 pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
51 let buffer = buffer.into();
52 if let Some(len) = validity.maybe_len() {
53 if buffer.len() != len {
54 vortex_panic!(
55 "Buffer and validity length mismatch: buffer={}, validity={}",
56 buffer.len(),
57 len
58 );
59 }
60 }
61 Self {
62 dtype: DType::Primitive(T::PTYPE, validity.nullability()),
63 buffer: buffer.into_byte_buffer(),
64 validity,
65 stats_set: Default::default(),
66 }
67 }
68
69 pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
70 Self::new(Buffer::<T>::empty(), nullability.into())
71 }
72
73 pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
74 match_each_native_ptype!(ptype, |$T| {
75 Self::new::<$T>(Buffer::from_byte_buffer(buffer), validity)
76 })
77 }
78
79 pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
82 let iter = iter.into_iter();
83 let mut values = BufferMut::with_capacity(iter.size_hint().0);
84 let mut validity = BooleanBufferBuilder::new(values.capacity());
85
86 for i in iter {
87 match i {
88 None => {
89 validity.append(false);
90 values.push(T::default());
91 }
92 Some(e) => {
93 validity.append(true);
94 values.push(e);
95 }
96 }
97 }
98 Self::new(values.freeze(), Validity::from(validity.finish()))
99 }
100
101 pub fn validity(&self) -> &Validity {
102 &self.validity
103 }
104
105 pub fn byte_buffer(&self) -> &ByteBuffer {
106 &self.buffer
107 }
108
109 pub fn into_byte_buffer(self) -> ByteBuffer {
110 self.buffer
111 }
112
113 pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
114 if T::PTYPE != self.ptype() {
115 vortex_panic!(
116 "Attempted to get buffer of type {} from array of type {}",
117 T::PTYPE,
118 self.ptype()
119 )
120 }
121 Buffer::from_byte_buffer(self.byte_buffer().clone())
122 }
123
124 pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
125 if T::PTYPE != self.ptype() {
126 vortex_panic!(
127 "Attempted to get buffer of type {} from array of type {}",
128 T::PTYPE,
129 self.ptype()
130 )
131 }
132 Buffer::from_byte_buffer(self.buffer)
133 }
134
135 pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
138 if T::PTYPE != self.ptype() {
139 vortex_panic!(
140 "Attempted to get buffer_mut of type {} from array of type {}",
141 T::PTYPE,
142 self.ptype()
143 )
144 }
145 self.into_buffer()
146 .try_into_mut()
147 .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
148 }
149
150 #[allow(clippy::panic_in_result_fn)]
152 pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
153 if T::PTYPE != self.ptype() {
154 vortex_panic!(
155 "Attempted to get buffer_mut of type {} from array of type {}",
156 T::PTYPE,
157 self.ptype()
158 )
159 }
160 let validity = self.validity().clone();
161 Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
162 .try_into_mut()
163 .map_err(|buffer| PrimitiveArray::new(buffer, validity))
164 }
165
166 pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
173 where
174 T: NativePType,
175 R: NativePType,
176 F: FnMut(T) -> R,
177 {
178 let validity = self.validity().clone();
179 let buffer = match self.try_into_buffer_mut() {
180 Ok(buffer_mut) => buffer_mut.map_each(f),
181 Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
182 };
183 PrimitiveArray::new(buffer.freeze(), validity)
184 }
185
186 pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
191 where
192 T: NativePType,
193 R: NativePType,
194 F: FnMut((T, bool)) -> R,
195 {
196 let validity = self.validity();
197
198 let buf_iter = self.buffer::<T>().into_iter();
199
200 let buffer = match &validity {
201 Validity::NonNullable | Validity::AllValid => {
202 BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
203 }
204 Validity::AllInvalid => {
205 BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
206 }
207 Validity::Array(val) => {
208 let val = val.to_canonical()?.into_bool()?;
209 BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
210 }
211 };
212 Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
213 }
214
215 pub fn as_slice<T: NativePType>(&self) -> &[T] {
219 if T::PTYPE != self.ptype() {
220 vortex_panic!(
221 "Attempted to get slice of type {} from array of type {}",
222 T::PTYPE,
223 self.ptype()
224 )
225 }
226 let length = self.len();
227 let raw_slice = self.byte_buffer().as_slice();
228 debug_assert_eq!(raw_slice.len() / size_of::<T>(), length);
229 unsafe { std::slice::from_raw_parts(raw_slice.as_ptr().cast(), length) }
231 }
232
233 pub fn reinterpret_cast(&self, ptype: PType) -> Self {
234 if self.ptype() == ptype {
235 return self.clone();
236 }
237
238 assert_eq!(
239 self.ptype().byte_width(),
240 ptype.byte_width(),
241 "can't reinterpret cast between integers of two different widths"
242 );
243
244 PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
245 }
246}
247
248impl ArrayImpl for PrimitiveArray {
249 type Encoding = PrimitiveEncoding;
250
251 fn _len(&self) -> usize {
252 self.byte_buffer().len() / self.ptype().byte_width()
253 }
254
255 fn _dtype(&self) -> &DType {
256 &self.dtype
257 }
258 fn _vtable(&self) -> VTableRef {
259 VTableRef::new_ref(&PrimitiveEncoding)
260 }
261
262 fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
263 let validity = if self.validity().is_array() {
264 Validity::Array(children[0].clone())
265 } else {
266 self.validity().clone()
267 };
268
269 Ok(Self::from_byte_buffer(
270 self.byte_buffer().clone(),
271 self.ptype(),
272 validity,
273 ))
274 }
275}
276
277impl ArrayStatisticsImpl for PrimitiveArray {
278 fn _stats_ref(&self) -> StatsSetRef<'_> {
279 self.stats_set.to_ref(self)
280 }
281}
282
283impl ArrayVariantsImpl for PrimitiveArray {
284 fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
285 Some(self)
286 }
287}
288
289impl PrimitiveArrayTrait for PrimitiveArray {}
290
291impl<T: NativePType> FromIterator<T> for PrimitiveArray {
292 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
293 let values = BufferMut::from_iter(iter);
294 PrimitiveArray::new(values.freeze(), Validity::NonNullable)
295 }
296}
297
298impl<T: NativePType> IntoArray for Buffer<T> {
299 fn into_array(self) -> ArrayRef {
300 PrimitiveArray::new(self, Validity::NonNullable).into_array()
301 }
302}
303
304impl<T: NativePType> IntoArray for BufferMut<T> {
305 fn into_array(self) -> ArrayRef {
306 self.freeze().into_array()
307 }
308}
309
310impl ArrayCanonicalImpl for PrimitiveArray {
311 fn _to_canonical(&self) -> VortexResult<Canonical> {
312 Ok(Canonical::Primitive(self.clone()))
313 }
314
315 fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
316 builder.extend_from_array(self)
317 }
318}
319
320impl ArrayValidityImpl for PrimitiveArray {
321 fn _is_valid(&self, index: usize) -> VortexResult<bool> {
322 self.validity.is_valid(index)
323 }
324
325 fn _all_valid(&self) -> VortexResult<bool> {
326 self.validity.all_valid()
327 }
328
329 fn _all_invalid(&self) -> VortexResult<bool> {
330 self.validity.all_invalid()
331 }
332
333 fn _validity_mask(&self) -> VortexResult<Mask> {
334 self.validity.to_mask(self.len())
335 }
336}
337
338#[cfg(test)]
339mod tests {
340 use vortex_buffer::buffer;
341
342 use crate::array::Array;
343 use crate::arrays::{BoolArray, PrimitiveArray};
344 use crate::compute::conformance::mask::test_mask;
345 use crate::validity::Validity;
346
347 #[test]
348 fn test_mask_primitive_array() {
349 test_mask(&PrimitiveArray::new(
350 buffer![0, 1, 2, 3, 4],
351 Validity::NonNullable,
352 ));
353 test_mask(&PrimitiveArray::new(
354 buffer![0, 1, 2, 3, 4],
355 Validity::AllValid,
356 ));
357 test_mask(&PrimitiveArray::new(
358 buffer![0, 1, 2, 3, 4],
359 Validity::AllInvalid,
360 ));
361 test_mask(&PrimitiveArray::new(
362 buffer![0, 1, 2, 3, 4],
363 Validity::Array(BoolArray::from_iter([true, false, true, false, true]).into_array()),
364 ));
365 }
366}