vortex_array/arrays/primitive/
mod.rs1use std::fmt::Debug;
2use std::iter;
3
4mod accessor;
5
6use arrow_buffer::BooleanBufferBuilder;
7use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
8use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11
12use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
13use crate::builders::ArrayBuilder;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::validity::Validity;
16use crate::variants::PrimitiveArrayTrait;
17use crate::vtable::{EncodingVTable, VTableRef};
18use crate::{
19 Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
20 Encoding, EncodingId, IntoArray, try_from_array_ref,
21};
22
23mod compute;
24mod patch;
25mod serde;
26mod stats;
27
28#[derive(Clone, Debug)]
29pub struct PrimitiveArray {
30 dtype: DType,
31 buffer: ByteBuffer,
32 validity: Validity,
33 stats_set: ArrayStats,
34}
35
36try_from_array_ref!(PrimitiveArray);
37
38pub struct PrimitiveEncoding;
39impl Encoding for PrimitiveEncoding {
40 type Array = PrimitiveArray;
41 type Metadata = EmptyMetadata;
42}
43
44impl EncodingVTable for PrimitiveEncoding {
45 fn id(&self) -> EncodingId {
46 EncodingId::new_ref("vortex.primitive")
47 }
48}
49
50impl PrimitiveArray {
51 pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
52 let buffer = buffer.into();
53 if let Some(len) = validity.maybe_len() {
54 if buffer.len() != len {
55 vortex_panic!(
56 "Buffer and validity length mismatch: buffer={}, validity={}",
57 buffer.len(),
58 len
59 );
60 }
61 }
62 Self {
63 dtype: DType::Primitive(T::PTYPE, validity.nullability()),
64 buffer: buffer.into_byte_buffer(),
65 validity,
66 stats_set: Default::default(),
67 }
68 }
69
70 pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
71 Self::new(Buffer::<T>::empty(), nullability.into())
72 }
73
74 pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
75 match_each_native_ptype!(ptype, |$T| {
76 Self::new::<$T>(Buffer::from_byte_buffer(buffer), validity)
77 })
78 }
79
80 pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
83 let iter = iter.into_iter();
84 let mut values = BufferMut::with_capacity(iter.size_hint().0);
85 let mut validity = BooleanBufferBuilder::new(values.capacity());
86
87 for i in iter {
88 match i {
89 None => {
90 validity.append(false);
91 values.push(T::default());
92 }
93 Some(e) => {
94 validity.append(true);
95 values.push(e);
96 }
97 }
98 }
99 Self::new(values.freeze(), Validity::from(validity.finish()))
100 }
101
102 pub fn validity(&self) -> &Validity {
103 &self.validity
104 }
105
106 pub fn byte_buffer(&self) -> &ByteBuffer {
107 &self.buffer
108 }
109
110 pub fn into_byte_buffer(self) -> ByteBuffer {
111 self.buffer
112 }
113
114 pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
115 if T::PTYPE != self.ptype() {
116 vortex_panic!(
117 "Attempted to get buffer of type {} from array of type {}",
118 T::PTYPE,
119 self.ptype()
120 )
121 }
122 Buffer::from_byte_buffer(self.byte_buffer().clone())
123 }
124
125 pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
126 if T::PTYPE != self.ptype() {
127 vortex_panic!(
128 "Attempted to get buffer of type {} from array of type {}",
129 T::PTYPE,
130 self.ptype()
131 )
132 }
133 Buffer::from_byte_buffer(self.buffer)
134 }
135
136 pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
139 if T::PTYPE != self.ptype() {
140 vortex_panic!(
141 "Attempted to get buffer_mut of type {} from array of type {}",
142 T::PTYPE,
143 self.ptype()
144 )
145 }
146 self.into_buffer()
147 .try_into_mut()
148 .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
149 }
150
151 #[allow(clippy::panic_in_result_fn)]
153 pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
154 if T::PTYPE != self.ptype() {
155 vortex_panic!(
156 "Attempted to get buffer_mut of type {} from array of type {}",
157 T::PTYPE,
158 self.ptype()
159 )
160 }
161 let validity = self.validity().clone();
162 Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
163 .try_into_mut()
164 .map_err(|buffer| PrimitiveArray::new(buffer, validity))
165 }
166
167 pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
174 where
175 T: NativePType,
176 R: NativePType,
177 F: FnMut(T) -> R,
178 {
179 let validity = self.validity().clone();
180 let buffer = match self.try_into_buffer_mut() {
181 Ok(buffer_mut) => buffer_mut.map_each(f),
182 Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
183 };
184 PrimitiveArray::new(buffer.freeze(), validity)
185 }
186
187 pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
192 where
193 T: NativePType,
194 R: NativePType,
195 F: FnMut((T, bool)) -> R,
196 {
197 let validity = self.validity();
198
199 let buf_iter = self.buffer::<T>().into_iter();
200
201 let buffer = match &validity {
202 Validity::NonNullable | Validity::AllValid => {
203 BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
204 }
205 Validity::AllInvalid => {
206 BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
207 }
208 Validity::Array(val) => {
209 let val = val.to_canonical()?.into_bool()?;
210 BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
211 }
212 };
213 Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
214 }
215
216 pub fn as_slice<T: NativePType>(&self) -> &[T] {
220 if T::PTYPE != self.ptype() {
221 vortex_panic!(
222 "Attempted to get slice of type {} from array of type {}",
223 T::PTYPE,
224 self.ptype()
225 )
226 }
227 let length = self.len();
228 let raw_slice = self.byte_buffer().as_slice();
229 debug_assert_eq!(raw_slice.len() / size_of::<T>(), length);
230 unsafe { std::slice::from_raw_parts(raw_slice.as_ptr().cast(), length) }
232 }
233
234 pub fn get_as_cast<T: NativePType>(&self, idx: usize) -> T {
235 match_each_native_ptype!(self.ptype(), |$P| {
236 T::from(self.as_slice::<$P>()[idx]).expect("failed to cast")
237 })
238 }
239
240 pub fn reinterpret_cast(&self, ptype: PType) -> Self {
241 if self.ptype() == ptype {
242 return self.clone();
243 }
244
245 assert_eq!(
246 self.ptype().byte_width(),
247 ptype.byte_width(),
248 "can't reinterpret cast between integers of two different widths"
249 );
250
251 PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
252 }
253}
254
255impl ArrayImpl for PrimitiveArray {
256 type Encoding = PrimitiveEncoding;
257
258 fn _len(&self) -> usize {
259 self.byte_buffer().len() / self.ptype().byte_width()
260 }
261
262 fn _dtype(&self) -> &DType {
263 &self.dtype
264 }
265 fn _vtable(&self) -> VTableRef {
266 VTableRef::new_ref(&PrimitiveEncoding)
267 }
268}
269
270impl ArrayStatisticsImpl for PrimitiveArray {
271 fn _stats_ref(&self) -> StatsSetRef<'_> {
272 self.stats_set.to_ref(self)
273 }
274}
275
276impl ArrayVariantsImpl for PrimitiveArray {
277 fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
278 Some(self)
279 }
280}
281
282impl PrimitiveArrayTrait for PrimitiveArray {}
283
284impl<T: NativePType> FromIterator<T> for PrimitiveArray {
285 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
286 let values = BufferMut::from_iter(iter);
287 PrimitiveArray::new(values.freeze(), Validity::NonNullable)
288 }
289}
290
291impl<T: NativePType> IntoArray for Buffer<T> {
292 fn into_array(self) -> ArrayRef {
293 PrimitiveArray::new(self, Validity::NonNullable).into_array()
294 }
295}
296
297impl<T: NativePType> IntoArray for BufferMut<T> {
298 fn into_array(self) -> ArrayRef {
299 self.freeze().into_array()
300 }
301}
302
303impl ArrayCanonicalImpl for PrimitiveArray {
304 fn _to_canonical(&self) -> VortexResult<Canonical> {
305 Ok(Canonical::Primitive(self.clone()))
306 }
307
308 fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
309 builder.extend_from_array(self)
310 }
311}
312
313impl ArrayValidityImpl for PrimitiveArray {
314 fn _is_valid(&self, index: usize) -> VortexResult<bool> {
315 self.validity.is_valid(index)
316 }
317
318 fn _all_valid(&self) -> VortexResult<bool> {
319 self.validity.all_valid()
320 }
321
322 fn _all_invalid(&self) -> VortexResult<bool> {
323 self.validity.all_invalid()
324 }
325
326 fn _validity_mask(&self) -> VortexResult<Mask> {
327 self.validity.to_logical(self.len())
328 }
329}
330
331#[cfg(test)]
332mod tests {
333 use vortex_buffer::buffer;
334
335 use crate::array::Array;
336 use crate::arrays::{BoolArray, PrimitiveArray};
337 use crate::compute::test_harness::test_mask;
338 use crate::validity::Validity;
339
340 #[test]
341 fn test_mask_primitive_array() {
342 test_mask(&PrimitiveArray::new(
343 buffer![0, 1, 2, 3, 4],
344 Validity::NonNullable,
345 ));
346 test_mask(&PrimitiveArray::new(
347 buffer![0, 1, 2, 3, 4],
348 Validity::AllValid,
349 ));
350 test_mask(&PrimitiveArray::new(
351 buffer![0, 1, 2, 3, 4],
352 Validity::AllInvalid,
353 ));
354 test_mask(&PrimitiveArray::new(
355 buffer![0, 1, 2, 3, 4],
356 Validity::Array(BoolArray::from_iter([true, false, true, false, true]).into_array()),
357 ));
358 }
359}