vortex_array/arrays/primitive/
mod.rs1use std::fmt::Debug;
2use std::iter;
3
4mod accessor;
5
6use arrow_buffer::BooleanBufferBuilder;
7use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
8use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11
12use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
13use crate::builders::ArrayBuilder;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::validity::Validity;
16use crate::variants::PrimitiveArrayTrait;
17use crate::vtable::VTableRef;
18use crate::{
19 Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
20 Encoding, IntoArray, try_from_array_ref,
21};
22
23mod compute;
24mod native_value;
25mod patch;
26mod serde;
27mod top_value;
28
29pub use native_value::NativeValue;
30
31#[derive(Clone, Debug)]
32pub struct PrimitiveArray {
33 dtype: DType,
34 buffer: ByteBuffer,
35 validity: Validity,
36 stats_set: ArrayStats,
37}
38
39try_from_array_ref!(PrimitiveArray);
40
41pub struct PrimitiveEncoding;
42impl Encoding for PrimitiveEncoding {
43 type Array = PrimitiveArray;
44 type Metadata = EmptyMetadata;
45}
46
47impl PrimitiveArray {
48 pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
49 let buffer = buffer.into();
50 if let Some(len) = validity.maybe_len() {
51 if buffer.len() != len {
52 vortex_panic!(
53 "Buffer and validity length mismatch: buffer={}, validity={}",
54 buffer.len(),
55 len
56 );
57 }
58 }
59 Self {
60 dtype: DType::Primitive(T::PTYPE, validity.nullability()),
61 buffer: buffer.into_byte_buffer(),
62 validity,
63 stats_set: Default::default(),
64 }
65 }
66
67 pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
68 Self::new(Buffer::<T>::empty(), nullability.into())
69 }
70
71 pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
72 match_each_native_ptype!(ptype, |$T| {
73 Self::new::<$T>(Buffer::from_byte_buffer(buffer), validity)
74 })
75 }
76
77 pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
80 let iter = iter.into_iter();
81 let mut values = BufferMut::with_capacity(iter.size_hint().0);
82 let mut validity = BooleanBufferBuilder::new(values.capacity());
83
84 for i in iter {
85 match i {
86 None => {
87 validity.append(false);
88 values.push(T::default());
89 }
90 Some(e) => {
91 validity.append(true);
92 values.push(e);
93 }
94 }
95 }
96 Self::new(values.freeze(), Validity::from(validity.finish()))
97 }
98
99 pub fn validity(&self) -> &Validity {
100 &self.validity
101 }
102
103 pub fn byte_buffer(&self) -> &ByteBuffer {
104 &self.buffer
105 }
106
107 pub fn into_byte_buffer(self) -> ByteBuffer {
108 self.buffer
109 }
110
111 pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
112 if T::PTYPE != self.ptype() {
113 vortex_panic!(
114 "Attempted to get buffer of type {} from array of type {}",
115 T::PTYPE,
116 self.ptype()
117 )
118 }
119 Buffer::from_byte_buffer(self.byte_buffer().clone())
120 }
121
122 pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
123 if T::PTYPE != self.ptype() {
124 vortex_panic!(
125 "Attempted to get buffer of type {} from array of type {}",
126 T::PTYPE,
127 self.ptype()
128 )
129 }
130 Buffer::from_byte_buffer(self.buffer)
131 }
132
133 pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
136 if T::PTYPE != self.ptype() {
137 vortex_panic!(
138 "Attempted to get buffer_mut of type {} from array of type {}",
139 T::PTYPE,
140 self.ptype()
141 )
142 }
143 self.into_buffer()
144 .try_into_mut()
145 .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
146 }
147
148 #[allow(clippy::panic_in_result_fn)]
150 pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
151 if T::PTYPE != self.ptype() {
152 vortex_panic!(
153 "Attempted to get buffer_mut of type {} from array of type {}",
154 T::PTYPE,
155 self.ptype()
156 )
157 }
158 let validity = self.validity().clone();
159 Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
160 .try_into_mut()
161 .map_err(|buffer| PrimitiveArray::new(buffer, validity))
162 }
163
164 pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
171 where
172 T: NativePType,
173 R: NativePType,
174 F: FnMut(T) -> R,
175 {
176 let validity = self.validity().clone();
177 let buffer = match self.try_into_buffer_mut() {
178 Ok(buffer_mut) => buffer_mut.map_each(f),
179 Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
180 };
181 PrimitiveArray::new(buffer.freeze(), validity)
182 }
183
184 pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
189 where
190 T: NativePType,
191 R: NativePType,
192 F: FnMut((T, bool)) -> R,
193 {
194 let validity = self.validity();
195
196 let buf_iter = self.buffer::<T>().into_iter();
197
198 let buffer = match &validity {
199 Validity::NonNullable | Validity::AllValid => {
200 BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
201 }
202 Validity::AllInvalid => {
203 BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
204 }
205 Validity::Array(val) => {
206 let val = val.to_canonical()?.into_bool()?;
207 BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
208 }
209 };
210 Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
211 }
212
213 pub fn as_slice<T: NativePType>(&self) -> &[T] {
217 if T::PTYPE != self.ptype() {
218 vortex_panic!(
219 "Attempted to get slice of type {} from array of type {}",
220 T::PTYPE,
221 self.ptype()
222 )
223 }
224 let length = self.len();
225 let raw_slice = self.byte_buffer().as_slice();
226 debug_assert_eq!(raw_slice.len() / size_of::<T>(), length);
227 unsafe { std::slice::from_raw_parts(raw_slice.as_ptr().cast(), length) }
229 }
230
231 pub fn reinterpret_cast(&self, ptype: PType) -> Self {
232 if self.ptype() == ptype {
233 return self.clone();
234 }
235
236 assert_eq!(
237 self.ptype().byte_width(),
238 ptype.byte_width(),
239 "can't reinterpret cast between integers of two different widths"
240 );
241
242 PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
243 }
244}
245
246impl ArrayImpl for PrimitiveArray {
247 type Encoding = PrimitiveEncoding;
248
249 fn _len(&self) -> usize {
250 self.byte_buffer().len() / self.ptype().byte_width()
251 }
252
253 fn _dtype(&self) -> &DType {
254 &self.dtype
255 }
256 fn _vtable(&self) -> VTableRef {
257 VTableRef::new_ref(&PrimitiveEncoding)
258 }
259
260 fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
261 let validity = if self.validity().is_array() {
262 Validity::Array(children[0].clone())
263 } else {
264 self.validity().clone()
265 };
266
267 Ok(Self::from_byte_buffer(
268 self.byte_buffer().clone(),
269 self.ptype(),
270 validity,
271 ))
272 }
273}
274
275impl ArrayStatisticsImpl for PrimitiveArray {
276 fn _stats_ref(&self) -> StatsSetRef<'_> {
277 self.stats_set.to_ref(self)
278 }
279}
280
281impl ArrayVariantsImpl for PrimitiveArray {
282 fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
283 Some(self)
284 }
285}
286
287impl PrimitiveArrayTrait for PrimitiveArray {}
288
289impl<T: NativePType> FromIterator<T> for PrimitiveArray {
290 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
291 let values = BufferMut::from_iter(iter);
292 PrimitiveArray::new(values.freeze(), Validity::NonNullable)
293 }
294}
295
296impl<T: NativePType> IntoArray for Buffer<T> {
297 fn into_array(self) -> ArrayRef {
298 PrimitiveArray::new(self, Validity::NonNullable).into_array()
299 }
300}
301
302impl<T: NativePType> IntoArray for BufferMut<T> {
303 fn into_array(self) -> ArrayRef {
304 self.freeze().into_array()
305 }
306}
307
308impl ArrayCanonicalImpl for PrimitiveArray {
309 fn _to_canonical(&self) -> VortexResult<Canonical> {
310 Ok(Canonical::Primitive(self.clone()))
311 }
312
313 fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
314 builder.extend_from_array(self)
315 }
316}
317
318impl ArrayValidityImpl for PrimitiveArray {
319 fn _is_valid(&self, index: usize) -> VortexResult<bool> {
320 self.validity.is_valid(index)
321 }
322
323 fn _all_valid(&self) -> VortexResult<bool> {
324 self.validity.all_valid()
325 }
326
327 fn _all_invalid(&self) -> VortexResult<bool> {
328 self.validity.all_invalid()
329 }
330
331 fn _validity_mask(&self) -> VortexResult<Mask> {
332 self.validity.to_mask(self.len())
333 }
334}
335
336#[cfg(test)]
337mod tests {
338 use vortex_buffer::buffer;
339
340 use crate::array::Array;
341 use crate::arrays::{BoolArray, PrimitiveArray};
342 use crate::compute::conformance::mask::test_mask;
343 use crate::validity::Validity;
344
345 #[test]
346 fn test_mask_primitive_array() {
347 test_mask(&PrimitiveArray::new(
348 buffer![0, 1, 2, 3, 4],
349 Validity::NonNullable,
350 ));
351 test_mask(&PrimitiveArray::new(
352 buffer![0, 1, 2, 3, 4],
353 Validity::AllValid,
354 ));
355 test_mask(&PrimitiveArray::new(
356 buffer![0, 1, 2, 3, 4],
357 Validity::AllInvalid,
358 ));
359 test_mask(&PrimitiveArray::new(
360 buffer![0, 1, 2, 3, 4],
361 Validity::Array(BoolArray::from_iter([true, false, true, false, true]).into_array()),
362 ));
363 }
364}