vortex_array/arrays/primitive/
mod.rs1use std::fmt::Debug;
2use std::iter;
3
4mod accessor;
5
6use arrow_buffer::BooleanBufferBuilder;
7use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
8use vortex_dtype::{DType, NativePType, Nullability, PType, match_each_native_ptype};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11
12use crate::array::{ArrayCanonicalImpl, ArrayValidityImpl};
13use crate::builders::ArrayBuilder;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::validity::Validity;
16use crate::variants::PrimitiveArrayTrait;
17use crate::vtable::VTableRef;
18use crate::{
19 Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayVariantsImpl, Canonical, EmptyMetadata,
20 Encoding, IntoArray, try_from_array_ref,
21};
22
23mod compute;
24mod native_value;
25mod patch;
26mod serde;
27mod top_value;
28
29pub use compute::{IS_CONST_LANE_WIDTH, compute_is_constant};
30pub use native_value::NativeValue;
31
32#[derive(Clone, Debug)]
33pub struct PrimitiveArray {
34 dtype: DType,
35 buffer: ByteBuffer,
36 validity: Validity,
37 stats_set: ArrayStats,
38}
39
40try_from_array_ref!(PrimitiveArray);
41
42pub struct PrimitiveEncoding;
43impl Encoding for PrimitiveEncoding {
44 type Array = PrimitiveArray;
45 type Metadata = EmptyMetadata;
46}
47
48impl PrimitiveArray {
49 pub fn new<T: NativePType>(buffer: impl Into<Buffer<T>>, validity: Validity) -> Self {
50 let buffer = buffer.into();
51 if let Some(len) = validity.maybe_len() {
52 if buffer.len() != len {
53 vortex_panic!(
54 "Buffer and validity length mismatch: buffer={}, validity={}",
55 buffer.len(),
56 len
57 );
58 }
59 }
60 Self {
61 dtype: DType::Primitive(T::PTYPE, validity.nullability()),
62 buffer: buffer.into_byte_buffer(),
63 validity,
64 stats_set: Default::default(),
65 }
66 }
67
68 pub fn empty<T: NativePType>(nullability: Nullability) -> Self {
69 Self::new(Buffer::<T>::empty(), nullability.into())
70 }
71
72 pub fn from_byte_buffer(buffer: ByteBuffer, ptype: PType, validity: Validity) -> Self {
73 match_each_native_ptype!(ptype, |$T| {
74 Self::new::<$T>(Buffer::from_byte_buffer(buffer), validity)
75 })
76 }
77
78 pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
81 let iter = iter.into_iter();
82 let mut values = BufferMut::with_capacity(iter.size_hint().0);
83 let mut validity = BooleanBufferBuilder::new(values.capacity());
84
85 for i in iter {
86 match i {
87 None => {
88 validity.append(false);
89 values.push(T::default());
90 }
91 Some(e) => {
92 validity.append(true);
93 values.push(e);
94 }
95 }
96 }
97 Self::new(values.freeze(), Validity::from(validity.finish()))
98 }
99
100 pub fn validity(&self) -> &Validity {
101 &self.validity
102 }
103
104 pub fn byte_buffer(&self) -> &ByteBuffer {
105 &self.buffer
106 }
107
108 pub fn into_byte_buffer(self) -> ByteBuffer {
109 self.buffer
110 }
111
112 pub fn buffer<T: NativePType>(&self) -> Buffer<T> {
113 if T::PTYPE != self.ptype() {
114 vortex_panic!(
115 "Attempted to get buffer of type {} from array of type {}",
116 T::PTYPE,
117 self.ptype()
118 )
119 }
120 Buffer::from_byte_buffer(self.byte_buffer().clone())
121 }
122
123 pub fn into_buffer<T: NativePType>(self) -> Buffer<T> {
124 if T::PTYPE != self.ptype() {
125 vortex_panic!(
126 "Attempted to get buffer of type {} from array of type {}",
127 T::PTYPE,
128 self.ptype()
129 )
130 }
131 Buffer::from_byte_buffer(self.buffer)
132 }
133
134 pub fn into_buffer_mut<T: NativePType>(self) -> BufferMut<T> {
137 if T::PTYPE != self.ptype() {
138 vortex_panic!(
139 "Attempted to get buffer_mut of type {} from array of type {}",
140 T::PTYPE,
141 self.ptype()
142 )
143 }
144 self.into_buffer()
145 .try_into_mut()
146 .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
147 }
148
149 #[allow(clippy::panic_in_result_fn)]
151 pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, PrimitiveArray> {
152 if T::PTYPE != self.ptype() {
153 vortex_panic!(
154 "Attempted to get buffer_mut of type {} from array of type {}",
155 T::PTYPE,
156 self.ptype()
157 )
158 }
159 let validity = self.validity().clone();
160 Buffer::<T>::from_byte_buffer(self.into_byte_buffer())
161 .try_into_mut()
162 .map_err(|buffer| PrimitiveArray::new(buffer, validity))
163 }
164
165 pub fn map_each<T, R, F>(self, f: F) -> PrimitiveArray
172 where
173 T: NativePType,
174 R: NativePType,
175 F: FnMut(T) -> R,
176 {
177 let validity = self.validity().clone();
178 let buffer = match self.try_into_buffer_mut() {
179 Ok(buffer_mut) => buffer_mut.map_each(f),
180 Err(parray) => BufferMut::<R>::from_iter(parray.buffer::<T>().iter().copied().map(f)),
181 };
182 PrimitiveArray::new(buffer.freeze(), validity)
183 }
184
185 pub fn map_each_with_validity<T, R, F>(self, f: F) -> VortexResult<PrimitiveArray>
190 where
191 T: NativePType,
192 R: NativePType,
193 F: FnMut((T, bool)) -> R,
194 {
195 let validity = self.validity();
196
197 let buf_iter = self.buffer::<T>().into_iter();
198
199 let buffer = match &validity {
200 Validity::NonNullable | Validity::AllValid => {
201 BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(true)).map(f))
202 }
203 Validity::AllInvalid => {
204 BufferMut::<R>::from_iter(buf_iter.zip(iter::repeat(false)).map(f))
205 }
206 Validity::Array(val) => {
207 let val = val.to_canonical()?.into_bool()?;
208 BufferMut::<R>::from_iter(buf_iter.zip(val.boolean_buffer()).map(f))
209 }
210 };
211 Ok(PrimitiveArray::new(buffer.freeze(), validity.clone()))
212 }
213
214 pub fn as_slice<T: NativePType>(&self) -> &[T] {
218 if T::PTYPE != self.ptype() {
219 vortex_panic!(
220 "Attempted to get slice of type {} from array of type {}",
221 T::PTYPE,
222 self.ptype()
223 )
224 }
225 let length = self.len();
226 let raw_slice = self.byte_buffer().as_slice();
227 debug_assert_eq!(raw_slice.len() / size_of::<T>(), length);
228 unsafe { std::slice::from_raw_parts(raw_slice.as_ptr().cast(), length) }
230 }
231
232 pub fn reinterpret_cast(&self, ptype: PType) -> Self {
233 if self.ptype() == ptype {
234 return self.clone();
235 }
236
237 assert_eq!(
238 self.ptype().byte_width(),
239 ptype.byte_width(),
240 "can't reinterpret cast between integers of two different widths"
241 );
242
243 PrimitiveArray::from_byte_buffer(self.byte_buffer().clone(), ptype, self.validity().clone())
244 }
245}
246
247impl ArrayImpl for PrimitiveArray {
248 type Encoding = PrimitiveEncoding;
249
250 fn _len(&self) -> usize {
251 self.byte_buffer().len() / self.ptype().byte_width()
252 }
253
254 fn _dtype(&self) -> &DType {
255 &self.dtype
256 }
257 fn _vtable(&self) -> VTableRef {
258 VTableRef::new_ref(&PrimitiveEncoding)
259 }
260
261 fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
262 let validity = if self.validity().is_array() {
263 Validity::Array(children[0].clone())
264 } else {
265 self.validity().clone()
266 };
267
268 Ok(Self::from_byte_buffer(
269 self.byte_buffer().clone(),
270 self.ptype(),
271 validity,
272 ))
273 }
274}
275
276impl ArrayStatisticsImpl for PrimitiveArray {
277 fn _stats_ref(&self) -> StatsSetRef<'_> {
278 self.stats_set.to_ref(self)
279 }
280}
281
282impl ArrayVariantsImpl for PrimitiveArray {
283 fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
284 Some(self)
285 }
286}
287
288impl PrimitiveArrayTrait for PrimitiveArray {}
289
290impl<T: NativePType> FromIterator<T> for PrimitiveArray {
291 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
292 let values = BufferMut::from_iter(iter);
293 PrimitiveArray::new(values.freeze(), Validity::NonNullable)
294 }
295}
296
297impl<T: NativePType> IntoArray for Buffer<T> {
298 fn into_array(self) -> ArrayRef {
299 PrimitiveArray::new(self, Validity::NonNullable).into_array()
300 }
301}
302
303impl<T: NativePType> IntoArray for BufferMut<T> {
304 fn into_array(self) -> ArrayRef {
305 self.freeze().into_array()
306 }
307}
308
309impl ArrayCanonicalImpl for PrimitiveArray {
310 fn _to_canonical(&self) -> VortexResult<Canonical> {
311 Ok(Canonical::Primitive(self.clone()))
312 }
313
314 fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
315 builder.extend_from_array(self)
316 }
317}
318
319impl ArrayValidityImpl for PrimitiveArray {
320 fn _is_valid(&self, index: usize) -> VortexResult<bool> {
321 self.validity.is_valid(index)
322 }
323
324 fn _all_valid(&self) -> VortexResult<bool> {
325 self.validity.all_valid()
326 }
327
328 fn _all_invalid(&self) -> VortexResult<bool> {
329 self.validity.all_invalid()
330 }
331
332 fn _validity_mask(&self) -> VortexResult<Mask> {
333 self.validity.to_mask(self.len())
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use vortex_buffer::buffer;
340
341 use crate::array::Array;
342 use crate::arrays::{BoolArray, PrimitiveArray};
343 use crate::compute::conformance::mask::test_mask;
344 use crate::validity::Validity;
345
346 #[test]
347 fn test_mask_primitive_array() {
348 test_mask(&PrimitiveArray::new(
349 buffer![0, 1, 2, 3, 4],
350 Validity::NonNullable,
351 ));
352 test_mask(&PrimitiveArray::new(
353 buffer![0, 1, 2, 3, 4],
354 Validity::AllValid,
355 ));
356 test_mask(&PrimitiveArray::new(
357 buffer![0, 1, 2, 3, 4],
358 Validity::AllInvalid,
359 ));
360 test_mask(&PrimitiveArray::new(
361 buffer![0, 1, 2, 3, 4],
362 Validity::Array(BoolArray::from_iter([true, false, true, false, true]).into_array()),
363 ));
364 }
365}