arrow2/array/primitive/mod.rs
1use crate::{
2 bitmap::{
3 utils::{BitmapIter, ZipValidity},
4 Bitmap,
5 },
6 buffer::Buffer,
7 datatypes::*,
8 error::Error,
9 trusted_len::TrustedLen,
10 types::{days_ms, f16, i256, months_days_ns, NativeType},
11};
12
13use super::Array;
14use either::Either;
15
16#[cfg(feature = "arrow")]
17mod data;
18mod ffi;
19pub(super) mod fmt;
20mod from_natural;
21mod iterator;
22pub use iterator::*;
23mod mutable;
24pub use mutable::*;
25
26/// A [`PrimitiveArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<T>>` where
27/// T is [`NativeType`] (e.g. [`i32`]). It implements [`Array`].
28///
29/// One way to think about a [`PrimitiveArray`] is `(DataType, Arc<Vec<T>>, Option<Arc<Vec<u8>>>)`
30/// where:
31/// * the first item is the array's logical type
32/// * the second is the immutable values
33/// * the third is the immutable validity (whether a value is null or not as a bitmap).
34///
35/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].
36/// # Example
37/// ```
38/// use arrow2::array::PrimitiveArray;
39/// use arrow2::bitmap::Bitmap;
40/// use arrow2::buffer::Buffer;
41///
42/// let array = PrimitiveArray::from([Some(1i32), None, Some(10)]);
43/// assert_eq!(array.value(0), 1);
44/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(&1i32), None, Some(&10)]);
45/// assert_eq!(array.values_iter().copied().collect::<Vec<_>>(), vec![1, 0, 10]);
46/// // the underlying representation
47/// assert_eq!(array.values(), &Buffer::from(vec![1i32, 0, 10]));
48/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
49///
50/// ```
51#[derive(Clone)]
52pub struct PrimitiveArray<T: NativeType> {
53 data_type: DataType,
54 values: Buffer<T>,
55 validity: Option<Bitmap>,
56}
57
58pub(super) fn check<T: NativeType>(
59 data_type: &DataType,
60 values: &[T],
61 validity_len: Option<usize>,
62) -> Result<(), Error> {
63 if validity_len.map_or(false, |len| len != values.len()) {
64 return Err(Error::oos(
65 "validity mask length must match the number of values",
66 ));
67 }
68
69 if data_type.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) {
70 return Err(Error::oos(
71 "PrimitiveArray can only be initialized with a DataType whose physical type is Primitive",
72 ));
73 }
74 Ok(())
75}
76
77impl<T: NativeType> PrimitiveArray<T> {
78 /// The canonical method to create a [`PrimitiveArray`] out of its internal components.
79 /// # Implementation
80 /// This function is `O(1)`.
81 ///
82 /// # Errors
83 /// This function errors iff:
84 /// * The validity is not `None` and its length is different from `values`'s length
85 /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`]
86 pub fn try_new(
87 data_type: DataType,
88 values: Buffer<T>,
89 validity: Option<Bitmap>,
90 ) -> Result<Self, Error> {
91 check(&data_type, &values, validity.as_ref().map(|v| v.len()))?;
92 Ok(Self {
93 data_type,
94 values,
95 validity,
96 })
97 }
98
99 /// Returns a new [`PrimitiveArray`] with a different logical type.
100 ///
101 /// This function is useful to assign a different [`DataType`] to the array.
102 /// Used to change the arrays' logical type (see example).
103 /// # Example
104 /// ```
105 /// use arrow2::array::Int32Array;
106 /// use arrow2::datatypes::DataType;
107 ///
108 /// let array = Int32Array::from(&[Some(1), None, Some(2)]).to(DataType::Date32);
109 /// assert_eq!(
110 /// format!("{:?}", array),
111 /// "Date32[1970-01-02, None, 1970-01-03]"
112 /// );
113 /// ```
114 /// # Panics
115 /// Panics iff the `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`]
116 #[inline]
117 #[must_use]
118 pub fn to(self, data_type: DataType) -> Self {
119 check(
120 &data_type,
121 &self.values,
122 self.validity.as_ref().map(|v| v.len()),
123 )
124 .unwrap();
125 Self {
126 data_type,
127 values: self.values,
128 validity: self.validity,
129 }
130 }
131
132 /// Creates a (non-null) [`PrimitiveArray`] from a vector of values.
133 /// This function is `O(1)`.
134 /// # Examples
135 /// ```
136 /// use arrow2::array::PrimitiveArray;
137 ///
138 /// let array = PrimitiveArray::from_vec(vec![1, 2, 3]);
139 /// assert_eq!(format!("{:?}", array), "Int32[1, 2, 3]");
140 /// ```
141 pub fn from_vec(values: Vec<T>) -> Self {
142 Self::new(T::PRIMITIVE.into(), values.into(), None)
143 }
144
145 /// Returns an iterator over the values and validity, `Option<&T>`.
146 #[inline]
147 pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter<T>, BitmapIter> {
148 ZipValidity::new_with_validity(self.values().iter(), self.validity())
149 }
150
151 /// Returns an iterator of the values, `&T`, ignoring the arrays' validity.
152 #[inline]
153 pub fn values_iter(&self) -> std::slice::Iter<T> {
154 self.values().iter()
155 }
156
157 /// Returns the length of this array
158 #[inline]
159 pub fn len(&self) -> usize {
160 self.values.len()
161 }
162
163 /// The values [`Buffer`].
164 /// Values on null slots are undetermined (they can be anything).
165 #[inline]
166 pub fn values(&self) -> &Buffer<T> {
167 &self.values
168 }
169
170 /// Returns the optional validity.
171 #[inline]
172 pub fn validity(&self) -> Option<&Bitmap> {
173 self.validity.as_ref()
174 }
175
176 /// Returns the arrays' [`DataType`].
177 #[inline]
178 pub fn data_type(&self) -> &DataType {
179 &self.data_type
180 }
181
182 /// Returns the value at slot `i`.
183 ///
184 /// Equivalent to `self.values()[i]`. The value of a null slot is undetermined (it can be anything).
185 /// # Panic
186 /// This function panics iff `i >= self.len`.
187 #[inline]
188 pub fn value(&self, i: usize) -> T {
189 self.values[i]
190 }
191
192 /// Returns the value at index `i`.
193 /// The value on null slots is undetermined (it can be anything).
194 /// # Safety
195 /// Caller must be sure that `i < self.len()`
196 #[inline]
197 pub unsafe fn value_unchecked(&self, i: usize) -> T {
198 *self.values.get_unchecked(i)
199 }
200
201 /// Returns the element at index `i` or `None` if it is null
202 /// # Panics
203 /// iff `i >= self.len()`
204 #[inline]
205 pub fn get(&self, i: usize) -> Option<T> {
206 if !self.is_null(i) {
207 // soundness: Array::is_null panics if i >= self.len
208 unsafe { Some(self.value_unchecked(i)) }
209 } else {
210 None
211 }
212 }
213
214 /// Slices this [`PrimitiveArray`] by an offset and length.
215 /// # Implementation
216 /// This operation is `O(1)`.
217 #[inline]
218 pub fn slice(&mut self, offset: usize, length: usize) {
219 assert!(
220 offset + length <= self.len(),
221 "offset + length may not exceed length of array"
222 );
223 unsafe { self.slice_unchecked(offset, length) }
224 }
225
226 /// Slices this [`PrimitiveArray`] by an offset and length.
227 /// # Implementation
228 /// This operation is `O(1)`.
229 /// # Safety
230 /// The caller must ensure that `offset + length <= self.len()`.
231 #[inline]
232 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
233 self.validity.as_mut().and_then(|bitmap| {
234 bitmap.slice_unchecked(offset, length);
235 (bitmap.unset_bits() > 0).then(|| bitmap)
236 });
237 self.values.slice_unchecked(offset, length);
238 }
239
240 impl_sliced!();
241 impl_mut_validity!();
242 impl_into_array!();
243
244 /// Returns this [`PrimitiveArray`] with new values.
245 /// # Panics
246 /// This function panics iff `values.len() != self.len()`.
247 #[must_use]
248 pub fn with_values(mut self, values: Buffer<T>) -> Self {
249 self.set_values(values);
250 self
251 }
252
253 /// Update the values of this [`PrimitiveArray`].
254 /// # Panics
255 /// This function panics iff `values.len() != self.len()`.
256 pub fn set_values(&mut self, values: Buffer<T>) {
257 assert_eq!(
258 values.len(),
259 self.len(),
260 "values' length must be equal to this arrays' length"
261 );
262 self.values = values;
263 }
264
265 /// Applies a function `f` to the validity of this array.
266 ///
267 /// This is an API to leverage clone-on-write
268 /// # Panics
269 /// This function panics if the function `f` modifies the length of the [`Bitmap`].
270 pub fn apply_validity<F: FnOnce(Bitmap) -> Bitmap>(&mut self, f: F) {
271 if let Some(validity) = std::mem::take(&mut self.validity) {
272 self.set_validity(Some(f(validity)))
273 }
274 }
275
276 /// Returns an option of a mutable reference to the values of this [`PrimitiveArray`].
277 pub fn get_mut_values(&mut self) -> Option<&mut [T]> {
278 self.values.get_mut_slice()
279 }
280
281 /// Returns its internal representation
282 #[must_use]
283 pub fn into_inner(self) -> (DataType, Buffer<T>, Option<Bitmap>) {
284 let Self {
285 data_type,
286 values,
287 validity,
288 } = self;
289 (data_type, values, validity)
290 }
291
292 /// Creates a `[PrimitiveArray]` from its internal representation.
293 /// This is the inverted from `[PrimitiveArray::into_inner]`
294 pub fn from_inner(
295 data_type: DataType,
296 values: Buffer<T>,
297 validity: Option<Bitmap>,
298 ) -> Result<Self, Error> {
299 check(&data_type, &values, validity.as_ref().map(|v| v.len()))?;
300 Ok(unsafe { Self::from_inner_unchecked(data_type, values, validity) })
301 }
302
303 /// Creates a `[PrimitiveArray]` from its internal representation.
304 /// This is the inverted from `[PrimitiveArray::into_inner]`
305 ///
306 /// # Safety
307 /// Callers must ensure all invariants of this struct are upheld.
308 pub unsafe fn from_inner_unchecked(
309 data_type: DataType,
310 values: Buffer<T>,
311 validity: Option<Bitmap>,
312 ) -> Self {
313 Self {
314 data_type,
315 values,
316 validity,
317 }
318 }
319
320 /// Try to convert this [`PrimitiveArray`] to a [`MutablePrimitiveArray`] via copy-on-write semantics.
321 ///
322 /// A [`PrimitiveArray`] is backed by a [`Buffer`] and [`Bitmap`] which are essentially `Arc<Vec<_>>`.
323 /// This function returns a [`MutablePrimitiveArray`] (via [`std::sync::Arc::get_mut`]) iff both values
324 /// and validity have not been cloned / are unique references to their underlying vectors.
325 ///
326 /// This function is primarily used to re-use memory regions.
327 #[must_use]
328 pub fn into_mut(self) -> Either<Self, MutablePrimitiveArray<T>> {
329 use Either::*;
330
331 if let Some(bitmap) = self.validity {
332 match bitmap.into_mut() {
333 Left(bitmap) => Left(PrimitiveArray::new(
334 self.data_type,
335 self.values,
336 Some(bitmap),
337 )),
338 Right(mutable_bitmap) => match self.values.into_mut() {
339 Right(values) => Right(
340 MutablePrimitiveArray::try_new(
341 self.data_type,
342 values,
343 Some(mutable_bitmap),
344 )
345 .unwrap(),
346 ),
347 Left(values) => Left(PrimitiveArray::new(
348 self.data_type,
349 values,
350 Some(mutable_bitmap.into()),
351 )),
352 },
353 }
354 } else {
355 match self.values.into_mut() {
356 Right(values) => {
357 Right(MutablePrimitiveArray::try_new(self.data_type, values, None).unwrap())
358 }
359 Left(values) => Left(PrimitiveArray::new(self.data_type, values, None)),
360 }
361 }
362 }
363
364 /// Returns a new empty (zero-length) [`PrimitiveArray`].
365 pub fn new_empty(data_type: DataType) -> Self {
366 Self::new(data_type, Buffer::new(), None)
367 }
368
369 /// Returns a new [`PrimitiveArray`] where all slots are null / `None`.
370 #[inline]
371 pub fn new_null(data_type: DataType, length: usize) -> Self {
372 Self::new(
373 data_type,
374 vec![T::default(); length].into(),
375 Some(Bitmap::new_zeroed(length)),
376 )
377 }
378
379 /// Creates a (non-null) [`PrimitiveArray`] from an iterator of values.
380 /// # Implementation
381 /// This does not assume that the iterator has a known length.
382 pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {
383 Self::new(T::PRIMITIVE.into(), Vec::<T>::from_iter(iter).into(), None)
384 }
385
386 /// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
387 /// # Implementation
388 /// This is essentially a memcopy and is thus `O(N)`
389 pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
390 Self::new(
391 T::PRIMITIVE.into(),
392 Vec::<T>::from(slice.as_ref()).into(),
393 None,
394 )
395 }
396
397 /// Creates a (non-null) [`PrimitiveArray`] from a [`TrustedLen`] of values.
398 /// # Implementation
399 /// This does not assume that the iterator has a known length.
400 pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
401 MutablePrimitiveArray::<T>::from_trusted_len_values_iter(iter).into()
402 }
403
404 /// Creates a new [`PrimitiveArray`] from an iterator over values
405 /// # Safety
406 /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
407 /// I.e. that `size_hint().1` correctly reports its length.
408 pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {
409 MutablePrimitiveArray::<T>::from_trusted_len_values_iter_unchecked(iter).into()
410 }
411
412 /// Creates a [`PrimitiveArray`] from a [`TrustedLen`] of optional values.
413 pub fn from_trusted_len_iter<I: TrustedLen<Item = Option<T>>>(iter: I) -> Self {
414 MutablePrimitiveArray::<T>::from_trusted_len_iter(iter).into()
415 }
416
417 /// Creates a [`PrimitiveArray`] from an iterator of optional values.
418 /// # Safety
419 /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
420 /// I.e. that `size_hint().1` correctly reports its length.
421 pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = Option<T>>>(iter: I) -> Self {
422 MutablePrimitiveArray::<T>::from_trusted_len_iter_unchecked(iter).into()
423 }
424
425 /// Alias for `Self::try_new(..).unwrap()`.
426 /// # Panics
427 /// This function errors iff:
428 /// * The validity is not `None` and its length is different from `values`'s length
429 /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive`].
430 pub fn new(data_type: DataType, values: Buffer<T>, validity: Option<Bitmap>) -> Self {
431 Self::try_new(data_type, values, validity).unwrap()
432 }
433}
434
435impl<T: NativeType> Array for PrimitiveArray<T> {
436 impl_common_array!();
437
438 fn validity(&self) -> Option<&Bitmap> {
439 self.validity.as_ref()
440 }
441
442 #[inline]
443 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
444 Box::new(self.clone().with_validity(validity))
445 }
446}
447
448/// A type definition [`PrimitiveArray`] for `i8`
449pub type Int8Array = PrimitiveArray<i8>;
450/// A type definition [`PrimitiveArray`] for `i16`
451pub type Int16Array = PrimitiveArray<i16>;
452/// A type definition [`PrimitiveArray`] for `i32`
453pub type Int32Array = PrimitiveArray<i32>;
454/// A type definition [`PrimitiveArray`] for `i64`
455pub type Int64Array = PrimitiveArray<i64>;
456/// A type definition [`PrimitiveArray`] for `i128`
457pub type Int128Array = PrimitiveArray<i128>;
458/// A type definition [`PrimitiveArray`] for `i256`
459pub type Int256Array = PrimitiveArray<i256>;
460/// A type definition [`PrimitiveArray`] for [`days_ms`]
461pub type DaysMsArray = PrimitiveArray<days_ms>;
462/// A type definition [`PrimitiveArray`] for [`months_days_ns`]
463pub type MonthsDaysNsArray = PrimitiveArray<months_days_ns>;
464/// A type definition [`PrimitiveArray`] for `f16`
465pub type Float16Array = PrimitiveArray<f16>;
466/// A type definition [`PrimitiveArray`] for `f32`
467pub type Float32Array = PrimitiveArray<f32>;
468/// A type definition [`PrimitiveArray`] for `f64`
469pub type Float64Array = PrimitiveArray<f64>;
470/// A type definition [`PrimitiveArray`] for `u8`
471pub type UInt8Array = PrimitiveArray<u8>;
472/// A type definition [`PrimitiveArray`] for `u16`
473pub type UInt16Array = PrimitiveArray<u16>;
474/// A type definition [`PrimitiveArray`] for `u32`
475pub type UInt32Array = PrimitiveArray<u32>;
476/// A type definition [`PrimitiveArray`] for `u64`
477pub type UInt64Array = PrimitiveArray<u64>;
478
479/// A type definition [`MutablePrimitiveArray`] for `i8`
480pub type Int8Vec = MutablePrimitiveArray<i8>;
481/// A type definition [`MutablePrimitiveArray`] for `i16`
482pub type Int16Vec = MutablePrimitiveArray<i16>;
483/// A type definition [`MutablePrimitiveArray`] for `i32`
484pub type Int32Vec = MutablePrimitiveArray<i32>;
485/// A type definition [`MutablePrimitiveArray`] for `i64`
486pub type Int64Vec = MutablePrimitiveArray<i64>;
487/// A type definition [`MutablePrimitiveArray`] for `i128`
488pub type Int128Vec = MutablePrimitiveArray<i128>;
489/// A type definition [`MutablePrimitiveArray`] for `i256`
490pub type Int256Vec = MutablePrimitiveArray<i256>;
491/// A type definition [`MutablePrimitiveArray`] for [`days_ms`]
492pub type DaysMsVec = MutablePrimitiveArray<days_ms>;
493/// A type definition [`MutablePrimitiveArray`] for [`months_days_ns`]
494pub type MonthsDaysNsVec = MutablePrimitiveArray<months_days_ns>;
495/// A type definition [`MutablePrimitiveArray`] for `f16`
496pub type Float16Vec = MutablePrimitiveArray<f16>;
497/// A type definition [`MutablePrimitiveArray`] for `f32`
498pub type Float32Vec = MutablePrimitiveArray<f32>;
499/// A type definition [`MutablePrimitiveArray`] for `f64`
500pub type Float64Vec = MutablePrimitiveArray<f64>;
501/// A type definition [`MutablePrimitiveArray`] for `u8`
502pub type UInt8Vec = MutablePrimitiveArray<u8>;
503/// A type definition [`MutablePrimitiveArray`] for `u16`
504pub type UInt16Vec = MutablePrimitiveArray<u16>;
505/// A type definition [`MutablePrimitiveArray`] for `u32`
506pub type UInt32Vec = MutablePrimitiveArray<u32>;
507/// A type definition [`MutablePrimitiveArray`] for `u64`
508pub type UInt64Vec = MutablePrimitiveArray<u64>;
509
510impl<T: NativeType> Default for PrimitiveArray<T> {
511 fn default() -> Self {
512 PrimitiveArray::new(T::PRIMITIVE.into(), Default::default(), None)
513 }
514}