arrow2/array/primitive/
mod.rs

1use crate::{
2    bitmap::{
3        utils::{BitmapIter, ZipValidity},
4        Bitmap,
5    },
6    buffer::Buffer,
7    datatypes::*,
8    error::Error,
9    trusted_len::TrustedLen,
10    types::{days_ms, f16, i256, months_days_ns, NativeType},
11};
12
13use super::Array;
14use either::Either;
15
16#[cfg(feature = "arrow")]
17mod data;
18mod ffi;
19pub(super) mod fmt;
20mod from_natural;
21mod iterator;
22pub use iterator::*;
23mod mutable;
24pub use mutable::*;
25
26/// A [`PrimitiveArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<T>>` where
27/// T is [`NativeType`] (e.g. [`i32`]). It implements [`Array`].
28///
29/// One way to think about a [`PrimitiveArray`] is `(DataType, Arc<Vec<T>>, Option<Arc<Vec<u8>>>)`
30/// where:
31/// * the first item is the array's logical type
32/// * the second is the immutable values
33/// * the third is the immutable validity (whether a value is null or not as a bitmap).
34///
35/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].
36/// # Example
37/// ```
38/// use arrow2::array::PrimitiveArray;
39/// use arrow2::bitmap::Bitmap;
40/// use arrow2::buffer::Buffer;
41///
42/// let array = PrimitiveArray::from([Some(1i32), None, Some(10)]);
43/// assert_eq!(array.value(0), 1);
44/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(&1i32), None, Some(&10)]);
45/// assert_eq!(array.values_iter().copied().collect::<Vec<_>>(), vec![1, 0, 10]);
46/// // the underlying representation
47/// assert_eq!(array.values(), &Buffer::from(vec![1i32, 0, 10]));
48/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
49///
50/// ```
51#[derive(Clone)]
52pub struct PrimitiveArray<T: NativeType> {
53    data_type: DataType,
54    values: Buffer<T>,
55    validity: Option<Bitmap>,
56}
57
58pub(super) fn check<T: NativeType>(
59    data_type: &DataType,
60    values: &[T],
61    validity_len: Option<usize>,
62) -> Result<(), Error> {
63    if validity_len.map_or(false, |len| len != values.len()) {
64        return Err(Error::oos(
65            "validity mask length must match the number of values",
66        ));
67    }
68
69    if data_type.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) {
70        return Err(Error::oos(
71            "PrimitiveArray can only be initialized with a DataType whose physical type is Primitive",
72        ));
73    }
74    Ok(())
75}
76
77impl<T: NativeType> PrimitiveArray<T> {
78    /// The canonical method to create a [`PrimitiveArray`] out of its internal components.
79    /// # Implementation
80    /// This function is `O(1)`.
81    ///
82    /// # Errors
83    /// This function errors iff:
84    /// * The validity is not `None` and its length is different from `values`'s length
85    /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`]
86    pub fn try_new(
87        data_type: DataType,
88        values: Buffer<T>,
89        validity: Option<Bitmap>,
90    ) -> Result<Self, Error> {
91        check(&data_type, &values, validity.as_ref().map(|v| v.len()))?;
92        Ok(Self {
93            data_type,
94            values,
95            validity,
96        })
97    }
98
99    /// Returns a new [`PrimitiveArray`] with a different logical type.
100    ///
101    /// This function is useful to assign a different [`DataType`] to the array.
102    /// Used to change the arrays' logical type (see example).
103    /// # Example
104    /// ```
105    /// use arrow2::array::Int32Array;
106    /// use arrow2::datatypes::DataType;
107    ///
108    /// let array = Int32Array::from(&[Some(1), None, Some(2)]).to(DataType::Date32);
109    /// assert_eq!(
110    ///    format!("{:?}", array),
111    ///    "Date32[1970-01-02, None, 1970-01-03]"
112    /// );
113    /// ```
114    /// # Panics
115    /// Panics iff the `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`]
116    #[inline]
117    #[must_use]
118    pub fn to(self, data_type: DataType) -> Self {
119        check(
120            &data_type,
121            &self.values,
122            self.validity.as_ref().map(|v| v.len()),
123        )
124        .unwrap();
125        Self {
126            data_type,
127            values: self.values,
128            validity: self.validity,
129        }
130    }
131
132    /// Creates a (non-null) [`PrimitiveArray`] from a vector of values.
133    /// This function is `O(1)`.
134    /// # Examples
135    /// ```
136    /// use arrow2::array::PrimitiveArray;
137    ///
138    /// let array = PrimitiveArray::from_vec(vec![1, 2, 3]);
139    /// assert_eq!(format!("{:?}", array), "Int32[1, 2, 3]");
140    /// ```
141    pub fn from_vec(values: Vec<T>) -> Self {
142        Self::new(T::PRIMITIVE.into(), values.into(), None)
143    }
144
145    /// Returns an iterator over the values and validity, `Option<&T>`.
146    #[inline]
147    pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter<T>, BitmapIter> {
148        ZipValidity::new_with_validity(self.values().iter(), self.validity())
149    }
150
151    /// Returns an iterator of the values, `&T`, ignoring the arrays' validity.
152    #[inline]
153    pub fn values_iter(&self) -> std::slice::Iter<T> {
154        self.values().iter()
155    }
156
157    /// Returns the length of this array
158    #[inline]
159    pub fn len(&self) -> usize {
160        self.values.len()
161    }
162
163    /// The values [`Buffer`].
164    /// Values on null slots are undetermined (they can be anything).
165    #[inline]
166    pub fn values(&self) -> &Buffer<T> {
167        &self.values
168    }
169
170    /// Returns the optional validity.
171    #[inline]
172    pub fn validity(&self) -> Option<&Bitmap> {
173        self.validity.as_ref()
174    }
175
176    /// Returns the arrays' [`DataType`].
177    #[inline]
178    pub fn data_type(&self) -> &DataType {
179        &self.data_type
180    }
181
182    /// Returns the value at slot `i`.
183    ///
184    /// Equivalent to `self.values()[i]`. The value of a null slot is undetermined (it can be anything).
185    /// # Panic
186    /// This function panics iff `i >= self.len`.
187    #[inline]
188    pub fn value(&self, i: usize) -> T {
189        self.values[i]
190    }
191
192    /// Returns the value at index `i`.
193    /// The value on null slots is undetermined (it can be anything).
194    /// # Safety
195    /// Caller must be sure that `i < self.len()`
196    #[inline]
197    pub unsafe fn value_unchecked(&self, i: usize) -> T {
198        *self.values.get_unchecked(i)
199    }
200
201    /// Returns the element at index `i` or `None` if it is null
202    /// # Panics
203    /// iff `i >= self.len()`
204    #[inline]
205    pub fn get(&self, i: usize) -> Option<T> {
206        if !self.is_null(i) {
207            // soundness: Array::is_null panics if i >= self.len
208            unsafe { Some(self.value_unchecked(i)) }
209        } else {
210            None
211        }
212    }
213
214    /// Slices this [`PrimitiveArray`] by an offset and length.
215    /// # Implementation
216    /// This operation is `O(1)`.
217    #[inline]
218    pub fn slice(&mut self, offset: usize, length: usize) {
219        assert!(
220            offset + length <= self.len(),
221            "offset + length may not exceed length of array"
222        );
223        unsafe { self.slice_unchecked(offset, length) }
224    }
225
226    /// Slices this [`PrimitiveArray`] by an offset and length.
227    /// # Implementation
228    /// This operation is `O(1)`.
229    /// # Safety
230    /// The caller must ensure that `offset + length <= self.len()`.
231    #[inline]
232    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
233        self.validity.as_mut().and_then(|bitmap| {
234            bitmap.slice_unchecked(offset, length);
235            (bitmap.unset_bits() > 0).then(|| bitmap)
236        });
237        self.values.slice_unchecked(offset, length);
238    }
239
240    impl_sliced!();
241    impl_mut_validity!();
242    impl_into_array!();
243
244    /// Returns this [`PrimitiveArray`] with new values.
245    /// # Panics
246    /// This function panics iff `values.len() != self.len()`.
247    #[must_use]
248    pub fn with_values(mut self, values: Buffer<T>) -> Self {
249        self.set_values(values);
250        self
251    }
252
253    /// Update the values of this [`PrimitiveArray`].
254    /// # Panics
255    /// This function panics iff `values.len() != self.len()`.
256    pub fn set_values(&mut self, values: Buffer<T>) {
257        assert_eq!(
258            values.len(),
259            self.len(),
260            "values' length must be equal to this arrays' length"
261        );
262        self.values = values;
263    }
264
265    /// Applies a function `f` to the validity of this array.
266    ///
267    /// This is an API to leverage clone-on-write
268    /// # Panics
269    /// This function panics if the function `f` modifies the length of the [`Bitmap`].
270    pub fn apply_validity<F: FnOnce(Bitmap) -> Bitmap>(&mut self, f: F) {
271        if let Some(validity) = std::mem::take(&mut self.validity) {
272            self.set_validity(Some(f(validity)))
273        }
274    }
275
276    /// Returns an option of a mutable reference to the values of this [`PrimitiveArray`].
277    pub fn get_mut_values(&mut self) -> Option<&mut [T]> {
278        self.values.get_mut_slice()
279    }
280
281    /// Returns its internal representation
282    #[must_use]
283    pub fn into_inner(self) -> (DataType, Buffer<T>, Option<Bitmap>) {
284        let Self {
285            data_type,
286            values,
287            validity,
288        } = self;
289        (data_type, values, validity)
290    }
291
292    /// Creates a `[PrimitiveArray]` from its internal representation.
293    /// This is the inverted from `[PrimitiveArray::into_inner]`
294    pub fn from_inner(
295        data_type: DataType,
296        values: Buffer<T>,
297        validity: Option<Bitmap>,
298    ) -> Result<Self, Error> {
299        check(&data_type, &values, validity.as_ref().map(|v| v.len()))?;
300        Ok(unsafe { Self::from_inner_unchecked(data_type, values, validity) })
301    }
302
303    /// Creates a `[PrimitiveArray]` from its internal representation.
304    /// This is the inverted from `[PrimitiveArray::into_inner]`
305    ///
306    /// # Safety
307    /// Callers must ensure all invariants of this struct are upheld.
308    pub unsafe fn from_inner_unchecked(
309        data_type: DataType,
310        values: Buffer<T>,
311        validity: Option<Bitmap>,
312    ) -> Self {
313        Self {
314            data_type,
315            values,
316            validity,
317        }
318    }
319
320    /// Try to convert this [`PrimitiveArray`] to a [`MutablePrimitiveArray`] via copy-on-write semantics.
321    ///
322    /// A [`PrimitiveArray`] is backed by a [`Buffer`] and [`Bitmap`] which are essentially `Arc<Vec<_>>`.
323    /// This function returns a [`MutablePrimitiveArray`] (via [`std::sync::Arc::get_mut`]) iff both values
324    /// and validity have not been cloned / are unique references to their underlying vectors.
325    ///
326    /// This function is primarily used to re-use memory regions.
327    #[must_use]
328    pub fn into_mut(self) -> Either<Self, MutablePrimitiveArray<T>> {
329        use Either::*;
330
331        if let Some(bitmap) = self.validity {
332            match bitmap.into_mut() {
333                Left(bitmap) => Left(PrimitiveArray::new(
334                    self.data_type,
335                    self.values,
336                    Some(bitmap),
337                )),
338                Right(mutable_bitmap) => match self.values.into_mut() {
339                    Right(values) => Right(
340                        MutablePrimitiveArray::try_new(
341                            self.data_type,
342                            values,
343                            Some(mutable_bitmap),
344                        )
345                        .unwrap(),
346                    ),
347                    Left(values) => Left(PrimitiveArray::new(
348                        self.data_type,
349                        values,
350                        Some(mutable_bitmap.into()),
351                    )),
352                },
353            }
354        } else {
355            match self.values.into_mut() {
356                Right(values) => {
357                    Right(MutablePrimitiveArray::try_new(self.data_type, values, None).unwrap())
358                }
359                Left(values) => Left(PrimitiveArray::new(self.data_type, values, None)),
360            }
361        }
362    }
363
364    /// Returns a new empty (zero-length) [`PrimitiveArray`].
365    pub fn new_empty(data_type: DataType) -> Self {
366        Self::new(data_type, Buffer::new(), None)
367    }
368
369    /// Returns a new [`PrimitiveArray`] where all slots are null / `None`.
370    #[inline]
371    pub fn new_null(data_type: DataType, length: usize) -> Self {
372        Self::new(
373            data_type,
374            vec![T::default(); length].into(),
375            Some(Bitmap::new_zeroed(length)),
376        )
377    }
378
379    /// Creates a (non-null) [`PrimitiveArray`] from an iterator of values.
380    /// # Implementation
381    /// This does not assume that the iterator has a known length.
382    pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {
383        Self::new(T::PRIMITIVE.into(), Vec::<T>::from_iter(iter).into(), None)
384    }
385
386    /// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
387    /// # Implementation
388    /// This is essentially a memcopy and is thus `O(N)`
389    pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
390        Self::new(
391            T::PRIMITIVE.into(),
392            Vec::<T>::from(slice.as_ref()).into(),
393            None,
394        )
395    }
396
397    /// Creates a (non-null) [`PrimitiveArray`] from a [`TrustedLen`] of values.
398    /// # Implementation
399    /// This does not assume that the iterator has a known length.
400    pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
401        MutablePrimitiveArray::<T>::from_trusted_len_values_iter(iter).into()
402    }
403
404    /// Creates a new [`PrimitiveArray`] from an iterator over values
405    /// # Safety
406    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
407    /// I.e. that `size_hint().1` correctly reports its length.
408    pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {
409        MutablePrimitiveArray::<T>::from_trusted_len_values_iter_unchecked(iter).into()
410    }
411
412    /// Creates a [`PrimitiveArray`] from a [`TrustedLen`] of optional values.
413    pub fn from_trusted_len_iter<I: TrustedLen<Item = Option<T>>>(iter: I) -> Self {
414        MutablePrimitiveArray::<T>::from_trusted_len_iter(iter).into()
415    }
416
417    /// Creates a [`PrimitiveArray`] from an iterator of optional values.
418    /// # Safety
419    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
420    /// I.e. that `size_hint().1` correctly reports its length.
421    pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = Option<T>>>(iter: I) -> Self {
422        MutablePrimitiveArray::<T>::from_trusted_len_iter_unchecked(iter).into()
423    }
424
425    /// Alias for `Self::try_new(..).unwrap()`.
426    /// # Panics
427    /// This function errors iff:
428    /// * The validity is not `None` and its length is different from `values`'s length
429    /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive`].
430    pub fn new(data_type: DataType, values: Buffer<T>, validity: Option<Bitmap>) -> Self {
431        Self::try_new(data_type, values, validity).unwrap()
432    }
433}
434
435impl<T: NativeType> Array for PrimitiveArray<T> {
436    impl_common_array!();
437
438    fn validity(&self) -> Option<&Bitmap> {
439        self.validity.as_ref()
440    }
441
442    #[inline]
443    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
444        Box::new(self.clone().with_validity(validity))
445    }
446}
447
448/// A type definition [`PrimitiveArray`] for `i8`
449pub type Int8Array = PrimitiveArray<i8>;
450/// A type definition [`PrimitiveArray`] for `i16`
451pub type Int16Array = PrimitiveArray<i16>;
452/// A type definition [`PrimitiveArray`] for `i32`
453pub type Int32Array = PrimitiveArray<i32>;
454/// A type definition [`PrimitiveArray`] for `i64`
455pub type Int64Array = PrimitiveArray<i64>;
456/// A type definition [`PrimitiveArray`] for `i128`
457pub type Int128Array = PrimitiveArray<i128>;
458/// A type definition [`PrimitiveArray`] for `i256`
459pub type Int256Array = PrimitiveArray<i256>;
460/// A type definition [`PrimitiveArray`] for [`days_ms`]
461pub type DaysMsArray = PrimitiveArray<days_ms>;
462/// A type definition [`PrimitiveArray`] for [`months_days_ns`]
463pub type MonthsDaysNsArray = PrimitiveArray<months_days_ns>;
464/// A type definition [`PrimitiveArray`] for `f16`
465pub type Float16Array = PrimitiveArray<f16>;
466/// A type definition [`PrimitiveArray`] for `f32`
467pub type Float32Array = PrimitiveArray<f32>;
468/// A type definition [`PrimitiveArray`] for `f64`
469pub type Float64Array = PrimitiveArray<f64>;
470/// A type definition [`PrimitiveArray`] for `u8`
471pub type UInt8Array = PrimitiveArray<u8>;
472/// A type definition [`PrimitiveArray`] for `u16`
473pub type UInt16Array = PrimitiveArray<u16>;
474/// A type definition [`PrimitiveArray`] for `u32`
475pub type UInt32Array = PrimitiveArray<u32>;
476/// A type definition [`PrimitiveArray`] for `u64`
477pub type UInt64Array = PrimitiveArray<u64>;
478
479/// A type definition [`MutablePrimitiveArray`] for `i8`
480pub type Int8Vec = MutablePrimitiveArray<i8>;
481/// A type definition [`MutablePrimitiveArray`] for `i16`
482pub type Int16Vec = MutablePrimitiveArray<i16>;
483/// A type definition [`MutablePrimitiveArray`] for `i32`
484pub type Int32Vec = MutablePrimitiveArray<i32>;
485/// A type definition [`MutablePrimitiveArray`] for `i64`
486pub type Int64Vec = MutablePrimitiveArray<i64>;
487/// A type definition [`MutablePrimitiveArray`] for `i128`
488pub type Int128Vec = MutablePrimitiveArray<i128>;
489/// A type definition [`MutablePrimitiveArray`] for `i256`
490pub type Int256Vec = MutablePrimitiveArray<i256>;
491/// A type definition [`MutablePrimitiveArray`] for [`days_ms`]
492pub type DaysMsVec = MutablePrimitiveArray<days_ms>;
493/// A type definition [`MutablePrimitiveArray`] for [`months_days_ns`]
494pub type MonthsDaysNsVec = MutablePrimitiveArray<months_days_ns>;
495/// A type definition [`MutablePrimitiveArray`] for `f16`
496pub type Float16Vec = MutablePrimitiveArray<f16>;
497/// A type definition [`MutablePrimitiveArray`] for `f32`
498pub type Float32Vec = MutablePrimitiveArray<f32>;
499/// A type definition [`MutablePrimitiveArray`] for `f64`
500pub type Float64Vec = MutablePrimitiveArray<f64>;
501/// A type definition [`MutablePrimitiveArray`] for `u8`
502pub type UInt8Vec = MutablePrimitiveArray<u8>;
503/// A type definition [`MutablePrimitiveArray`] for `u16`
504pub type UInt16Vec = MutablePrimitiveArray<u16>;
505/// A type definition [`MutablePrimitiveArray`] for `u32`
506pub type UInt32Vec = MutablePrimitiveArray<u32>;
507/// A type definition [`MutablePrimitiveArray`] for `u64`
508pub type UInt64Vec = MutablePrimitiveArray<u64>;
509
510impl<T: NativeType> Default for PrimitiveArray<T> {
511    fn default() -> Self {
512        PrimitiveArray::new(T::PRIMITIVE.into(), Default::default(), None)
513    }
514}