polars_arrow/array/primitive/
mod.rs1use std::ops::Range;
2
3use either::Either;
4use polars_buffer::Buffer;
5use polars_utils::float16::pf16;
6
7use super::{Array, Splitable};
8use crate::array::iterator::NonNullValuesIter;
9use crate::bitmap::Bitmap;
10use crate::bitmap::utils::{BitmapIter, ZipValidity};
11use crate::datatypes::*;
12use crate::trusted_len::TrustedLen;
13use crate::types::{NativeType, days_ms, i256, months_days_ns};
14
15mod ffi;
16pub(super) mod fmt;
17mod from_natural;
18pub mod iterator;
19#[cfg(feature = "proptest")]
20pub mod proptest;
21
22mod mutable;
23pub use mutable::*;
24mod builder;
25pub use builder::*;
26use polars_error::{PolarsResult, polars_bail};
27use polars_utils::index::{Bounded, Indexable, NullCount};
28use polars_utils::slice::SliceAble;
29
30#[derive(Clone)]
56pub struct PrimitiveArray<T: NativeType> {
57 dtype: ArrowDataType,
58 values: Buffer<T>,
59 validity: Option<Bitmap>,
60}
61
62pub(super) fn check<T: NativeType>(
63 dtype: &ArrowDataType,
64 values: &[T],
65 validity_len: Option<usize>,
66) -> PolarsResult<()> {
67 if validity_len.is_some_and(|len| len != values.len()) {
68 polars_bail!(ComputeError: "validity mask length must match the number of values")
69 }
70
71 if dtype.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) {
72 polars_bail!(ComputeError: "PrimitiveArray can only be initialized with a DataType whose physical type is Primitive")
73 }
74 Ok(())
75}
76
77impl<T: NativeType> PrimitiveArray<T> {
78 pub fn try_new(
87 dtype: ArrowDataType,
88 values: Buffer<T>,
89 validity: Option<Bitmap>,
90 ) -> PolarsResult<Self> {
91 check(&dtype, &values, validity.as_ref().map(|v| v.len()))?;
92 Ok(Self {
93 dtype,
94 values,
95 validity,
96 })
97 }
98
99 pub unsafe fn new_unchecked(
102 dtype: ArrowDataType,
103 values: Buffer<T>,
104 validity: Option<Bitmap>,
105 ) -> Self {
106 if cfg!(debug_assertions) {
107 check(&dtype, &values, validity.as_ref().map(|v| v.len())).unwrap();
108 }
109
110 Self {
111 dtype,
112 values,
113 validity,
114 }
115 }
116
117 #[inline]
135 #[must_use]
136 pub fn to(self, dtype: ArrowDataType) -> Self {
137 check(
138 &dtype,
139 &self.values,
140 self.validity.as_ref().map(|v| v.len()),
141 )
142 .unwrap();
143 Self {
144 dtype,
145 values: self.values,
146 validity: self.validity,
147 }
148 }
149
150 pub fn from_vec(values: Vec<T>) -> Self {
160 Self::new(T::PRIMITIVE.into(), values.into(), None)
161 }
162
163 #[inline]
165 pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter<'_, T>, BitmapIter<'_>> {
166 ZipValidity::new_with_validity(self.values().iter(), self.validity())
167 }
168
169 #[inline]
171 pub fn values_iter(&self) -> std::slice::Iter<'_, T> {
172 self.values().iter()
173 }
174
175 #[inline]
177 pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, [T]> {
178 NonNullValuesIter::new(self.values(), self.validity())
179 }
180
181 #[inline]
183 pub fn len(&self) -> usize {
184 self.values.len()
185 }
186
187 #[inline]
190 pub fn values(&self) -> &Buffer<T> {
191 &self.values
192 }
193
194 #[inline]
196 pub fn validity(&self) -> Option<&Bitmap> {
197 self.validity.as_ref()
198 }
199
200 #[inline]
202 pub fn dtype(&self) -> &ArrowDataType {
203 &self.dtype
204 }
205
206 #[inline]
212 pub fn value(&self, i: usize) -> T {
213 self.values[i]
214 }
215
216 #[inline]
222 pub unsafe fn value_unchecked(&self, i: usize) -> T {
223 *self.values.get_unchecked(i)
224 }
225
226 #[inline]
243 pub fn slice(&mut self, offset: usize, length: usize) {
244 assert!(
245 offset + length <= self.len(),
246 "offset + length may not exceed length of array"
247 );
248 unsafe { self.slice_unchecked(offset, length) }
249 }
250
251 #[inline]
258 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
259 self.validity = self
260 .validity
261 .take()
262 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
263 .filter(|bitmap| bitmap.unset_bits() > 0);
264 self.values
265 .slice_in_place_unchecked(offset..offset + length);
266 }
267
268 impl_sliced!();
269 impl_mut_validity!();
270 impl_into_array!();
271
272 #[must_use]
276 pub fn with_values(mut self, values: Buffer<T>) -> Self {
277 self.set_values(values);
278 self
279 }
280
281 pub fn set_values(&mut self, values: Buffer<T>) {
285 assert_eq!(
286 values.len(),
287 self.len(),
288 "values' length must be equal to this arrays' length"
289 );
290 self.values = values;
291 }
292
293 pub fn apply_validity<F: FnOnce(Bitmap) -> Bitmap>(&mut self, f: F) {
298 if let Some(validity) = std::mem::take(&mut self.validity) {
299 self.set_validity(Some(f(validity)))
300 }
301 }
302
303 pub fn with_values_mut<F: FnOnce(&mut [T])>(&mut self, f: F) {
306 if let Some(slice) = self.values.get_mut_slice() {
307 f(slice)
308 } else {
309 let mut values = self.values.as_slice().to_vec();
310 f(&mut values);
311 self.values = Buffer::from(values);
312 }
313 }
314
315 pub fn get_mut_values(&mut self) -> Option<&mut [T]> {
317 self.values.get_mut_slice()
318 }
319
320 #[must_use]
322 pub fn into_inner(self) -> (ArrowDataType, Buffer<T>, Option<Bitmap>) {
323 let Self {
324 dtype,
325 values,
326 validity,
327 } = self;
328 (dtype, values, validity)
329 }
330
331 pub fn from_inner(
334 dtype: ArrowDataType,
335 values: Buffer<T>,
336 validity: Option<Bitmap>,
337 ) -> PolarsResult<Self> {
338 check(&dtype, &values, validity.as_ref().map(|v| v.len()))?;
339 Ok(unsafe { Self::from_inner_unchecked(dtype, values, validity) })
340 }
341
342 pub unsafe fn from_inner_unchecked(
348 dtype: ArrowDataType,
349 values: Buffer<T>,
350 validity: Option<Bitmap>,
351 ) -> Self {
352 Self {
353 dtype,
354 values,
355 validity,
356 }
357 }
358
359 #[must_use]
367 pub fn into_mut(self) -> Either<Self, MutablePrimitiveArray<T>> {
368 use Either::*;
369
370 if let Some(bitmap) = self.validity {
371 match bitmap.into_mut() {
372 Left(bitmap) => Left(PrimitiveArray::new(self.dtype, self.values, Some(bitmap))),
373 Right(mutable_bitmap) => match self.values.into_mut() {
374 Right(values) => Right(
375 MutablePrimitiveArray::try_new(self.dtype, values, Some(mutable_bitmap))
376 .unwrap(),
377 ),
378 Left(values) => Left(PrimitiveArray::new(
379 self.dtype,
380 values,
381 Some(mutable_bitmap.into()),
382 )),
383 },
384 }
385 } else {
386 match self.values.into_mut() {
387 Right(values) => {
388 Right(MutablePrimitiveArray::try_new(self.dtype, values, None).unwrap())
389 },
390 Left(values) => Left(PrimitiveArray::new(self.dtype, values, None)),
391 }
392 }
393 }
394
395 pub fn new_empty(dtype: ArrowDataType) -> Self {
397 Self::new(dtype, Buffer::new(), None)
398 }
399
400 #[inline]
402 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
403 Self::new(
404 dtype,
405 vec![T::default(); length].into(),
406 Some(Bitmap::new_zeroed(length)),
407 )
408 }
409
410 pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {
414 Self::new(T::PRIMITIVE.into(), Vec::<T>::from_iter(iter).into(), None)
415 }
416
417 pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
421 Self::new(
422 T::PRIMITIVE.into(),
423 Vec::<T>::from(slice.as_ref()).into(),
424 None,
425 )
426 }
427
428 pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
432 MutablePrimitiveArray::<T>::from_trusted_len_values_iter(iter).into()
433 }
434
435 pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {
441 MutablePrimitiveArray::<T>::from_trusted_len_values_iter_unchecked(iter).into()
442 }
443
444 pub fn from_trusted_len_iter<I: TrustedLen<Item = Option<T>>>(iter: I) -> Self {
446 MutablePrimitiveArray::<T>::from_trusted_len_iter(iter).into()
447 }
448
449 pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = Option<T>>>(iter: I) -> Self {
455 MutablePrimitiveArray::<T>::from_trusted_len_iter_unchecked(iter).into()
456 }
457
458 pub fn new(dtype: ArrowDataType, values: Buffer<T>, validity: Option<Bitmap>) -> Self {
464 Self::try_new(dtype, values, validity).unwrap()
465 }
466
467 pub fn transmute<U: NativeType>(self) -> PrimitiveArray<U> {
471 let PrimitiveArray {
472 values, validity, ..
473 } = self;
474 PrimitiveArray::new(
475 U::PRIMITIVE.into(),
476 Buffer::try_transmute::<U>(values).unwrap(),
477 validity,
478 )
479 }
480
481 pub fn fill_with(mut self, value: T) -> Self {
485 if let Some(values) = self.get_mut_values() {
486 for x in values.iter_mut() {
487 *x = value;
488 }
489 self
490 } else {
491 let values = vec![value; self.len()];
492 Self::new(T::PRIMITIVE.into(), values.into(), self.validity)
493 }
494 }
495}
496
497impl<T: NativeType> Array for PrimitiveArray<T> {
498 impl_common_array!();
499
500 fn validity(&self) -> Option<&Bitmap> {
501 self.validity.as_ref()
502 }
503
504 #[inline]
505 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
506 Box::new(self.clone().with_validity(validity))
507 }
508}
509
510impl<T: NativeType> Splitable for PrimitiveArray<T> {
511 #[inline(always)]
512 fn check_bound(&self, offset: usize) -> bool {
513 offset <= self.len()
514 }
515
516 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
517 let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
518 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
519
520 (
521 Self {
522 dtype: self.dtype.clone(),
523 values: lhs_values,
524 validity: lhs_validity,
525 },
526 Self {
527 dtype: self.dtype.clone(),
528 values: rhs_values,
529 validity: rhs_validity,
530 },
531 )
532 }
533}
534
535impl<T: NativeType> SliceAble for PrimitiveArray<T> {
536 unsafe fn slice_unchecked(&self, range: Range<usize>) -> Self {
537 self.clone().sliced_unchecked(range.start, range.len())
538 }
539
540 fn slice(&self, range: Range<usize>) -> Self {
541 self.clone().sliced(range.start, range.len())
542 }
543}
544
545impl<T: NativeType> Indexable for PrimitiveArray<T> {
546 type Item = Option<T>;
547
548 fn get(&self, i: usize) -> Self::Item {
549 if !self.is_null(i) {
550 unsafe { Some(self.value_unchecked(i)) }
552 } else {
553 None
554 }
555 }
556
557 unsafe fn get_unchecked(&self, i: usize) -> Self::Item {
558 if !self.is_null_unchecked(i) {
559 Some(self.value_unchecked(i))
560 } else {
561 None
562 }
563 }
564}
565
566pub type Int8Array = PrimitiveArray<i8>;
568pub type Int16Array = PrimitiveArray<i16>;
570pub type Int32Array = PrimitiveArray<i32>;
572pub type Int64Array = PrimitiveArray<i64>;
574pub type Int128Array = PrimitiveArray<i128>;
576pub type Int256Array = PrimitiveArray<i256>;
578pub type DaysMsArray = PrimitiveArray<days_ms>;
580pub type MonthsDaysNsArray = PrimitiveArray<months_days_ns>;
582pub type Float16Array = PrimitiveArray<pf16>;
584pub type Float32Array = PrimitiveArray<f32>;
586pub type Float64Array = PrimitiveArray<f64>;
588pub type UInt8Array = PrimitiveArray<u8>;
590pub type UInt16Array = PrimitiveArray<u16>;
592pub type UInt32Array = PrimitiveArray<u32>;
594pub type UInt64Array = PrimitiveArray<u64>;
596pub type UInt128Array = PrimitiveArray<u128>;
598
599pub type Int8Vec = MutablePrimitiveArray<i8>;
601pub type Int16Vec = MutablePrimitiveArray<i16>;
603pub type Int32Vec = MutablePrimitiveArray<i32>;
605pub type Int64Vec = MutablePrimitiveArray<i64>;
607pub type Int128Vec = MutablePrimitiveArray<i128>;
609pub type Int256Vec = MutablePrimitiveArray<i256>;
611pub type DaysMsVec = MutablePrimitiveArray<days_ms>;
613pub type MonthsDaysNsVec = MutablePrimitiveArray<months_days_ns>;
615pub type Float16Vec = MutablePrimitiveArray<pf16>;
617pub type Float32Vec = MutablePrimitiveArray<f32>;
619pub type Float64Vec = MutablePrimitiveArray<f64>;
621pub type UInt8Vec = MutablePrimitiveArray<u8>;
623pub type UInt16Vec = MutablePrimitiveArray<u16>;
625pub type UInt32Vec = MutablePrimitiveArray<u32>;
627pub type UInt64Vec = MutablePrimitiveArray<u64>;
629pub type UInt128Vec = MutablePrimitiveArray<u128>;
631
632impl<T: NativeType> Default for PrimitiveArray<T> {
633 fn default() -> Self {
634 PrimitiveArray::new(T::PRIMITIVE.into(), Default::default(), None)
635 }
636}
637
638impl<T: NativeType> Bounded for PrimitiveArray<T> {
639 fn len(&self) -> usize {
640 self.values.len()
641 }
642}
643
644impl<T: NativeType> NullCount for PrimitiveArray<T> {
645 fn null_count(&self) -> usize {
646 <Self as Array>::null_count(self)
647 }
648}