polars_arrow/array/primitive/
mod.rs1use std::ops::Range;
2
3use either::Either;
4use polars_buffer::Buffer;
5use polars_utils::float16::pf16;
6
7use super::{Array, Splitable};
8use crate::array::iterator::NonNullValuesIter;
9use crate::bitmap::Bitmap;
10use crate::bitmap::utils::{BitmapIter, ZipValidity};
11use crate::datatypes::*;
12use crate::trusted_len::TrustedLen;
13use crate::types::{NativeType, days_ms, i256, months_days_ns};
14
15mod ffi;
16pub(super) mod fmt;
17mod from_natural;
18pub mod iterator;
19#[cfg(feature = "proptest")]
20pub mod proptest;
21
22mod mutable;
23pub use mutable::*;
24mod builder;
25pub use builder::*;
26use polars_error::{PolarsResult, polars_bail};
27use polars_utils::index::{Bounded, Indexable, NullCount};
28use polars_utils::slice::SliceAble;
29
30#[derive(Clone)]
56pub struct PrimitiveArray<T: NativeType> {
57 dtype: ArrowDataType,
58 values: Buffer<T>,
59 validity: Option<Bitmap>,
60}
61
62pub(super) fn check<T: NativeType>(
63 dtype: &ArrowDataType,
64 values: &[T],
65 validity_len: Option<usize>,
66) -> PolarsResult<()> {
67 if validity_len.is_some_and(|len| len != values.len()) {
68 polars_bail!(ComputeError: "validity mask length must match the number of values")
69 }
70
71 if dtype.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) {
72 polars_bail!(ComputeError: "PrimitiveArray can only be initialized with a DataType whose physical type is Primitive")
73 }
74 Ok(())
75}
76
77impl<T: NativeType> PrimitiveArray<T> {
78 pub fn try_new(
87 dtype: ArrowDataType,
88 values: Buffer<T>,
89 validity: Option<Bitmap>,
90 ) -> PolarsResult<Self> {
91 check(&dtype, &values, validity.as_ref().map(|v| v.len()))?;
92 Ok(Self {
93 dtype,
94 values,
95 validity,
96 })
97 }
98
99 pub unsafe fn new_unchecked(
102 dtype: ArrowDataType,
103 values: Buffer<T>,
104 validity: Option<Bitmap>,
105 ) -> Self {
106 if cfg!(debug_assertions) {
107 check(&dtype, &values, validity.as_ref().map(|v| v.len())).unwrap();
108 }
109
110 Self {
111 dtype,
112 values,
113 validity,
114 }
115 }
116
117 #[inline]
135 #[must_use]
136 pub fn to(self, dtype: ArrowDataType) -> Self {
137 check(
138 &dtype,
139 &self.values,
140 self.validity.as_ref().map(|v| v.len()),
141 )
142 .unwrap();
143 Self {
144 dtype,
145 values: self.values,
146 validity: self.validity,
147 }
148 }
149
150 pub fn from_vec(values: Vec<T>) -> Self {
160 Self::new(T::PRIMITIVE.into(), values.into(), None)
161 }
162
163 #[inline]
165 pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter<'_, T>, BitmapIter<'_>> {
166 ZipValidity::new_with_validity(self.values().iter(), self.validity())
167 }
168
169 #[inline]
171 pub fn values_iter(&self) -> std::slice::Iter<'_, T> {
172 self.values().iter()
173 }
174
175 #[inline]
177 pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, [T]> {
178 NonNullValuesIter::new(self.values(), self.validity())
179 }
180
181 #[inline]
183 pub fn len(&self) -> usize {
184 self.values.len()
185 }
186
187 #[inline]
190 pub fn values(&self) -> &Buffer<T> {
191 &self.values
192 }
193
194 #[inline]
196 pub fn validity(&self) -> Option<&Bitmap> {
197 self.validity.as_ref()
198 }
199
200 #[inline]
202 pub fn dtype(&self) -> &ArrowDataType {
203 &self.dtype
204 }
205
206 #[inline]
212 pub fn value(&self, i: usize) -> T {
213 self.values[i]
214 }
215
216 #[inline]
222 pub unsafe fn value_unchecked(&self, i: usize) -> T {
223 *self.values.get_unchecked(i)
224 }
225
226 #[inline]
243 pub fn slice(&mut self, offset: usize, length: usize) {
244 assert!(
245 offset + length <= self.len(),
246 "offset + length may not exceed length of array"
247 );
248 unsafe { self.slice_unchecked(offset, length) }
249 }
250
251 #[inline]
258 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
259 self.validity = self
260 .validity
261 .take()
262 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
263 .filter(|bitmap| bitmap.unset_bits() > 0);
264 self.values
265 .slice_in_place_unchecked(offset..offset + length);
266 }
267
268 impl_sliced!();
269 impl_mut_validity!();
270 impl_into_array!();
271
272 #[must_use]
276 pub fn with_values(mut self, values: Buffer<T>) -> Self {
277 self.set_values(values);
278 self
279 }
280
281 pub fn set_values(&mut self, values: Buffer<T>) {
285 assert_eq!(
286 values.len(),
287 self.len(),
288 "values' length must be equal to this arrays' length"
289 );
290 self.values = values;
291 }
292
293 pub fn apply_validity<F: FnOnce(Bitmap) -> Bitmap>(&mut self, f: F) {
298 if let Some(validity) = std::mem::take(&mut self.validity) {
299 self.set_validity(Some(f(validity)))
300 }
301 }
302
303 pub fn with_values_mut<F: FnOnce(&mut [T])>(&mut self, f: F) {
306 if let Some(slice) = self.values.get_mut_slice() {
307 f(slice)
308 } else {
309 let mut values = self.values.as_slice().to_vec();
310 f(&mut values);
311 self.values = Buffer::from(values);
312 }
313 }
314
315 pub fn get_mut_values(&mut self) -> Option<&mut [T]> {
317 self.values.get_mut_slice()
318 }
319
320 #[must_use]
322 pub fn into_inner(self) -> (ArrowDataType, Buffer<T>, Option<Bitmap>) {
323 let Self {
324 dtype,
325 values,
326 validity,
327 } = self;
328 (dtype, values, validity)
329 }
330
331 pub fn from_inner(
334 dtype: ArrowDataType,
335 values: Buffer<T>,
336 validity: Option<Bitmap>,
337 ) -> PolarsResult<Self> {
338 check(&dtype, &values, validity.as_ref().map(|v| v.len()))?;
339 Ok(unsafe { Self::from_inner_unchecked(dtype, values, validity) })
340 }
341
342 pub unsafe fn from_inner_unchecked(
348 dtype: ArrowDataType,
349 values: Buffer<T>,
350 validity: Option<Bitmap>,
351 ) -> Self {
352 Self {
353 dtype,
354 values,
355 validity,
356 }
357 }
358
359 #[must_use]
367 pub fn into_mut(self) -> Either<Self, MutablePrimitiveArray<T>> {
368 use Either::*;
369
370 if let Some(bitmap) = self.validity {
371 match bitmap.into_mut() {
372 Left(bitmap) => Left(PrimitiveArray::new(self.dtype, self.values, Some(bitmap))),
373 Right(mutable_bitmap) => match self.values.into_mut() {
374 Right(values) => Right(
375 MutablePrimitiveArray::try_new(self.dtype, values, Some(mutable_bitmap))
376 .unwrap(),
377 ),
378 Left(values) => Left(PrimitiveArray::new(
379 self.dtype,
380 values,
381 Some(mutable_bitmap.into()),
382 )),
383 },
384 }
385 } else {
386 match self.values.into_mut() {
387 Right(values) => {
388 Right(MutablePrimitiveArray::try_new(self.dtype, values, None).unwrap())
389 },
390 Left(values) => Left(PrimitiveArray::new(self.dtype, values, None)),
391 }
392 }
393 }
394
395 pub fn new_empty(dtype: ArrowDataType) -> Self {
397 Self::new(dtype, Buffer::new(), None)
398 }
399
400 #[inline]
402 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
403 Self::new(
404 dtype,
405 vec![T::default(); length].into(),
406 Some(Bitmap::new_zeroed(length)),
407 )
408 }
409
410 pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {
414 Self::new(T::PRIMITIVE.into(), Vec::<T>::from_iter(iter).into(), None)
415 }
416
417 pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
421 Self::new(
422 T::PRIMITIVE.into(),
423 Vec::<T>::from(slice.as_ref()).into(),
424 None,
425 )
426 }
427
428 pub fn with_slice<R, F: FnOnce(PrimitiveArray<T>) -> R>(slice: &[T], f: F) -> R {
432 Buffer::with_slice(slice, |buf| f(Self::new(T::PRIMITIVE.into(), buf, None)))
433 }
434
435 pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
439 MutablePrimitiveArray::<T>::from_trusted_len_values_iter(iter).into()
440 }
441
442 pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {
448 MutablePrimitiveArray::<T>::from_trusted_len_values_iter_unchecked(iter).into()
449 }
450
451 pub fn from_trusted_len_iter<I: TrustedLen<Item = Option<T>>>(iter: I) -> Self {
453 MutablePrimitiveArray::<T>::from_trusted_len_iter(iter).into()
454 }
455
456 pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = Option<T>>>(iter: I) -> Self {
462 MutablePrimitiveArray::<T>::from_trusted_len_iter_unchecked(iter).into()
463 }
464
465 pub fn new(dtype: ArrowDataType, values: Buffer<T>, validity: Option<Bitmap>) -> Self {
471 Self::try_new(dtype, values, validity).unwrap()
472 }
473
474 pub fn transmute<U: NativeType>(self) -> PrimitiveArray<U> {
478 let PrimitiveArray {
479 values, validity, ..
480 } = self;
481 PrimitiveArray::new(
482 U::PRIMITIVE.into(),
483 Buffer::try_transmute::<U>(values).unwrap(),
484 validity,
485 )
486 }
487
488 pub fn fill_with(mut self, value: T) -> Self {
492 if let Some(values) = self.get_mut_values() {
493 for x in values.iter_mut() {
494 *x = value;
495 }
496 self
497 } else {
498 let values = vec![value; self.len()];
499 Self::new(T::PRIMITIVE.into(), values.into(), self.validity)
500 }
501 }
502}
503
504impl<T: NativeType> Array for PrimitiveArray<T> {
505 impl_common_array!();
506
507 fn validity(&self) -> Option<&Bitmap> {
508 self.validity.as_ref()
509 }
510
511 #[inline]
512 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
513 Box::new(self.clone().with_validity(validity))
514 }
515}
516
517impl<T: NativeType> Splitable for PrimitiveArray<T> {
518 #[inline(always)]
519 fn check_bound(&self, offset: usize) -> bool {
520 offset <= self.len()
521 }
522
523 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
524 let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
525 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
526
527 (
528 Self {
529 dtype: self.dtype.clone(),
530 values: lhs_values,
531 validity: lhs_validity,
532 },
533 Self {
534 dtype: self.dtype.clone(),
535 values: rhs_values,
536 validity: rhs_validity,
537 },
538 )
539 }
540}
541
542impl<T: NativeType> SliceAble for PrimitiveArray<T> {
543 unsafe fn slice_unchecked(&self, range: Range<usize>) -> Self {
544 self.clone().sliced_unchecked(range.start, range.len())
545 }
546
547 fn slice(&self, range: Range<usize>) -> Self {
548 self.clone().sliced(range.start, range.len())
549 }
550}
551
552impl<T: NativeType> Indexable for PrimitiveArray<T> {
553 type Item = Option<T>;
554
555 fn get(&self, i: usize) -> Self::Item {
556 if !self.is_null(i) {
557 unsafe { Some(self.value_unchecked(i)) }
559 } else {
560 None
561 }
562 }
563
564 unsafe fn get_unchecked(&self, i: usize) -> Self::Item {
565 if !self.is_null_unchecked(i) {
566 Some(self.value_unchecked(i))
567 } else {
568 None
569 }
570 }
571}
572
573pub type Int8Array = PrimitiveArray<i8>;
575pub type Int16Array = PrimitiveArray<i16>;
577pub type Int32Array = PrimitiveArray<i32>;
579pub type Int64Array = PrimitiveArray<i64>;
581pub type Int128Array = PrimitiveArray<i128>;
583pub type Int256Array = PrimitiveArray<i256>;
585pub type DaysMsArray = PrimitiveArray<days_ms>;
587pub type MonthsDaysNsArray = PrimitiveArray<months_days_ns>;
589pub type Float16Array = PrimitiveArray<pf16>;
591pub type Float32Array = PrimitiveArray<f32>;
593pub type Float64Array = PrimitiveArray<f64>;
595pub type UInt8Array = PrimitiveArray<u8>;
597pub type UInt16Array = PrimitiveArray<u16>;
599pub type UInt32Array = PrimitiveArray<u32>;
601pub type UInt64Array = PrimitiveArray<u64>;
603pub type UInt128Array = PrimitiveArray<u128>;
605
606pub type Int8Vec = MutablePrimitiveArray<i8>;
608pub type Int16Vec = MutablePrimitiveArray<i16>;
610pub type Int32Vec = MutablePrimitiveArray<i32>;
612pub type Int64Vec = MutablePrimitiveArray<i64>;
614pub type Int128Vec = MutablePrimitiveArray<i128>;
616pub type Int256Vec = MutablePrimitiveArray<i256>;
618pub type DaysMsVec = MutablePrimitiveArray<days_ms>;
620pub type MonthsDaysNsVec = MutablePrimitiveArray<months_days_ns>;
622pub type Float16Vec = MutablePrimitiveArray<pf16>;
624pub type Float32Vec = MutablePrimitiveArray<f32>;
626pub type Float64Vec = MutablePrimitiveArray<f64>;
628pub type UInt8Vec = MutablePrimitiveArray<u8>;
630pub type UInt16Vec = MutablePrimitiveArray<u16>;
632pub type UInt32Vec = MutablePrimitiveArray<u32>;
634pub type UInt64Vec = MutablePrimitiveArray<u64>;
636pub type UInt128Vec = MutablePrimitiveArray<u128>;
638
639impl<T: NativeType> Default for PrimitiveArray<T> {
640 fn default() -> Self {
641 PrimitiveArray::new(T::PRIMITIVE.into(), Default::default(), None)
642 }
643}
644
645impl<T: NativeType> Bounded for PrimitiveArray<T> {
646 fn len(&self) -> usize {
647 self.values.len()
648 }
649}
650
651impl<T: NativeType> NullCount for PrimitiveArray<T> {
652 fn null_count(&self) -> usize {
653 <Self as Array>::null_count(self)
654 }
655}