1#![allow(unsafe_op_in_unsafe_fn)]
21use std::any::Any;
22use std::sync::Arc;
23
24use crate::bitmap::{Bitmap, MutableBitmap};
25use crate::datatypes::ArrowDataType;
26
27pub mod physical_binary;
28#[cfg(feature = "proptest")]
29pub mod proptest;
30
31pub trait Splitable: Sized {
32 fn check_bound(&self, offset: usize) -> bool;
33
34 #[inline]
36 #[must_use]
37 fn split_at(&self, offset: usize) -> (Self, Self) {
38 assert!(self.check_bound(offset));
39 unsafe { self._split_at_unchecked(offset) }
40 }
41
42 #[inline]
48 #[must_use]
49 unsafe fn split_at_unchecked(&self, offset: usize) -> (Self, Self) {
50 debug_assert!(self.check_bound(offset));
51 unsafe { self._split_at_unchecked(offset) }
52 }
53
54 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self);
61}
62
63impl<T> Splitable for Buffer<T> {
64 fn check_bound(&self, offset: usize) -> bool {
65 offset <= self.len()
66 }
67
68 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
69 let left = self.clone().sliced_unchecked(..offset);
70 let right = self.clone().sliced_unchecked(offset..);
71 (left, right)
72 }
73}
74
75pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
78 fn as_any(&self) -> &dyn Any;
80
81 fn as_any_mut(&mut self) -> &mut dyn Any;
83
84 fn len(&self) -> usize;
87
88 fn is_empty(&self) -> bool {
90 self.len() == 0
91 }
92
93 fn dtype(&self) -> &ArrowDataType;
96
97 fn dtype_mut(&mut self) -> &mut ArrowDataType;
98
99 fn validity(&self) -> Option<&Bitmap>;
103
104 #[inline]
108 fn null_count(&self) -> usize {
109 if self.dtype() == &ArrowDataType::Null {
110 return self.len();
111 };
112 self.validity()
113 .as_ref()
114 .map(|x| x.unset_bits())
115 .unwrap_or(0)
116 }
117
118 #[inline]
119 fn has_nulls(&self) -> bool {
120 self.null_count() > 0
121 }
122
123 #[inline]
127 fn is_null(&self, i: usize) -> bool {
128 assert!(i < self.len());
129 unsafe { self.is_null_unchecked(i) }
130 }
131
132 #[inline]
137 unsafe fn is_null_unchecked(&self, i: usize) -> bool {
138 self.validity()
139 .as_ref()
140 .map(|x| !x.get_bit_unchecked(i))
141 .unwrap_or(false)
142 }
143
144 #[inline]
148 fn is_valid(&self, i: usize) -> bool {
149 !self.is_null(i)
150 }
151
152 #[must_use]
154 fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
155
156 #[must_use]
162 unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
163
164 fn slice(&mut self, offset: usize, length: usize);
170
171 unsafe fn slice_unchecked(&mut self, offset: usize, length: usize);
178
179 #[must_use]
185 fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array> {
186 if length == 0 {
187 return new_empty_array(self.dtype().clone());
188 }
189 let mut new = self.to_boxed();
190 new.slice(offset, length);
191 new
192 }
193
194 #[must_use]
202 unsafe fn sliced_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
203 debug_assert!(offset + length <= self.len());
204 let mut new = self.to_boxed();
205 new.slice_unchecked(offset, length);
206 new
207 }
208
209 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>;
213
214 fn to_boxed(&self) -> Box<dyn Array>;
216}
217
218dyn_clone::clone_trait_object!(Array);
219
220pub trait IntoBoxedArray {
221 fn into_boxed(self) -> Box<dyn Array>;
222}
223
224impl<A: Array> IntoBoxedArray for A {
225 #[inline(always)]
226 fn into_boxed(self) -> Box<dyn Array> {
227 Box::new(self) as _
228 }
229}
230impl IntoBoxedArray for Box<dyn Array> {
231 #[inline(always)]
232 fn into_boxed(self) -> Box<dyn Array> {
233 self
234 }
235}
236
237pub trait MutableArray: std::fmt::Debug + Send + Sync {
243 fn dtype(&self) -> &ArrowDataType;
245
246 fn len(&self) -> usize;
248
249 fn is_empty(&self) -> bool {
251 self.len() == 0
252 }
253
254 fn validity(&self) -> Option<&MutableBitmap>;
256
257 fn as_box(&mut self) -> Box<dyn Array>;
259
260 fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
265 self.as_box().into()
266 }
267
268 fn as_any(&self) -> &dyn Any;
270
271 fn as_mut_any(&mut self) -> &mut dyn Any;
273
274 fn push_null(&mut self);
276
277 #[inline]
281 fn is_valid(&self, index: usize) -> bool {
282 self.validity()
283 .as_ref()
284 .map(|x| x.get(index))
285 .unwrap_or(true)
286 }
287
288 fn reserve(&mut self, additional: usize);
290
291 fn shrink_to_fit(&mut self);
293}
294
295impl MutableArray for Box<dyn MutableArray> {
296 fn len(&self) -> usize {
297 self.as_ref().len()
298 }
299
300 fn validity(&self) -> Option<&MutableBitmap> {
301 self.as_ref().validity()
302 }
303
304 fn as_box(&mut self) -> Box<dyn Array> {
305 self.as_mut().as_box()
306 }
307
308 fn as_arc(&mut self) -> Arc<dyn Array> {
309 self.as_mut().as_arc()
310 }
311
312 fn dtype(&self) -> &ArrowDataType {
313 self.as_ref().dtype()
314 }
315
316 fn as_any(&self) -> &dyn std::any::Any {
317 self.as_ref().as_any()
318 }
319
320 fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
321 self.as_mut().as_mut_any()
322 }
323
324 #[inline]
325 fn push_null(&mut self) {
326 self.as_mut().push_null()
327 }
328
329 fn shrink_to_fit(&mut self) {
330 self.as_mut().shrink_to_fit();
331 }
332
333 fn reserve(&mut self, additional: usize) {
334 self.as_mut().reserve(additional);
335 }
336}
337
338macro_rules! general_dyn {
339 ($array:expr, $ty:ty, $f:expr) => {{
340 let array = $array.as_any().downcast_ref::<$ty>().unwrap();
341 ($f)(array)
342 }};
343}
344
345macro_rules! fmt_dyn {
346 ($array:expr, $ty:ty, $f:expr) => {{
347 let mut f = |x: &$ty| x.fmt($f);
348 general_dyn!($array, $ty, f)
349 }};
350}
351
352impl std::fmt::Debug for dyn Array + '_ {
353 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
354 use crate::datatypes::PhysicalType::*;
355 match self.dtype().to_physical_type() {
356 Null => fmt_dyn!(self, NullArray, f),
357 Boolean => fmt_dyn!(self, BooleanArray, f),
358 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
359 fmt_dyn!(self, PrimitiveArray<$T>, f)
360 }),
361 BinaryView => fmt_dyn!(self, BinaryViewArray, f),
362 Utf8View => fmt_dyn!(self, Utf8ViewArray, f),
363 Binary => fmt_dyn!(self, BinaryArray<i32>, f),
364 LargeBinary => fmt_dyn!(self, BinaryArray<i64>, f),
365 FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f),
366 Utf8 => fmt_dyn!(self, Utf8Array::<i32>, f),
367 LargeUtf8 => fmt_dyn!(self, Utf8Array::<i64>, f),
368 List => fmt_dyn!(self, ListArray::<i32>, f),
369 LargeList => fmt_dyn!(self, ListArray::<i64>, f),
370 FixedSizeList => fmt_dyn!(self, FixedSizeListArray, f),
371 Struct => fmt_dyn!(self, StructArray, f),
372 Union => fmt_dyn!(self, UnionArray, f),
373 Dictionary(key_type) => {
374 match_integer_type!(key_type, |$T| {
375 fmt_dyn!(self, DictionaryArray::<$T>, f)
376 })
377 },
378 Map => fmt_dyn!(self, MapArray, f),
379 }
380 }
381}
382
383pub fn new_empty_array(dtype: ArrowDataType) -> Box<dyn Array> {
385 use crate::datatypes::PhysicalType::*;
386 match dtype.to_physical_type() {
387 Null => Box::new(NullArray::new_empty(dtype)),
388 Boolean => Box::new(BooleanArray::new_empty(dtype)),
389 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
390 Box::new(PrimitiveArray::<$T>::new_empty(dtype))
391 }),
392 Binary => Box::new(BinaryArray::<i32>::new_empty(dtype)),
393 LargeBinary => Box::new(BinaryArray::<i64>::new_empty(dtype)),
394 FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(dtype)),
395 Utf8 => Box::new(Utf8Array::<i32>::new_empty(dtype)),
396 LargeUtf8 => Box::new(Utf8Array::<i64>::new_empty(dtype)),
397 List => Box::new(ListArray::<i32>::new_empty(dtype)),
398 LargeList => Box::new(ListArray::<i64>::new_empty(dtype)),
399 FixedSizeList => Box::new(FixedSizeListArray::new_empty(dtype)),
400 Struct => Box::new(StructArray::new_empty(dtype)),
401 Union => Box::new(UnionArray::new_empty(dtype)),
402 Map => Box::new(MapArray::new_empty(dtype)),
403 Utf8View => Box::new(Utf8ViewArray::new_empty(dtype)),
404 BinaryView => Box::new(BinaryViewArray::new_empty(dtype)),
405 Dictionary(key_type) => {
406 match_integer_type!(key_type, |$T| {
407 Box::new(DictionaryArray::<$T>::new_empty(dtype))
408 })
409 },
410 }
411}
412
413pub fn new_null_array(dtype: ArrowDataType, length: usize) -> Box<dyn Array> {
418 use crate::datatypes::PhysicalType::*;
419 match dtype.to_physical_type() {
420 Null => Box::new(NullArray::new_null(dtype, length)),
421 Boolean => Box::new(BooleanArray::new_null(dtype, length)),
422 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
423 Box::new(PrimitiveArray::<$T>::new_null(dtype, length))
424 }),
425 Binary => Box::new(BinaryArray::<i32>::new_null(dtype, length)),
426 LargeBinary => Box::new(BinaryArray::<i64>::new_null(dtype, length)),
427 FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(dtype, length)),
428 Utf8 => Box::new(Utf8Array::<i32>::new_null(dtype, length)),
429 LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(dtype, length)),
430 List => Box::new(ListArray::<i32>::new_null(dtype, length)),
431 LargeList => Box::new(ListArray::<i64>::new_null(dtype, length)),
432 FixedSizeList => Box::new(FixedSizeListArray::new_null(dtype, length)),
433 Struct => Box::new(StructArray::new_null(dtype, length)),
434 Union => Box::new(UnionArray::new_null(dtype, length)),
435 Map => Box::new(MapArray::new_null(dtype, length)),
436 BinaryView => Box::new(BinaryViewArray::new_null(dtype, length)),
437 Utf8View => Box::new(Utf8ViewArray::new_null(dtype, length)),
438 Dictionary(key_type) => {
439 match_integer_type!(key_type, |$T| {
440 Box::new(DictionaryArray::<$T>::new_null(dtype, length))
441 })
442 },
443 }
444}
445
446macro_rules! clone_dyn {
447 ($array:expr, $ty:ty) => {{
448 let f = |x: &$ty| Box::new(x.clone());
449 general_dyn!($array, $ty, f)
450 }};
451}
452
453macro_rules! impl_sliced {
455 () => {
456 #[inline]
462 #[must_use]
463 pub fn sliced(self, offset: usize, length: usize) -> Self {
464 let total = offset
465 .checked_add(length)
466 .expect("offset + length overflowed");
467 assert!(
468 total <= self.len(),
469 "the offset of the new Buffer cannot exceed the existing length"
470 );
471 unsafe { Self::sliced_unchecked(self, offset, length) }
472 }
473
474 #[inline]
481 #[must_use]
482 pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
483 Self::slice_unchecked(&mut self, offset, length);
484 self
485 }
486 };
487}
488
489macro_rules! impl_mut_validity {
491 () => {
492 #[must_use]
496 #[inline]
497 pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
498 self.set_validity(validity);
499 self
500 }
501
502 #[inline]
506 pub fn set_validity(&mut self, validity: Option<Bitmap>) {
507 if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
508 panic!("validity must be equal to the array's length")
509 }
510 self.validity = validity;
511 }
512
513 #[inline]
515 pub fn take_validity(&mut self) -> Option<Bitmap> {
516 self.validity.take()
517 }
518 }
519}
520
521macro_rules! impl_mutable_array_mut_validity {
523 () => {
524 #[must_use]
528 #[inline]
529 pub fn with_validity(mut self, validity: Option<MutableBitmap>) -> Self {
530 self.set_validity(validity);
531 self
532 }
533
534 #[inline]
538 pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
539 if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
540 panic!("validity must be equal to the array's length")
541 }
542 self.validity = validity;
543 }
544
545 #[inline]
551 pub fn apply_validity<F: FnOnce(MutableBitmap) -> MutableBitmap>(&mut self, f: F) {
552 if let Some(validity) = std::mem::take(&mut self.validity) {
553 self.set_validity(Some(f(validity)))
554 }
555 }
556
557 }
558}
559
560macro_rules! impl_into_array {
562 () => {
563 pub fn boxed(self) -> Box<dyn Array> {
565 Box::new(self)
566 }
567
568 pub fn arced(self) -> std::sync::Arc<dyn Array> {
570 std::sync::Arc::new(self)
571 }
572 };
573}
574
575macro_rules! impl_common_array {
577 () => {
578 #[inline]
579 fn as_any(&self) -> &dyn std::any::Any {
580 self
581 }
582
583 #[inline]
584 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
585 self
586 }
587
588 #[inline]
589 fn len(&self) -> usize {
590 self.len()
591 }
592
593 #[inline]
594 fn dtype(&self) -> &ArrowDataType {
595 &self.dtype
596 }
597
598 #[inline]
599 fn dtype_mut(&mut self) -> &mut ArrowDataType {
600 &mut self.dtype
601 }
602
603 #[inline]
604 fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
605 let (lhs, rhs) = $crate::array::Splitable::split_at(self, offset);
606 (Box::new(lhs), Box::new(rhs))
607 }
608
609 #[inline]
610 unsafe fn split_at_boxed_unchecked(
611 &self,
612 offset: usize,
613 ) -> (Box<dyn Array>, Box<dyn Array>) {
614 let (lhs, rhs) = unsafe { $crate::array::Splitable::split_at_unchecked(self, offset) };
615 (Box::new(lhs), Box::new(rhs))
616 }
617
618 #[inline]
619 fn slice(&mut self, offset: usize, length: usize) {
620 self.slice(offset, length);
621 }
622
623 #[inline]
624 unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
625 self.slice_unchecked(offset, length);
626 }
627
628 #[inline]
629 fn to_boxed(&self) -> Box<dyn Array> {
630 Box::new(self.clone())
631 }
632 };
633}
634
635pub fn clone(array: &dyn Array) -> Box<dyn Array> {
640 use crate::datatypes::PhysicalType::*;
641 match array.dtype().to_physical_type() {
642 Null => clone_dyn!(array, NullArray),
643 Boolean => clone_dyn!(array, BooleanArray),
644 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
645 clone_dyn!(array, PrimitiveArray<$T>)
646 }),
647 Binary => clone_dyn!(array, BinaryArray<i32>),
648 LargeBinary => clone_dyn!(array, BinaryArray<i64>),
649 FixedSizeBinary => clone_dyn!(array, FixedSizeBinaryArray),
650 Utf8 => clone_dyn!(array, Utf8Array::<i32>),
651 LargeUtf8 => clone_dyn!(array, Utf8Array::<i64>),
652 List => clone_dyn!(array, ListArray::<i32>),
653 LargeList => clone_dyn!(array, ListArray::<i64>),
654 FixedSizeList => clone_dyn!(array, FixedSizeListArray),
655 Struct => clone_dyn!(array, StructArray),
656 Union => clone_dyn!(array, UnionArray),
657 Map => clone_dyn!(array, MapArray),
658 BinaryView => clone_dyn!(array, BinaryViewArray),
659 Utf8View => clone_dyn!(array, Utf8ViewArray),
660 Dictionary(key_type) => {
661 match_integer_type!(key_type, |$T| {
662 clone_dyn!(array, DictionaryArray::<$T>)
663 })
664 },
665 }
666}
667
668impl<'a> AsRef<dyn Array + 'a> for dyn Array {
671 fn as_ref(&self) -> &(dyn Array + 'a) {
672 self
673 }
674}
675
676mod binary;
677mod boolean;
678pub mod builder;
679mod dictionary;
680mod fixed_size_binary;
681mod fixed_size_list;
682mod list;
683pub use list::LIST_VALUES_NAME;
684mod map;
685mod null;
686mod primitive;
687pub mod specification;
688mod static_array;
689mod static_array_collect;
690mod struct_;
691mod total_ord;
692mod union;
693mod utf8;
694
695mod equal;
696mod ffi;
697mod fmt;
698#[doc(hidden)]
699pub mod indexable;
700pub mod iterator;
701
702mod binview;
703mod values;
704
705pub use binary::{
706 BinaryArray, BinaryArrayBuilder, BinaryValueIter, MutableBinaryArray, MutableBinaryValuesArray,
707};
708pub use binview::{
709 BinaryViewArray, BinaryViewArrayBuilder, BinaryViewArrayGeneric, BinaryViewArrayGenericBuilder,
710 MutableBinaryViewArray, MutablePlBinary, MutablePlString, Utf8ViewArray, Utf8ViewArrayBuilder,
711 View, ViewType,
712};
713pub use boolean::{BooleanArray, BooleanArrayBuilder, MutableBooleanArray};
714pub use dictionary::{DictionaryArray, DictionaryKey, MutableDictionaryArray};
715pub use equal::equal;
716pub use fixed_size_binary::{
717 FixedSizeBinaryArray, FixedSizeBinaryArrayBuilder, MutableFixedSizeBinaryArray,
718};
719pub use fixed_size_list::{
720 FixedSizeListArray, FixedSizeListArrayBuilder, MutableFixedSizeListArray,
721};
722pub use fmt::{get_display, get_value_display};
723pub(crate) use iterator::ArrayAccessor;
724pub use iterator::ArrayValuesIter;
725pub use list::{ListArray, ListArrayBuilder, ListValuesIter, MutableListArray};
726pub use map::MapArray;
727pub use null::{MutableNullArray, NullArray, NullArrayBuilder};
728use polars_buffer::Buffer;
729use polars_error::PolarsResult;
730pub use primitive::*;
731pub use static_array::{ParameterFreeDtypeStaticArray, StaticArray};
732pub use static_array_collect::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype};
733pub use struct_::{StructArray, StructArrayBuilder};
734pub use union::UnionArray;
735pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter};
736pub use values::ValueSize;
737
738#[cfg(feature = "proptest")]
739pub use self::boolean::proptest::boolean_array;
740pub(crate) use self::ffi::{FromFfi, ToFfi, offset_buffers_children_dictionary};
741use crate::{match_integer_type, with_match_primitive_type_full};
742
743pub trait TryExtend<A> {
746 fn try_extend<I: IntoIterator<Item = A>>(&mut self, iter: I) -> PolarsResult<()>;
748}
749
750pub trait TryPush<A> {
752 fn try_push(&mut self, item: A) -> PolarsResult<()>;
754}
755
756pub trait PushUnchecked<A> {
758 unsafe fn push_unchecked(&mut self, item: A);
764}
765
766pub trait TryExtendFromSelf {
769 fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()>;
771}
772
773pub unsafe trait GenericBinaryArray<O: crate::offset::Offset>: Array {
780 fn values(&self) -> &[u8];
782 fn offsets(&self) -> &[O];
784}
785
786pub type ArrayRef = Box<dyn Array>;
787
788impl Splitable for Option<Bitmap> {
789 #[inline(always)]
790 fn check_bound(&self, offset: usize) -> bool {
791 self.as_ref().is_none_or(|v| offset <= v.len())
792 }
793
794 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
795 self.as_ref().map_or((None, None), |bm| {
796 let (lhs, rhs) = unsafe { bm.split_at_unchecked(offset) };
797 (
798 (lhs.unset_bits() > 0).then_some(lhs),
799 (rhs.unset_bits() > 0).then_some(rhs),
800 )
801 })
802 }
803}