1#![allow(unsafe_op_in_unsafe_fn)]
21use std::any::Any;
22use std::sync::Arc;
23
24use crate::bitmap::{Bitmap, MutableBitmap};
25use crate::datatypes::ArrowDataType;
26
27pub mod physical_binary;
28
29pub trait Splitable: Sized {
30 fn check_bound(&self, offset: usize) -> bool;
31
32 #[inline]
34 #[must_use]
35 fn split_at(&self, offset: usize) -> (Self, Self) {
36 assert!(self.check_bound(offset));
37 unsafe { self._split_at_unchecked(offset) }
38 }
39
40 #[inline]
46 #[must_use]
47 unsafe fn split_at_unchecked(&self, offset: usize) -> (Self, Self) {
48 debug_assert!(self.check_bound(offset));
49 unsafe { self._split_at_unchecked(offset) }
50 }
51
52 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self);
59}
60
61pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
64 fn as_any(&self) -> &dyn Any;
66
67 fn as_any_mut(&mut self) -> &mut dyn Any;
69
70 fn len(&self) -> usize;
73
74 fn is_empty(&self) -> bool {
76 self.len() == 0
77 }
78
79 fn dtype(&self) -> &ArrowDataType;
82
83 fn validity(&self) -> Option<&Bitmap>;
87
88 #[inline]
92 fn null_count(&self) -> usize {
93 if self.dtype() == &ArrowDataType::Null {
94 return self.len();
95 };
96 self.validity()
97 .as_ref()
98 .map(|x| x.unset_bits())
99 .unwrap_or(0)
100 }
101
102 #[inline]
103 fn has_nulls(&self) -> bool {
104 self.null_count() > 0
105 }
106
107 #[inline]
111 fn is_null(&self, i: usize) -> bool {
112 assert!(i < self.len());
113 unsafe { self.is_null_unchecked(i) }
114 }
115
116 #[inline]
121 unsafe fn is_null_unchecked(&self, i: usize) -> bool {
122 self.validity()
123 .as_ref()
124 .map(|x| !x.get_bit_unchecked(i))
125 .unwrap_or(false)
126 }
127
128 #[inline]
132 fn is_valid(&self, i: usize) -> bool {
133 !self.is_null(i)
134 }
135
136 #[must_use]
138 fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
139
140 #[must_use]
146 unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
147
148 fn slice(&mut self, offset: usize, length: usize);
154
155 unsafe fn slice_unchecked(&mut self, offset: usize, length: usize);
162
163 #[must_use]
169 fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array> {
170 if length == 0 {
171 return new_empty_array(self.dtype().clone());
172 }
173 let mut new = self.to_boxed();
174 new.slice(offset, length);
175 new
176 }
177
178 #[must_use]
186 unsafe fn sliced_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
187 debug_assert!(offset + length <= self.len());
188 let mut new = self.to_boxed();
189 new.slice_unchecked(offset, length);
190 new
191 }
192
193 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>;
197
198 fn to_boxed(&self) -> Box<dyn Array>;
200}
201
202dyn_clone::clone_trait_object!(Array);
203
204pub trait IntoBoxedArray {
205 fn into_boxed(self) -> Box<dyn Array>;
206}
207
208impl<A: Array> IntoBoxedArray for A {
209 #[inline(always)]
210 fn into_boxed(self) -> Box<dyn Array> {
211 Box::new(self) as _
212 }
213}
214impl IntoBoxedArray for Box<dyn Array> {
215 #[inline(always)]
216 fn into_boxed(self) -> Box<dyn Array> {
217 self
218 }
219}
220
221pub trait MutableArray: std::fmt::Debug + Send + Sync {
227 fn dtype(&self) -> &ArrowDataType;
229
230 fn len(&self) -> usize;
232
233 fn is_empty(&self) -> bool {
235 self.len() == 0
236 }
237
238 fn validity(&self) -> Option<&MutableBitmap>;
240
241 fn as_box(&mut self) -> Box<dyn Array>;
243
244 fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
249 self.as_box().into()
250 }
251
252 fn as_any(&self) -> &dyn Any;
254
255 fn as_mut_any(&mut self) -> &mut dyn Any;
257
258 fn push_null(&mut self);
260
261 #[inline]
265 fn is_valid(&self, index: usize) -> bool {
266 self.validity()
267 .as_ref()
268 .map(|x| x.get(index))
269 .unwrap_or(true)
270 }
271
272 fn reserve(&mut self, additional: usize);
274
275 fn shrink_to_fit(&mut self);
277}
278
279impl MutableArray for Box<dyn MutableArray> {
280 fn len(&self) -> usize {
281 self.as_ref().len()
282 }
283
284 fn validity(&self) -> Option<&MutableBitmap> {
285 self.as_ref().validity()
286 }
287
288 fn as_box(&mut self) -> Box<dyn Array> {
289 self.as_mut().as_box()
290 }
291
292 fn as_arc(&mut self) -> Arc<dyn Array> {
293 self.as_mut().as_arc()
294 }
295
296 fn dtype(&self) -> &ArrowDataType {
297 self.as_ref().dtype()
298 }
299
300 fn as_any(&self) -> &dyn std::any::Any {
301 self.as_ref().as_any()
302 }
303
304 fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
305 self.as_mut().as_mut_any()
306 }
307
308 #[inline]
309 fn push_null(&mut self) {
310 self.as_mut().push_null()
311 }
312
313 fn shrink_to_fit(&mut self) {
314 self.as_mut().shrink_to_fit();
315 }
316
317 fn reserve(&mut self, additional: usize) {
318 self.as_mut().reserve(additional);
319 }
320}
321
322macro_rules! general_dyn {
323 ($array:expr, $ty:ty, $f:expr) => {{
324 let array = $array.as_any().downcast_ref::<$ty>().unwrap();
325 ($f)(array)
326 }};
327}
328
329macro_rules! fmt_dyn {
330 ($array:expr, $ty:ty, $f:expr) => {{
331 let mut f = |x: &$ty| x.fmt($f);
332 general_dyn!($array, $ty, f)
333 }};
334}
335
336impl std::fmt::Debug for dyn Array + '_ {
337 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
338 use crate::datatypes::PhysicalType::*;
339 match self.dtype().to_physical_type() {
340 Null => fmt_dyn!(self, NullArray, f),
341 Boolean => fmt_dyn!(self, BooleanArray, f),
342 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
343 fmt_dyn!(self, PrimitiveArray<$T>, f)
344 }),
345 BinaryView => fmt_dyn!(self, BinaryViewArray, f),
346 Utf8View => fmt_dyn!(self, Utf8ViewArray, f),
347 Binary => fmt_dyn!(self, BinaryArray<i32>, f),
348 LargeBinary => fmt_dyn!(self, BinaryArray<i64>, f),
349 FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f),
350 Utf8 => fmt_dyn!(self, Utf8Array::<i32>, f),
351 LargeUtf8 => fmt_dyn!(self, Utf8Array::<i64>, f),
352 List => fmt_dyn!(self, ListArray::<i32>, f),
353 LargeList => fmt_dyn!(self, ListArray::<i64>, f),
354 FixedSizeList => fmt_dyn!(self, FixedSizeListArray, f),
355 Struct => fmt_dyn!(self, StructArray, f),
356 Union => fmt_dyn!(self, UnionArray, f),
357 Dictionary(key_type) => {
358 match_integer_type!(key_type, |$T| {
359 fmt_dyn!(self, DictionaryArray::<$T>, f)
360 })
361 },
362 Map => fmt_dyn!(self, MapArray, f),
363 }
364 }
365}
366
367pub fn new_empty_array(dtype: ArrowDataType) -> Box<dyn Array> {
369 use crate::datatypes::PhysicalType::*;
370 match dtype.to_physical_type() {
371 Null => Box::new(NullArray::new_empty(dtype)),
372 Boolean => Box::new(BooleanArray::new_empty(dtype)),
373 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
374 Box::new(PrimitiveArray::<$T>::new_empty(dtype))
375 }),
376 Binary => Box::new(BinaryArray::<i32>::new_empty(dtype)),
377 LargeBinary => Box::new(BinaryArray::<i64>::new_empty(dtype)),
378 FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(dtype)),
379 Utf8 => Box::new(Utf8Array::<i32>::new_empty(dtype)),
380 LargeUtf8 => Box::new(Utf8Array::<i64>::new_empty(dtype)),
381 List => Box::new(ListArray::<i32>::new_empty(dtype)),
382 LargeList => Box::new(ListArray::<i64>::new_empty(dtype)),
383 FixedSizeList => Box::new(FixedSizeListArray::new_empty(dtype)),
384 Struct => Box::new(StructArray::new_empty(dtype)),
385 Union => Box::new(UnionArray::new_empty(dtype)),
386 Map => Box::new(MapArray::new_empty(dtype)),
387 Utf8View => Box::new(Utf8ViewArray::new_empty(dtype)),
388 BinaryView => Box::new(BinaryViewArray::new_empty(dtype)),
389 Dictionary(key_type) => {
390 match_integer_type!(key_type, |$T| {
391 Box::new(DictionaryArray::<$T>::new_empty(dtype))
392 })
393 },
394 }
395}
396
397pub fn new_null_array(dtype: ArrowDataType, length: usize) -> Box<dyn Array> {
402 use crate::datatypes::PhysicalType::*;
403 match dtype.to_physical_type() {
404 Null => Box::new(NullArray::new_null(dtype, length)),
405 Boolean => Box::new(BooleanArray::new_null(dtype, length)),
406 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
407 Box::new(PrimitiveArray::<$T>::new_null(dtype, length))
408 }),
409 Binary => Box::new(BinaryArray::<i32>::new_null(dtype, length)),
410 LargeBinary => Box::new(BinaryArray::<i64>::new_null(dtype, length)),
411 FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(dtype, length)),
412 Utf8 => Box::new(Utf8Array::<i32>::new_null(dtype, length)),
413 LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(dtype, length)),
414 List => Box::new(ListArray::<i32>::new_null(dtype, length)),
415 LargeList => Box::new(ListArray::<i64>::new_null(dtype, length)),
416 FixedSizeList => Box::new(FixedSizeListArray::new_null(dtype, length)),
417 Struct => Box::new(StructArray::new_null(dtype, length)),
418 Union => Box::new(UnionArray::new_null(dtype, length)),
419 Map => Box::new(MapArray::new_null(dtype, length)),
420 BinaryView => Box::new(BinaryViewArray::new_null(dtype, length)),
421 Utf8View => Box::new(Utf8ViewArray::new_null(dtype, length)),
422 Dictionary(key_type) => {
423 match_integer_type!(key_type, |$T| {
424 Box::new(DictionaryArray::<$T>::new_null(dtype, length))
425 })
426 },
427 }
428}
429
430macro_rules! clone_dyn {
431 ($array:expr, $ty:ty) => {{
432 let f = |x: &$ty| Box::new(x.clone());
433 general_dyn!($array, $ty, f)
434 }};
435}
436
437macro_rules! impl_sliced {
439 () => {
440 #[inline]
446 #[must_use]
447 pub fn sliced(self, offset: usize, length: usize) -> Self {
448 let total = offset
449 .checked_add(length)
450 .expect("offset + length overflowed");
451 assert!(
452 total <= self.len(),
453 "the offset of the new Buffer cannot exceed the existing length"
454 );
455 unsafe { Self::sliced_unchecked(self, offset, length) }
456 }
457
458 #[inline]
465 #[must_use]
466 pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
467 Self::slice_unchecked(&mut self, offset, length);
468 self
469 }
470 };
471}
472
473macro_rules! impl_mut_validity {
475 () => {
476 #[must_use]
480 #[inline]
481 pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
482 self.set_validity(validity);
483 self
484 }
485
486 #[inline]
490 pub fn set_validity(&mut self, validity: Option<Bitmap>) {
491 if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
492 panic!("validity must be equal to the array's length")
493 }
494 self.validity = validity;
495 }
496
497 #[inline]
499 pub fn take_validity(&mut self) -> Option<Bitmap> {
500 self.validity.take()
501 }
502 }
503}
504
505macro_rules! impl_mutable_array_mut_validity {
507 () => {
508 #[must_use]
512 #[inline]
513 pub fn with_validity(mut self, validity: Option<MutableBitmap>) -> Self {
514 self.set_validity(validity);
515 self
516 }
517
518 #[inline]
522 pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
523 if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
524 panic!("validity must be equal to the array's length")
525 }
526 self.validity = validity;
527 }
528
529 #[inline]
535 pub fn apply_validity<F: FnOnce(MutableBitmap) -> MutableBitmap>(&mut self, f: F) {
536 if let Some(validity) = std::mem::take(&mut self.validity) {
537 self.set_validity(Some(f(validity)))
538 }
539 }
540
541 }
542}
543
544macro_rules! impl_into_array {
546 () => {
547 pub fn boxed(self) -> Box<dyn Array> {
549 Box::new(self)
550 }
551
552 pub fn arced(self) -> std::sync::Arc<dyn Array> {
554 std::sync::Arc::new(self)
555 }
556 };
557}
558
559macro_rules! impl_common_array {
561 () => {
562 #[inline]
563 fn as_any(&self) -> &dyn std::any::Any {
564 self
565 }
566
567 #[inline]
568 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
569 self
570 }
571
572 #[inline]
573 fn len(&self) -> usize {
574 self.len()
575 }
576
577 #[inline]
578 fn dtype(&self) -> &ArrowDataType {
579 &self.dtype
580 }
581
582 #[inline]
583 fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
584 let (lhs, rhs) = $crate::array::Splitable::split_at(self, offset);
585 (Box::new(lhs), Box::new(rhs))
586 }
587
588 #[inline]
589 unsafe fn split_at_boxed_unchecked(
590 &self,
591 offset: usize,
592 ) -> (Box<dyn Array>, Box<dyn Array>) {
593 let (lhs, rhs) = unsafe { $crate::array::Splitable::split_at_unchecked(self, offset) };
594 (Box::new(lhs), Box::new(rhs))
595 }
596
597 #[inline]
598 fn slice(&mut self, offset: usize, length: usize) {
599 self.slice(offset, length);
600 }
601
602 #[inline]
603 unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
604 self.slice_unchecked(offset, length);
605 }
606
607 #[inline]
608 fn to_boxed(&self) -> Box<dyn Array> {
609 Box::new(self.clone())
610 }
611 };
612}
613
614pub fn clone(array: &dyn Array) -> Box<dyn Array> {
619 use crate::datatypes::PhysicalType::*;
620 match array.dtype().to_physical_type() {
621 Null => clone_dyn!(array, NullArray),
622 Boolean => clone_dyn!(array, BooleanArray),
623 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
624 clone_dyn!(array, PrimitiveArray<$T>)
625 }),
626 Binary => clone_dyn!(array, BinaryArray<i32>),
627 LargeBinary => clone_dyn!(array, BinaryArray<i64>),
628 FixedSizeBinary => clone_dyn!(array, FixedSizeBinaryArray),
629 Utf8 => clone_dyn!(array, Utf8Array::<i32>),
630 LargeUtf8 => clone_dyn!(array, Utf8Array::<i64>),
631 List => clone_dyn!(array, ListArray::<i32>),
632 LargeList => clone_dyn!(array, ListArray::<i64>),
633 FixedSizeList => clone_dyn!(array, FixedSizeListArray),
634 Struct => clone_dyn!(array, StructArray),
635 Union => clone_dyn!(array, UnionArray),
636 Map => clone_dyn!(array, MapArray),
637 BinaryView => clone_dyn!(array, BinaryViewArray),
638 Utf8View => clone_dyn!(array, Utf8ViewArray),
639 Dictionary(key_type) => {
640 match_integer_type!(key_type, |$T| {
641 clone_dyn!(array, DictionaryArray::<$T>)
642 })
643 },
644 }
645}
646
647impl<'a> AsRef<(dyn Array + 'a)> for dyn Array {
650 fn as_ref(&self) -> &(dyn Array + 'a) {
651 self
652 }
653}
654
655mod binary;
656mod boolean;
657pub mod builder;
658mod dictionary;
659mod fixed_size_binary;
660mod fixed_size_list;
661mod list;
662mod map;
663mod null;
664mod primitive;
665pub mod specification;
666mod static_array;
667mod static_array_collect;
668mod struct_;
669mod total_ord;
670mod union;
671mod utf8;
672
673mod equal;
674mod ffi;
675mod fmt;
676#[doc(hidden)]
677pub mod indexable;
678pub mod iterator;
679
680mod binview;
681mod values;
682
683pub use binary::{
684 BinaryArray, BinaryArrayBuilder, BinaryValueIter, MutableBinaryArray, MutableBinaryValuesArray,
685};
686pub use binview::{
687 BinaryViewArray, BinaryViewArrayGeneric, BinaryViewArrayGenericBuilder, MutableBinaryViewArray,
688 MutablePlBinary, MutablePlString, Utf8ViewArray, View, ViewType,
689};
690pub use boolean::{BooleanArray, BooleanArrayBuilder, MutableBooleanArray};
691pub use dictionary::{DictionaryArray, DictionaryKey, MutableDictionaryArray};
692pub use equal::equal;
693pub use fixed_size_binary::{
694 FixedSizeBinaryArray, FixedSizeBinaryArrayBuilder, MutableFixedSizeBinaryArray,
695};
696pub use fixed_size_list::{
697 FixedSizeListArray, FixedSizeListArrayBuilder, MutableFixedSizeListArray,
698};
699pub use fmt::{get_display, get_value_display};
700pub(crate) use iterator::ArrayAccessor;
701pub use iterator::ArrayValuesIter;
702pub use list::{ListArray, ListArrayBuilder, ListValuesIter, MutableListArray};
703pub use map::MapArray;
704pub use null::{MutableNullArray, NullArray, NullArrayBuilder};
705use polars_error::PolarsResult;
706pub use primitive::*;
707pub use static_array::{ParameterFreeDtypeStaticArray, StaticArray};
708pub use static_array_collect::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype};
709pub use struct_::{StructArray, StructArrayBuilder};
710pub use union::UnionArray;
711pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter};
712pub use values::ValueSize;
713
714pub(crate) use self::ffi::{FromFfi, ToFfi, offset_buffers_children_dictionary};
715use crate::{match_integer_type, with_match_primitive_type_full};
716
717pub trait TryExtend<A> {
720 fn try_extend<I: IntoIterator<Item = A>>(&mut self, iter: I) -> PolarsResult<()>;
722}
723
724pub trait TryPush<A> {
726 fn try_push(&mut self, item: A) -> PolarsResult<()>;
728}
729
730pub trait PushUnchecked<A> {
732 unsafe fn push_unchecked(&mut self, item: A);
738}
739
740pub trait TryExtendFromSelf {
743 fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()>;
745}
746
747pub unsafe trait GenericBinaryArray<O: crate::offset::Offset>: Array {
754 fn values(&self) -> &[u8];
756 fn offsets(&self) -> &[O];
758}
759
760pub type ArrayRef = Box<dyn Array>;
761
762impl Splitable for Option<Bitmap> {
763 #[inline(always)]
764 fn check_bound(&self, offset: usize) -> bool {
765 self.as_ref().is_none_or(|v| offset <= v.len())
766 }
767
768 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
769 self.as_ref().map_or((None, None), |bm| {
770 let (lhs, rhs) = unsafe { bm.split_at_unchecked(offset) };
771 (
772 (lhs.unset_bits() > 0).then_some(lhs),
773 (rhs.unset_bits() > 0).then_some(rhs),
774 )
775 })
776 }
777}