1mod visitor;
5
6use std::any::Any;
7use std::fmt::Debug;
8use std::fmt::Formatter;
9use std::hash::Hash;
10use std::hash::Hasher;
11use std::ops::Deref;
12use std::ops::Range;
13use std::sync::Arc;
14
15pub use visitor::*;
16use vortex_buffer::ByteBuffer;
17use vortex_dtype::DType;
18use vortex_dtype::Nullability;
19use vortex_error::VortexExpect;
20use vortex_error::VortexResult;
21use vortex_error::vortex_ensure;
22use vortex_error::vortex_err;
23use vortex_error::vortex_panic;
24use vortex_mask::Mask;
25use vortex_scalar::Scalar;
26
27use crate::ArrayEq;
28use crate::ArrayHash;
29use crate::Canonical;
30use crate::DynArrayEq;
31use crate::DynArrayHash;
32use crate::arrays::BoolVTable;
33use crate::arrays::ConstantVTable;
34use crate::arrays::DecimalVTable;
35use crate::arrays::DictArray;
36use crate::arrays::ExtensionVTable;
37use crate::arrays::FilterArray;
38use crate::arrays::FixedSizeListVTable;
39use crate::arrays::ListViewVTable;
40use crate::arrays::NullVTable;
41use crate::arrays::PrimitiveVTable;
42use crate::arrays::StructVTable;
43use crate::arrays::VarBinVTable;
44use crate::arrays::VarBinViewVTable;
45use crate::builders::ArrayBuilder;
46use crate::compute::ComputeFn;
47use crate::compute::Cost;
48use crate::compute::InvocationArgs;
49use crate::compute::IsConstantOpts;
50use crate::compute::Output;
51use crate::compute::is_constant_opts;
52use crate::expr::stats::Precision;
53use crate::expr::stats::Stat;
54use crate::expr::stats::StatsProviderExt;
55use crate::hash;
56use crate::optimizer::ArrayOptimizer;
57use crate::stats::StatsSetRef;
58use crate::validity::Validity;
59use crate::vtable::ArrayId;
60use crate::vtable::ArrayVTable;
61use crate::vtable::BaseArrayVTable;
62use crate::vtable::CanonicalVTable;
63use crate::vtable::ComputeVTable;
64use crate::vtable::OperationsVTable;
65use crate::vtable::VTable;
66use crate::vtable::ValidityVTable;
67use crate::vtable::VisitorVTable;
68
69pub trait Array:
71 'static + private::Sealed + Send + Sync + Debug + DynArrayEq + DynArrayHash + ArrayVisitor
72{
73 fn as_any(&self) -> &dyn Any;
75
76 fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
78
79 fn to_array(&self) -> ArrayRef;
81
82 fn len(&self) -> usize;
84
85 fn is_empty(&self) -> bool {
87 self.len() == 0
88 }
89
90 fn dtype(&self) -> &DType;
92
93 fn encoding(&self) -> ArrayVTable;
95
96 fn encoding_id(&self) -> ArrayId;
98
99 fn slice(&self, range: Range<usize>) -> ArrayRef;
101
102 fn filter(&self, mask: Mask) -> VortexResult<ArrayRef>;
104
105 fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef>;
107
108 fn scalar_at(&self, index: usize) -> Scalar;
112
113 fn is_encoding(&self, encoding: ArrayId) -> bool {
115 self.encoding_id() == encoding
116 }
117
118 fn is_arrow(&self) -> bool {
121 self.is_encoding(NullVTable.id())
122 || self.is_encoding(BoolVTable.id())
123 || self.is_encoding(PrimitiveVTable.id())
124 || self.is_encoding(VarBinVTable.id())
125 || self.is_encoding(VarBinViewVTable.id())
126 }
127
128 fn is_canonical(&self) -> bool {
131 self.is_encoding(NullVTable.id())
132 || self.is_encoding(BoolVTable.id())
133 || self.is_encoding(PrimitiveVTable.id())
134 || self.is_encoding(DecimalVTable.id())
135 || self.is_encoding(StructVTable.id())
136 || self.is_encoding(ListViewVTable.id())
137 || self.is_encoding(FixedSizeListVTable.id())
138 || self.is_encoding(VarBinViewVTable.id())
139 || self.is_encoding(ExtensionVTable.id())
140 }
141
142 fn is_valid(&self, index: usize) -> bool;
144
145 fn is_invalid(&self, index: usize) -> bool;
147
148 fn all_valid(&self) -> bool;
152
153 fn all_invalid(&self) -> bool;
157
158 fn valid_count(&self) -> usize;
160
161 fn invalid_count(&self) -> usize;
163
164 fn validity(&self) -> VortexResult<Validity>;
166
167 fn validity_mask(&self) -> Mask;
169
170 fn to_canonical(&self) -> Canonical;
172
173 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder);
177
178 fn statistics(&self) -> StatsSetRef<'_>;
181
182 fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef>;
184
185 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
202 -> VortexResult<Option<Output>>;
203}
204
205impl Array for Arc<dyn Array> {
206 #[inline]
207 fn as_any(&self) -> &dyn Any {
208 self.as_ref().as_any()
209 }
210
211 fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
212 self
213 }
214
215 #[inline]
216 fn to_array(&self) -> ArrayRef {
217 self.clone()
218 }
219
220 #[inline]
221 fn len(&self) -> usize {
222 self.as_ref().len()
223 }
224
225 #[inline]
226 fn dtype(&self) -> &DType {
227 self.as_ref().dtype()
228 }
229
230 #[inline]
231 fn encoding(&self) -> ArrayVTable {
232 self.as_ref().encoding()
233 }
234
235 #[inline]
236 fn encoding_id(&self) -> ArrayId {
237 self.as_ref().encoding_id()
238 }
239
240 #[inline]
241 fn slice(&self, range: Range<usize>) -> ArrayRef {
242 self.as_ref().slice(range)
243 }
244
245 fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
246 self.as_ref().filter(mask)
247 }
248
249 fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
250 self.as_ref().take(indices)
251 }
252
253 #[inline]
254 fn scalar_at(&self, index: usize) -> Scalar {
255 self.as_ref().scalar_at(index)
256 }
257
258 #[inline]
259 fn is_valid(&self, index: usize) -> bool {
260 self.as_ref().is_valid(index)
261 }
262
263 #[inline]
264 fn is_invalid(&self, index: usize) -> bool {
265 self.as_ref().is_invalid(index)
266 }
267
268 #[inline]
269 fn all_valid(&self) -> bool {
270 self.as_ref().all_valid()
271 }
272
273 #[inline]
274 fn all_invalid(&self) -> bool {
275 self.as_ref().all_invalid()
276 }
277
278 #[inline]
279 fn valid_count(&self) -> usize {
280 self.as_ref().valid_count()
281 }
282
283 #[inline]
284 fn invalid_count(&self) -> usize {
285 self.as_ref().invalid_count()
286 }
287
288 #[inline]
289 fn validity(&self) -> VortexResult<Validity> {
290 self.as_ref().validity()
291 }
292
293 #[inline]
294 fn validity_mask(&self) -> Mask {
295 self.as_ref().validity_mask()
296 }
297
298 fn to_canonical(&self) -> Canonical {
299 self.as_ref().to_canonical()
300 }
301
302 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
303 self.as_ref().append_to_builder(builder)
304 }
305
306 fn statistics(&self) -> StatsSetRef<'_> {
307 self.as_ref().statistics()
308 }
309
310 fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef> {
311 self.as_ref().with_children(children)
312 }
313
314 fn invoke(
315 &self,
316 compute_fn: &ComputeFn,
317 args: &InvocationArgs,
318 ) -> VortexResult<Option<Output>> {
319 self.as_ref().invoke(compute_fn, args)
320 }
321}
322
323pub type ArrayRef = Arc<dyn Array>;
325
326impl ToOwned for dyn Array {
327 type Owned = ArrayRef;
328
329 fn to_owned(&self) -> Self::Owned {
330 self.to_array()
331 }
332}
333
334impl dyn Array + '_ {
335 pub fn as_<V: VTable>(&self) -> &V::Array {
337 self.as_opt::<V>().vortex_expect("Failed to downcast")
338 }
339
340 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
342 self.as_any()
343 .downcast_ref::<ArrayAdapter<V>>()
344 .map(|array_adapter| &array_adapter.0)
345 }
346
347 pub fn try_into<V: VTable>(self: Arc<Self>) -> Result<V::Array, Arc<Self>> {
349 match self.is::<V>() {
350 true => {
351 let arc = self
352 .as_any_arc()
353 .downcast::<ArrayAdapter<V>>()
354 .map_err(|_| vortex_err!("failed to downcast"))
355 .vortex_expect("Failed to downcast");
356 Ok(match Arc::try_unwrap(arc) {
357 Ok(array) => array.0,
358 Err(arc) => arc.deref().0.clone(),
359 })
360 }
361 false => Err(self),
362 }
363 }
364
365 pub fn is<V: VTable>(&self) -> bool {
367 self.as_opt::<V>().is_some()
368 }
369
370 pub fn is_constant(&self) -> bool {
371 let opts = IsConstantOpts {
372 cost: Cost::Specialized,
373 };
374 is_constant_opts(self, &opts)
375 .inspect_err(|e| tracing::warn!("Failed to compute IsConstant: {e}"))
376 .ok()
377 .flatten()
378 .unwrap_or_default()
379 }
380
381 pub fn is_constant_opts(&self, cost: Cost) -> bool {
382 let opts = IsConstantOpts { cost };
383 is_constant_opts(self, &opts)
384 .inspect_err(|e| tracing::warn!("Failed to compute IsConstant: {e}"))
385 .ok()
386 .flatten()
387 .unwrap_or_default()
388 }
389
390 pub fn as_constant(&self) -> Option<Scalar> {
391 self.is_constant().then(|| self.scalar_at(0))
392 }
393
394 pub fn nbytes(&self) -> u64 {
396 let mut nbytes = 0;
397 for array in self.depth_first_traversal() {
398 for buffer in array.buffers() {
399 nbytes += buffer.len() as u64;
400 }
401 }
402 nbytes
403 }
404}
405
406pub trait IntoArray {
408 fn into_array(self) -> ArrayRef;
409}
410
411impl IntoArray for ArrayRef {
412 fn into_array(self) -> ArrayRef {
413 self
414 }
415}
416
417mod private {
418 use super::*;
419
420 pub trait Sealed {}
421
422 impl<V: VTable> Sealed for ArrayAdapter<V> {}
423 impl Sealed for Arc<dyn Array> {}
424}
425
426#[repr(transparent)]
433pub struct ArrayAdapter<V: VTable>(V::Array);
434
435impl<V: VTable> ArrayAdapter<V> {
436 pub fn as_inner(&self) -> &V::Array {
438 &self.0
439 }
440}
441
442impl<V: VTable> Debug for ArrayAdapter<V> {
443 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
444 self.0.fmt(f)
445 }
446}
447
448impl<V: VTable> Array for ArrayAdapter<V> {
449 fn as_any(&self) -> &dyn Any {
450 self
451 }
452
453 fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
454 self
455 }
456
457 fn to_array(&self) -> ArrayRef {
458 Arc::new(ArrayAdapter::<V>(self.0.clone()))
459 }
460
461 fn len(&self) -> usize {
462 <V::ArrayVTable as BaseArrayVTable<V>>::len(&self.0)
463 }
464
465 fn dtype(&self) -> &DType {
466 <V::ArrayVTable as BaseArrayVTable<V>>::dtype(&self.0)
467 }
468
469 fn encoding(&self) -> ArrayVTable {
470 V::encoding(&self.0)
471 }
472
473 fn encoding_id(&self) -> ArrayId {
474 V::encoding(&self.0).id()
475 }
476
477 fn slice(&self, range: Range<usize>) -> ArrayRef {
478 let start = range.start;
479 let stop = range.end;
480
481 if start == 0 && stop == self.len() {
482 return self.to_array();
483 }
484
485 assert!(
486 start <= self.len(),
487 "OutOfBounds: start {start} > length {}",
488 self.len()
489 );
490 assert!(
491 stop <= self.len(),
492 "OutOfBounds: stop {stop} > length {}",
493 self.len()
494 );
495
496 assert!(start <= stop, "start ({start}) must be <= stop ({stop})");
497
498 if start == stop {
499 return Canonical::empty(self.dtype()).into_array();
500 }
501
502 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, range);
503
504 assert_eq!(
505 sliced.len(),
506 stop - start,
507 "Slice length mismatch {}",
508 self.encoding_id()
509 );
510
511 debug_assert_eq!(
513 sliced.dtype(),
514 self.dtype(),
515 "Slice dtype mismatch {}",
516 self.encoding_id()
517 );
518
519 if !sliced.is::<ConstantVTable>() {
521 self.statistics().with_iter(|iter| {
522 sliced.statistics().inherit(iter.filter(|(stat, value)| {
523 matches!(
524 stat,
525 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
526 ) && value.as_ref().as_exact().is_some_and(|v| {
527 Scalar::new(DType::Bool(Nullability::NonNullable), v.clone())
528 .as_bool()
529 .value()
530 .unwrap_or_default()
531 })
532 }));
533 });
534 }
535
536 sliced
537 }
538
539 fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
540 vortex_ensure!(self.len() == mask.len(), "Filter mask length mismatch");
541 FilterArray::new(self.to_array(), mask)
542 .into_array()
543 .optimize()
544 }
545
546 fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
547 DictArray::try_new(indices, self.to_array())?
548 .into_array()
549 .optimize()
550 }
551
552 fn scalar_at(&self, index: usize) -> Scalar {
553 assert!(index < self.len(), "index {index} out of bounds");
554 if self.is_invalid(index) {
555 return Scalar::null(self.dtype().clone());
556 }
557 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index);
558 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
559 scalar
560 }
561
562 fn is_valid(&self, index: usize) -> bool {
563 if index >= self.len() {
564 vortex_panic!(OutOfBounds: index, 0, self.len());
565 }
566 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
567 }
568
569 fn is_invalid(&self, index: usize) -> bool {
570 !self.is_valid(index)
571 }
572
573 fn all_valid(&self) -> bool {
574 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
575 }
576
577 fn all_invalid(&self) -> bool {
578 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
579 }
580
581 fn valid_count(&self) -> usize {
582 if let Some(Precision::Exact(invalid_count)) =
583 self.statistics().get_as::<usize>(Stat::NullCount)
584 {
585 return self.len() - invalid_count;
586 }
587
588 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0);
589 assert!(count <= self.len(), "Valid count exceeds array length");
590
591 self.statistics()
592 .set(Stat::NullCount, Precision::exact(self.len() - count));
593
594 count
595 }
596
597 fn invalid_count(&self) -> usize {
598 if let Some(Precision::Exact(invalid_count)) =
599 self.statistics().get_as::<usize>(Stat::NullCount)
600 {
601 return invalid_count;
602 }
603
604 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0);
605 assert!(count <= self.len(), "Invalid count exceeds array length");
606
607 self.statistics()
608 .set(Stat::NullCount, Precision::exact(count));
609
610 count
611 }
612
613 fn validity(&self) -> VortexResult<Validity> {
614 if self.dtype().is_nullable() {
615 <V::ValidityVTable as ValidityVTable<V>>::validity(&self.0)
616 } else {
617 Ok(Validity::NonNullable)
618 }
619 }
620
621 fn validity_mask(&self) -> Mask {
622 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0);
623 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
624 mask
625 }
626
627 fn to_canonical(&self) -> Canonical {
628 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0);
629 assert_eq!(
630 self.len(),
631 canonical.as_ref().len(),
632 "Canonical length mismatch {}. Expected {} but encoded into {}.",
633 self.encoding_id(),
634 self.len(),
635 canonical.as_ref().len()
636 );
637 assert_eq!(
638 self.dtype(),
639 canonical.as_ref().dtype(),
640 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
641 self.encoding_id(),
642 self.dtype(),
643 canonical.as_ref().dtype()
644 );
645 canonical
646 .as_ref()
647 .statistics()
648 .inherit_from(self.statistics());
649 canonical
650 }
651
652 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
653 if builder.dtype() != self.dtype() {
654 vortex_panic!(
655 "Builder dtype mismatch: expected {}, got {}",
656 self.dtype(),
657 builder.dtype(),
658 );
659 }
660 let len = builder.len();
661
662 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder);
663 assert_eq!(
664 len + self.len(),
665 builder.len(),
666 "Builder length mismatch after writing array for encoding {}",
667 self.encoding_id(),
668 );
669 }
670
671 fn statistics(&self) -> StatsSetRef<'_> {
672 <V::ArrayVTable as BaseArrayVTable<V>>::stats(&self.0)
673 }
674
675 fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef> {
676 self.encoding().as_dyn().with_children(self, children)
677 }
678
679 fn invoke(
680 &self,
681 compute_fn: &ComputeFn,
682 args: &InvocationArgs,
683 ) -> VortexResult<Option<Output>> {
684 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
685 }
686}
687
688impl<V: VTable> ArrayHash for ArrayAdapter<V> {
689 fn array_hash<H: Hasher>(&self, state: &mut H, precision: hash::Precision) {
690 self.0.encoding_id().hash(state);
691 <V::ArrayVTable as BaseArrayVTable<V>>::array_hash(&self.0, state, precision);
692 }
693}
694
695impl<V: VTable> ArrayEq for ArrayAdapter<V> {
696 fn array_eq(&self, other: &Self, precision: hash::Precision) -> bool {
697 <V::ArrayVTable as BaseArrayVTable<V>>::array_eq(&self.0, &other.0, precision)
698 }
699}
700
701impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
702 fn children(&self) -> Vec<ArrayRef> {
703 struct ChildrenCollector {
704 children: Vec<ArrayRef>,
705 }
706
707 impl ArrayChildVisitor for ChildrenCollector {
708 fn visit_child(&mut self, _name: &str, array: &ArrayRef) {
709 self.children.push(array.clone());
710 }
711 }
712
713 let mut collector = ChildrenCollector {
714 children: Vec::new(),
715 };
716 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
717 collector.children
718 }
719
720 fn nchildren(&self) -> usize {
721 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
722 }
723
724 fn children_names(&self) -> Vec<String> {
725 struct ChildNameCollector {
726 names: Vec<String>,
727 }
728
729 impl ArrayChildVisitor for ChildNameCollector {
730 fn visit_child(&mut self, name: &str, _array: &ArrayRef) {
731 self.names.push(name.to_string());
732 }
733 }
734
735 let mut collector = ChildNameCollector { names: Vec::new() };
736 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
737 collector.names
738 }
739
740 fn named_children(&self) -> Vec<(String, ArrayRef)> {
741 struct NamedChildrenCollector {
742 children: Vec<(String, ArrayRef)>,
743 }
744
745 impl ArrayChildVisitor for NamedChildrenCollector {
746 fn visit_child(&mut self, name: &str, array: &ArrayRef) {
747 self.children.push((name.to_string(), array.to_array()));
748 }
749 }
750
751 let mut collector = NamedChildrenCollector {
752 children: Vec::new(),
753 };
754
755 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
756 collector.children
757 }
758
759 fn buffers(&self) -> Vec<ByteBuffer> {
760 struct BufferCollector {
761 buffers: Vec<ByteBuffer>,
762 }
763
764 impl ArrayBufferVisitor for BufferCollector {
765 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
766 self.buffers.push(buffer.clone());
767 }
768 }
769
770 let mut collector = BufferCollector {
771 buffers: Vec::new(),
772 };
773 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
774 collector.buffers
775 }
776
777 fn nbuffers(&self) -> usize {
778 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
779 }
780
781 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
782 V::serialize(V::metadata(&self.0)?)
783 }
784
785 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
786 match V::metadata(&self.0) {
787 Err(e) => write!(f, "<serde error: {e}>"),
788 Ok(metadata) => Debug::fmt(&metadata, f),
789 }
790 }
791}