1mod visitor;
5
6use std::any::Any;
7use std::fmt::Debug;
8use std::fmt::Formatter;
9use std::hash::Hash;
10use std::hash::Hasher;
11use std::ops::Range;
12use std::sync::Arc;
13
14pub use visitor::*;
15use vortex_buffer::ByteBuffer;
16use vortex_dtype::DType;
17use vortex_dtype::Nullability;
18use vortex_error::VortexExpect;
19use vortex_error::VortexResult;
20use vortex_error::vortex_bail;
21use vortex_error::vortex_ensure;
22use vortex_error::vortex_panic;
23use vortex_mask::Mask;
24use vortex_scalar::Scalar;
25use vortex_session::VortexSession;
26use vortex_vector::Vector;
27use vortex_vector::VectorOps;
28
29use crate::ArrayEq;
30use crate::ArrayHash;
31use crate::Canonical;
32use crate::DynArrayEq;
33use crate::DynArrayHash;
34use crate::arrays::BoolVTable;
35use crate::arrays::ConstantVTable;
36use crate::arrays::DecimalVTable;
37use crate::arrays::ExtensionVTable;
38use crate::arrays::FixedSizeListVTable;
39use crate::arrays::ListViewVTable;
40use crate::arrays::NullVTable;
41use crate::arrays::PrimitiveVTable;
42use crate::arrays::StructVTable;
43use crate::arrays::VarBinVTable;
44use crate::arrays::VarBinViewVTable;
45use crate::builders::ArrayBuilder;
46use crate::compute::ComputeFn;
47use crate::compute::Cost;
48use crate::compute::InvocationArgs;
49use crate::compute::IsConstantOpts;
50use crate::compute::Output;
51use crate::compute::is_constant_opts;
52use crate::execution::ExecutionCtx;
53use crate::expr::stats::Precision;
54use crate::expr::stats::Stat;
55use crate::expr::stats::StatsProviderExt;
56use crate::hash;
57use crate::serde::ArrayChildren;
58use crate::stats::StatsSetRef;
59use crate::vtable::ArrayId;
60use crate::vtable::ArrayVTable;
61use crate::vtable::BaseArrayVTable;
62use crate::vtable::CanonicalVTable;
63use crate::vtable::ComputeVTable;
64use crate::vtable::OperationsVTable;
65use crate::vtable::VTable;
66use crate::vtable::ValidityVTable;
67use crate::vtable::VisitorVTable;
68
69pub trait Array:
71 'static + private::Sealed + Send + Sync + Debug + DynArrayEq + DynArrayHash + ArrayVisitor
72{
73 fn as_any(&self) -> &dyn Any;
75
76 fn to_array(&self) -> ArrayRef;
78
79 fn len(&self) -> usize;
81
82 fn is_empty(&self) -> bool {
84 self.len() == 0
85 }
86
87 fn dtype(&self) -> &DType;
89
90 fn encoding(&self) -> ArrayVTable;
92
93 fn encoding_id(&self) -> ArrayId;
95
96 fn slice(&self, range: Range<usize>) -> ArrayRef;
98
99 fn scalar_at(&self, index: usize) -> Scalar;
103
104 fn is_encoding(&self, encoding: ArrayId) -> bool {
106 self.encoding_id() == encoding
107 }
108
109 fn is_arrow(&self) -> bool {
112 self.is_encoding(NullVTable.id())
113 || self.is_encoding(BoolVTable.id())
114 || self.is_encoding(PrimitiveVTable.id())
115 || self.is_encoding(VarBinVTable.id())
116 || self.is_encoding(VarBinViewVTable.id())
117 }
118
119 fn is_canonical(&self) -> bool {
122 self.is_encoding(NullVTable.id())
123 || self.is_encoding(BoolVTable.id())
124 || self.is_encoding(PrimitiveVTable.id())
125 || self.is_encoding(DecimalVTable.id())
126 || self.is_encoding(StructVTable.id())
127 || self.is_encoding(ListViewVTable.id())
128 || self.is_encoding(FixedSizeListVTable.id())
129 || self.is_encoding(VarBinViewVTable.id())
130 || self.is_encoding(ExtensionVTable.id())
131 }
132
133 fn is_valid(&self, index: usize) -> bool;
135
136 fn is_invalid(&self, index: usize) -> bool;
138
139 fn all_valid(&self) -> bool;
143
144 fn all_invalid(&self) -> bool;
148
149 fn valid_count(&self) -> usize;
151
152 fn invalid_count(&self) -> usize;
154
155 fn validity_mask(&self) -> Mask;
157
158 fn to_canonical(&self) -> Canonical;
160
161 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder);
165
166 fn statistics(&self) -> StatsSetRef<'_>;
169
170 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
172
173 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
190 -> VortexResult<Option<Output>>;
191
192 fn batch_execute(&self, ctx: &mut ExecutionCtx) -> VortexResult<Vector>;
194}
195
196impl Array for Arc<dyn Array> {
197 #[inline]
198 fn as_any(&self) -> &dyn Any {
199 self.as_ref().as_any()
200 }
201
202 #[inline]
203 fn to_array(&self) -> ArrayRef {
204 self.clone()
205 }
206
207 #[inline]
208 fn len(&self) -> usize {
209 self.as_ref().len()
210 }
211
212 #[inline]
213 fn dtype(&self) -> &DType {
214 self.as_ref().dtype()
215 }
216
217 #[inline]
218 fn encoding(&self) -> ArrayVTable {
219 self.as_ref().encoding()
220 }
221
222 #[inline]
223 fn encoding_id(&self) -> ArrayId {
224 self.as_ref().encoding_id()
225 }
226
227 #[inline]
228 fn slice(&self, range: Range<usize>) -> ArrayRef {
229 self.as_ref().slice(range)
230 }
231
232 #[inline]
233 fn scalar_at(&self, index: usize) -> Scalar {
234 self.as_ref().scalar_at(index)
235 }
236
237 #[inline]
238 fn is_valid(&self, index: usize) -> bool {
239 self.as_ref().is_valid(index)
240 }
241
242 #[inline]
243 fn is_invalid(&self, index: usize) -> bool {
244 self.as_ref().is_invalid(index)
245 }
246
247 #[inline]
248 fn all_valid(&self) -> bool {
249 self.as_ref().all_valid()
250 }
251
252 #[inline]
253 fn all_invalid(&self) -> bool {
254 self.as_ref().all_invalid()
255 }
256
257 #[inline]
258 fn valid_count(&self) -> usize {
259 self.as_ref().valid_count()
260 }
261
262 #[inline]
263 fn invalid_count(&self) -> usize {
264 self.as_ref().invalid_count()
265 }
266
267 #[inline]
268 fn validity_mask(&self) -> Mask {
269 self.as_ref().validity_mask()
270 }
271
272 fn to_canonical(&self) -> Canonical {
273 self.as_ref().to_canonical()
274 }
275
276 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
277 self.as_ref().append_to_builder(builder)
278 }
279
280 fn statistics(&self) -> StatsSetRef<'_> {
281 self.as_ref().statistics()
282 }
283
284 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
286 self.as_ref().with_children(children)
287 }
288
289 fn invoke(
290 &self,
291 compute_fn: &ComputeFn,
292 args: &InvocationArgs,
293 ) -> VortexResult<Option<Output>> {
294 self.as_ref().invoke(compute_fn, args)
295 }
296
297 fn batch_execute(&self, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
298 self.as_ref().batch_execute(ctx)
299 }
300}
301
302pub type ArrayRef = Arc<dyn Array>;
304
305impl ToOwned for dyn Array {
306 type Owned = ArrayRef;
307
308 fn to_owned(&self) -> Self::Owned {
309 self.to_array()
310 }
311}
312
313impl dyn Array + '_ {
314 pub fn as_<V: VTable>(&self) -> &V::Array {
316 self.as_opt::<V>().vortex_expect("Failed to downcast")
317 }
318
319 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
321 self.as_any()
322 .downcast_ref::<ArrayAdapter<V>>()
323 .map(|array_adapter| &array_adapter.0)
324 }
325
326 pub fn is<V: VTable>(&self) -> bool {
328 self.as_opt::<V>().is_some()
329 }
330
331 pub fn is_constant(&self) -> bool {
332 let opts = IsConstantOpts {
333 cost: Cost::Specialized,
334 };
335 is_constant_opts(self, &opts)
336 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
337 .ok()
338 .flatten()
339 .unwrap_or_default()
340 }
341
342 pub fn is_constant_opts(&self, cost: Cost) -> bool {
343 let opts = IsConstantOpts { cost };
344 is_constant_opts(self, &opts)
345 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
346 .ok()
347 .flatten()
348 .unwrap_or_default()
349 }
350
351 pub fn as_constant(&self) -> Option<Scalar> {
352 self.is_constant().then(|| self.scalar_at(0))
353 }
354
355 pub fn nbytes(&self) -> u64 {
357 let mut nbytes = 0;
358 for array in self.depth_first_traversal() {
359 for buffer in array.buffers() {
360 nbytes += buffer.len() as u64;
361 }
362 }
363 nbytes
364 }
365
366 pub fn execute(&self, session: &VortexSession) -> VortexResult<Vector> {
370 let mut ctx = ExecutionCtx::new(session.clone());
371 self.batch_execute(&mut ctx)
372 }
373}
374
375pub trait IntoArray {
377 fn into_array(self) -> ArrayRef;
378}
379
380impl IntoArray for ArrayRef {
381 fn into_array(self) -> ArrayRef {
382 self
383 }
384}
385
386mod private {
387 use super::*;
388
389 pub trait Sealed {}
390
391 impl<V: VTable> Sealed for ArrayAdapter<V> {}
392 impl Sealed for Arc<dyn Array> {}
393}
394
395#[repr(transparent)]
402pub struct ArrayAdapter<V: VTable>(V::Array);
403
404impl<V: VTable> ArrayAdapter<V> {
405 pub fn as_inner(&self) -> &V::Array {
407 &self.0
408 }
409
410 pub fn into_inner(self) -> V::Array {
412 self.0
413 }
414}
415
416impl<V: VTable> Debug for ArrayAdapter<V> {
417 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
418 self.0.fmt(f)
419 }
420}
421
422impl<V: VTable> Array for ArrayAdapter<V> {
423 fn as_any(&self) -> &dyn Any {
424 self
425 }
426
427 fn to_array(&self) -> ArrayRef {
428 Arc::new(ArrayAdapter::<V>(self.0.clone()))
429 }
430
431 fn len(&self) -> usize {
432 <V::ArrayVTable as BaseArrayVTable<V>>::len(&self.0)
433 }
434
435 fn dtype(&self) -> &DType {
436 <V::ArrayVTable as BaseArrayVTable<V>>::dtype(&self.0)
437 }
438
439 fn encoding(&self) -> ArrayVTable {
440 V::encoding(&self.0)
441 }
442
443 fn encoding_id(&self) -> ArrayId {
444 V::encoding(&self.0).id()
445 }
446
447 fn slice(&self, range: Range<usize>) -> ArrayRef {
448 let start = range.start;
449 let stop = range.end;
450
451 if start == 0 && stop == self.len() {
452 return self.to_array();
453 }
454
455 assert!(
456 start <= self.len(),
457 "OutOfBounds: start {start} > length {}",
458 self.len()
459 );
460 assert!(
461 stop <= self.len(),
462 "OutOfBounds: stop {stop} > length {}",
463 self.len()
464 );
465
466 assert!(start <= stop, "start ({start}) must be <= stop ({stop})");
467
468 if start == stop {
469 return Canonical::empty(self.dtype()).into_array();
470 }
471
472 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, range);
473
474 assert_eq!(
475 sliced.len(),
476 stop - start,
477 "Slice length mismatch {}",
478 self.encoding_id()
479 );
480
481 debug_assert_eq!(
483 sliced.dtype(),
484 self.dtype(),
485 "Slice dtype mismatch {}",
486 self.encoding_id()
487 );
488
489 if !sliced.is::<ConstantVTable>() {
491 self.statistics().with_iter(|iter| {
492 sliced.statistics().inherit(iter.filter(|(stat, value)| {
493 matches!(
494 stat,
495 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
496 ) && value.as_ref().as_exact().is_some_and(|v| {
497 Scalar::new(DType::Bool(Nullability::NonNullable), v.clone())
498 .as_bool()
499 .value()
500 .unwrap_or_default()
501 })
502 }));
503 });
504 }
505
506 sliced
507 }
508
509 fn scalar_at(&self, index: usize) -> Scalar {
510 assert!(index < self.len(), "index {index} out of bounds");
511 if self.is_invalid(index) {
512 return Scalar::null(self.dtype().clone());
513 }
514 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index);
515 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
516 scalar
517 }
518
519 fn is_valid(&self, index: usize) -> bool {
520 if index >= self.len() {
521 vortex_panic!(OutOfBounds: index, 0, self.len());
522 }
523 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
524 }
525
526 fn is_invalid(&self, index: usize) -> bool {
527 !self.is_valid(index)
528 }
529
530 fn all_valid(&self) -> bool {
531 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
532 }
533
534 fn all_invalid(&self) -> bool {
535 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
536 }
537
538 fn valid_count(&self) -> usize {
539 if let Some(Precision::Exact(invalid_count)) =
540 self.statistics().get_as::<usize>(Stat::NullCount)
541 {
542 return self.len() - invalid_count;
543 }
544
545 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0);
546 assert!(count <= self.len(), "Valid count exceeds array length");
547
548 self.statistics()
549 .set(Stat::NullCount, Precision::exact(self.len() - count));
550
551 count
552 }
553
554 fn invalid_count(&self) -> usize {
555 if let Some(Precision::Exact(invalid_count)) =
556 self.statistics().get_as::<usize>(Stat::NullCount)
557 {
558 return invalid_count;
559 }
560
561 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0);
562 assert!(count <= self.len(), "Invalid count exceeds array length");
563
564 self.statistics()
565 .set(Stat::NullCount, Precision::exact(count));
566
567 count
568 }
569
570 fn validity_mask(&self) -> Mask {
571 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0);
572 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
573 mask
574 }
575
576 fn to_canonical(&self) -> Canonical {
577 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0);
578 assert_eq!(
579 self.len(),
580 canonical.as_ref().len(),
581 "Canonical length mismatch {}. Expected {} but encoded into {}.",
582 self.encoding_id(),
583 self.len(),
584 canonical.as_ref().len()
585 );
586 assert_eq!(
587 self.dtype(),
588 canonical.as_ref().dtype(),
589 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
590 self.encoding_id(),
591 self.dtype(),
592 canonical.as_ref().dtype()
593 );
594 canonical
595 .as_ref()
596 .statistics()
597 .inherit_from(self.statistics());
598 canonical
599 }
600
601 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
602 if builder.dtype() != self.dtype() {
603 vortex_panic!(
604 "Builder dtype mismatch: expected {}, got {}",
605 self.dtype(),
606 builder.dtype(),
607 );
608 }
609 let len = builder.len();
610
611 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder);
612 assert_eq!(
613 len + self.len(),
614 builder.len(),
615 "Builder length mismatch after writing array for encoding {}",
616 self.encoding_id(),
617 );
618 }
619
620 fn statistics(&self) -> StatsSetRef<'_> {
621 <V::ArrayVTable as BaseArrayVTable<V>>::stats(&self.0)
622 }
623
624 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
625 struct ReplacementChildren<'a> {
626 children: &'a [ArrayRef],
627 }
628
629 impl ArrayChildren for ReplacementChildren<'_> {
630 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
631 if index >= self.children.len() {
632 vortex_bail!(OutOfBounds: index, 0, self.children.len());
633 }
634 let child = &self.children[index];
635 if child.len() != len {
636 vortex_bail!(
637 "Child length mismatch: expected {}, got {}",
638 len,
639 child.len()
640 );
641 }
642 if child.dtype() != dtype {
643 vortex_bail!(
644 "Child dtype mismatch: expected {}, got {}",
645 dtype,
646 child.dtype()
647 );
648 }
649 Ok(child.clone())
650 }
651
652 fn len(&self) -> usize {
653 self.children.len()
654 }
655 }
656
657 self.encoding()
659 .with_children(self, &ReplacementChildren { children })
660 }
661
662 fn invoke(
663 &self,
664 compute_fn: &ComputeFn,
665 args: &InvocationArgs,
666 ) -> VortexResult<Option<Output>> {
667 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
668 }
669
670 fn batch_execute(&self, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
671 let result = V::batch_execute(&self.0, ctx)?;
672
673 vortex_ensure!(result.len() == self.len(), "Result length mismatch");
675 #[cfg(debug_assertions)]
676 vortex_ensure!(
677 vortex_vector::vector_matches_dtype(&result, self.dtype()),
678 "Executed vector dtype mismatch",
679 );
680
681 Ok(result)
682 }
683}
684
685impl<V: VTable> ArrayHash for ArrayAdapter<V> {
686 fn array_hash<H: Hasher>(&self, state: &mut H, precision: hash::Precision) {
687 self.0.encoding_id().hash(state);
688 <V::ArrayVTable as BaseArrayVTable<V>>::array_hash(&self.0, state, precision);
689 }
690}
691
692impl<V: VTable> ArrayEq for ArrayAdapter<V> {
693 fn array_eq(&self, other: &Self, precision: hash::Precision) -> bool {
694 <V::ArrayVTable as BaseArrayVTable<V>>::array_eq(&self.0, &other.0, precision)
695 }
696}
697
698impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
699 fn children(&self) -> Vec<ArrayRef> {
700 struct ChildrenCollector {
701 children: Vec<ArrayRef>,
702 }
703
704 impl ArrayChildVisitor for ChildrenCollector {
705 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
706 self.children.push(array.to_array());
707 }
708 }
709
710 let mut collector = ChildrenCollector {
711 children: Vec::new(),
712 };
713 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
714 collector.children
715 }
716
717 fn nchildren(&self) -> usize {
718 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
719 }
720
721 fn children_names(&self) -> Vec<String> {
722 struct ChildNameCollector {
723 names: Vec<String>,
724 }
725
726 impl ArrayChildVisitor for ChildNameCollector {
727 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
728 self.names.push(name.to_string());
729 }
730 }
731
732 let mut collector = ChildNameCollector { names: Vec::new() };
733 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
734 collector.names
735 }
736
737 fn named_children(&self) -> Vec<(String, ArrayRef)> {
738 struct NamedChildrenCollector {
739 children: Vec<(String, ArrayRef)>,
740 }
741
742 impl ArrayChildVisitor for NamedChildrenCollector {
743 fn visit_child(&mut self, name: &str, array: &dyn Array) {
744 self.children.push((name.to_string(), array.to_array()));
745 }
746 }
747
748 let mut collector = NamedChildrenCollector {
749 children: Vec::new(),
750 };
751
752 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
753 collector.children
754 }
755
756 fn buffers(&self) -> Vec<ByteBuffer> {
757 struct BufferCollector {
758 buffers: Vec<ByteBuffer>,
759 }
760
761 impl ArrayBufferVisitor for BufferCollector {
762 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
763 self.buffers.push(buffer.clone());
764 }
765 }
766
767 let mut collector = BufferCollector {
768 buffers: Vec::new(),
769 };
770 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
771 collector.buffers
772 }
773
774 fn nbuffers(&self) -> usize {
775 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
776 }
777
778 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
779 V::serialize(V::metadata(&self.0)?)
780 }
781
782 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
783 match V::metadata(&self.0) {
784 Err(e) => write!(f, "<serde error: {e}>"),
785 Ok(metadata) => Debug::fmt(&metadata, f),
786 }
787 }
788}