1mod visitor;
5
6use std::any::Any;
7use std::fmt::Debug;
8use std::fmt::Formatter;
9use std::hash::Hash;
10use std::hash::Hasher;
11use std::ops::Deref;
12use std::ops::Range;
13use std::sync::Arc;
14
15pub use visitor::*;
16use vortex_buffer::ByteBuffer;
17use vortex_error::VortexExpect;
18use vortex_error::VortexResult;
19use vortex_error::vortex_ensure;
20use vortex_error::vortex_err;
21use vortex_error::vortex_panic;
22use vortex_mask::Mask;
23
24use crate::AnyCanonical;
25use crate::ArrayEq;
26use crate::ArrayHash;
27use crate::Canonical;
28use crate::DynArrayEq;
29use crate::DynArrayHash;
30use crate::ExecutionCtx;
31use crate::LEGACY_SESSION;
32use crate::ToCanonical;
33use crate::VortexSessionExecute;
34use crate::arrays::BoolVTable;
35use crate::arrays::ConstantVTable;
36use crate::arrays::DictArray;
37use crate::arrays::FilterArray;
38use crate::arrays::NullVTable;
39use crate::arrays::PrimitiveVTable;
40use crate::arrays::ScalarFnVTable;
41use crate::arrays::SliceArray;
42use crate::arrays::VarBinVTable;
43use crate::arrays::VarBinViewVTable;
44use crate::buffer::BufferHandle;
45use crate::builders::ArrayBuilder;
46use crate::compute;
47use crate::dtype::DType;
48use crate::dtype::Nullability;
49use crate::expr::stats::Precision;
50use crate::expr::stats::Stat;
51use crate::expr::stats::StatsProviderExt;
52use crate::hash;
53use crate::matcher::Matcher;
54use crate::optimizer::ArrayOptimizer;
55use crate::scalar::Scalar;
56use crate::scalar_fn::ReduceNode;
57use crate::scalar_fn::ReduceNodeRef;
58use crate::scalar_fn::ScalarFnRef;
59use crate::stats::StatsSetRef;
60use crate::validity::Validity;
61use crate::vtable::ArrayId;
62use crate::vtable::ArrayVTableExt;
63use crate::vtable::DynVTable;
64use crate::vtable::OperationsVTable;
65use crate::vtable::VTable;
66use crate::vtable::ValidityVTable;
67
68pub trait DynArray:
70 'static
71 + private::Sealed
72 + Send
73 + Sync
74 + Debug
75 + DynArrayEq
76 + DynArrayHash
77 + ArrayVisitor
78 + ReduceNode
79{
80 fn as_any(&self) -> &dyn Any;
82
83 fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
85
86 fn to_array(&self) -> ArrayRef;
88
89 fn len(&self) -> usize;
91
92 fn is_empty(&self) -> bool {
94 self.len() == 0
95 }
96
97 fn dtype(&self) -> &DType;
99
100 fn vtable(&self) -> &dyn DynVTable;
102
103 fn encoding_id(&self) -> ArrayId;
105
106 fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef>;
108
109 fn filter(&self, mask: Mask) -> VortexResult<ArrayRef>;
111
112 fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef>;
114
115 fn scalar_at(&self, index: usize) -> VortexResult<Scalar>;
119
120 fn is_valid(&self, index: usize) -> VortexResult<bool>;
122
123 fn is_invalid(&self, index: usize) -> VortexResult<bool>;
125
126 fn all_valid(&self) -> VortexResult<bool>;
131
132 fn all_invalid(&self) -> VortexResult<bool>;
137
138 fn valid_count(&self) -> VortexResult<usize>;
140
141 fn invalid_count(&self) -> VortexResult<usize>;
143
144 fn validity(&self) -> VortexResult<Validity>;
146
147 fn validity_mask(&self) -> VortexResult<Mask>;
149
150 fn to_canonical(&self) -> VortexResult<Canonical>;
152
153 fn append_to_builder(
157 &self,
158 builder: &mut dyn ArrayBuilder,
159 ctx: &mut ExecutionCtx,
160 ) -> VortexResult<()>;
161
162 fn statistics(&self) -> StatsSetRef<'_>;
165
166 fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef>;
168}
169
170impl DynArray for Arc<dyn DynArray> {
171 #[inline]
172 fn as_any(&self) -> &dyn Any {
173 DynArray::as_any(self.as_ref())
174 }
175
176 fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
177 self
178 }
179
180 #[inline]
181 fn to_array(&self) -> ArrayRef {
182 self.clone()
183 }
184
185 #[inline]
186 fn len(&self) -> usize {
187 self.as_ref().len()
188 }
189
190 #[inline]
191 fn dtype(&self) -> &DType {
192 self.as_ref().dtype()
193 }
194
195 fn vtable(&self) -> &dyn DynVTable {
196 self.as_ref().vtable()
197 }
198
199 #[inline]
200 fn encoding_id(&self) -> ArrayId {
201 self.as_ref().encoding_id()
202 }
203
204 #[inline]
205 fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef> {
206 self.as_ref().slice(range)
207 }
208
209 fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
210 self.as_ref().filter(mask)
211 }
212
213 fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
214 self.as_ref().take(indices)
215 }
216
217 #[inline]
218 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
219 self.as_ref().scalar_at(index)
220 }
221
222 #[inline]
223 fn is_valid(&self, index: usize) -> VortexResult<bool> {
224 self.as_ref().is_valid(index)
225 }
226
227 #[inline]
228 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
229 self.as_ref().is_invalid(index)
230 }
231
232 #[inline]
233 fn all_valid(&self) -> VortexResult<bool> {
234 self.as_ref().all_valid()
235 }
236
237 #[inline]
238 fn all_invalid(&self) -> VortexResult<bool> {
239 self.as_ref().all_invalid()
240 }
241
242 #[inline]
243 fn valid_count(&self) -> VortexResult<usize> {
244 self.as_ref().valid_count()
245 }
246
247 #[inline]
248 fn invalid_count(&self) -> VortexResult<usize> {
249 self.as_ref().invalid_count()
250 }
251
252 #[inline]
253 fn validity(&self) -> VortexResult<Validity> {
254 self.as_ref().validity()
255 }
256
257 #[inline]
258 fn validity_mask(&self) -> VortexResult<Mask> {
259 self.as_ref().validity_mask()
260 }
261
262 fn to_canonical(&self) -> VortexResult<Canonical> {
263 self.as_ref().to_canonical()
264 }
265
266 fn append_to_builder(
267 &self,
268 builder: &mut dyn ArrayBuilder,
269 ctx: &mut ExecutionCtx,
270 ) -> VortexResult<()> {
271 self.as_ref().append_to_builder(builder, ctx)
272 }
273
274 fn statistics(&self) -> StatsSetRef<'_> {
275 self.as_ref().statistics()
276 }
277
278 fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef> {
279 self.as_ref().with_children(children)
280 }
281}
282
283pub type ArrayRef = Arc<dyn DynArray>;
285
286impl ToOwned for dyn DynArray {
287 type Owned = ArrayRef;
288
289 fn to_owned(&self) -> Self::Owned {
290 self.to_array()
291 }
292}
293
294impl dyn DynArray + '_ {
295 pub fn is<M: Matcher>(&self) -> bool {
297 M::matches(self)
298 }
299
300 pub fn as_<M: Matcher>(&self) -> M::Match<'_> {
302 self.as_opt::<M>().vortex_expect("Failed to downcast")
303 }
304
305 pub fn as_opt<M: Matcher>(&self) -> Option<M::Match<'_>> {
307 M::try_match(self)
308 }
309
310 pub fn try_into<V: VTable>(self: Arc<Self>) -> Result<V::Array, Arc<Self>> {
312 match self.is::<V>() {
313 true => {
314 let arc = self
315 .as_any_arc()
316 .downcast::<ArrayAdapter<V>>()
317 .map_err(|_| vortex_err!("failed to downcast"))
318 .vortex_expect("Failed to downcast");
319 Ok(match Arc::try_unwrap(arc) {
320 Ok(array) => array.0,
321 Err(arc) => arc.deref().0.clone(),
322 })
323 }
324 false => Err(self),
325 }
326 }
327
328 pub fn as_constant(&self) -> Option<Scalar> {
329 self.as_opt::<ConstantVTable>().map(|a| a.scalar().clone())
330 }
331
332 pub fn nbytes(&self) -> u64 {
334 let mut nbytes = 0;
335 for array in self.depth_first_traversal() {
336 for buffer in array.buffers() {
337 nbytes += buffer.len() as u64;
338 }
339 }
340 nbytes
341 }
342
343 pub fn is_arrow(&self) -> bool {
345 self.is::<NullVTable>()
346 || self.is::<BoolVTable>()
347 || self.is::<PrimitiveVTable>()
348 || self.is::<VarBinVTable>()
349 || self.is::<VarBinViewVTable>()
350 }
351
352 pub fn is_canonical(&self) -> bool {
354 self.is::<AnyCanonical>()
355 }
356
357 pub fn with_child(&self, child_idx: usize, replacement: ArrayRef) -> VortexResult<ArrayRef> {
359 let mut children: Vec<ArrayRef> = self.children();
360 vortex_ensure!(
361 child_idx < children.len(),
362 "child index {} out of bounds for array with {} children",
363 child_idx,
364 children.len()
365 );
366 children[child_idx] = replacement;
367 self.with_children(children)
368 }
369}
370
371pub trait IntoArray {
373 fn into_array(self) -> ArrayRef;
374}
375
376impl IntoArray for ArrayRef {
377 fn into_array(self) -> ArrayRef {
378 self
379 }
380}
381
382mod private {
383 use super::*;
384
385 pub trait Sealed {}
386
387 impl<V: VTable> Sealed for ArrayAdapter<V> {}
388 impl Sealed for Arc<dyn DynArray> {}
389}
390
391#[repr(transparent)]
398pub struct ArrayAdapter<V: VTable>(V::Array);
399
400impl<V: VTable> ArrayAdapter<V> {
401 pub fn as_inner(&self) -> &V::Array {
403 &self.0
404 }
405}
406
407impl<V: VTable> Debug for ArrayAdapter<V> {
408 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
409 self.0.fmt(f)
410 }
411}
412
413impl<V: VTable> ReduceNode for ArrayAdapter<V> {
414 fn as_any(&self) -> &dyn Any {
415 self
416 }
417
418 fn node_dtype(&self) -> VortexResult<DType> {
419 Ok(V::dtype(&self.0).clone())
420 }
421
422 fn scalar_fn(&self) -> Option<&ScalarFnRef> {
423 self.0.as_opt::<ScalarFnVTable>().map(|a| a.scalar_fn())
424 }
425
426 fn child(&self, idx: usize) -> ReduceNodeRef {
427 self.nth_child(idx)
428 .unwrap_or_else(|| vortex_panic!("Child index out of bounds: {}", idx))
429 }
430
431 fn child_count(&self) -> usize {
432 self.nchildren()
433 }
434}
435
436impl<V: VTable> DynArray for ArrayAdapter<V> {
437 fn as_any(&self) -> &dyn Any {
438 self
439 }
440
441 fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
442 self
443 }
444
445 fn to_array(&self) -> ArrayRef {
446 Arc::new(ArrayAdapter::<V>(self.0.clone()))
447 }
448
449 fn len(&self) -> usize {
450 V::len(&self.0)
451 }
452
453 fn dtype(&self) -> &DType {
454 V::dtype(&self.0)
455 }
456
457 fn vtable(&self) -> &dyn DynVTable {
458 V::vtable()
459 }
460
461 fn encoding_id(&self) -> ArrayId {
462 V::id(&self.0)
463 }
464
465 fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef> {
466 let start = range.start;
467 let stop = range.end;
468
469 if start == 0 && stop == self.len() {
470 return Ok(self.to_array());
471 }
472
473 vortex_ensure!(
474 start <= self.len(),
475 "OutOfBounds: start {start} > length {}",
476 self.len()
477 );
478 vortex_ensure!(
479 stop <= self.len(),
480 "OutOfBounds: stop {stop} > length {}",
481 self.len()
482 );
483
484 vortex_ensure!(start <= stop, "start ({start}) must be <= stop ({stop})");
485
486 if start == stop {
487 return Ok(Canonical::empty(self.dtype()).into_array());
488 }
489
490 let sliced = SliceArray::try_new(self.to_array(), range)?
491 .into_array()
492 .optimize()?;
493
494 if !sliced.is::<ConstantVTable>() {
496 self.statistics().with_iter(|iter| {
497 sliced.statistics().inherit(iter.filter(|(stat, value)| {
498 matches!(
499 stat,
500 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
501 ) && value.as_ref().as_exact().is_some_and(|v| {
502 Scalar::try_new(DType::Bool(Nullability::NonNullable), Some(v.clone()))
503 .vortex_expect("A stat that was expected to be a boolean stat was not")
504 .as_bool()
505 .value()
506 .unwrap_or_default()
507 })
508 }));
509 });
510 }
511
512 Ok(sliced)
513 }
514
515 fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
516 FilterArray::try_new(self.to_array(), mask)?
517 .into_array()
518 .optimize()
519 }
520
521 fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
522 DictArray::try_new(indices, self.to_array())?
523 .into_array()
524 .optimize()
525 }
526
527 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
528 vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
529 if self.is_invalid(index)? {
530 return Ok(Scalar::null(self.dtype().clone()));
531 }
532 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index)?;
533 vortex_ensure!(self.dtype() == scalar.dtype(), "Scalar dtype mismatch");
534 Ok(scalar)
535 }
536
537 fn is_valid(&self, index: usize) -> VortexResult<bool> {
538 vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
539 match self.validity()? {
540 Validity::NonNullable | Validity::AllValid => Ok(true),
541 Validity::AllInvalid => Ok(false),
542 Validity::Array(a) => a
543 .scalar_at(index)?
544 .as_bool()
545 .value()
546 .ok_or_else(|| vortex_err!("validity value at index {} is null", index)),
547 }
548 }
549
550 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
551 Ok(!self.is_valid(index)?)
552 }
553
554 fn all_valid(&self) -> VortexResult<bool> {
555 match self.validity()? {
556 Validity::NonNullable | Validity::AllValid => Ok(true),
557 Validity::AllInvalid => Ok(false),
558 Validity::Array(a) => Ok(a.statistics().compute_min::<bool>().unwrap_or(false)),
559 }
560 }
561
562 fn all_invalid(&self) -> VortexResult<bool> {
563 match self.validity()? {
564 Validity::NonNullable | Validity::AllValid => Ok(false),
565 Validity::AllInvalid => Ok(true),
566 Validity::Array(a) => Ok(!a.statistics().compute_max::<bool>().unwrap_or(true)),
567 }
568 }
569
570 fn valid_count(&self) -> VortexResult<usize> {
571 if let Some(Precision::Exact(invalid_count)) =
572 self.statistics().get_as::<usize>(Stat::NullCount)
573 {
574 return Ok(self.len() - invalid_count);
575 }
576
577 let count = match self.validity()? {
578 Validity::NonNullable | Validity::AllValid => self.len(),
579 Validity::AllInvalid => 0,
580 Validity::Array(a) => {
581 let sum = compute::sum(&a)?;
582 sum.as_primitive()
583 .as_::<usize>()
584 .ok_or_else(|| vortex_err!("sum of validity array is null"))?
585 }
586 };
587 vortex_ensure!(count <= self.len(), "Valid count exceeds array length");
588
589 self.statistics()
590 .set(Stat::NullCount, Precision::exact(self.len() - count));
591
592 Ok(count)
593 }
594
595 fn invalid_count(&self) -> VortexResult<usize> {
596 Ok(self.len() - self.valid_count()?)
597 }
598
599 fn validity(&self) -> VortexResult<Validity> {
600 if self.dtype().is_nullable() {
601 let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(&self.0)?;
602 if let Validity::Array(array) = &validity {
603 vortex_ensure!(array.len() == self.len(), "Validity array length mismatch");
604 vortex_ensure!(
605 matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
606 "Validity array is not non-nullable boolean: {}",
607 self.encoding_id(),
608 );
609 }
610 Ok(validity)
611 } else {
612 Ok(Validity::NonNullable)
613 }
614 }
615
616 fn validity_mask(&self) -> VortexResult<Mask> {
617 match self.validity()? {
618 Validity::NonNullable | Validity::AllValid => Ok(Mask::new_true(self.len())),
619 Validity::AllInvalid => Ok(Mask::new_false(self.len())),
620 Validity::Array(a) => Ok(a.to_bool().to_mask()),
621 }
622 }
623
624 fn to_canonical(&self) -> VortexResult<Canonical> {
625 self.to_array()
626 .execute(&mut LEGACY_SESSION.create_execution_ctx())
627 }
628
629 fn append_to_builder(
630 &self,
631 builder: &mut dyn ArrayBuilder,
632 ctx: &mut ExecutionCtx,
633 ) -> VortexResult<()> {
634 if builder.dtype() != self.dtype() {
635 vortex_panic!(
636 "Builder dtype mismatch: expected {}, got {}",
637 self.dtype(),
638 builder.dtype(),
639 );
640 }
641 let len = builder.len();
642
643 V::append_to_builder(&self.0, builder, ctx)?;
644
645 assert_eq!(
646 len + self.len(),
647 builder.len(),
648 "Builder length mismatch after writing array for encoding {}",
649 self.encoding_id(),
650 );
651 Ok(())
652 }
653
654 fn statistics(&self) -> StatsSetRef<'_> {
655 V::stats(&self.0)
656 }
657
658 fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef> {
659 let mut this = self.0.clone();
660 V::with_children(&mut this, children)?;
661 Ok(this.into_array())
662 }
663}
664
665impl<V: VTable> ArrayHash for ArrayAdapter<V> {
666 fn array_hash<H: Hasher>(&self, state: &mut H, precision: hash::Precision) {
667 self.0.encoding_id().hash(state);
668 V::array_hash(&self.0, state, precision);
669 }
670}
671
672impl<V: VTable> ArrayEq for ArrayAdapter<V> {
673 fn array_eq(&self, other: &Self, precision: hash::Precision) -> bool {
674 V::array_eq(&self.0, &other.0, precision)
675 }
676}
677
678impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
679 fn children(&self) -> Vec<ArrayRef> {
680 (0..V::nchildren(&self.0))
681 .map(|i| V::child(&self.0, i))
682 .collect()
683 }
684
685 fn nchildren(&self) -> usize {
686 V::nchildren(&self.0)
687 }
688
689 fn nth_child(&self, idx: usize) -> Option<ArrayRef> {
690 (idx < V::nchildren(&self.0)).then(|| V::child(&self.0, idx))
691 }
692
693 fn children_names(&self) -> Vec<String> {
694 (0..V::nchildren(&self.0))
695 .map(|i| V::child_name(&self.0, i))
696 .collect()
697 }
698
699 fn named_children(&self) -> Vec<(String, ArrayRef)> {
700 (0..V::nchildren(&self.0))
701 .map(|i| (V::child_name(&self.0, i), V::child(&self.0, i)))
702 .collect()
703 }
704
705 fn buffers(&self) -> Vec<ByteBuffer> {
706 (0..V::nbuffers(&self.0))
707 .map(|i| V::buffer(&self.0, i).to_host_sync())
708 .collect()
709 }
710
711 fn buffer_handles(&self) -> Vec<BufferHandle> {
712 (0..V::nbuffers(&self.0))
713 .map(|i| V::buffer(&self.0, i))
714 .collect()
715 }
716
717 fn buffer_names(&self) -> Vec<String> {
718 (0..V::nbuffers(&self.0))
719 .filter_map(|i| V::buffer_name(&self.0, i))
720 .collect()
721 }
722
723 fn named_buffers(&self) -> Vec<(String, BufferHandle)> {
724 (0..V::nbuffers(&self.0))
725 .filter_map(|i| V::buffer_name(&self.0, i).map(|name| (name, V::buffer(&self.0, i))))
726 .collect()
727 }
728
729 fn nbuffers(&self) -> usize {
730 V::nbuffers(&self.0)
731 }
732
733 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
734 V::serialize(V::metadata(&self.0)?)
735 }
736
737 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
738 match V::metadata(&self.0) {
739 Err(e) => write!(f, "<serde error: {e}>"),
740 Ok(metadata) => Debug::fmt(&metadata, f),
741 }
742 }
743
744 fn is_host(&self) -> bool {
745 for array in self.depth_first_traversal() {
746 if !array.buffer_handles().iter().all(BufferHandle::is_on_host) {
747 return false;
748 }
749 }
750
751 true
752 }
753}
754
755impl<V: VTable> Matcher for V {
757 type Match<'a> = &'a V::Array;
758
759 fn matches(array: &dyn DynArray) -> bool {
760 DynArray::as_any(array).is::<ArrayAdapter<V>>()
761 }
762
763 fn try_match<'a>(array: &'a dyn DynArray) -> Option<Self::Match<'a>> {
764 DynArray::as_any(array)
765 .downcast_ref::<ArrayAdapter<V>>()
766 .map(|array_adapter| &array_adapter.0)
767 }
768}