1mod visitor;
5
6use std::any::Any;
7use std::fmt::Debug;
8use std::fmt::Formatter;
9use std::hash::Hash;
10use std::hash::Hasher;
11use std::ops::Deref;
12use std::ops::Range;
13use std::sync::Arc;
14
15pub use visitor::*;
16use vortex_buffer::ByteBuffer;
17use vortex_error::VortexExpect;
18use vortex_error::VortexResult;
19use vortex_error::vortex_ensure;
20use vortex_error::vortex_err;
21use vortex_error::vortex_panic;
22use vortex_mask::Mask;
23
24use crate::AnyCanonical;
25use crate::ArrayEq;
26use crate::ArrayHash;
27use crate::Canonical;
28use crate::DynArrayEq;
29use crate::DynArrayHash;
30use crate::ExecutionCtx;
31use crate::LEGACY_SESSION;
32use crate::ToCanonical;
33use crate::VortexSessionExecute;
34use crate::arrays::BoolVTable;
35use crate::arrays::ConstantVTable;
36use crate::arrays::DictArray;
37use crate::arrays::FilterArray;
38use crate::arrays::NullVTable;
39use crate::arrays::PrimitiveVTable;
40use crate::arrays::ScalarFnVTable;
41use crate::arrays::SliceArray;
42use crate::arrays::VarBinVTable;
43use crate::arrays::VarBinViewVTable;
44use crate::buffer::BufferHandle;
45use crate::builders::ArrayBuilder;
46use crate::compute;
47use crate::dtype::DType;
48use crate::dtype::Nullability;
49use crate::expr::stats::Precision;
50use crate::expr::stats::Stat;
51use crate::expr::stats::StatsProviderExt;
52use crate::hash;
53use crate::matcher::Matcher;
54use crate::optimizer::ArrayOptimizer;
55use crate::scalar::Scalar;
56use crate::scalar_fn::ReduceNode;
57use crate::scalar_fn::ReduceNodeRef;
58use crate::scalar_fn::ScalarFnRef;
59use crate::stats::StatsSetRef;
60use crate::validity::Validity;
61use crate::vtable::ArrayId;
62use crate::vtable::ArrayVTableExt;
63use crate::vtable::DynVTable;
64use crate::vtable::OperationsVTable;
65use crate::vtable::VTable;
66use crate::vtable::ValidityVTable;
67
68pub trait Array:
70 'static
71 + private::Sealed
72 + Send
73 + Sync
74 + Debug
75 + DynArrayEq
76 + DynArrayHash
77 + ArrayVisitor
78 + ReduceNode
79{
80 fn as_any(&self) -> &dyn Any;
82
83 fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
85
86 fn to_array(&self) -> ArrayRef;
88
89 fn len(&self) -> usize;
91
92 fn is_empty(&self) -> bool {
94 self.len() == 0
95 }
96
97 fn dtype(&self) -> &DType;
99
100 fn vtable(&self) -> &dyn DynVTable;
102
103 fn encoding_id(&self) -> ArrayId;
105
106 fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef>;
108
109 fn filter(&self, mask: Mask) -> VortexResult<ArrayRef>;
111
112 fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef>;
114
115 fn scalar_at(&self, index: usize) -> VortexResult<Scalar>;
119
120 fn is_valid(&self, index: usize) -> VortexResult<bool>;
122
123 fn is_invalid(&self, index: usize) -> VortexResult<bool>;
125
126 fn all_valid(&self) -> VortexResult<bool>;
131
132 fn all_invalid(&self) -> VortexResult<bool>;
137
138 fn valid_count(&self) -> VortexResult<usize>;
140
141 fn invalid_count(&self) -> VortexResult<usize>;
143
144 fn validity(&self) -> VortexResult<Validity>;
146
147 fn validity_mask(&self) -> VortexResult<Mask>;
149
150 fn to_canonical(&self) -> VortexResult<Canonical>;
152
153 fn append_to_builder(
157 &self,
158 builder: &mut dyn ArrayBuilder,
159 ctx: &mut ExecutionCtx,
160 ) -> VortexResult<()>;
161
162 fn statistics(&self) -> StatsSetRef<'_>;
165
166 fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef>;
168}
169
170impl Array for Arc<dyn Array> {
171 #[inline]
172 fn as_any(&self) -> &dyn Any {
173 Array::as_any(self.as_ref())
174 }
175
176 fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
177 self
178 }
179
180 #[inline]
181 fn to_array(&self) -> ArrayRef {
182 self.clone()
183 }
184
185 #[inline]
186 fn len(&self) -> usize {
187 self.as_ref().len()
188 }
189
190 #[inline]
191 fn dtype(&self) -> &DType {
192 self.as_ref().dtype()
193 }
194
195 fn vtable(&self) -> &dyn DynVTable {
196 self.as_ref().vtable()
197 }
198
199 #[inline]
200 fn encoding_id(&self) -> ArrayId {
201 self.as_ref().encoding_id()
202 }
203
204 #[inline]
205 fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef> {
206 self.as_ref().slice(range)
207 }
208
209 fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
210 self.as_ref().filter(mask)
211 }
212
213 fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
214 self.as_ref().take(indices)
215 }
216
217 #[inline]
218 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
219 self.as_ref().scalar_at(index)
220 }
221
222 #[inline]
223 fn is_valid(&self, index: usize) -> VortexResult<bool> {
224 self.as_ref().is_valid(index)
225 }
226
227 #[inline]
228 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
229 self.as_ref().is_invalid(index)
230 }
231
232 #[inline]
233 fn all_valid(&self) -> VortexResult<bool> {
234 self.as_ref().all_valid()
235 }
236
237 #[inline]
238 fn all_invalid(&self) -> VortexResult<bool> {
239 self.as_ref().all_invalid()
240 }
241
242 #[inline]
243 fn valid_count(&self) -> VortexResult<usize> {
244 self.as_ref().valid_count()
245 }
246
247 #[inline]
248 fn invalid_count(&self) -> VortexResult<usize> {
249 self.as_ref().invalid_count()
250 }
251
252 #[inline]
253 fn validity(&self) -> VortexResult<Validity> {
254 self.as_ref().validity()
255 }
256
257 #[inline]
258 fn validity_mask(&self) -> VortexResult<Mask> {
259 self.as_ref().validity_mask()
260 }
261
262 fn to_canonical(&self) -> VortexResult<Canonical> {
263 self.as_ref().to_canonical()
264 }
265
266 fn append_to_builder(
267 &self,
268 builder: &mut dyn ArrayBuilder,
269 ctx: &mut ExecutionCtx,
270 ) -> VortexResult<()> {
271 self.as_ref().append_to_builder(builder, ctx)
272 }
273
274 fn statistics(&self) -> StatsSetRef<'_> {
275 self.as_ref().statistics()
276 }
277
278 fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef> {
279 self.as_ref().with_children(children)
280 }
281}
282
283pub type ArrayRef = Arc<dyn Array>;
285
286impl ToOwned for dyn Array {
287 type Owned = ArrayRef;
288
289 fn to_owned(&self) -> Self::Owned {
290 self.to_array()
291 }
292}
293
294impl dyn Array + '_ {
295 pub fn is<M: Matcher>(&self) -> bool {
297 M::matches(self)
298 }
299
300 pub fn as_<M: Matcher>(&self) -> M::Match<'_> {
302 self.as_opt::<M>().vortex_expect("Failed to downcast")
303 }
304
305 pub fn as_opt<M: Matcher>(&self) -> Option<M::Match<'_>> {
307 M::try_match(self)
308 }
309
310 pub fn try_into<V: VTable>(self: Arc<Self>) -> Result<V::Array, Arc<Self>> {
312 match self.is::<V>() {
313 true => {
314 let arc = self
315 .as_any_arc()
316 .downcast::<ArrayAdapter<V>>()
317 .map_err(|_| vortex_err!("failed to downcast"))
318 .vortex_expect("Failed to downcast");
319 Ok(match Arc::try_unwrap(arc) {
320 Ok(array) => array.0,
321 Err(arc) => arc.deref().0.clone(),
322 })
323 }
324 false => Err(self),
325 }
326 }
327
328 pub fn as_constant(&self) -> Option<Scalar> {
329 self.as_opt::<ConstantVTable>().map(|a| a.scalar().clone())
330 }
331
332 pub fn nbytes(&self) -> u64 {
334 let mut nbytes = 0;
335 for array in self.depth_first_traversal() {
336 for buffer in array.buffers() {
337 nbytes += buffer.len() as u64;
338 }
339 }
340 nbytes
341 }
342
343 pub fn is_arrow(&self) -> bool {
345 self.is::<NullVTable>()
346 || self.is::<BoolVTable>()
347 || self.is::<PrimitiveVTable>()
348 || self.is::<VarBinVTable>()
349 || self.is::<VarBinViewVTable>()
350 }
351
352 pub fn is_canonical(&self) -> bool {
354 self.is::<AnyCanonical>()
355 }
356}
357
358pub trait IntoArray {
360 fn into_array(self) -> ArrayRef;
361}
362
363impl IntoArray for ArrayRef {
364 fn into_array(self) -> ArrayRef {
365 self
366 }
367}
368
369mod private {
370 use super::*;
371
372 pub trait Sealed {}
373
374 impl<V: VTable> Sealed for ArrayAdapter<V> {}
375 impl Sealed for Arc<dyn Array> {}
376}
377
378#[repr(transparent)]
385pub struct ArrayAdapter<V: VTable>(V::Array);
386
387impl<V: VTable> ArrayAdapter<V> {
388 pub fn as_inner(&self) -> &V::Array {
390 &self.0
391 }
392}
393
394impl<V: VTable> Debug for ArrayAdapter<V> {
395 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
396 self.0.fmt(f)
397 }
398}
399
400impl<V: VTable> ReduceNode for ArrayAdapter<V> {
401 fn as_any(&self) -> &dyn Any {
402 self
403 }
404
405 fn node_dtype(&self) -> VortexResult<DType> {
406 Ok(V::dtype(&self.0).clone())
407 }
408
409 fn scalar_fn(&self) -> Option<&ScalarFnRef> {
410 self.0.as_opt::<ScalarFnVTable>().map(|a| a.scalar_fn())
411 }
412
413 fn child(&self, idx: usize) -> ReduceNodeRef {
414 self.nth_child(idx)
415 .unwrap_or_else(|| vortex_panic!("Child index out of bounds: {}", idx))
416 }
417
418 fn child_count(&self) -> usize {
419 self.nchildren()
420 }
421}
422
423impl<V: VTable> Array for ArrayAdapter<V> {
424 fn as_any(&self) -> &dyn Any {
425 self
426 }
427
428 fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
429 self
430 }
431
432 fn to_array(&self) -> ArrayRef {
433 Arc::new(ArrayAdapter::<V>(self.0.clone()))
434 }
435
436 fn len(&self) -> usize {
437 V::len(&self.0)
438 }
439
440 fn dtype(&self) -> &DType {
441 V::dtype(&self.0)
442 }
443
444 fn vtable(&self) -> &dyn DynVTable {
445 V::vtable()
446 }
447
448 fn encoding_id(&self) -> ArrayId {
449 V::id(&self.0)
450 }
451
452 fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef> {
453 let start = range.start;
454 let stop = range.end;
455
456 if start == 0 && stop == self.len() {
457 return Ok(self.to_array());
458 }
459
460 vortex_ensure!(
461 start <= self.len(),
462 "OutOfBounds: start {start} > length {}",
463 self.len()
464 );
465 vortex_ensure!(
466 stop <= self.len(),
467 "OutOfBounds: stop {stop} > length {}",
468 self.len()
469 );
470
471 vortex_ensure!(start <= stop, "start ({start}) must be <= stop ({stop})");
472
473 if start == stop {
474 return Ok(Canonical::empty(self.dtype()).into_array());
475 }
476
477 let sliced = SliceArray::try_new(self.to_array(), range)?
478 .into_array()
479 .optimize()?;
480
481 if !sliced.is::<ConstantVTable>() {
483 self.statistics().with_iter(|iter| {
484 sliced.statistics().inherit(iter.filter(|(stat, value)| {
485 matches!(
486 stat,
487 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
488 ) && value.as_ref().as_exact().is_some_and(|v| {
489 Scalar::try_new(DType::Bool(Nullability::NonNullable), Some(v.clone()))
490 .vortex_expect("A stat that was expected to be a boolean stat was not")
491 .as_bool()
492 .value()
493 .unwrap_or_default()
494 })
495 }));
496 });
497 }
498
499 Ok(sliced)
500 }
501
502 fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
503 FilterArray::try_new(self.to_array(), mask)?
504 .into_array()
505 .optimize()
506 }
507
508 fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
509 DictArray::try_new(indices, self.to_array())?
510 .into_array()
511 .optimize()
512 }
513
514 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
515 vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
516 if self.is_invalid(index)? {
517 return Ok(Scalar::null(self.dtype().clone()));
518 }
519 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index)?;
520 vortex_ensure!(self.dtype() == scalar.dtype(), "Scalar dtype mismatch");
521 Ok(scalar)
522 }
523
524 fn is_valid(&self, index: usize) -> VortexResult<bool> {
525 vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
526 match self.validity()? {
527 Validity::NonNullable | Validity::AllValid => Ok(true),
528 Validity::AllInvalid => Ok(false),
529 Validity::Array(a) => a
530 .scalar_at(index)?
531 .as_bool()
532 .value()
533 .ok_or_else(|| vortex_err!("validity value at index {} is null", index)),
534 }
535 }
536
537 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
538 Ok(!self.is_valid(index)?)
539 }
540
541 fn all_valid(&self) -> VortexResult<bool> {
542 match self.validity()? {
543 Validity::NonNullable | Validity::AllValid => Ok(true),
544 Validity::AllInvalid => Ok(false),
545 Validity::Array(a) => Ok(a.statistics().compute_min::<bool>().unwrap_or(false)),
546 }
547 }
548
549 fn all_invalid(&self) -> VortexResult<bool> {
550 match self.validity()? {
551 Validity::NonNullable | Validity::AllValid => Ok(false),
552 Validity::AllInvalid => Ok(true),
553 Validity::Array(a) => Ok(!a.statistics().compute_max::<bool>().unwrap_or(true)),
554 }
555 }
556
557 fn valid_count(&self) -> VortexResult<usize> {
558 if let Some(Precision::Exact(invalid_count)) =
559 self.statistics().get_as::<usize>(Stat::NullCount)
560 {
561 return Ok(self.len() - invalid_count);
562 }
563
564 let count = match self.validity()? {
565 Validity::NonNullable | Validity::AllValid => self.len(),
566 Validity::AllInvalid => 0,
567 Validity::Array(a) => {
568 let sum = compute::sum(&a)?;
569 sum.as_primitive()
570 .as_::<usize>()
571 .ok_or_else(|| vortex_err!("sum of validity array is null"))?
572 }
573 };
574 vortex_ensure!(count <= self.len(), "Valid count exceeds array length");
575
576 self.statistics()
577 .set(Stat::NullCount, Precision::exact(self.len() - count));
578
579 Ok(count)
580 }
581
582 fn invalid_count(&self) -> VortexResult<usize> {
583 Ok(self.len() - self.valid_count()?)
584 }
585
586 fn validity(&self) -> VortexResult<Validity> {
587 if self.dtype().is_nullable() {
588 let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(&self.0)?;
589 if let Validity::Array(array) = &validity {
590 vortex_ensure!(array.len() == self.len(), "Validity array length mismatch");
591 vortex_ensure!(
592 matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
593 "Validity array is not non-nullable boolean: {}",
594 self.encoding_id(),
595 );
596 }
597 Ok(validity)
598 } else {
599 Ok(Validity::NonNullable)
600 }
601 }
602
603 fn validity_mask(&self) -> VortexResult<Mask> {
604 match self.validity()? {
605 Validity::NonNullable | Validity::AllValid => Ok(Mask::new_true(self.len())),
606 Validity::AllInvalid => Ok(Mask::new_false(self.len())),
607 Validity::Array(a) => Ok(a.to_bool().to_mask()),
608 }
609 }
610
611 fn to_canonical(&self) -> VortexResult<Canonical> {
612 self.to_array()
613 .execute(&mut LEGACY_SESSION.create_execution_ctx())
614 }
615
616 fn append_to_builder(
617 &self,
618 builder: &mut dyn ArrayBuilder,
619 ctx: &mut ExecutionCtx,
620 ) -> VortexResult<()> {
621 if builder.dtype() != self.dtype() {
622 vortex_panic!(
623 "Builder dtype mismatch: expected {}, got {}",
624 self.dtype(),
625 builder.dtype(),
626 );
627 }
628 let len = builder.len();
629
630 V::append_to_builder(&self.0, builder, ctx)?;
631
632 assert_eq!(
633 len + self.len(),
634 builder.len(),
635 "Builder length mismatch after writing array for encoding {}",
636 self.encoding_id(),
637 );
638 Ok(())
639 }
640
641 fn statistics(&self) -> StatsSetRef<'_> {
642 V::stats(&self.0)
643 }
644
645 fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef> {
646 let mut this = self.0.clone();
647 V::with_children(&mut this, children)?;
648 Ok(this.into_array())
649 }
650}
651
652impl<V: VTable> ArrayHash for ArrayAdapter<V> {
653 fn array_hash<H: Hasher>(&self, state: &mut H, precision: hash::Precision) {
654 self.0.encoding_id().hash(state);
655 V::array_hash(&self.0, state, precision);
656 }
657}
658
659impl<V: VTable> ArrayEq for ArrayAdapter<V> {
660 fn array_eq(&self, other: &Self, precision: hash::Precision) -> bool {
661 V::array_eq(&self.0, &other.0, precision)
662 }
663}
664
665impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
666 fn children(&self) -> Vec<ArrayRef> {
667 (0..V::nchildren(&self.0))
668 .map(|i| V::child(&self.0, i))
669 .collect()
670 }
671
672 fn nchildren(&self) -> usize {
673 V::nchildren(&self.0)
674 }
675
676 fn nth_child(&self, idx: usize) -> Option<ArrayRef> {
677 (idx < V::nchildren(&self.0)).then(|| V::child(&self.0, idx))
678 }
679
680 fn children_names(&self) -> Vec<String> {
681 (0..V::nchildren(&self.0))
682 .map(|i| V::child_name(&self.0, i))
683 .collect()
684 }
685
686 fn named_children(&self) -> Vec<(String, ArrayRef)> {
687 (0..V::nchildren(&self.0))
688 .map(|i| (V::child_name(&self.0, i), V::child(&self.0, i)))
689 .collect()
690 }
691
692 fn buffers(&self) -> Vec<ByteBuffer> {
693 (0..V::nbuffers(&self.0))
694 .map(|i| V::buffer(&self.0, i).to_host_sync())
695 .collect()
696 }
697
698 fn buffer_handles(&self) -> Vec<BufferHandle> {
699 (0..V::nbuffers(&self.0))
700 .map(|i| V::buffer(&self.0, i))
701 .collect()
702 }
703
704 fn buffer_names(&self) -> Vec<String> {
705 (0..V::nbuffers(&self.0))
706 .filter_map(|i| V::buffer_name(&self.0, i))
707 .collect()
708 }
709
710 fn named_buffers(&self) -> Vec<(String, BufferHandle)> {
711 (0..V::nbuffers(&self.0))
712 .filter_map(|i| V::buffer_name(&self.0, i).map(|name| (name, V::buffer(&self.0, i))))
713 .collect()
714 }
715
716 fn nbuffers(&self) -> usize {
717 V::nbuffers(&self.0)
718 }
719
720 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
721 V::serialize(V::metadata(&self.0)?)
722 }
723
724 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
725 match V::metadata(&self.0) {
726 Err(e) => write!(f, "<serde error: {e}>"),
727 Ok(metadata) => Debug::fmt(&metadata, f),
728 }
729 }
730
731 fn is_host(&self) -> bool {
732 for array in self.depth_first_traversal() {
733 if !array.buffer_handles().iter().all(BufferHandle::is_on_host) {
734 return false;
735 }
736 }
737
738 true
739 }
740}
741
742impl<V: VTable> Matcher for V {
744 type Match<'a> = &'a V::Array;
745
746 fn matches(array: &dyn Array) -> bool {
747 Array::as_any(array).is::<ArrayAdapter<V>>()
748 }
749
750 fn try_match<'a>(array: &'a dyn Array) -> Option<Self::Match<'a>> {
751 Array::as_any(array)
752 .downcast_ref::<ArrayAdapter<V>>()
753 .map(|array_adapter| &array_adapter.0)
754 }
755}