1pub mod display;
5mod operator;
6mod visitor;
7
8use std::any::Any;
9use std::fmt::{Debug, Formatter};
10use std::hash::{Hash, Hasher};
11use std::ops::Range;
12use std::sync::Arc;
13
14pub use operator::*;
15pub use visitor::*;
16use vortex_buffer::ByteBuffer;
17use vortex_dtype::{DType, Nullability};
18use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err, vortex_panic};
19use vortex_mask::Mask;
20use vortex_scalar::Scalar;
21
22use crate::arrays::{
23 BoolEncoding, ConstantVTable, DecimalEncoding, ExtensionEncoding, FixedSizeListEncoding,
24 ListViewEncoding, NullEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding,
25 VarBinViewEncoding,
26};
27use crate::builders::ArrayBuilder;
28use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
29use crate::serde::ArrayChildren;
30use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef};
31use crate::vtable::{
32 ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
33 ValidityVTable, VisitorVTable,
34};
35use crate::{
36 ArrayEq, ArrayHash, Canonical, DynArrayEq, DynArrayHash, EncodingId, EncodingRef,
37 SerializeMetadata, hash,
38};
39
40pub trait Array:
42 'static
43 + private::Sealed
44 + Send
45 + Sync
46 + Debug
47 + DynArrayEq
48 + DynArrayHash
49 + ArrayVisitor
50 + ArrayOperator
51{
52 fn as_any(&self) -> &dyn Any;
54
55 fn to_array(&self) -> ArrayRef;
57
58 fn len(&self) -> usize;
60
61 fn is_empty(&self) -> bool {
63 self.len() == 0
64 }
65
66 fn dtype(&self) -> &DType;
68
69 fn encoding(&self) -> EncodingRef;
71
72 fn encoding_id(&self) -> EncodingId;
74
75 fn slice(&self, range: Range<usize>) -> ArrayRef;
77
78 fn scalar_at(&self, index: usize) -> Scalar;
82
83 fn is_encoding(&self, encoding: EncodingId) -> bool {
85 self.encoding_id() == encoding
86 }
87
88 fn is_arrow(&self) -> bool {
91 self.is_encoding(NullEncoding.id())
92 || self.is_encoding(BoolEncoding.id())
93 || self.is_encoding(PrimitiveEncoding.id())
94 || self.is_encoding(VarBinEncoding.id())
95 || self.is_encoding(VarBinViewEncoding.id())
96 }
97
98 fn is_canonical(&self) -> bool {
101 self.is_encoding(NullEncoding.id())
102 || self.is_encoding(BoolEncoding.id())
103 || self.is_encoding(PrimitiveEncoding.id())
104 || self.is_encoding(DecimalEncoding.id())
105 || self.is_encoding(StructEncoding.id())
106 || self.is_encoding(ListViewEncoding.id())
107 || self.is_encoding(FixedSizeListEncoding.id())
108 || self.is_encoding(VarBinViewEncoding.id())
109 || self.is_encoding(ExtensionEncoding.id())
110 }
111
112 fn is_valid(&self, index: usize) -> bool;
114
115 fn is_invalid(&self, index: usize) -> bool;
117
118 fn all_valid(&self) -> bool;
122
123 fn all_invalid(&self) -> bool;
127
128 fn valid_count(&self) -> usize;
130
131 fn invalid_count(&self) -> usize;
133
134 fn validity_mask(&self) -> Mask;
136
137 fn to_canonical(&self) -> Canonical;
139
140 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder);
144
145 fn statistics(&self) -> StatsSetRef<'_>;
148
149 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
151
152 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
169 -> VortexResult<Option<Output>>;
170}
171
172impl Array for Arc<dyn Array> {
173 #[inline]
174 fn as_any(&self) -> &dyn Any {
175 self.as_ref().as_any()
176 }
177
178 #[inline]
179 fn to_array(&self) -> ArrayRef {
180 self.clone()
181 }
182
183 #[inline]
184 fn len(&self) -> usize {
185 self.as_ref().len()
186 }
187
188 #[inline]
189 fn dtype(&self) -> &DType {
190 self.as_ref().dtype()
191 }
192
193 #[inline]
194 fn encoding(&self) -> EncodingRef {
195 self.as_ref().encoding()
196 }
197
198 #[inline]
199 fn encoding_id(&self) -> EncodingId {
200 self.as_ref().encoding_id()
201 }
202
203 #[inline]
204 fn slice(&self, range: Range<usize>) -> ArrayRef {
205 self.as_ref().slice(range)
206 }
207
208 #[inline]
209 fn scalar_at(&self, index: usize) -> Scalar {
210 self.as_ref().scalar_at(index)
211 }
212
213 #[inline]
214 fn is_valid(&self, index: usize) -> bool {
215 self.as_ref().is_valid(index)
216 }
217
218 #[inline]
219 fn is_invalid(&self, index: usize) -> bool {
220 self.as_ref().is_invalid(index)
221 }
222
223 #[inline]
224 fn all_valid(&self) -> bool {
225 self.as_ref().all_valid()
226 }
227
228 #[inline]
229 fn all_invalid(&self) -> bool {
230 self.as_ref().all_invalid()
231 }
232
233 #[inline]
234 fn valid_count(&self) -> usize {
235 self.as_ref().valid_count()
236 }
237
238 #[inline]
239 fn invalid_count(&self) -> usize {
240 self.as_ref().invalid_count()
241 }
242
243 #[inline]
244 fn validity_mask(&self) -> Mask {
245 self.as_ref().validity_mask()
246 }
247
248 fn to_canonical(&self) -> Canonical {
249 self.as_ref().to_canonical()
250 }
251
252 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
253 self.as_ref().append_to_builder(builder)
254 }
255
256 fn statistics(&self) -> StatsSetRef<'_> {
257 self.as_ref().statistics()
258 }
259
260 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
262 self.as_ref().with_children(children)
263 }
264
265 fn invoke(
266 &self,
267 compute_fn: &ComputeFn,
268 args: &InvocationArgs,
269 ) -> VortexResult<Option<Output>> {
270 self.as_ref().invoke(compute_fn, args)
271 }
272}
273
274pub type ArrayRef = Arc<dyn Array>;
276
277impl ToOwned for dyn Array {
278 type Owned = ArrayRef;
279
280 fn to_owned(&self) -> Self::Owned {
281 self.to_array()
282 }
283}
284
285impl dyn Array + '_ {
286 pub fn as_<V: VTable>(&self) -> &V::Array {
288 self.as_opt::<V>().vortex_expect("Failed to downcast")
289 }
290
291 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
293 self.as_any()
294 .downcast_ref::<ArrayAdapter<V>>()
295 .map(|array_adapter| &array_adapter.0)
296 }
297
298 pub fn is<V: VTable>(&self) -> bool {
300 self.as_opt::<V>().is_some()
301 }
302
303 pub fn is_constant(&self) -> bool {
304 let opts = IsConstantOpts {
305 cost: Cost::Specialized,
306 };
307 is_constant_opts(self, &opts)
308 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
309 .ok()
310 .flatten()
311 .unwrap_or_default()
312 }
313
314 pub fn is_constant_opts(&self, cost: Cost) -> bool {
315 let opts = IsConstantOpts { cost };
316 is_constant_opts(self, &opts)
317 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
318 .ok()
319 .flatten()
320 .unwrap_or_default()
321 }
322
323 pub fn as_constant(&self) -> Option<Scalar> {
324 self.is_constant().then(|| self.scalar_at(0))
325 }
326
327 pub fn nbytes(&self) -> u64 {
329 let mut nbytes = 0;
330 for array in self.depth_first_traversal() {
331 for buffer in array.buffers() {
332 nbytes += buffer.len() as u64;
333 }
334 }
335 nbytes
336 }
337}
338
339pub trait IntoArray {
341 fn into_array(self) -> ArrayRef;
342}
343
344impl IntoArray for ArrayRef {
345 fn into_array(self) -> ArrayRef {
346 self
347 }
348}
349
350mod private {
351 use super::*;
352
353 pub trait Sealed {}
354
355 impl<V: VTable> Sealed for ArrayAdapter<V> {}
356 impl Sealed for Arc<dyn Array> {}
357}
358
359#[repr(transparent)]
366pub struct ArrayAdapter<V: VTable>(V::Array);
367
368impl<V: VTable> ArrayAdapter<V> {
369 pub fn as_inner(&self) -> &V::Array {
371 &self.0
372 }
373
374 pub fn into_inner(self) -> V::Array {
376 self.0
377 }
378}
379
380impl<V: VTable> Debug for ArrayAdapter<V> {
381 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
382 self.0.fmt(f)
383 }
384}
385
386impl<V: VTable> Array for ArrayAdapter<V> {
387 fn as_any(&self) -> &dyn Any {
388 self
389 }
390
391 fn to_array(&self) -> ArrayRef {
392 Arc::new(ArrayAdapter::<V>(self.0.clone()))
393 }
394
395 fn len(&self) -> usize {
396 <V::ArrayVTable as ArrayVTable<V>>::len(&self.0)
397 }
398
399 fn dtype(&self) -> &DType {
400 <V::ArrayVTable as ArrayVTable<V>>::dtype(&self.0)
401 }
402
403 fn encoding(&self) -> EncodingRef {
404 V::encoding(&self.0)
405 }
406
407 fn encoding_id(&self) -> EncodingId {
408 V::encoding(&self.0).id()
409 }
410
411 fn slice(&self, range: Range<usize>) -> ArrayRef {
412 let start = range.start;
413 let stop = range.end;
414
415 if start == 0 && stop == self.len() {
416 return self.to_array();
417 }
418
419 assert!(
420 start <= self.len(),
421 "OutOfBounds: start {start} > length {}",
422 self.len()
423 );
424 assert!(
425 stop <= self.len(),
426 "OutOfBounds: stop {stop} > length {}",
427 self.len()
428 );
429
430 assert!(start <= stop, "start ({start}) must be <= stop ({stop})");
431
432 if start == stop {
433 return Canonical::empty(self.dtype()).into_array();
434 }
435
436 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, range);
437
438 assert_eq!(
439 sliced.len(),
440 stop - start,
441 "Slice length mismatch {}",
442 self.encoding_id()
443 );
444
445 debug_assert_eq!(
447 sliced.dtype(),
448 self.dtype(),
449 "Slice dtype mismatch {}",
450 self.encoding_id()
451 );
452
453 if !sliced.is::<ConstantVTable>() {
455 self.statistics().with_iter(|iter| {
456 sliced.statistics().inherit(iter.filter(|(stat, value)| {
457 matches!(
458 stat,
459 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
460 ) && value.as_ref().as_exact().is_some_and(|v| {
461 Scalar::new(DType::Bool(Nullability::NonNullable), v.clone())
462 .as_bool()
463 .value()
464 .unwrap_or_default()
465 })
466 }));
467 });
468 }
469
470 sliced
471 }
472
473 fn scalar_at(&self, index: usize) -> Scalar {
474 assert!(index < self.len(), "index {index} out of bounds");
475 if self.is_invalid(index) {
476 return Scalar::null(self.dtype().clone());
477 }
478 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index);
479 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
480 scalar
481 }
482
483 fn is_valid(&self, index: usize) -> bool {
484 if index >= self.len() {
485 vortex_panic!(OutOfBounds: index, 0, self.len());
486 }
487 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
488 }
489
490 fn is_invalid(&self, index: usize) -> bool {
491 !self.is_valid(index)
492 }
493
494 fn all_valid(&self) -> bool {
495 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
496 }
497
498 fn all_invalid(&self) -> bool {
499 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
500 }
501
502 fn valid_count(&self) -> usize {
503 if let Some(Precision::Exact(invalid_count)) =
504 self.statistics().get_as::<usize>(Stat::NullCount)
505 {
506 return self.len() - invalid_count;
507 }
508
509 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0);
510 assert!(count <= self.len(), "Valid count exceeds array length");
511
512 self.statistics()
513 .set(Stat::NullCount, Precision::exact(self.len() - count));
514
515 count
516 }
517
518 fn invalid_count(&self) -> usize {
519 if let Some(Precision::Exact(invalid_count)) =
520 self.statistics().get_as::<usize>(Stat::NullCount)
521 {
522 return invalid_count;
523 }
524
525 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0);
526 assert!(count <= self.len(), "Invalid count exceeds array length");
527
528 self.statistics()
529 .set(Stat::NullCount, Precision::exact(count));
530
531 count
532 }
533
534 fn validity_mask(&self) -> Mask {
535 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0);
536 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
537 mask
538 }
539
540 fn to_canonical(&self) -> Canonical {
541 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0);
542 assert_eq!(
543 self.len(),
544 canonical.as_ref().len(),
545 "Canonical length mismatch {}. Expected {} but encoded into {}.",
546 self.encoding_id(),
547 self.len(),
548 canonical.as_ref().len()
549 );
550 assert_eq!(
551 self.dtype(),
552 canonical.as_ref().dtype(),
553 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
554 self.encoding_id(),
555 self.dtype(),
556 canonical.as_ref().dtype()
557 );
558 canonical
559 .as_ref()
560 .statistics()
561 .inherit_from(self.statistics());
562 canonical
563 }
564
565 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
566 if builder.dtype() != self.dtype() {
567 vortex_panic!(
568 "Builder dtype mismatch: expected {}, got {}",
569 self.dtype(),
570 builder.dtype(),
571 );
572 }
573 let len = builder.len();
574
575 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder);
576 assert_eq!(
577 len + self.len(),
578 builder.len(),
579 "Builder length mismatch after writing array for encoding {}",
580 self.encoding_id(),
581 );
582 }
583
584 fn statistics(&self) -> StatsSetRef<'_> {
585 <V::ArrayVTable as ArrayVTable<V>>::stats(&self.0)
586 }
587
588 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
589 struct ReplacementChildren<'a> {
590 children: &'a [ArrayRef],
591 }
592
593 impl ArrayChildren for ReplacementChildren<'_> {
594 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
595 if index >= self.children.len() {
596 vortex_bail!(OutOfBounds: index, 0, self.children.len());
597 }
598 let child = &self.children[index];
599 if child.len() != len {
600 vortex_bail!(
601 "Child length mismatch: expected {}, got {}",
602 len,
603 child.len()
604 );
605 }
606 if child.dtype() != dtype {
607 vortex_bail!(
608 "Child dtype mismatch: expected {}, got {}",
609 dtype,
610 child.dtype()
611 );
612 }
613 Ok(child.clone())
614 }
615
616 fn len(&self) -> usize {
617 self.children.len()
618 }
619 }
620
621 let metadata = self.metadata()?.ok_or_else(|| {
622 vortex_err!("Cannot replace children for arrays that do not support serialization")
623 })?;
624
625 self.encoding().build(
627 self.dtype(),
628 self.len(),
629 &metadata,
630 &self.buffers(),
631 &ReplacementChildren { children },
632 )
633 }
634
635 fn invoke(
636 &self,
637 compute_fn: &ComputeFn,
638 args: &InvocationArgs,
639 ) -> VortexResult<Option<Output>> {
640 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
641 }
642}
643
644impl<V: VTable> ArrayHash for ArrayAdapter<V> {
645 fn array_hash<H: Hasher>(&self, state: &mut H, precision: hash::Precision) {
646 self.0.encoding_id().hash(state);
647 <V::ArrayVTable as ArrayVTable<V>>::array_hash(&self.0, state, precision);
648 }
649}
650
651impl<V: VTable> ArrayEq for ArrayAdapter<V> {
652 fn array_eq(&self, other: &Self, precision: hash::Precision) -> bool {
653 <V::ArrayVTable as ArrayVTable<V>>::array_eq(&self.0, &other.0, precision)
654 }
655}
656
657impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
658 fn children(&self) -> Vec<ArrayRef> {
659 struct ChildrenCollector {
660 children: Vec<ArrayRef>,
661 }
662
663 impl ArrayChildVisitor for ChildrenCollector {
664 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
665 self.children.push(array.to_array());
666 }
667 }
668
669 let mut collector = ChildrenCollector {
670 children: Vec::new(),
671 };
672 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
673 collector.children
674 }
675
676 fn nchildren(&self) -> usize {
677 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
678 }
679
680 fn children_names(&self) -> Vec<String> {
681 struct ChildNameCollector {
682 names: Vec<String>,
683 }
684
685 impl ArrayChildVisitor for ChildNameCollector {
686 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
687 self.names.push(name.to_string());
688 }
689 }
690
691 let mut collector = ChildNameCollector { names: Vec::new() };
692 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
693 collector.names
694 }
695
696 fn named_children(&self) -> Vec<(String, ArrayRef)> {
697 struct NamedChildrenCollector {
698 children: Vec<(String, ArrayRef)>,
699 }
700
701 impl ArrayChildVisitor for NamedChildrenCollector {
702 fn visit_child(&mut self, name: &str, array: &dyn Array) {
703 self.children.push((name.to_string(), array.to_array()));
704 }
705 }
706
707 let mut collector = NamedChildrenCollector {
708 children: Vec::new(),
709 };
710
711 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
712 collector.children
713 }
714
715 fn buffers(&self) -> Vec<ByteBuffer> {
716 struct BufferCollector {
717 buffers: Vec<ByteBuffer>,
718 }
719
720 impl ArrayBufferVisitor for BufferCollector {
721 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
722 self.buffers.push(buffer.clone());
723 }
724 }
725
726 let mut collector = BufferCollector {
727 buffers: Vec::new(),
728 };
729 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
730 collector.buffers
731 }
732
733 fn nbuffers(&self) -> usize {
734 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
735 }
736
737 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
738 Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
739 }
740
741 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
742 match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
743 Err(e) => write!(f, "<serde error: {e}>"),
744 Ok(None) => write!(f, "<serde not supported>"),
745 Ok(Some(metadata)) => Debug::fmt(&metadata, f),
746 }
747 }
748}