1pub mod display;
5mod visitor;
6
7use std::any::Any;
8use std::fmt::{Debug, Formatter};
9use std::ops::Range;
10use std::sync::Arc;
11
12pub use visitor::*;
13use vortex_buffer::ByteBuffer;
14use vortex_dtype::{DType, Nullability};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err, vortex_panic};
16use vortex_mask::Mask;
17use vortex_scalar::Scalar;
18
19use crate::arrays::{
20 BoolEncoding, ConstantVTable, DecimalEncoding, ExtensionEncoding, FixedSizeListEncoding,
21 ListEncoding, NullEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding,
22 VarBinViewEncoding,
23};
24use crate::builders::ArrayBuilder;
25use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
26use crate::operator::OperatorRef;
27use crate::serde::ArrayChildren;
28use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef};
29use crate::vtable::{
30 ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, PipelineVTable, SerdeVTable,
31 VTable, ValidityVTable, VisitorVTable,
32};
33use crate::{Canonical, EncodingId, EncodingRef, SerializeMetadata};
34
35pub trait Array: 'static + private::Sealed + Send + Sync + Debug + ArrayVisitor {
37 fn as_any(&self) -> &dyn Any;
39
40 fn to_array(&self) -> ArrayRef;
42
43 fn len(&self) -> usize;
45
46 fn is_empty(&self) -> bool {
48 self.len() == 0
49 }
50
51 fn dtype(&self) -> &DType;
53
54 fn encoding(&self) -> EncodingRef;
56
57 fn encoding_id(&self) -> EncodingId;
59
60 fn slice(&self, range: Range<usize>) -> ArrayRef;
62
63 fn scalar_at(&self, index: usize) -> Scalar;
67
68 fn is_encoding(&self, encoding: EncodingId) -> bool {
70 self.encoding_id() == encoding
71 }
72
73 fn is_arrow(&self) -> bool {
76 self.is_encoding(NullEncoding.id())
77 || self.is_encoding(BoolEncoding.id())
78 || self.is_encoding(PrimitiveEncoding.id())
79 || self.is_encoding(VarBinEncoding.id())
80 || self.is_encoding(VarBinViewEncoding.id())
81 }
82
83 fn is_canonical(&self) -> bool {
86 self.is_encoding(NullEncoding.id())
87 || self.is_encoding(BoolEncoding.id())
88 || self.is_encoding(PrimitiveEncoding.id())
89 || self.is_encoding(DecimalEncoding.id())
90 || self.is_encoding(StructEncoding.id())
91 || self.is_encoding(ListEncoding.id())
92 || self.is_encoding(FixedSizeListEncoding.id())
93 || self.is_encoding(VarBinViewEncoding.id())
94 || self.is_encoding(ExtensionEncoding.id())
95 }
96
97 fn is_valid(&self, index: usize) -> bool;
99
100 fn is_invalid(&self, index: usize) -> bool;
102
103 fn all_valid(&self) -> bool;
107
108 fn all_invalid(&self) -> bool;
112
113 fn valid_count(&self) -> usize;
115
116 fn invalid_count(&self) -> usize;
118
119 fn validity_mask(&self) -> Mask;
121
122 fn to_canonical(&self) -> Canonical;
124
125 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder);
129
130 fn statistics(&self) -> StatsSetRef<'_>;
133
134 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
136
137 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
154 -> VortexResult<Option<Output>>;
155
156 fn to_operator(&self) -> VortexResult<Option<OperatorRef>>;
160}
161
162impl Array for Arc<dyn Array> {
163 #[inline]
164 fn as_any(&self) -> &dyn Any {
165 self.as_ref().as_any()
166 }
167
168 #[inline]
169 fn to_array(&self) -> ArrayRef {
170 self.clone()
171 }
172
173 #[inline]
174 fn len(&self) -> usize {
175 self.as_ref().len()
176 }
177
178 #[inline]
179 fn dtype(&self) -> &DType {
180 self.as_ref().dtype()
181 }
182
183 #[inline]
184 fn encoding(&self) -> EncodingRef {
185 self.as_ref().encoding()
186 }
187
188 #[inline]
189 fn encoding_id(&self) -> EncodingId {
190 self.as_ref().encoding_id()
191 }
192
193 #[inline]
194 fn slice(&self, range: Range<usize>) -> ArrayRef {
195 self.as_ref().slice(range)
196 }
197
198 #[inline]
199 fn scalar_at(&self, index: usize) -> Scalar {
200 self.as_ref().scalar_at(index)
201 }
202
203 #[inline]
204 fn is_valid(&self, index: usize) -> bool {
205 self.as_ref().is_valid(index)
206 }
207
208 #[inline]
209 fn is_invalid(&self, index: usize) -> bool {
210 self.as_ref().is_invalid(index)
211 }
212
213 #[inline]
214 fn all_valid(&self) -> bool {
215 self.as_ref().all_valid()
216 }
217
218 #[inline]
219 fn all_invalid(&self) -> bool {
220 self.as_ref().all_invalid()
221 }
222
223 #[inline]
224 fn valid_count(&self) -> usize {
225 self.as_ref().valid_count()
226 }
227
228 #[inline]
229 fn invalid_count(&self) -> usize {
230 self.as_ref().invalid_count()
231 }
232
233 #[inline]
234 fn validity_mask(&self) -> Mask {
235 self.as_ref().validity_mask()
236 }
237
238 fn to_canonical(&self) -> Canonical {
239 self.as_ref().to_canonical()
240 }
241
242 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
243 self.as_ref().append_to_builder(builder)
244 }
245
246 fn statistics(&self) -> StatsSetRef<'_> {
247 self.as_ref().statistics()
248 }
249
250 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
251 self.as_ref().with_children(children)
252 }
253
254 fn invoke(
255 &self,
256 compute_fn: &ComputeFn,
257 args: &InvocationArgs,
258 ) -> VortexResult<Option<Output>> {
259 self.as_ref().invoke(compute_fn, args)
260 }
261
262 fn to_operator(&self) -> VortexResult<Option<OperatorRef>> {
263 self.as_ref().to_operator()
264 }
265}
266
267pub type ArrayRef = Arc<dyn Array>;
269
270impl ToOwned for dyn Array {
271 type Owned = ArrayRef;
272
273 fn to_owned(&self) -> Self::Owned {
274 self.to_array()
275 }
276}
277
278impl dyn Array + '_ {
279 pub fn as_<V: VTable>(&self) -> &V::Array {
281 self.as_opt::<V>().vortex_expect("Failed to downcast")
282 }
283
284 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
286 self.as_any()
287 .downcast_ref::<ArrayAdapter<V>>()
288 .map(|array_adapter| &array_adapter.0)
289 }
290
291 pub fn is<V: VTable>(&self) -> bool {
293 self.as_opt::<V>().is_some()
294 }
295
296 pub fn is_constant(&self) -> bool {
297 let opts = IsConstantOpts {
298 cost: Cost::Specialized,
299 };
300 is_constant_opts(self, &opts)
301 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
302 .ok()
303 .flatten()
304 .unwrap_or_default()
305 }
306
307 pub fn is_constant_opts(&self, cost: Cost) -> bool {
308 let opts = IsConstantOpts { cost };
309 is_constant_opts(self, &opts)
310 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
311 .ok()
312 .flatten()
313 .unwrap_or_default()
314 }
315
316 pub fn as_constant(&self) -> Option<Scalar> {
317 self.is_constant().then(|| self.scalar_at(0))
318 }
319
320 pub fn nbytes(&self) -> u64 {
322 let mut nbytes = 0;
323 for array in self.depth_first_traversal() {
324 for buffer in array.buffers() {
325 nbytes += buffer.len() as u64;
326 }
327 }
328 nbytes
329 }
330}
331
332pub trait IntoArray {
334 fn into_array(self) -> ArrayRef;
335}
336
337impl IntoArray for ArrayRef {
338 fn into_array(self) -> ArrayRef {
339 self
340 }
341}
342
343mod private {
344 use super::*;
345
346 pub trait Sealed {}
347
348 impl<V: VTable> Sealed for ArrayAdapter<V> {}
349 impl Sealed for Arc<dyn Array> {}
350}
351
352#[repr(transparent)]
359pub struct ArrayAdapter<V: VTable>(V::Array);
360
361impl<V: VTable> ArrayAdapter<V> {
362 pub fn as_inner(&self) -> &V::Array {
364 &self.0
365 }
366
367 pub fn into_inner(self) -> V::Array {
369 self.0
370 }
371}
372
373impl<V: VTable> Debug for ArrayAdapter<V> {
374 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
375 self.0.fmt(f)
376 }
377}
378
379impl<V: VTable> Array for ArrayAdapter<V> {
380 fn as_any(&self) -> &dyn Any {
381 self
382 }
383
384 fn to_array(&self) -> ArrayRef {
385 Arc::new(ArrayAdapter::<V>(self.0.clone()))
386 }
387
388 fn len(&self) -> usize {
389 <V::ArrayVTable as ArrayVTable<V>>::len(&self.0)
390 }
391
392 fn dtype(&self) -> &DType {
393 <V::ArrayVTable as ArrayVTable<V>>::dtype(&self.0)
394 }
395
396 fn encoding(&self) -> EncodingRef {
397 V::encoding(&self.0)
398 }
399
400 fn encoding_id(&self) -> EncodingId {
401 V::encoding(&self.0).id()
402 }
403
404 fn slice(&self, range: Range<usize>) -> ArrayRef {
405 let start = range.start;
406 let stop = range.end;
407
408 if start == 0 && stop == self.len() {
409 return self.to_array();
410 }
411
412 assert!(
413 start <= self.len(),
414 "OutOfBounds: start {start} > length {}",
415 self.len()
416 );
417 assert!(
418 stop <= self.len(),
419 "OutOfBounds: stop {stop} > length {}",
420 self.len()
421 );
422
423 assert!(start <= stop, "start ({start}) must be <= stop ({stop})");
424
425 if start == stop {
426 return Canonical::empty(self.dtype()).into_array();
427 }
428
429 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, range);
430
431 assert_eq!(
432 sliced.len(),
433 stop - start,
434 "Slice length mismatch {}",
435 self.encoding_id()
436 );
437
438 debug_assert_eq!(
440 sliced.dtype(),
441 self.dtype(),
442 "Slice dtype mismatch {}",
443 self.encoding_id()
444 );
445
446 if !sliced.is::<ConstantVTable>() {
448 self.statistics().with_iter(|iter| {
449 sliced.statistics().inherit(iter.filter(|(stat, value)| {
450 matches!(
451 stat,
452 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
453 ) && value.as_ref().as_exact().is_some_and(|v| {
454 Scalar::new(DType::Bool(Nullability::NonNullable), v.clone())
455 .as_bool()
456 .value()
457 .unwrap_or_default()
458 })
459 }));
460 });
461 }
462
463 sliced
464 }
465
466 fn scalar_at(&self, index: usize) -> Scalar {
467 assert!(index < self.len(), "index {index} out of bounds");
468 if self.is_invalid(index) {
469 return Scalar::null(self.dtype().clone());
470 }
471 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index);
472 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
473 scalar
474 }
475
476 fn is_valid(&self, index: usize) -> bool {
477 if index >= self.len() {
478 vortex_panic!(OutOfBounds: index, 0, self.len());
479 }
480 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
481 }
482
483 fn is_invalid(&self, index: usize) -> bool {
484 !self.is_valid(index)
485 }
486
487 fn all_valid(&self) -> bool {
488 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
489 }
490
491 fn all_invalid(&self) -> bool {
492 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
493 }
494
495 fn valid_count(&self) -> usize {
496 if let Some(Precision::Exact(invalid_count)) =
497 self.statistics().get_as::<usize>(Stat::NullCount)
498 {
499 return self.len() - invalid_count;
500 }
501
502 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0);
503 assert!(count <= self.len(), "Valid count exceeds array length");
504
505 self.statistics()
506 .set(Stat::NullCount, Precision::exact(self.len() - count));
507
508 count
509 }
510
511 fn invalid_count(&self) -> usize {
512 if let Some(Precision::Exact(invalid_count)) =
513 self.statistics().get_as::<usize>(Stat::NullCount)
514 {
515 return invalid_count;
516 }
517
518 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0);
519 assert!(count <= self.len(), "Invalid count exceeds array length");
520
521 self.statistics()
522 .set(Stat::NullCount, Precision::exact(count));
523
524 count
525 }
526
527 fn validity_mask(&self) -> Mask {
528 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0);
529 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
530 mask
531 }
532
533 fn to_canonical(&self) -> Canonical {
534 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0);
535 assert_eq!(
536 self.len(),
537 canonical.as_ref().len(),
538 "Canonical length mismatch {}. Expected {} but encoded into {}.",
539 self.encoding_id(),
540 self.len(),
541 canonical.as_ref().len()
542 );
543 assert_eq!(
544 self.dtype(),
545 canonical.as_ref().dtype(),
546 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
547 self.encoding_id(),
548 self.dtype(),
549 canonical.as_ref().dtype()
550 );
551 canonical
552 .as_ref()
553 .statistics()
554 .inherit_from(self.statistics());
555 canonical
556 }
557
558 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
559 if builder.dtype() != self.dtype() {
560 vortex_panic!(
561 "Builder dtype mismatch: expected {}, got {}",
562 self.dtype(),
563 builder.dtype(),
564 );
565 }
566 let len = builder.len();
567
568 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder);
569 assert_eq!(
570 len + self.len(),
571 builder.len(),
572 "Builder length mismatch after writing array for encoding {}",
573 self.encoding_id(),
574 );
575 }
576
577 fn statistics(&self) -> StatsSetRef<'_> {
578 <V::ArrayVTable as ArrayVTable<V>>::stats(&self.0)
579 }
580
581 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
582 struct ReplacementChildren<'a> {
583 children: &'a [ArrayRef],
584 }
585
586 impl ArrayChildren for ReplacementChildren<'_> {
587 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
588 if index >= self.children.len() {
589 vortex_bail!(OutOfBounds: index, 0, self.children.len());
590 }
591 let child = &self.children[index];
592 if child.len() != len {
593 vortex_bail!(
594 "Child length mismatch: expected {}, got {}",
595 len,
596 child.len()
597 );
598 }
599 if child.dtype() != dtype {
600 vortex_bail!(
601 "Child dtype mismatch: expected {}, got {}",
602 dtype,
603 child.dtype()
604 );
605 }
606 Ok(child.clone())
607 }
608
609 fn len(&self) -> usize {
610 self.children.len()
611 }
612 }
613
614 let metadata = self.metadata()?.ok_or_else(|| {
615 vortex_err!("Cannot replace children for arrays that do not support serialization")
616 })?;
617
618 self.encoding().build(
620 self.dtype(),
621 self.len(),
622 &metadata,
623 &self.buffers(),
624 &ReplacementChildren { children },
625 )
626 }
627
628 fn invoke(
629 &self,
630 compute_fn: &ComputeFn,
631 args: &InvocationArgs,
632 ) -> VortexResult<Option<Output>> {
633 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
634 }
635
636 fn to_operator(&self) -> VortexResult<Option<OperatorRef>> {
637 <V::PipelineVTable as PipelineVTable<V>>::to_operator(&self.0)
638 }
639}
640
641impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
642 fn children(&self) -> Vec<ArrayRef> {
643 struct ChildrenCollector {
644 children: Vec<ArrayRef>,
645 }
646
647 impl ArrayChildVisitor for ChildrenCollector {
648 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
649 self.children.push(array.to_array());
650 }
651 }
652
653 let mut collector = ChildrenCollector {
654 children: Vec::new(),
655 };
656 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
657 collector.children
658 }
659
660 fn nchildren(&self) -> usize {
661 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
662 }
663
664 fn children_names(&self) -> Vec<String> {
665 struct ChildNameCollector {
666 names: Vec<String>,
667 }
668
669 impl ArrayChildVisitor for ChildNameCollector {
670 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
671 self.names.push(name.to_string());
672 }
673 }
674
675 let mut collector = ChildNameCollector { names: Vec::new() };
676 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
677 collector.names
678 }
679
680 fn named_children(&self) -> Vec<(String, ArrayRef)> {
681 struct NamedChildrenCollector {
682 children: Vec<(String, ArrayRef)>,
683 }
684
685 impl ArrayChildVisitor for NamedChildrenCollector {
686 fn visit_child(&mut self, name: &str, array: &dyn Array) {
687 self.children.push((name.to_string(), array.to_array()));
688 }
689 }
690
691 let mut collector = NamedChildrenCollector {
692 children: Vec::new(),
693 };
694
695 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
696 collector.children
697 }
698
699 fn buffers(&self) -> Vec<ByteBuffer> {
700 struct BufferCollector {
701 buffers: Vec<ByteBuffer>,
702 }
703
704 impl ArrayBufferVisitor for BufferCollector {
705 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
706 self.buffers.push(buffer.clone());
707 }
708 }
709
710 let mut collector = BufferCollector {
711 buffers: Vec::new(),
712 };
713 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
714 collector.buffers
715 }
716
717 fn nbuffers(&self) -> usize {
718 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
719 }
720
721 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
722 Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
723 }
724
725 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
726 match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
727 Err(e) => write!(f, "<serde error: {e}>"),
728 Ok(None) => write!(f, "<serde not supported>"),
729 Ok(Some(metadata)) => Debug::fmt(&metadata, f),
730 }
731 }
732}