1pub mod display;
5mod visitor;
6
7use std::any::Any;
8use std::fmt::{Debug, Formatter};
9use std::ops::Range;
10use std::sync::Arc;
11
12pub use visitor::*;
13use vortex_buffer::ByteBuffer;
14use vortex_dtype::{DType, Nullability};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err, vortex_panic};
16use vortex_mask::Mask;
17use vortex_scalar::Scalar;
18
19use crate::arrays::{
20 BoolEncoding, ConstantVTable, DecimalEncoding, ExtensionEncoding, ListEncoding, NullEncoding,
21 PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
22};
23use crate::builders::ArrayBuilder;
24use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
25use crate::pipeline::{OperatorRef, PipelineVTable};
26use crate::serde::ArrayChildren;
27use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef};
28use crate::vtable::{
29 ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
30 ValidityVTable, VisitorVTable,
31};
32use crate::{Canonical, EncodingId, EncodingRef, SerializeMetadata};
33
34pub trait Array: 'static + private::Sealed + Send + Sync + Debug + ArrayVisitor {
36 fn as_any(&self) -> &dyn Any;
38
39 fn to_array(&self) -> ArrayRef;
41
42 fn len(&self) -> usize;
44
45 fn is_empty(&self) -> bool {
47 self.len() == 0
48 }
49
50 fn dtype(&self) -> &DType;
52
53 fn encoding(&self) -> EncodingRef;
55
56 fn encoding_id(&self) -> EncodingId;
58
59 fn slice(&self, range: Range<usize>) -> ArrayRef;
61
62 fn scalar_at(&self, index: usize) -> Scalar;
66
67 fn is_encoding(&self, encoding: EncodingId) -> bool {
69 self.encoding_id() == encoding
70 }
71
72 fn is_arrow(&self) -> bool {
75 self.is_encoding(NullEncoding.id())
76 || self.is_encoding(BoolEncoding.id())
77 || self.is_encoding(PrimitiveEncoding.id())
78 || self.is_encoding(VarBinEncoding.id())
79 || self.is_encoding(VarBinViewEncoding.id())
80 }
81
82 fn is_canonical(&self) -> bool {
85 self.is_encoding(NullEncoding.id())
86 || self.is_encoding(BoolEncoding.id())
87 || self.is_encoding(PrimitiveEncoding.id())
88 || self.is_encoding(DecimalEncoding.id())
89 || self.is_encoding(StructEncoding.id())
90 || self.is_encoding(ListEncoding.id())
91 || self.is_encoding(VarBinViewEncoding.id())
92 || self.is_encoding(ExtensionEncoding.id())
93 }
94
95 fn is_valid(&self, index: usize) -> bool;
97
98 fn is_invalid(&self, index: usize) -> bool;
100
101 fn all_valid(&self) -> bool;
105
106 fn all_invalid(&self) -> bool;
110
111 fn valid_count(&self) -> usize;
113
114 fn invalid_count(&self) -> usize;
116
117 fn validity_mask(&self) -> Mask;
119
120 fn to_canonical(&self) -> Canonical;
122
123 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder);
127
128 fn statistics(&self) -> StatsSetRef<'_>;
131
132 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
134
135 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
152 -> VortexResult<Option<Output>>;
153
154 fn to_operator(&self) -> VortexResult<Option<OperatorRef>>;
158}
159
160impl Array for Arc<dyn Array> {
161 #[inline]
162 fn as_any(&self) -> &dyn Any {
163 self.as_ref().as_any()
164 }
165
166 #[inline]
167 fn to_array(&self) -> ArrayRef {
168 self.clone()
169 }
170
171 #[inline]
172 fn len(&self) -> usize {
173 self.as_ref().len()
174 }
175
176 #[inline]
177 fn dtype(&self) -> &DType {
178 self.as_ref().dtype()
179 }
180
181 #[inline]
182 fn encoding(&self) -> EncodingRef {
183 self.as_ref().encoding()
184 }
185
186 #[inline]
187 fn encoding_id(&self) -> EncodingId {
188 self.as_ref().encoding_id()
189 }
190
191 #[inline]
192 fn slice(&self, range: Range<usize>) -> ArrayRef {
193 self.as_ref().slice(range)
194 }
195
196 #[inline]
197 fn scalar_at(&self, index: usize) -> Scalar {
198 self.as_ref().scalar_at(index)
199 }
200
201 #[inline]
202 fn is_valid(&self, index: usize) -> bool {
203 self.as_ref().is_valid(index)
204 }
205
206 #[inline]
207 fn is_invalid(&self, index: usize) -> bool {
208 self.as_ref().is_invalid(index)
209 }
210
211 #[inline]
212 fn all_valid(&self) -> bool {
213 self.as_ref().all_valid()
214 }
215
216 #[inline]
217 fn all_invalid(&self) -> bool {
218 self.as_ref().all_invalid()
219 }
220
221 #[inline]
222 fn valid_count(&self) -> usize {
223 self.as_ref().valid_count()
224 }
225
226 #[inline]
227 fn invalid_count(&self) -> usize {
228 self.as_ref().invalid_count()
229 }
230
231 #[inline]
232 fn validity_mask(&self) -> Mask {
233 self.as_ref().validity_mask()
234 }
235
236 fn to_canonical(&self) -> Canonical {
237 self.as_ref().to_canonical()
238 }
239
240 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
241 self.as_ref().append_to_builder(builder)
242 }
243
244 fn statistics(&self) -> StatsSetRef<'_> {
245 self.as_ref().statistics()
246 }
247
248 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
249 self.as_ref().with_children(children)
250 }
251
252 fn invoke(
253 &self,
254 compute_fn: &ComputeFn,
255 args: &InvocationArgs,
256 ) -> VortexResult<Option<Output>> {
257 self.as_ref().invoke(compute_fn, args)
258 }
259
260 fn to_operator(&self) -> VortexResult<Option<OperatorRef>> {
261 self.as_ref().to_operator()
262 }
263}
264
265pub type ArrayRef = Arc<dyn Array>;
267
268impl ToOwned for dyn Array {
269 type Owned = ArrayRef;
270
271 fn to_owned(&self) -> Self::Owned {
272 self.to_array()
273 }
274}
275
276impl dyn Array + '_ {
277 pub fn as_<V: VTable>(&self) -> &V::Array {
279 self.as_opt::<V>().vortex_expect("Failed to downcast")
280 }
281
282 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
284 self.as_any()
285 .downcast_ref::<ArrayAdapter<V>>()
286 .map(|array_adapter| &array_adapter.0)
287 }
288
289 pub fn is<V: VTable>(&self) -> bool {
291 self.as_opt::<V>().is_some()
292 }
293
294 pub fn is_constant(&self) -> bool {
295 let opts = IsConstantOpts {
296 cost: Cost::Specialized,
297 };
298 is_constant_opts(self, &opts)
299 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
300 .ok()
301 .flatten()
302 .unwrap_or_default()
303 }
304
305 pub fn is_constant_opts(&self, cost: Cost) -> bool {
306 let opts = IsConstantOpts { cost };
307 is_constant_opts(self, &opts)
308 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
309 .ok()
310 .flatten()
311 .unwrap_or_default()
312 }
313
314 pub fn as_constant(&self) -> Option<Scalar> {
315 self.is_constant().then(|| self.scalar_at(0))
316 }
317
318 pub fn nbytes(&self) -> u64 {
320 let mut nbytes = 0;
321 for array in self.depth_first_traversal() {
322 for buffer in array.buffers() {
323 nbytes += buffer.len() as u64;
324 }
325 }
326 nbytes
327 }
328}
329
330pub trait IntoArray {
332 fn into_array(self) -> ArrayRef;
333}
334
335impl IntoArray for ArrayRef {
336 fn into_array(self) -> ArrayRef {
337 self
338 }
339}
340
341mod private {
342 use super::*;
343
344 pub trait Sealed {}
345
346 impl<V: VTable> Sealed for ArrayAdapter<V> {}
347 impl Sealed for Arc<dyn Array> {}
348}
349
350#[repr(transparent)]
357pub struct ArrayAdapter<V: VTable>(V::Array);
358
359impl<V: VTable> ArrayAdapter<V> {
360 pub fn as_inner(&self) -> &V::Array {
362 &self.0
363 }
364
365 pub fn into_inner(self) -> V::Array {
367 self.0
368 }
369}
370
371impl<V: VTable> Debug for ArrayAdapter<V> {
372 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
373 self.0.fmt(f)
374 }
375}
376
377impl<V: VTable> Array for ArrayAdapter<V> {
378 fn as_any(&self) -> &dyn Any {
379 self
380 }
381
382 fn to_array(&self) -> ArrayRef {
383 Arc::new(ArrayAdapter::<V>(self.0.clone()))
384 }
385
386 fn len(&self) -> usize {
387 <V::ArrayVTable as ArrayVTable<V>>::len(&self.0)
388 }
389
390 fn dtype(&self) -> &DType {
391 <V::ArrayVTable as ArrayVTable<V>>::dtype(&self.0)
392 }
393
394 fn encoding(&self) -> EncodingRef {
395 V::encoding(&self.0)
396 }
397
398 fn encoding_id(&self) -> EncodingId {
399 V::encoding(&self.0).id()
400 }
401
402 fn slice(&self, range: Range<usize>) -> ArrayRef {
403 let start = range.start;
404 let stop = range.end;
405
406 if start == 0 && stop == self.len() {
407 return self.to_array();
408 }
409
410 assert!(
411 start <= self.len(),
412 "OutOfBounds: start {start} > length {}",
413 self.len()
414 );
415 assert!(
416 stop <= self.len(),
417 "OutOfBounds: stop {stop} > length {}",
418 self.len()
419 );
420
421 assert!(start <= stop, "start ({start}) must be <= stop ({stop})");
422
423 if start == stop {
424 return Canonical::empty(self.dtype()).into_array();
425 }
426
427 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, range);
428
429 assert_eq!(
430 sliced.len(),
431 stop - start,
432 "Slice length mismatch {}",
433 self.encoding_id()
434 );
435
436 debug_assert_eq!(
438 sliced.dtype(),
439 self.dtype(),
440 "Slice dtype mismatch {}",
441 self.encoding_id()
442 );
443
444 if !sliced.is::<ConstantVTable>() {
446 self.statistics().with_iter(|iter| {
447 sliced.statistics().inherit(iter.filter(|(stat, value)| {
448 matches!(
449 stat,
450 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
451 ) && value.as_ref().as_exact().is_some_and(|v| {
452 Scalar::new(DType::Bool(Nullability::NonNullable), v.clone())
453 .as_bool()
454 .value()
455 .unwrap_or_default()
456 })
457 }));
458 });
459 }
460
461 sliced
462 }
463
464 fn scalar_at(&self, index: usize) -> Scalar {
465 assert!(index < self.len(), "index {index} out of bounds");
466 if self.is_invalid(index) {
467 return Scalar::null(self.dtype().clone());
468 }
469 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index);
470 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
471 scalar
472 }
473
474 fn is_valid(&self, index: usize) -> bool {
475 if index >= self.len() {
476 vortex_panic!(OutOfBounds: index, 0, self.len());
477 }
478 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
479 }
480
481 fn is_invalid(&self, index: usize) -> bool {
482 !self.is_valid(index)
483 }
484
485 fn all_valid(&self) -> bool {
486 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
487 }
488
489 fn all_invalid(&self) -> bool {
490 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
491 }
492
493 fn valid_count(&self) -> usize {
494 if let Some(Precision::Exact(invalid_count)) =
495 self.statistics().get_as::<usize>(Stat::NullCount)
496 {
497 return self.len() - invalid_count;
498 }
499
500 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0);
501 assert!(count <= self.len(), "Valid count exceeds array length");
502
503 self.statistics()
504 .set(Stat::NullCount, Precision::exact(self.len() - count));
505
506 count
507 }
508
509 fn invalid_count(&self) -> usize {
510 if let Some(Precision::Exact(invalid_count)) =
511 self.statistics().get_as::<usize>(Stat::NullCount)
512 {
513 return invalid_count;
514 }
515
516 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0);
517 assert!(count <= self.len(), "Invalid count exceeds array length");
518
519 self.statistics()
520 .set(Stat::NullCount, Precision::exact(count));
521
522 count
523 }
524
525 fn validity_mask(&self) -> Mask {
526 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0);
527 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
528 mask
529 }
530
531 fn to_canonical(&self) -> Canonical {
532 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0);
533 assert_eq!(
534 self.len(),
535 canonical.as_ref().len(),
536 "Canonical length mismatch {}. Expected {} but encoded into {}.",
537 self.encoding_id(),
538 self.len(),
539 canonical.as_ref().len()
540 );
541 assert_eq!(
542 self.dtype(),
543 canonical.as_ref().dtype(),
544 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
545 self.encoding_id(),
546 self.dtype(),
547 canonical.as_ref().dtype()
548 );
549 canonical
550 .as_ref()
551 .statistics()
552 .inherit_from(self.statistics());
553 canonical
554 }
555
556 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) {
557 if builder.dtype() != self.dtype() {
558 vortex_panic!(
559 "Builder dtype mismatch: expected {}, got {}",
560 self.dtype(),
561 builder.dtype(),
562 );
563 }
564 let len = builder.len();
565
566 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder);
567 assert_eq!(
568 len + self.len(),
569 builder.len(),
570 "Builder length mismatch after writing array for encoding {}",
571 self.encoding_id(),
572 );
573 }
574
575 fn statistics(&self) -> StatsSetRef<'_> {
576 <V::ArrayVTable as ArrayVTable<V>>::stats(&self.0)
577 }
578
579 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
580 struct ReplacementChildren<'a> {
581 children: &'a [ArrayRef],
582 }
583
584 impl ArrayChildren for ReplacementChildren<'_> {
585 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
586 if index >= self.children.len() {
587 vortex_bail!(OutOfBounds: index, 0, self.children.len());
588 }
589 let child = &self.children[index];
590 if child.len() != len {
591 vortex_bail!(
592 "Child length mismatch: expected {}, got {}",
593 len,
594 child.len()
595 );
596 }
597 if child.dtype() != dtype {
598 vortex_bail!(
599 "Child dtype mismatch: expected {}, got {}",
600 dtype,
601 child.dtype()
602 );
603 }
604 Ok(child.clone())
605 }
606
607 fn len(&self) -> usize {
608 self.children.len()
609 }
610 }
611
612 let metadata = self.metadata()?.ok_or_else(|| {
613 vortex_err!("Cannot replace children for arrays that do not support serialization")
614 })?;
615
616 self.encoding().build(
618 self.dtype(),
619 self.len(),
620 &metadata,
621 &self.buffers(),
622 &ReplacementChildren { children },
623 )
624 }
625
626 fn invoke(
627 &self,
628 compute_fn: &ComputeFn,
629 args: &InvocationArgs,
630 ) -> VortexResult<Option<Output>> {
631 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
632 }
633
634 fn to_operator(&self) -> VortexResult<Option<OperatorRef>> {
635 <V::PipelineVTable as PipelineVTable<V>>::to_operator(&self.0)
636 }
637}
638
639impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
640 fn children(&self) -> Vec<ArrayRef> {
641 struct ChildrenCollector {
642 children: Vec<ArrayRef>,
643 }
644
645 impl ArrayChildVisitor for ChildrenCollector {
646 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
647 self.children.push(array.to_array());
648 }
649 }
650
651 let mut collector = ChildrenCollector {
652 children: Vec::new(),
653 };
654 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
655 collector.children
656 }
657
658 fn nchildren(&self) -> usize {
659 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
660 }
661
662 fn children_names(&self) -> Vec<String> {
663 struct ChildNameCollector {
664 names: Vec<String>,
665 }
666
667 impl ArrayChildVisitor for ChildNameCollector {
668 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
669 self.names.push(name.to_string());
670 }
671 }
672
673 let mut collector = ChildNameCollector { names: Vec::new() };
674 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
675 collector.names
676 }
677
678 fn named_children(&self) -> Vec<(String, ArrayRef)> {
679 struct NamedChildrenCollector {
680 children: Vec<(String, ArrayRef)>,
681 }
682
683 impl ArrayChildVisitor for NamedChildrenCollector {
684 fn visit_child(&mut self, name: &str, array: &dyn Array) {
685 self.children.push((name.to_string(), array.to_array()));
686 }
687 }
688
689 let mut collector = NamedChildrenCollector {
690 children: Vec::new(),
691 };
692
693 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
694 collector.children
695 }
696
697 fn buffers(&self) -> Vec<ByteBuffer> {
698 struct BufferCollector {
699 buffers: Vec<ByteBuffer>,
700 }
701
702 impl ArrayBufferVisitor for BufferCollector {
703 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
704 self.buffers.push(buffer.clone());
705 }
706 }
707
708 let mut collector = BufferCollector {
709 buffers: Vec::new(),
710 };
711 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
712 collector.buffers
713 }
714
715 fn nbuffers(&self) -> usize {
716 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
717 }
718
719 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
720 Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
721 }
722
723 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
724 match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
725 Err(e) => write!(f, "<serde error: {e}>"),
726 Ok(None) => write!(f, "<serde not supported>"),
727 Ok(Some(metadata)) => Debug::fmt(&metadata, f),
728 }
729 }
730}