1pub mod display;
5mod visitor;
6
7use std::any::Any;
8use std::fmt::{Debug, Formatter};
9use std::ops::Range;
10use std::sync::Arc;
11
12pub use visitor::*;
13use vortex_buffer::ByteBuffer;
14use vortex_dtype::{DType, Nullability};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err, vortex_panic};
16use vortex_mask::Mask;
17use vortex_scalar::Scalar;
18
19use crate::arrays::{
20 BoolEncoding, ConstantVTable, DecimalEncoding, ExtensionEncoding, ListEncoding, NullEncoding,
21 PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
22};
23use crate::builders::ArrayBuilder;
24use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
25use crate::pipeline::{OperatorRef, PipelineVTable};
26use crate::serde::ArrayChildren;
27use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef};
28use crate::vtable::{
29 ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
30 ValidityVTable, VisitorVTable,
31};
32use crate::{Canonical, EncodingId, EncodingRef, SerializeMetadata};
33
34pub trait Array: 'static + private::Sealed + Send + Sync + Debug + ArrayVisitor {
36 fn as_any(&self) -> &dyn Any;
38
39 fn to_array(&self) -> ArrayRef;
41
42 fn len(&self) -> usize;
44
45 fn is_empty(&self) -> bool {
47 self.len() == 0
48 }
49
50 fn dtype(&self) -> &DType;
52
53 fn encoding(&self) -> EncodingRef;
55
56 fn encoding_id(&self) -> EncodingId;
58
59 fn slice(&self, range: Range<usize>) -> ArrayRef;
61
62 fn scalar_at(&self, index: usize) -> Scalar;
66
67 fn is_encoding(&self, encoding: EncodingId) -> bool {
69 self.encoding_id() == encoding
70 }
71
72 fn is_arrow(&self) -> bool {
75 self.is_encoding(NullEncoding.id())
76 || self.is_encoding(BoolEncoding.id())
77 || self.is_encoding(PrimitiveEncoding.id())
78 || self.is_encoding(VarBinEncoding.id())
79 || self.is_encoding(VarBinViewEncoding.id())
80 }
81
82 fn is_canonical(&self) -> bool {
85 self.is_encoding(NullEncoding.id())
86 || self.is_encoding(BoolEncoding.id())
87 || self.is_encoding(PrimitiveEncoding.id())
88 || self.is_encoding(DecimalEncoding.id())
89 || self.is_encoding(StructEncoding.id())
90 || self.is_encoding(ListEncoding.id())
91 || self.is_encoding(VarBinViewEncoding.id())
92 || self.is_encoding(ExtensionEncoding.id())
93 }
94
95 fn is_valid(&self, index: usize) -> bool;
97
98 fn is_invalid(&self, index: usize) -> bool;
100
101 fn all_valid(&self) -> bool;
105
106 fn all_invalid(&self) -> bool;
110
111 fn valid_count(&self) -> usize;
113
114 fn invalid_count(&self) -> usize;
116
117 fn validity_mask(&self) -> Mask;
119
120 fn to_canonical(&self) -> VortexResult<Canonical>;
122
123 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()>;
127
128 fn statistics(&self) -> StatsSetRef<'_>;
131
132 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
134
135 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
152 -> VortexResult<Option<Output>>;
153
154 fn to_operator(&self) -> VortexResult<Option<OperatorRef>>;
158}
159
160impl Array for Arc<dyn Array> {
161 fn as_any(&self) -> &dyn Any {
162 self.as_ref().as_any()
163 }
164
165 fn to_array(&self) -> ArrayRef {
166 self.clone()
167 }
168
169 fn len(&self) -> usize {
170 self.as_ref().len()
171 }
172
173 fn dtype(&self) -> &DType {
174 self.as_ref().dtype()
175 }
176
177 fn encoding(&self) -> EncodingRef {
178 self.as_ref().encoding()
179 }
180
181 fn encoding_id(&self) -> EncodingId {
182 self.as_ref().encoding_id()
183 }
184
185 fn slice(&self, range: Range<usize>) -> ArrayRef {
186 self.as_ref().slice(range)
187 }
188
189 fn scalar_at(&self, index: usize) -> Scalar {
190 self.as_ref().scalar_at(index)
191 }
192
193 fn is_valid(&self, index: usize) -> bool {
194 self.as_ref().is_valid(index)
195 }
196
197 fn is_invalid(&self, index: usize) -> bool {
198 self.as_ref().is_invalid(index)
199 }
200
201 fn all_valid(&self) -> bool {
202 self.as_ref().all_valid()
203 }
204
205 fn all_invalid(&self) -> bool {
206 self.as_ref().all_invalid()
207 }
208
209 fn valid_count(&self) -> usize {
210 self.as_ref().valid_count()
211 }
212
213 fn invalid_count(&self) -> usize {
214 self.as_ref().invalid_count()
215 }
216
217 fn validity_mask(&self) -> Mask {
218 self.as_ref().validity_mask()
219 }
220
221 fn to_canonical(&self) -> VortexResult<Canonical> {
222 self.as_ref().to_canonical()
223 }
224
225 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
226 self.as_ref().append_to_builder(builder)
227 }
228
229 fn statistics(&self) -> StatsSetRef<'_> {
230 self.as_ref().statistics()
231 }
232
233 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
234 self.as_ref().with_children(children)
235 }
236
237 fn invoke(
238 &self,
239 compute_fn: &ComputeFn,
240 args: &InvocationArgs,
241 ) -> VortexResult<Option<Output>> {
242 self.as_ref().invoke(compute_fn, args)
243 }
244
245 fn to_operator(&self) -> VortexResult<Option<OperatorRef>> {
246 self.as_ref().to_operator()
247 }
248}
249
250pub type ArrayRef = Arc<dyn Array>;
252
253impl ToOwned for dyn Array {
254 type Owned = ArrayRef;
255
256 fn to_owned(&self) -> Self::Owned {
257 self.to_array()
258 }
259}
260
261impl dyn Array + '_ {
262 pub fn as_<V: VTable>(&self) -> &V::Array {
264 self.as_opt::<V>().vortex_expect("Failed to downcast")
265 }
266
267 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
269 self.as_any()
270 .downcast_ref::<ArrayAdapter<V>>()
271 .map(|array_adapter| &array_adapter.0)
272 }
273
274 pub fn is<V: VTable>(&self) -> bool {
276 self.as_opt::<V>().is_some()
277 }
278
279 pub fn is_constant(&self) -> bool {
280 let opts = IsConstantOpts {
281 cost: Cost::Specialized,
282 };
283 is_constant_opts(self, &opts)
284 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
285 .ok()
286 .flatten()
287 .unwrap_or_default()
288 }
289
290 pub fn is_constant_opts(&self, cost: Cost) -> bool {
291 let opts = IsConstantOpts { cost };
292 is_constant_opts(self, &opts)
293 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
294 .ok()
295 .flatten()
296 .unwrap_or_default()
297 }
298
299 pub fn as_constant(&self) -> Option<Scalar> {
300 self.is_constant().then(|| self.scalar_at(0))
301 }
302
303 pub fn nbytes(&self) -> u64 {
305 let mut nbytes = 0;
306 for array in self.depth_first_traversal() {
307 for buffer in array.buffers() {
308 nbytes += buffer.len() as u64;
309 }
310 }
311 nbytes
312 }
313}
314
315pub trait IntoArray {
317 fn into_array(self) -> ArrayRef;
318}
319
320impl IntoArray for ArrayRef {
321 fn into_array(self) -> ArrayRef {
322 self
323 }
324}
325
326mod private {
327 use super::*;
328
329 pub trait Sealed {}
330
331 impl<V: VTable> Sealed for ArrayAdapter<V> {}
332 impl Sealed for Arc<dyn Array> {}
333}
334
335#[repr(transparent)]
342pub struct ArrayAdapter<V: VTable>(V::Array);
343
344impl<V: VTable> ArrayAdapter<V> {
345 pub fn as_inner(&self) -> &V::Array {
347 &self.0
348 }
349
350 pub fn into_inner(self) -> V::Array {
352 self.0
353 }
354}
355
356impl<V: VTable> Debug for ArrayAdapter<V> {
357 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
358 self.0.fmt(f)
359 }
360}
361
362impl<V: VTable> Array for ArrayAdapter<V> {
363 fn as_any(&self) -> &dyn Any {
364 self
365 }
366
367 fn to_array(&self) -> ArrayRef {
368 Arc::new(ArrayAdapter::<V>(self.0.clone()))
369 }
370
371 fn len(&self) -> usize {
372 <V::ArrayVTable as ArrayVTable<V>>::len(&self.0)
373 }
374
375 fn dtype(&self) -> &DType {
376 <V::ArrayVTable as ArrayVTable<V>>::dtype(&self.0)
377 }
378
379 fn encoding(&self) -> EncodingRef {
380 V::encoding(&self.0)
381 }
382
383 fn encoding_id(&self) -> EncodingId {
384 V::encoding(&self.0).id()
385 }
386
387 fn slice(&self, range: Range<usize>) -> ArrayRef {
388 let start = range.start;
389 let stop = range.end;
390
391 if start == 0 && stop == self.len() {
392 return self.to_array();
393 }
394
395 assert!(
396 start <= self.len(),
397 "OutOfBounds: start {start} > length {}",
398 self.len()
399 );
400 assert!(
401 stop <= self.len(),
402 "OutOfBounds: stop {stop} > length {}",
403 self.len()
404 );
405
406 assert!(start <= stop, "start ({start}) must be <= stop ({stop})");
407
408 if start == stop {
409 return Canonical::empty(self.dtype()).into_array();
410 }
411
412 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, range);
413
414 assert_eq!(
415 sliced.len(),
416 stop - start,
417 "Slice length mismatch {}",
418 self.encoding_id()
419 );
420
421 debug_assert_eq!(
423 sliced.dtype(),
424 self.dtype(),
425 "Slice dtype mismatch {}",
426 self.encoding_id()
427 );
428
429 if !sliced.is::<ConstantVTable>() {
431 self.statistics().with_iter(|iter| {
432 sliced.statistics().inherit(iter.filter(|(stat, value)| {
433 matches!(
434 stat,
435 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
436 ) && value.as_ref().as_exact().is_some_and(|v| {
437 Scalar::new(DType::Bool(Nullability::NonNullable), v.clone())
438 .as_bool()
439 .value()
440 .unwrap_or_default()
441 })
442 }));
443 });
444 }
445
446 sliced
447 }
448
449 fn scalar_at(&self, index: usize) -> Scalar {
450 assert!(index < self.len(), "index {index} out of bounds");
451 if self.is_invalid(index) {
452 return Scalar::null(self.dtype().clone());
453 }
454 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index);
455 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
456 scalar
457 }
458
459 fn is_valid(&self, index: usize) -> bool {
460 if index >= self.len() {
461 vortex_panic!(OutOfBounds: index, 0, self.len());
462 }
463 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
464 }
465
466 fn is_invalid(&self, index: usize) -> bool {
467 !self.is_valid(index)
468 }
469
470 fn all_valid(&self) -> bool {
471 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
472 }
473
474 fn all_invalid(&self) -> bool {
475 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
476 }
477
478 fn valid_count(&self) -> usize {
479 if let Some(Precision::Exact(invalid_count)) =
480 self.statistics().get_as::<usize>(Stat::NullCount)
481 {
482 return self.len() - invalid_count;
483 }
484
485 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0);
486 assert!(count <= self.len(), "Valid count exceeds array length");
487
488 self.statistics()
489 .set(Stat::NullCount, Precision::exact(self.len() - count));
490
491 count
492 }
493
494 fn invalid_count(&self) -> usize {
495 if let Some(Precision::Exact(invalid_count)) =
496 self.statistics().get_as::<usize>(Stat::NullCount)
497 {
498 return invalid_count;
499 }
500
501 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0);
502 assert!(count <= self.len(), "Invalid count exceeds array length");
503
504 self.statistics()
505 .set(Stat::NullCount, Precision::exact(count));
506
507 count
508 }
509
510 fn validity_mask(&self) -> Mask {
511 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0);
512 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
513 mask
514 }
515
516 fn to_canonical(&self) -> VortexResult<Canonical> {
517 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0)?;
518 assert_eq!(
519 self.len(),
520 canonical.as_ref().len(),
521 "Canonical length mismatch {}. Expected {} but encoded into {}.",
522 self.encoding_id(),
523 self.len(),
524 canonical.as_ref().len()
525 );
526 assert_eq!(
527 self.dtype(),
528 canonical.as_ref().dtype(),
529 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
530 self.encoding_id(),
531 self.dtype(),
532 canonical.as_ref().dtype()
533 );
534 canonical
535 .as_ref()
536 .statistics()
537 .inherit_from(self.statistics());
538 Ok(canonical)
539 }
540
541 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
542 if builder.dtype() != self.dtype() {
543 vortex_bail!(
544 "Builder dtype mismatch: expected {}, got {}",
545 self.dtype(),
546 builder.dtype(),
547 );
548 }
549 let len = builder.len();
550
551 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder)?;
552 assert_eq!(
553 len + self.len(),
554 builder.len(),
555 "Builder length mismatch after writing array for encoding {}",
556 self.encoding_id(),
557 );
558 Ok(())
559 }
560
561 fn statistics(&self) -> StatsSetRef<'_> {
562 <V::ArrayVTable as ArrayVTable<V>>::stats(&self.0)
563 }
564
565 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
566 struct ReplacementChildren<'a> {
567 children: &'a [ArrayRef],
568 }
569
570 impl ArrayChildren for ReplacementChildren<'_> {
571 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
572 if index >= self.children.len() {
573 vortex_bail!(OutOfBounds: index, 0, self.children.len());
574 }
575 let child = &self.children[index];
576 if child.len() != len {
577 vortex_bail!(
578 "Child length mismatch: expected {}, got {}",
579 len,
580 child.len()
581 );
582 }
583 if child.dtype() != dtype {
584 vortex_bail!(
585 "Child dtype mismatch: expected {}, got {}",
586 dtype,
587 child.dtype()
588 );
589 }
590 Ok(child.clone())
591 }
592
593 fn len(&self) -> usize {
594 self.children.len()
595 }
596 }
597
598 let metadata = self.metadata()?.ok_or_else(|| {
599 vortex_err!("Cannot replace children for arrays that do not support serialization")
600 })?;
601
602 self.encoding().build(
604 self.dtype(),
605 self.len(),
606 &metadata,
607 &self.buffers(),
608 &ReplacementChildren { children },
609 )
610 }
611
612 fn invoke(
613 &self,
614 compute_fn: &ComputeFn,
615 args: &InvocationArgs,
616 ) -> VortexResult<Option<Output>> {
617 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
618 }
619
620 fn to_operator(&self) -> VortexResult<Option<OperatorRef>> {
621 <V::PipelineVTable as PipelineVTable<V>>::to_operator(&self.0)
622 }
623}
624
625impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
626 fn children(&self) -> Vec<ArrayRef> {
627 struct ChildrenCollector {
628 children: Vec<ArrayRef>,
629 }
630
631 impl ArrayChildVisitor for ChildrenCollector {
632 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
633 self.children.push(array.to_array());
634 }
635 }
636
637 let mut collector = ChildrenCollector {
638 children: Vec::new(),
639 };
640 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
641 collector.children
642 }
643
644 fn nchildren(&self) -> usize {
645 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
646 }
647
648 fn children_names(&self) -> Vec<String> {
649 struct ChildNameCollector {
650 names: Vec<String>,
651 }
652
653 impl ArrayChildVisitor for ChildNameCollector {
654 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
655 self.names.push(name.to_string());
656 }
657 }
658
659 let mut collector = ChildNameCollector { names: Vec::new() };
660 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
661 collector.names
662 }
663
664 fn named_children(&self) -> Vec<(String, ArrayRef)> {
665 struct NamedChildrenCollector {
666 children: Vec<(String, ArrayRef)>,
667 }
668
669 impl ArrayChildVisitor for NamedChildrenCollector {
670 fn visit_child(&mut self, name: &str, array: &dyn Array) {
671 self.children.push((name.to_string(), array.to_array()));
672 }
673 }
674
675 let mut collector = NamedChildrenCollector {
676 children: Vec::new(),
677 };
678
679 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
680 collector.children
681 }
682
683 fn buffers(&self) -> Vec<ByteBuffer> {
684 struct BufferCollector {
685 buffers: Vec<ByteBuffer>,
686 }
687
688 impl ArrayBufferVisitor for BufferCollector {
689 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
690 self.buffers.push(buffer.clone());
691 }
692 }
693
694 let mut collector = BufferCollector {
695 buffers: Vec::new(),
696 };
697 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
698 collector.buffers
699 }
700
701 fn nbuffers(&self) -> usize {
702 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
703 }
704
705 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
706 Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
707 }
708
709 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
710 match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
711 Err(e) => write!(f, "<serde error: {e}>"),
712 Ok(None) => write!(f, "<serde not supported>"),
713 Ok(Some(metadata)) => Debug::fmt(&metadata, f),
714 }
715 }
716}