1pub mod display;
5mod visitor;
6
7use std::any::Any;
8use std::fmt::{Debug, Formatter};
9use std::rc::Rc;
10use std::sync::Arc;
11
12pub use visitor::*;
13use vortex_buffer::ByteBuffer;
14use vortex_dtype::{DType, Nullability};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
16use vortex_mask::Mask;
17use vortex_scalar::Scalar;
18
19use crate::arrays::{
20 BoolEncoding, ConstantVTable, DecimalEncoding, ExtensionEncoding, ListEncoding, NullEncoding,
21 PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
22};
23use crate::builders::ArrayBuilder;
24use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
25use crate::pipeline::{Operator, PipelineVTable};
26use crate::serde::ArrayChildren;
27use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef};
28use crate::vtable::{
29 ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
30 ValidityVTable, VisitorVTable,
31};
32use crate::{Canonical, EncodingId, EncodingRef, SerializeMetadata};
33
34pub trait Array: 'static + private::Sealed + Send + Sync + Debug + ArrayVisitor {
36 fn as_any(&self) -> &dyn Any;
38
39 fn to_array(&self) -> ArrayRef;
41
42 fn len(&self) -> usize;
44
45 fn is_empty(&self) -> bool {
47 self.len() == 0
48 }
49
50 fn dtype(&self) -> &DType;
52
53 fn encoding(&self) -> EncodingRef;
55
56 fn encoding_id(&self) -> EncodingId;
58
59 fn slice(&self, start: usize, end: usize) -> ArrayRef;
61
62 fn scalar_at(&self, index: usize) -> Scalar;
66
67 fn is_encoding(&self, encoding: EncodingId) -> bool {
69 self.encoding_id() == encoding
70 }
71
72 fn is_arrow(&self) -> bool {
75 self.is_encoding(NullEncoding.id())
76 || self.is_encoding(BoolEncoding.id())
77 || self.is_encoding(PrimitiveEncoding.id())
78 || self.is_encoding(VarBinEncoding.id())
79 || self.is_encoding(VarBinViewEncoding.id())
80 }
81
82 fn is_canonical(&self) -> bool {
85 self.is_encoding(NullEncoding.id())
86 || self.is_encoding(BoolEncoding.id())
87 || self.is_encoding(PrimitiveEncoding.id())
88 || self.is_encoding(DecimalEncoding.id())
89 || self.is_encoding(StructEncoding.id())
90 || self.is_encoding(ListEncoding.id())
91 || self.is_encoding(VarBinViewEncoding.id())
92 || self.is_encoding(ExtensionEncoding.id())
93 }
94
95 fn is_valid(&self, index: usize) -> VortexResult<bool>;
97
98 fn is_invalid(&self, index: usize) -> VortexResult<bool>;
100
101 fn all_valid(&self) -> VortexResult<bool>;
105
106 fn all_invalid(&self) -> VortexResult<bool>;
110
111 fn valid_count(&self) -> VortexResult<usize>;
113
114 fn invalid_count(&self) -> VortexResult<usize>;
116
117 fn validity_mask(&self) -> VortexResult<Mask>;
119
120 fn to_canonical(&self) -> VortexResult<Canonical>;
122
123 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()>;
127
128 fn statistics(&self) -> StatsSetRef<'_>;
131
132 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
134
135 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
152 -> VortexResult<Option<Output>>;
153
154 fn to_operator(&self) -> VortexResult<Option<Rc<dyn Operator>>>;
158}
159
160impl Array for Arc<dyn Array> {
161 fn as_any(&self) -> &dyn Any {
162 self.as_ref().as_any()
163 }
164
165 fn to_array(&self) -> ArrayRef {
166 self.clone()
167 }
168
169 fn len(&self) -> usize {
170 self.as_ref().len()
171 }
172
173 fn dtype(&self) -> &DType {
174 self.as_ref().dtype()
175 }
176
177 fn encoding(&self) -> EncodingRef {
178 self.as_ref().encoding()
179 }
180
181 fn encoding_id(&self) -> EncodingId {
182 self.as_ref().encoding_id()
183 }
184
185 fn slice(&self, start: usize, end: usize) -> ArrayRef {
186 self.as_ref().slice(start, end)
187 }
188
189 fn scalar_at(&self, index: usize) -> Scalar {
190 self.as_ref().scalar_at(index)
191 }
192
193 fn is_valid(&self, index: usize) -> VortexResult<bool> {
194 self.as_ref().is_valid(index)
195 }
196
197 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
198 self.as_ref().is_invalid(index)
199 }
200
201 fn all_valid(&self) -> VortexResult<bool> {
202 self.as_ref().all_valid()
203 }
204
205 fn all_invalid(&self) -> VortexResult<bool> {
206 self.as_ref().all_invalid()
207 }
208
209 fn valid_count(&self) -> VortexResult<usize> {
210 self.as_ref().valid_count()
211 }
212
213 fn invalid_count(&self) -> VortexResult<usize> {
214 self.as_ref().invalid_count()
215 }
216
217 fn validity_mask(&self) -> VortexResult<Mask> {
218 self.as_ref().validity_mask()
219 }
220
221 fn to_canonical(&self) -> VortexResult<Canonical> {
222 self.as_ref().to_canonical()
223 }
224
225 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
226 self.as_ref().append_to_builder(builder)
227 }
228
229 fn statistics(&self) -> StatsSetRef<'_> {
230 self.as_ref().statistics()
231 }
232
233 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
234 self.as_ref().with_children(children)
235 }
236
237 fn invoke(
238 &self,
239 compute_fn: &ComputeFn,
240 args: &InvocationArgs,
241 ) -> VortexResult<Option<Output>> {
242 self.as_ref().invoke(compute_fn, args)
243 }
244
245 fn to_operator(&self) -> VortexResult<Option<Rc<dyn Operator>>> {
246 self.as_ref().to_operator()
247 }
248}
249
250pub type ArrayRef = Arc<dyn Array>;
252
253impl ToOwned for dyn Array {
254 type Owned = ArrayRef;
255
256 fn to_owned(&self) -> Self::Owned {
257 self.to_array()
258 }
259}
260
261impl dyn Array + '_ {
262 pub fn as_<V: VTable>(&self) -> &V::Array {
264 self.as_opt::<V>().vortex_expect("Failed to downcast")
265 }
266
267 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
269 self.as_any()
270 .downcast_ref::<ArrayAdapter<V>>()
271 .map(|array_adapter| &array_adapter.0)
272 }
273
274 pub fn is<V: VTable>(&self) -> bool {
276 self.as_opt::<V>().is_some()
277 }
278
279 pub fn is_constant(&self) -> bool {
280 let opts = IsConstantOpts {
281 cost: Cost::Specialized,
282 };
283 is_constant_opts(self, &opts)
284 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
285 .ok()
286 .flatten()
287 .unwrap_or_default()
288 }
289
290 pub fn is_constant_opts(&self, cost: Cost) -> bool {
291 let opts = IsConstantOpts { cost };
292 is_constant_opts(self, &opts)
293 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
294 .ok()
295 .flatten()
296 .unwrap_or_default()
297 }
298
299 pub fn as_constant(&self) -> Option<Scalar> {
300 self.is_constant().then(|| self.scalar_at(0))
301 }
302
303 pub fn nbytes(&self) -> u64 {
305 let mut nbytes = 0;
306 for array in self.depth_first_traversal() {
307 for buffer in array.buffers() {
308 nbytes += buffer.len() as u64;
309 }
310 }
311 nbytes
312 }
313}
314
315pub trait IntoArray {
317 fn into_array(self) -> ArrayRef;
318}
319
320impl IntoArray for ArrayRef {
321 fn into_array(self) -> ArrayRef {
322 self
323 }
324}
325
326mod private {
327 use super::*;
328
329 pub trait Sealed {}
330
331 impl<V: VTable> Sealed for ArrayAdapter<V> {}
332 impl Sealed for Arc<dyn Array> {}
333}
334
335#[repr(transparent)]
342pub struct ArrayAdapter<V: VTable>(V::Array);
343
344impl<V: VTable> ArrayAdapter<V> {
345 pub fn as_inner(&self) -> &V::Array {
347 &self.0
348 }
349
350 pub fn into_inner(self) -> V::Array {
352 self.0
353 }
354}
355
356impl<V: VTable> Debug for ArrayAdapter<V> {
357 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
358 self.0.fmt(f)
359 }
360}
361
362impl<V: VTable> Array for ArrayAdapter<V> {
363 fn as_any(&self) -> &dyn Any {
364 self
365 }
366
367 fn to_array(&self) -> ArrayRef {
368 Arc::new(ArrayAdapter::<V>(self.0.clone()))
369 }
370
371 fn len(&self) -> usize {
372 <V::ArrayVTable as ArrayVTable<V>>::len(&self.0)
373 }
374
375 fn dtype(&self) -> &DType {
376 <V::ArrayVTable as ArrayVTable<V>>::dtype(&self.0)
377 }
378
379 fn encoding(&self) -> EncodingRef {
380 V::encoding(&self.0)
381 }
382
383 fn encoding_id(&self) -> EncodingId {
384 V::encoding(&self.0).id()
385 }
386
387 fn slice(&self, start: usize, stop: usize) -> ArrayRef {
388 if start == 0 && stop == self.len() {
389 return self.to_array();
390 }
391
392 assert!(
393 start <= self.len(),
394 "OutOfBounds: start {start} > length {}",
395 self.len()
396 );
397 assert!(
398 stop <= self.len(),
399 "OutOfBounds: stop {stop} > length {}",
400 self.len()
401 );
402
403 assert!(start <= stop, "start ({start}) must be <= stop ({stop})");
404
405 if start == stop {
406 return Canonical::empty(self.dtype()).into_array();
407 }
408
409 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, start, stop);
410
411 assert_eq!(
412 sliced.len(),
413 stop - start,
414 "Slice length mismatch {}",
415 self.encoding_id()
416 );
417
418 debug_assert_eq!(
420 sliced.dtype(),
421 self.dtype(),
422 "Slice dtype mismatch {}",
423 self.encoding_id()
424 );
425
426 if !sliced.is::<ConstantVTable>() {
428 self.statistics().with_iter(|iter| {
429 sliced.statistics().inherit(iter.filter(|(stat, value)| {
430 matches!(
431 stat,
432 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
433 ) && value.as_ref().as_exact().is_some_and(|v| {
434 Scalar::new(DType::Bool(Nullability::NonNullable), v.clone())
435 .as_bool()
436 .value()
437 .unwrap_or_default()
438 })
439 }));
440 });
441 }
442
443 sliced
444 }
445
446 fn scalar_at(&self, index: usize) -> Scalar {
447 assert!(index < self.len(), "index {index} out of bounds");
448 if self.is_invalid(index).vortex_expect("index out of bounds") {
449 return Scalar::null(self.dtype().clone());
450 }
451 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index);
452 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
453 scalar
454 }
455
456 fn is_valid(&self, index: usize) -> VortexResult<bool> {
457 if index >= self.len() {
458 vortex_bail!(OutOfBounds: index, 0, self.len());
459 }
460 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
461 }
462
463 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
464 self.is_valid(index).map(|valid| !valid)
465 }
466
467 fn all_valid(&self) -> VortexResult<bool> {
468 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
469 }
470
471 fn all_invalid(&self) -> VortexResult<bool> {
472 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
473 }
474
475 fn valid_count(&self) -> VortexResult<usize> {
476 if let Some(Precision::Exact(invalid_count)) =
477 self.statistics().get_as::<usize>(Stat::NullCount)
478 {
479 return Ok(self.len() - invalid_count);
480 }
481
482 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0)?;
483 assert!(count <= self.len(), "Valid count exceeds array length");
484
485 self.statistics()
486 .set(Stat::NullCount, Precision::exact(self.len() - count));
487
488 Ok(count)
489 }
490
491 fn invalid_count(&self) -> VortexResult<usize> {
492 if let Some(Precision::Exact(invalid_count)) =
493 self.statistics().get_as::<usize>(Stat::NullCount)
494 {
495 return Ok(invalid_count);
496 }
497
498 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0)?;
499 assert!(count <= self.len(), "Invalid count exceeds array length");
500
501 self.statistics()
502 .set(Stat::NullCount, Precision::exact(count));
503
504 Ok(count)
505 }
506
507 fn validity_mask(&self) -> VortexResult<Mask> {
508 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0)?;
509 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
510 Ok(mask)
511 }
512
513 fn to_canonical(&self) -> VortexResult<Canonical> {
514 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0)?;
515 assert_eq!(
516 self.len(),
517 canonical.as_ref().len(),
518 "Canonical length mismatch {}. Expected {} but encoded into {}.",
519 self.encoding_id(),
520 self.len(),
521 canonical.as_ref().len()
522 );
523 assert_eq!(
524 self.dtype(),
525 canonical.as_ref().dtype(),
526 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
527 self.encoding_id(),
528 self.dtype(),
529 canonical.as_ref().dtype()
530 );
531 canonical
532 .as_ref()
533 .statistics()
534 .inherit_from(self.statistics());
535 Ok(canonical)
536 }
537
538 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
539 if builder.dtype() != self.dtype() {
540 vortex_bail!(
541 "Builder dtype mismatch: expected {}, got {}",
542 self.dtype(),
543 builder.dtype(),
544 );
545 }
546 let len = builder.len();
547
548 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder)?;
549 assert_eq!(
550 len + self.len(),
551 builder.len(),
552 "Builder length mismatch after writing array for encoding {}",
553 self.encoding_id(),
554 );
555 Ok(())
556 }
557
558 fn statistics(&self) -> StatsSetRef<'_> {
559 <V::ArrayVTable as ArrayVTable<V>>::stats(&self.0)
560 }
561
562 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
563 struct ReplacementChildren<'a> {
564 children: &'a [ArrayRef],
565 }
566
567 impl ArrayChildren for ReplacementChildren<'_> {
568 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
569 if index >= self.children.len() {
570 vortex_bail!(OutOfBounds: index, 0, self.children.len());
571 }
572 let child = &self.children[index];
573 if child.len() != len {
574 vortex_bail!(
575 "Child length mismatch: expected {}, got {}",
576 len,
577 child.len()
578 );
579 }
580 if child.dtype() != dtype {
581 vortex_bail!(
582 "Child dtype mismatch: expected {}, got {}",
583 dtype,
584 child.dtype()
585 );
586 }
587 Ok(child.clone())
588 }
589
590 fn len(&self) -> usize {
591 self.children.len()
592 }
593 }
594
595 let metadata = self.metadata()?.ok_or_else(|| {
596 vortex_err!("Cannot replace children for arrays that do not support serialization")
597 })?;
598
599 self.encoding().build(
601 self.dtype(),
602 self.len(),
603 &metadata,
604 &self.buffers(),
605 &ReplacementChildren { children },
606 )
607 }
608
609 fn invoke(
610 &self,
611 compute_fn: &ComputeFn,
612 args: &InvocationArgs,
613 ) -> VortexResult<Option<Output>> {
614 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
615 }
616
617 fn to_operator(&self) -> VortexResult<Option<Rc<dyn Operator>>> {
618 <V::PipelineVTable as PipelineVTable<V>>::to_operator(&self.0)
619 }
620}
621
622impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
623 fn children(&self) -> Vec<ArrayRef> {
624 struct ChildrenCollector {
625 children: Vec<ArrayRef>,
626 }
627
628 impl ArrayChildVisitor for ChildrenCollector {
629 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
630 self.children.push(array.to_array());
631 }
632 }
633
634 let mut collector = ChildrenCollector {
635 children: Vec::new(),
636 };
637 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
638 collector.children
639 }
640
641 fn nchildren(&self) -> usize {
642 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
643 }
644
645 fn children_names(&self) -> Vec<String> {
646 struct ChildNameCollector {
647 names: Vec<String>,
648 }
649
650 impl ArrayChildVisitor for ChildNameCollector {
651 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
652 self.names.push(name.to_string());
653 }
654 }
655
656 let mut collector = ChildNameCollector { names: Vec::new() };
657 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
658 collector.names
659 }
660
661 fn named_children(&self) -> Vec<(String, ArrayRef)> {
662 struct NamedChildrenCollector {
663 children: Vec<(String, ArrayRef)>,
664 }
665
666 impl ArrayChildVisitor for NamedChildrenCollector {
667 fn visit_child(&mut self, name: &str, array: &dyn Array) {
668 self.children.push((name.to_string(), array.to_array()));
669 }
670 }
671
672 let mut collector = NamedChildrenCollector {
673 children: Vec::new(),
674 };
675
676 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
677 collector.children
678 }
679
680 fn buffers(&self) -> Vec<ByteBuffer> {
681 struct BufferCollector {
682 buffers: Vec<ByteBuffer>,
683 }
684
685 impl ArrayBufferVisitor for BufferCollector {
686 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
687 self.buffers.push(buffer.clone());
688 }
689 }
690
691 let mut collector = BufferCollector {
692 buffers: Vec::new(),
693 };
694 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
695 collector.buffers
696 }
697
698 fn nbuffers(&self) -> usize {
699 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
700 }
701
702 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
703 Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
704 }
705
706 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
707 match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
708 Err(e) => write!(f, "<serde error: {e}>"),
709 Ok(None) => write!(f, "<serde not supported>"),
710 Ok(Some(metadata)) => Debug::fmt(&metadata, f),
711 }
712 }
713}