1pub mod display;
5mod visitor;
6
7use std::any::Any;
8use std::fmt::{Debug, Formatter};
9use std::sync::Arc;
10
11pub use visitor::*;
12use vortex_buffer::ByteBuffer;
13use vortex_dtype::{DType, Nullability};
14use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
15use vortex_mask::Mask;
16use vortex_scalar::Scalar;
17
18use crate::arrays::{
19 BoolEncoding, ConstantVTable, DecimalEncoding, ExtensionEncoding, ListEncoding, NullEncoding,
20 PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
21};
22use crate::builders::ArrayBuilder;
23use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
24use crate::serde::ArrayChildren;
25use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef};
26use crate::vtable::{
27 ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
28 ValidityVTable, VisitorVTable,
29};
30use crate::{Canonical, EncodingId, EncodingRef, SerializeMetadata};
31
32pub trait Array: 'static + private::Sealed + Send + Sync + Debug + ArrayVisitor {
34 fn as_any(&self) -> &dyn Any;
36
37 fn to_array(&self) -> ArrayRef;
39
40 fn len(&self) -> usize;
42
43 fn is_empty(&self) -> bool {
45 self.len() == 0
46 }
47
48 fn dtype(&self) -> &DType;
50
51 fn encoding(&self) -> EncodingRef;
53
54 fn encoding_id(&self) -> EncodingId;
56
57 fn slice(&self, start: usize, end: usize) -> ArrayRef;
59
60 fn scalar_at(&self, index: usize) -> Scalar;
64
65 fn is_encoding(&self, encoding: EncodingId) -> bool {
67 self.encoding_id() == encoding
68 }
69
70 fn is_arrow(&self) -> bool {
73 self.is_encoding(NullEncoding.id())
74 || self.is_encoding(BoolEncoding.id())
75 || self.is_encoding(PrimitiveEncoding.id())
76 || self.is_encoding(VarBinEncoding.id())
77 || self.is_encoding(VarBinViewEncoding.id())
78 }
79
80 fn is_canonical(&self) -> bool {
83 self.is_encoding(NullEncoding.id())
84 || self.is_encoding(BoolEncoding.id())
85 || self.is_encoding(PrimitiveEncoding.id())
86 || self.is_encoding(DecimalEncoding.id())
87 || self.is_encoding(StructEncoding.id())
88 || self.is_encoding(ListEncoding.id())
89 || self.is_encoding(VarBinViewEncoding.id())
90 || self.is_encoding(ExtensionEncoding.id())
91 }
92
93 fn is_valid(&self, index: usize) -> VortexResult<bool>;
95
96 fn is_invalid(&self, index: usize) -> VortexResult<bool>;
98
99 fn all_valid(&self) -> VortexResult<bool>;
103
104 fn all_invalid(&self) -> VortexResult<bool>;
108
109 fn valid_count(&self) -> VortexResult<usize>;
111
112 fn invalid_count(&self) -> VortexResult<usize>;
114
115 fn validity_mask(&self) -> VortexResult<Mask>;
117
118 fn to_canonical(&self) -> VortexResult<Canonical>;
120
121 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()>;
125
126 fn statistics(&self) -> StatsSetRef<'_>;
129
130 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
132
133 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
150 -> VortexResult<Option<Output>>;
151}
152
153impl Array for Arc<dyn Array> {
154 fn as_any(&self) -> &dyn Any {
155 self.as_ref().as_any()
156 }
157
158 fn to_array(&self) -> ArrayRef {
159 self.clone()
160 }
161
162 fn len(&self) -> usize {
163 self.as_ref().len()
164 }
165
166 fn dtype(&self) -> &DType {
167 self.as_ref().dtype()
168 }
169
170 fn encoding(&self) -> EncodingRef {
171 self.as_ref().encoding()
172 }
173
174 fn encoding_id(&self) -> EncodingId {
175 self.as_ref().encoding_id()
176 }
177
178 fn slice(&self, start: usize, end: usize) -> ArrayRef {
179 self.as_ref().slice(start, end)
180 }
181
182 fn scalar_at(&self, index: usize) -> Scalar {
183 self.as_ref().scalar_at(index)
184 }
185
186 fn is_valid(&self, index: usize) -> VortexResult<bool> {
187 self.as_ref().is_valid(index)
188 }
189
190 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
191 self.as_ref().is_invalid(index)
192 }
193
194 fn all_valid(&self) -> VortexResult<bool> {
195 self.as_ref().all_valid()
196 }
197
198 fn all_invalid(&self) -> VortexResult<bool> {
199 self.as_ref().all_invalid()
200 }
201
202 fn valid_count(&self) -> VortexResult<usize> {
203 self.as_ref().valid_count()
204 }
205
206 fn invalid_count(&self) -> VortexResult<usize> {
207 self.as_ref().invalid_count()
208 }
209
210 fn validity_mask(&self) -> VortexResult<Mask> {
211 self.as_ref().validity_mask()
212 }
213
214 fn to_canonical(&self) -> VortexResult<Canonical> {
215 self.as_ref().to_canonical()
216 }
217
218 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
219 self.as_ref().append_to_builder(builder)
220 }
221
222 fn statistics(&self) -> StatsSetRef<'_> {
223 self.as_ref().statistics()
224 }
225
226 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
227 self.as_ref().with_children(children)
228 }
229
230 fn invoke(
231 &self,
232 compute_fn: &ComputeFn,
233 args: &InvocationArgs,
234 ) -> VortexResult<Option<Output>> {
235 self.as_ref().invoke(compute_fn, args)
236 }
237}
238
239pub type ArrayRef = Arc<dyn Array>;
241
242impl ToOwned for dyn Array {
243 type Owned = ArrayRef;
244
245 fn to_owned(&self) -> Self::Owned {
246 self.to_array()
247 }
248}
249
250impl dyn Array + '_ {
251 pub fn as_<V: VTable>(&self) -> &V::Array {
253 self.as_opt::<V>().vortex_expect("Failed to downcast")
254 }
255
256 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
258 self.as_any()
259 .downcast_ref::<ArrayAdapter<V>>()
260 .map(|array_adapter| &array_adapter.0)
261 }
262
263 pub fn is<V: VTable>(&self) -> bool {
265 self.as_opt::<V>().is_some()
266 }
267
268 pub fn is_constant(&self) -> bool {
269 let opts = IsConstantOpts {
270 cost: Cost::Specialized,
271 };
272 is_constant_opts(self, &opts)
273 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
274 .ok()
275 .flatten()
276 .unwrap_or_default()
277 }
278
279 pub fn is_constant_opts(&self, cost: Cost) -> bool {
280 let opts = IsConstantOpts { cost };
281 is_constant_opts(self, &opts)
282 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
283 .ok()
284 .flatten()
285 .unwrap_or_default()
286 }
287
288 pub fn as_constant(&self) -> Option<Scalar> {
289 self.is_constant().then(|| self.scalar_at(0))
290 }
291
292 pub fn nbytes(&self) -> u64 {
294 let mut nbytes = 0;
295 for array in self.depth_first_traversal() {
296 for buffer in array.buffers() {
297 nbytes += buffer.len() as u64;
298 }
299 }
300 nbytes
301 }
302}
303
304pub trait IntoArray {
306 fn into_array(self) -> ArrayRef;
307}
308
309impl IntoArray for ArrayRef {
310 fn into_array(self) -> ArrayRef {
311 self
312 }
313}
314
315mod private {
316 use super::*;
317
318 pub trait Sealed {}
319
320 impl<V: VTable> Sealed for ArrayAdapter<V> {}
321 impl Sealed for Arc<dyn Array> {}
322}
323
324#[repr(transparent)]
331pub struct ArrayAdapter<V: VTable>(V::Array);
332
333impl<V: VTable> ArrayAdapter<V> {
334 pub fn as_inner(&self) -> &V::Array {
336 &self.0
337 }
338
339 pub fn into_inner(self) -> V::Array {
341 self.0
342 }
343}
344
345impl<V: VTable> Debug for ArrayAdapter<V> {
346 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
347 self.0.fmt(f)
348 }
349}
350
351impl<V: VTable> Array for ArrayAdapter<V> {
352 fn as_any(&self) -> &dyn Any {
353 self
354 }
355
356 fn to_array(&self) -> ArrayRef {
357 Arc::new(ArrayAdapter::<V>(self.0.clone()))
358 }
359
360 fn len(&self) -> usize {
361 <V::ArrayVTable as ArrayVTable<V>>::len(&self.0)
362 }
363
364 fn dtype(&self) -> &DType {
365 <V::ArrayVTable as ArrayVTable<V>>::dtype(&self.0)
366 }
367
368 fn encoding(&self) -> EncodingRef {
369 V::encoding(&self.0)
370 }
371
372 fn encoding_id(&self) -> EncodingId {
373 V::encoding(&self.0).id()
374 }
375
376 fn slice(&self, start: usize, stop: usize) -> ArrayRef {
377 if start == 0 && stop == self.len() {
378 return self.to_array();
379 }
380
381 assert!(
382 start <= self.len(),
383 "OutOfBounds: start {start} > length {}",
384 self.len()
385 );
386 assert!(
387 stop <= self.len(),
388 "OutOfBounds: stop {stop} > length {}",
389 self.len()
390 );
391
392 assert!(start <= stop, "start ({start}) must be <= stop ({stop})");
393
394 if start == stop {
395 return Canonical::empty(self.dtype()).into_array();
396 }
397
398 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, start, stop);
399
400 assert_eq!(
401 sliced.len(),
402 stop - start,
403 "Slice length mismatch {}",
404 self.encoding_id()
405 );
406
407 debug_assert_eq!(
409 sliced.dtype(),
410 self.dtype(),
411 "Slice dtype mismatch {}",
412 self.encoding_id()
413 );
414
415 if !sliced.is::<ConstantVTable>() {
417 self.statistics().with_iter(|iter| {
418 sliced.statistics().inherit(iter.filter(|(stat, value)| {
419 matches!(
420 stat,
421 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
422 ) && value.as_ref().as_exact().is_some_and(|v| {
423 Scalar::new(DType::Bool(Nullability::NonNullable), v.clone())
424 .as_bool()
425 .value()
426 .unwrap_or_default()
427 })
428 }));
429 });
430 }
431
432 sliced
433 }
434
435 fn scalar_at(&self, index: usize) -> Scalar {
436 assert!(index < self.len(), "index {index} out of bounds");
437 if self.is_invalid(index).vortex_expect("index out of bounds") {
438 return Scalar::null(self.dtype().clone());
439 }
440 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index);
441 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
442 scalar
443 }
444
445 fn is_valid(&self, index: usize) -> VortexResult<bool> {
446 if index >= self.len() {
447 vortex_bail!(OutOfBounds: index, 0, self.len());
448 }
449 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
450 }
451
452 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
453 self.is_valid(index).map(|valid| !valid)
454 }
455
456 fn all_valid(&self) -> VortexResult<bool> {
457 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
458 }
459
460 fn all_invalid(&self) -> VortexResult<bool> {
461 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
462 }
463
464 fn valid_count(&self) -> VortexResult<usize> {
465 if let Some(Precision::Exact(invalid_count)) =
466 self.statistics().get_as::<usize>(Stat::NullCount)
467 {
468 return Ok(self.len() - invalid_count);
469 }
470
471 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0)?;
472 assert!(count <= self.len(), "Valid count exceeds array length");
473
474 self.statistics()
475 .set(Stat::NullCount, Precision::exact(self.len() - count));
476
477 Ok(count)
478 }
479
480 fn invalid_count(&self) -> VortexResult<usize> {
481 if let Some(Precision::Exact(invalid_count)) =
482 self.statistics().get_as::<usize>(Stat::NullCount)
483 {
484 return Ok(invalid_count);
485 }
486
487 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0)?;
488 assert!(count <= self.len(), "Invalid count exceeds array length");
489
490 self.statistics()
491 .set(Stat::NullCount, Precision::exact(count));
492
493 Ok(count)
494 }
495
496 fn validity_mask(&self) -> VortexResult<Mask> {
497 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0)?;
498 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
499 Ok(mask)
500 }
501
502 fn to_canonical(&self) -> VortexResult<Canonical> {
503 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0)?;
504 assert_eq!(
505 self.len(),
506 canonical.as_ref().len(),
507 "Canonical length mismatch {}. Expected {} but encoded into {}.",
508 self.encoding_id(),
509 self.len(),
510 canonical.as_ref().len()
511 );
512 assert_eq!(
513 self.dtype(),
514 canonical.as_ref().dtype(),
515 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
516 self.encoding_id(),
517 self.dtype(),
518 canonical.as_ref().dtype()
519 );
520 canonical
521 .as_ref()
522 .statistics()
523 .inherit_from(self.statistics());
524 Ok(canonical)
525 }
526
527 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
528 if builder.dtype() != self.dtype() {
529 vortex_bail!(
530 "Builder dtype mismatch: expected {}, got {}",
531 self.dtype(),
532 builder.dtype(),
533 );
534 }
535 let len = builder.len();
536
537 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder)?;
538 assert_eq!(
539 len + self.len(),
540 builder.len(),
541 "Builder length mismatch after writing array for encoding {}",
542 self.encoding_id(),
543 );
544 Ok(())
545 }
546
547 fn statistics(&self) -> StatsSetRef<'_> {
548 <V::ArrayVTable as ArrayVTable<V>>::stats(&self.0)
549 }
550
551 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
552 struct ReplacementChildren<'a> {
553 children: &'a [ArrayRef],
554 }
555
556 impl ArrayChildren for ReplacementChildren<'_> {
557 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
558 if index >= self.children.len() {
559 vortex_bail!(OutOfBounds: index, 0, self.children.len());
560 }
561 let child = &self.children[index];
562 if child.len() != len {
563 vortex_bail!(
564 "Child length mismatch: expected {}, got {}",
565 len,
566 child.len()
567 );
568 }
569 if child.dtype() != dtype {
570 vortex_bail!(
571 "Child dtype mismatch: expected {}, got {}",
572 dtype,
573 child.dtype()
574 );
575 }
576 Ok(child.clone())
577 }
578
579 fn len(&self) -> usize {
580 self.children.len()
581 }
582 }
583
584 let metadata = self.metadata()?.ok_or_else(|| {
585 vortex_err!("Cannot replace children for arrays that do not support serialization")
586 })?;
587
588 self.encoding().build(
590 self.dtype(),
591 self.len(),
592 &metadata,
593 &self.buffers(),
594 &ReplacementChildren { children },
595 )
596 }
597
598 fn invoke(
599 &self,
600 compute_fn: &ComputeFn,
601 args: &InvocationArgs,
602 ) -> VortexResult<Option<Output>> {
603 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
604 }
605}
606
607impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
608 fn children(&self) -> Vec<ArrayRef> {
609 struct ChildrenCollector {
610 children: Vec<ArrayRef>,
611 }
612
613 impl ArrayChildVisitor for ChildrenCollector {
614 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
615 self.children.push(array.to_array());
616 }
617 }
618
619 let mut collector = ChildrenCollector {
620 children: Vec::new(),
621 };
622 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
623 collector.children
624 }
625
626 fn nchildren(&self) -> usize {
627 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
628 }
629
630 fn children_names(&self) -> Vec<String> {
631 struct ChildNameCollector {
632 names: Vec<String>,
633 }
634
635 impl ArrayChildVisitor for ChildNameCollector {
636 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
637 self.names.push(name.to_string());
638 }
639 }
640
641 let mut collector = ChildNameCollector { names: Vec::new() };
642 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
643 collector.names
644 }
645
646 fn named_children(&self) -> Vec<(String, ArrayRef)> {
647 struct NamedChildrenCollector {
648 children: Vec<(String, ArrayRef)>,
649 }
650
651 impl ArrayChildVisitor for NamedChildrenCollector {
652 fn visit_child(&mut self, name: &str, array: &dyn Array) {
653 self.children.push((name.to_string(), array.to_array()));
654 }
655 }
656
657 let mut collector = NamedChildrenCollector {
658 children: Vec::new(),
659 };
660
661 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
662 collector.children
663 }
664
665 fn buffers(&self) -> Vec<ByteBuffer> {
666 struct BufferCollector {
667 buffers: Vec<ByteBuffer>,
668 }
669
670 impl ArrayBufferVisitor for BufferCollector {
671 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
672 self.buffers.push(buffer.clone());
673 }
674 }
675
676 let mut collector = BufferCollector {
677 buffers: Vec::new(),
678 };
679 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
680 collector.buffers
681 }
682
683 fn nbuffers(&self) -> usize {
684 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
685 }
686
687 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
688 Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
689 }
690
691 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
692 match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
693 Err(e) => write!(f, "<serde error: {e}>"),
694 Ok(None) => write!(f, "<serde not supported>"),
695 Ok(Some(metadata)) => Debug::fmt(&metadata, f),
696 }
697 }
698}