1pub mod display;
5mod visitor;
6
7use std::any::Any;
8use std::fmt::{Debug, Formatter};
9use std::sync::Arc;
10
11pub use visitor::*;
12use vortex_buffer::ByteBuffer;
13use vortex_dtype::{DType, Nullability};
14use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
15use vortex_mask::Mask;
16use vortex_scalar::Scalar;
17
18use crate::arrays::{
19 BoolEncoding, ConstantVTable, DecimalEncoding, ExtensionEncoding, ListEncoding, NullEncoding,
20 PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
21};
22use crate::builders::ArrayBuilder;
23use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
24use crate::serde::ArrayChildren;
25use crate::stats::{Precision, Stat, StatsSetRef};
26use crate::vtable::{
27 ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
28 ValidityVTable, VisitorVTable,
29};
30use crate::{Canonical, EncodingId, EncodingRef, SerializeMetadata};
31
32pub trait Array: 'static + private::Sealed + Send + Sync + Debug + ArrayVisitor {
34 fn as_any(&self) -> &dyn Any;
36
37 fn to_array(&self) -> ArrayRef;
39
40 fn len(&self) -> usize;
42
43 fn is_empty(&self) -> bool {
45 self.len() == 0
46 }
47
48 fn dtype(&self) -> &DType;
50
51 fn encoding(&self) -> EncodingRef;
53
54 fn encoding_id(&self) -> EncodingId;
56
57 fn slice(&self, start: usize, end: usize) -> VortexResult<ArrayRef>;
59
60 fn scalar_at(&self, index: usize) -> VortexResult<Scalar>;
62
63 fn is_encoding(&self, encoding: EncodingId) -> bool {
65 self.encoding_id() == encoding
66 }
67
68 fn is_arrow(&self) -> bool {
71 self.is_encoding(NullEncoding.id())
72 || self.is_encoding(BoolEncoding.id())
73 || self.is_encoding(PrimitiveEncoding.id())
74 || self.is_encoding(VarBinEncoding.id())
75 || self.is_encoding(VarBinViewEncoding.id())
76 }
77
78 fn is_canonical(&self) -> bool {
81 self.is_encoding(NullEncoding.id())
82 || self.is_encoding(BoolEncoding.id())
83 || self.is_encoding(PrimitiveEncoding.id())
84 || self.is_encoding(DecimalEncoding.id())
85 || self.is_encoding(StructEncoding.id())
86 || self.is_encoding(ListEncoding.id())
87 || self.is_encoding(VarBinViewEncoding.id())
88 || self.is_encoding(ExtensionEncoding.id())
89 }
90
91 fn is_valid(&self, index: usize) -> VortexResult<bool>;
93
94 fn is_invalid(&self, index: usize) -> VortexResult<bool>;
96
97 fn all_valid(&self) -> VortexResult<bool>;
101
102 fn all_invalid(&self) -> VortexResult<bool>;
106
107 fn valid_count(&self) -> VortexResult<usize>;
109
110 fn invalid_count(&self) -> VortexResult<usize>;
112
113 fn validity_mask(&self) -> VortexResult<Mask>;
115
116 fn to_canonical(&self) -> VortexResult<Canonical>;
118
119 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()>;
123
124 fn statistics(&self) -> StatsSetRef<'_>;
127
128 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
130
131 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
148 -> VortexResult<Option<Output>>;
149}
150
151impl Array for Arc<dyn Array> {
152 fn as_any(&self) -> &dyn Any {
153 self.as_ref().as_any()
154 }
155
156 fn to_array(&self) -> ArrayRef {
157 self.clone()
158 }
159
160 fn len(&self) -> usize {
161 self.as_ref().len()
162 }
163
164 fn dtype(&self) -> &DType {
165 self.as_ref().dtype()
166 }
167
168 fn encoding(&self) -> EncodingRef {
169 self.as_ref().encoding()
170 }
171
172 fn encoding_id(&self) -> EncodingId {
173 self.as_ref().encoding_id()
174 }
175
176 fn slice(&self, start: usize, end: usize) -> VortexResult<ArrayRef> {
177 self.as_ref().slice(start, end)
178 }
179
180 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
181 self.as_ref().scalar_at(index)
182 }
183
184 fn is_valid(&self, index: usize) -> VortexResult<bool> {
185 self.as_ref().is_valid(index)
186 }
187
188 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
189 self.as_ref().is_invalid(index)
190 }
191
192 fn all_valid(&self) -> VortexResult<bool> {
193 self.as_ref().all_valid()
194 }
195
196 fn all_invalid(&self) -> VortexResult<bool> {
197 self.as_ref().all_invalid()
198 }
199
200 fn valid_count(&self) -> VortexResult<usize> {
201 self.as_ref().valid_count()
202 }
203
204 fn invalid_count(&self) -> VortexResult<usize> {
205 self.as_ref().invalid_count()
206 }
207
208 fn validity_mask(&self) -> VortexResult<Mask> {
209 self.as_ref().validity_mask()
210 }
211
212 fn to_canonical(&self) -> VortexResult<Canonical> {
213 self.as_ref().to_canonical()
214 }
215
216 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
217 self.as_ref().append_to_builder(builder)
218 }
219
220 fn statistics(&self) -> StatsSetRef<'_> {
221 self.as_ref().statistics()
222 }
223
224 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
225 self.as_ref().with_children(children)
226 }
227
228 fn invoke(
229 &self,
230 compute_fn: &ComputeFn,
231 args: &InvocationArgs,
232 ) -> VortexResult<Option<Output>> {
233 self.as_ref().invoke(compute_fn, args)
234 }
235}
236
237pub type ArrayRef = Arc<dyn Array>;
239
240impl ToOwned for dyn Array {
241 type Owned = ArrayRef;
242
243 fn to_owned(&self) -> Self::Owned {
244 self.to_array()
245 }
246}
247
248impl dyn Array + '_ {
249 pub fn as_<V: VTable>(&self) -> &V::Array {
251 self.as_opt::<V>().vortex_expect("Failed to downcast")
252 }
253
254 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
256 self.as_any()
257 .downcast_ref::<ArrayAdapter<V>>()
258 .map(|array_adapter| &array_adapter.0)
259 }
260
261 pub fn is<V: VTable>(&self) -> bool {
263 self.as_opt::<V>().is_some()
264 }
265
266 pub fn is_constant(&self) -> bool {
267 let opts = IsConstantOpts {
268 cost: Cost::Specialized,
269 };
270 is_constant_opts(self, &opts)
271 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
272 .ok()
273 .flatten()
274 .unwrap_or_default()
275 }
276
277 pub fn is_constant_opts(&self, cost: Cost) -> bool {
278 let opts = IsConstantOpts { cost };
279 is_constant_opts(self, &opts)
280 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
281 .ok()
282 .flatten()
283 .unwrap_or_default()
284 }
285
286 pub fn as_constant(&self) -> Option<Scalar> {
287 self.is_constant().then(|| self.scalar_at(0).ok()).flatten()
288 }
289
290 pub fn nbytes(&self) -> u64 {
292 let mut nbytes = 0;
293 for array in self.depth_first_traversal() {
294 for buffer in array.buffers() {
295 nbytes += buffer.len() as u64;
296 }
297 }
298 nbytes
299 }
300}
301
302pub trait IntoArray {
304 fn into_array(self) -> ArrayRef;
305}
306
307impl IntoArray for ArrayRef {
308 fn into_array(self) -> ArrayRef {
309 self
310 }
311}
312
313mod private {
314 use super::*;
315
316 pub trait Sealed {}
317
318 impl<V: VTable> Sealed for ArrayAdapter<V> {}
319 impl Sealed for Arc<dyn Array> {}
320}
321
322#[repr(transparent)]
329pub struct ArrayAdapter<V: VTable>(V::Array);
330
331impl<V: VTable> ArrayAdapter<V> {
332 pub fn as_inner(&self) -> &V::Array {
334 &self.0
335 }
336
337 pub fn into_inner(self) -> V::Array {
339 self.0
340 }
341}
342
343impl<V: VTable> Debug for ArrayAdapter<V> {
344 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
345 self.0.fmt(f)
346 }
347}
348
349impl<V: VTable> Array for ArrayAdapter<V> {
350 fn as_any(&self) -> &dyn Any {
351 self
352 }
353
354 fn to_array(&self) -> ArrayRef {
355 Arc::new(ArrayAdapter::<V>(self.0.clone()))
356 }
357
358 fn len(&self) -> usize {
359 <V::ArrayVTable as ArrayVTable<V>>::len(&self.0)
360 }
361
362 fn dtype(&self) -> &DType {
363 <V::ArrayVTable as ArrayVTable<V>>::dtype(&self.0)
364 }
365
366 fn encoding(&self) -> EncodingRef {
367 V::encoding(&self.0)
368 }
369
370 fn encoding_id(&self) -> EncodingId {
371 V::encoding(&self.0).id()
372 }
373
374 fn slice(&self, start: usize, stop: usize) -> VortexResult<ArrayRef> {
375 if start == 0 && stop == self.len() {
376 return Ok(self.to_array());
377 }
378
379 if start > self.len() {
380 vortex_bail!(OutOfBounds: start, 0, self.len());
381 }
382 if stop > self.len() {
383 vortex_bail!(OutOfBounds: stop, 0, self.len());
384 }
385 if start > stop {
386 vortex_bail!("start ({start}) must be <= stop ({stop})");
387 }
388 if start == stop {
389 return Ok(Canonical::empty(self.dtype()).into_array());
390 }
391
392 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, start, stop)?;
393
394 assert_eq!(
395 sliced.len(),
396 stop - start,
397 "Slice length mismatch {}",
398 self.encoding_id()
399 );
400
401 debug_assert_eq!(
403 sliced.dtype(),
404 self.dtype(),
405 "Slice dtype mismatch {}",
406 self.encoding_id()
407 );
408
409 if !sliced.is::<ConstantVTable>() {
411 self.statistics().with_iter(|iter| {
412 sliced.statistics().inherit(iter.filter(|(stat, value)| {
413 matches!(
414 stat,
415 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
416 ) && value.as_ref().as_exact().is_some_and(|v| {
417 Scalar::new(DType::Bool(Nullability::NonNullable), v.clone())
418 .as_bool()
419 .value()
420 .unwrap_or_default()
421 })
422 }));
423 });
424 }
425
426 Ok(sliced)
427 }
428
429 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
430 if index >= self.len() {
431 vortex_bail!(OutOfBounds: index, 0, self.len());
432 }
433 if self.is_invalid(index)? {
434 return Ok(Scalar::null(self.dtype().clone()));
435 }
436 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index)?;
437 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
438 Ok(scalar)
439 }
440
441 fn is_valid(&self, index: usize) -> VortexResult<bool> {
442 if index >= self.len() {
443 vortex_bail!(OutOfBounds: index, 0, self.len());
444 }
445 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
446 }
447
448 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
449 self.is_valid(index).map(|valid| !valid)
450 }
451
452 fn all_valid(&self) -> VortexResult<bool> {
453 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
454 }
455
456 fn all_invalid(&self) -> VortexResult<bool> {
457 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
458 }
459
460 fn valid_count(&self) -> VortexResult<usize> {
461 if let Some(Precision::Exact(invalid_count)) =
462 self.statistics().get_as::<usize>(Stat::NullCount)
463 {
464 return Ok(self.len() - invalid_count);
465 }
466
467 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0)?;
468 assert!(count <= self.len(), "Valid count exceeds array length");
469
470 self.statistics()
471 .set(Stat::NullCount, Precision::exact(self.len() - count));
472
473 Ok(count)
474 }
475
476 fn invalid_count(&self) -> VortexResult<usize> {
477 if let Some(Precision::Exact(invalid_count)) =
478 self.statistics().get_as::<usize>(Stat::NullCount)
479 {
480 return Ok(invalid_count);
481 }
482
483 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0)?;
484 assert!(count <= self.len(), "Invalid count exceeds array length");
485
486 self.statistics()
487 .set(Stat::NullCount, Precision::exact(count));
488
489 Ok(count)
490 }
491
492 fn validity_mask(&self) -> VortexResult<Mask> {
493 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0)?;
494 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
495 Ok(mask)
496 }
497
498 fn to_canonical(&self) -> VortexResult<Canonical> {
499 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0)?;
500 assert_eq!(
501 self.len(),
502 canonical.as_ref().len(),
503 "Canonical length mismatch {}. Expected {} but encoded into {}.",
504 self.encoding_id(),
505 self.len(),
506 canonical.as_ref().len()
507 );
508 assert_eq!(
509 self.dtype(),
510 canonical.as_ref().dtype(),
511 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
512 self.encoding_id(),
513 self.dtype(),
514 canonical.as_ref().dtype()
515 );
516 canonical
517 .as_ref()
518 .statistics()
519 .replace(self.statistics().to_owned());
520 Ok(canonical)
521 }
522
523 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
524 if builder.dtype() != self.dtype() {
525 vortex_bail!(
526 "Builder dtype mismatch: expected {}, got {}",
527 self.dtype(),
528 builder.dtype(),
529 );
530 }
531 let len = builder.len();
532
533 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder)?;
534 assert_eq!(
535 len + self.len(),
536 builder.len(),
537 "Builder length mismatch after writing array for encoding {}",
538 self.encoding_id(),
539 );
540 Ok(())
541 }
542
543 fn statistics(&self) -> StatsSetRef<'_> {
544 <V::ArrayVTable as ArrayVTable<V>>::stats(&self.0)
545 }
546
547 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
548 struct ReplacementChildren<'a> {
549 children: &'a [ArrayRef],
550 }
551
552 impl ArrayChildren for ReplacementChildren<'_> {
553 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
554 if index >= self.children.len() {
555 vortex_bail!(OutOfBounds: index, 0, self.children.len());
556 }
557 let child = &self.children[index];
558 if child.len() != len {
559 vortex_bail!(
560 "Child length mismatch: expected {}, got {}",
561 len,
562 child.len()
563 );
564 }
565 if child.dtype() != dtype {
566 vortex_bail!(
567 "Child dtype mismatch: expected {}, got {}",
568 dtype,
569 child.dtype()
570 );
571 }
572 Ok(child.clone())
573 }
574
575 fn len(&self) -> usize {
576 self.children.len()
577 }
578 }
579
580 let metadata = self.metadata()?.ok_or_else(|| {
581 vortex_err!("Cannot replace children for arrays that do not support serialization")
582 })?;
583
584 self.encoding().build(
586 self.dtype(),
587 self.len(),
588 &metadata,
589 &self.buffers(),
590 &ReplacementChildren { children },
591 )
592 }
593
594 fn invoke(
595 &self,
596 compute_fn: &ComputeFn,
597 args: &InvocationArgs,
598 ) -> VortexResult<Option<Output>> {
599 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
600 }
601}
602
603impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
604 fn children(&self) -> Vec<ArrayRef> {
605 struct ChildrenCollector {
606 children: Vec<ArrayRef>,
607 }
608
609 impl ArrayChildVisitor for ChildrenCollector {
610 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
611 self.children.push(array.to_array());
612 }
613 }
614
615 let mut collector = ChildrenCollector {
616 children: Vec::new(),
617 };
618 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
619 collector.children
620 }
621
622 fn nchildren(&self) -> usize {
623 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
624 }
625
626 fn children_names(&self) -> Vec<String> {
627 struct ChildNameCollector {
628 names: Vec<String>,
629 }
630
631 impl ArrayChildVisitor for ChildNameCollector {
632 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
633 self.names.push(name.to_string());
634 }
635 }
636
637 let mut collector = ChildNameCollector { names: Vec::new() };
638 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
639 collector.names
640 }
641
642 fn named_children(&self) -> Vec<(String, ArrayRef)> {
643 struct NamedChildrenCollector {
644 children: Vec<(String, ArrayRef)>,
645 }
646
647 impl ArrayChildVisitor for NamedChildrenCollector {
648 fn visit_child(&mut self, name: &str, array: &dyn Array) {
649 self.children.push((name.to_string(), array.to_array()));
650 }
651 }
652
653 let mut collector = NamedChildrenCollector {
654 children: Vec::new(),
655 };
656
657 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
658 collector.children
659 }
660
661 fn buffers(&self) -> Vec<ByteBuffer> {
662 struct BufferCollector {
663 buffers: Vec<ByteBuffer>,
664 }
665
666 impl ArrayBufferVisitor for BufferCollector {
667 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
668 self.buffers.push(buffer.clone());
669 }
670 }
671
672 let mut collector = BufferCollector {
673 buffers: Vec::new(),
674 };
675 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
676 collector.buffers
677 }
678
679 fn nbuffers(&self) -> usize {
680 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
681 }
682
683 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
684 Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
685 }
686
687 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
688 match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
689 Err(e) => write!(f, "<serde error: {e}>"),
690 Ok(None) => write!(f, "<serde not supported>"),
691 Ok(Some(metadata)) => Debug::fmt(&metadata, f),
692 }
693 }
694}