1mod convert;
5pub mod display;
6mod statistics;
7mod visitor;
8
9use std::any::Any;
10use std::fmt::{Debug, Formatter};
11use std::sync::Arc;
12
13pub use convert::*;
14pub use visitor::*;
15use vortex_buffer::ByteBuffer;
16use vortex_dtype::DType;
17use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
18use vortex_mask::Mask;
19use vortex_scalar::Scalar;
20
21use crate::arrays::{
22 BoolEncoding, DecimalEncoding, ExtensionEncoding, ListEncoding, NullEncoding,
23 PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
24};
25use crate::builders::ArrayBuilder;
26use crate::compute::{ComputeFn, Cost, InvocationArgs, Output};
27use crate::serde::ArrayChildren;
28use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef};
29use crate::vtable::{
30 ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
31 ValidityVTable, VisitorVTable,
32};
33use crate::{Canonical, EncodingId, EncodingRef, SerializeMetadata};
34
35pub trait Array: 'static + private::Sealed + Send + Sync + Debug + ArrayVisitor {
37 fn as_any(&self) -> &dyn Any;
39
40 fn to_array(&self) -> ArrayRef;
42
43 fn len(&self) -> usize;
45
46 fn is_empty(&self) -> bool {
48 self.len() == 0
49 }
50
51 fn dtype(&self) -> &DType;
53
54 fn encoding(&self) -> EncodingRef;
56
57 fn encoding_id(&self) -> EncodingId;
59
60 fn slice(&self, start: usize, end: usize) -> VortexResult<ArrayRef>;
62
63 fn scalar_at(&self, index: usize) -> VortexResult<Scalar>;
65
66 fn is_encoding(&self, encoding: EncodingId) -> bool {
68 self.encoding_id() == encoding
69 }
70
71 fn is_arrow(&self) -> bool {
74 self.is_encoding(NullEncoding.id())
75 || self.is_encoding(BoolEncoding.id())
76 || self.is_encoding(PrimitiveEncoding.id())
77 || self.is_encoding(VarBinEncoding.id())
78 || self.is_encoding(VarBinViewEncoding.id())
79 }
80
81 fn is_canonical(&self) -> bool {
84 self.is_encoding(NullEncoding.id())
85 || self.is_encoding(BoolEncoding.id())
86 || self.is_encoding(PrimitiveEncoding.id())
87 || self.is_encoding(DecimalEncoding.id())
88 || self.is_encoding(StructEncoding.id())
89 || self.is_encoding(ListEncoding.id())
90 || self.is_encoding(VarBinViewEncoding.id())
91 || self.is_encoding(ExtensionEncoding.id())
92 }
93
94 fn is_valid(&self, index: usize) -> VortexResult<bool>;
96
97 fn is_invalid(&self, index: usize) -> VortexResult<bool>;
99
100 fn all_valid(&self) -> VortexResult<bool>;
104
105 fn all_invalid(&self) -> VortexResult<bool>;
109
110 fn valid_count(&self) -> VortexResult<usize>;
112
113 fn invalid_count(&self) -> VortexResult<usize>;
115
116 fn validity_mask(&self) -> VortexResult<Mask>;
118
119 fn to_canonical(&self) -> VortexResult<Canonical>;
121
122 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()>;
126
127 fn statistics(&self) -> StatsSetRef<'_>;
130
131 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
133
134 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
151 -> VortexResult<Option<Output>>;
152}
153
154impl Array for Arc<dyn Array> {
155 fn as_any(&self) -> &dyn Any {
156 self.as_ref().as_any()
157 }
158
159 fn to_array(&self) -> ArrayRef {
160 self.clone()
161 }
162
163 fn len(&self) -> usize {
164 self.as_ref().len()
165 }
166
167 fn dtype(&self) -> &DType {
168 self.as_ref().dtype()
169 }
170
171 fn encoding(&self) -> EncodingRef {
172 self.as_ref().encoding()
173 }
174
175 fn encoding_id(&self) -> EncodingId {
176 self.as_ref().encoding_id()
177 }
178
179 fn slice(&self, start: usize, end: usize) -> VortexResult<ArrayRef> {
180 self.as_ref().slice(start, end)
181 }
182
183 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
184 self.as_ref().scalar_at(index)
185 }
186
187 fn is_valid(&self, index: usize) -> VortexResult<bool> {
188 self.as_ref().is_valid(index)
189 }
190
191 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
192 self.as_ref().is_invalid(index)
193 }
194
195 fn all_valid(&self) -> VortexResult<bool> {
196 self.as_ref().all_valid()
197 }
198
199 fn all_invalid(&self) -> VortexResult<bool> {
200 self.as_ref().all_invalid()
201 }
202
203 fn valid_count(&self) -> VortexResult<usize> {
204 self.as_ref().valid_count()
205 }
206
207 fn invalid_count(&self) -> VortexResult<usize> {
208 self.as_ref().invalid_count()
209 }
210
211 fn validity_mask(&self) -> VortexResult<Mask> {
212 self.as_ref().validity_mask()
213 }
214
215 fn to_canonical(&self) -> VortexResult<Canonical> {
216 self.as_ref().to_canonical()
217 }
218
219 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
220 self.as_ref().append_to_builder(builder)
221 }
222
223 fn statistics(&self) -> StatsSetRef<'_> {
224 self.as_ref().statistics()
225 }
226
227 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
228 self.as_ref().with_children(children)
229 }
230
231 fn invoke(
232 &self,
233 compute_fn: &ComputeFn,
234 args: &InvocationArgs,
235 ) -> VortexResult<Option<Output>> {
236 self.as_ref().invoke(compute_fn, args)
237 }
238}
239
240pub type ArrayRef = Arc<dyn Array>;
242
243impl ToOwned for dyn Array {
244 type Owned = ArrayRef;
245
246 fn to_owned(&self) -> Self::Owned {
247 self.to_array()
248 }
249}
250
251impl dyn Array + '_ {
252 pub fn as_<V: VTable>(&self) -> &V::Array {
254 self.as_opt::<V>().vortex_expect("Failed to downcast")
255 }
256
257 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
259 self.as_any()
260 .downcast_ref::<ArrayAdapter<V>>()
261 .map(|array_adapter| &array_adapter.0)
262 }
263
264 pub fn is<V: VTable>(&self) -> bool {
266 self.as_opt::<V>().is_some()
267 }
268}
269
270impl dyn Array + '_ {
271 pub fn nbytes(&self) -> u64 {
273 let mut nbytes = 0;
274 for array in self.depth_first_traversal() {
275 for buffer in array.buffers() {
276 nbytes += buffer.len() as u64;
277 }
278 }
279 nbytes
280 }
281}
282
283mod private {
284 use super::*;
285
286 pub trait Sealed {}
287
288 impl<V: VTable> Sealed for ArrayAdapter<V> {}
289 impl Sealed for Arc<dyn Array> {}
290}
291
292#[repr(transparent)]
299pub struct ArrayAdapter<V: VTable>(V::Array);
300
301impl<V: VTable> Debug for ArrayAdapter<V> {
302 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
303 self.0.fmt(f)
304 }
305}
306
307impl<V: VTable> Array for ArrayAdapter<V> {
308 fn as_any(&self) -> &dyn Any {
309 self
310 }
311
312 fn to_array(&self) -> ArrayRef {
313 Arc::new(ArrayAdapter::<V>(self.0.clone()))
314 }
315
316 fn len(&self) -> usize {
317 <V::ArrayVTable as ArrayVTable<V>>::len(&self.0)
318 }
319
320 fn dtype(&self) -> &DType {
321 <V::ArrayVTable as ArrayVTable<V>>::dtype(&self.0)
322 }
323
324 fn encoding(&self) -> EncodingRef {
325 V::encoding(&self.0)
326 }
327
328 fn encoding_id(&self) -> EncodingId {
329 V::encoding(&self.0).id()
330 }
331
332 fn slice(&self, start: usize, stop: usize) -> VortexResult<ArrayRef> {
333 if start == 0 && stop == self.len() {
334 return Ok(self.to_array());
335 }
336
337 if start > self.len() {
338 vortex_bail!(OutOfBounds: start, 0, self.len());
339 }
340 if stop > self.len() {
341 vortex_bail!(OutOfBounds: stop, 0, self.len());
342 }
343 if start > stop {
344 vortex_bail!("start ({start}) must be <= stop ({stop})");
345 }
346
347 if start == stop {
348 return Ok(Canonical::empty(self.dtype()).into_array());
349 }
350
351 let derived_stats = (!self.0.is_constant_opts(Cost::Negligible)).then(|| {
356 let stats = self.statistics().to_owned();
357
358 let is_constant = stats.get_as::<bool>(Stat::IsConstant);
360 let is_sorted = stats.get_as::<bool>(Stat::IsSorted);
361 let is_strict_sorted = stats.get_as::<bool>(Stat::IsStrictSorted);
362
363 let mut stats = stats.keep_inexact_stats(&[
364 Stat::Max,
365 Stat::Min,
366 Stat::NullCount,
367 Stat::UncompressedSizeInBytes,
368 ]);
369
370 if is_constant == Some(Precision::Exact(true)) {
371 stats.set(Stat::IsConstant, Precision::exact(true));
372 }
373 if is_sorted == Some(Precision::Exact(true)) {
374 stats.set(Stat::IsSorted, Precision::exact(true));
375 }
376 if is_strict_sorted == Some(Precision::Exact(true)) {
377 stats.set(Stat::IsStrictSorted, Precision::exact(true));
378 }
379
380 stats
381 });
382
383 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, start, stop)?;
384
385 assert_eq!(
386 sliced.len(),
387 stop - start,
388 "Slice length mismatch {}",
389 self.encoding_id()
390 );
391 assert_eq!(
392 sliced.dtype(),
393 self.dtype(),
394 "Slice dtype mismatch {}",
395 self.encoding_id()
396 );
397
398 if let Some(derived_stats) = derived_stats {
399 let mut stats = sliced.statistics().to_owned();
400 stats.combine_sets(&derived_stats, self.dtype())?;
401 for (stat, val) in stats.into_iter() {
402 sliced.statistics().set(stat, val)
403 }
404 }
405
406 Ok(sliced)
407 }
408
409 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
410 if index >= self.len() {
411 vortex_bail!(OutOfBounds: index, 0, self.len());
412 }
413 if self.is_invalid(index)? {
414 return Ok(Scalar::null(self.dtype().clone()));
415 }
416 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index)?;
417 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
418 Ok(scalar)
419 }
420
421 fn is_valid(&self, index: usize) -> VortexResult<bool> {
422 if index >= self.len() {
423 vortex_bail!(OutOfBounds: index, 0, self.len());
424 }
425 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
426 }
427
428 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
429 self.is_valid(index).map(|valid| !valid)
430 }
431
432 fn all_valid(&self) -> VortexResult<bool> {
433 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
434 }
435
436 fn all_invalid(&self) -> VortexResult<bool> {
437 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
438 }
439
440 fn valid_count(&self) -> VortexResult<usize> {
441 if let Some(Precision::Exact(invalid_count)) =
442 self.statistics().get_as::<usize>(Stat::NullCount)
443 {
444 return Ok(self.len() - invalid_count);
445 }
446
447 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0)?;
448 assert!(count <= self.len(), "Valid count exceeds array length");
449
450 self.statistics()
451 .set(Stat::NullCount, Precision::exact(self.len() - count));
452
453 Ok(count)
454 }
455
456 fn invalid_count(&self) -> VortexResult<usize> {
457 if let Some(Precision::Exact(invalid_count)) =
458 self.statistics().get_as::<usize>(Stat::NullCount)
459 {
460 return Ok(invalid_count);
461 }
462
463 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0)?;
464 assert!(count <= self.len(), "Invalid count exceeds array length");
465
466 self.statistics()
467 .set(Stat::NullCount, Precision::exact(count));
468
469 Ok(count)
470 }
471
472 fn validity_mask(&self) -> VortexResult<Mask> {
473 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0)?;
474 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
475 Ok(mask)
476 }
477
478 fn to_canonical(&self) -> VortexResult<Canonical> {
479 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0)?;
480 assert_eq!(
481 self.len(),
482 canonical.as_ref().len(),
483 "Canonical length mismatch {}. Expected {} but encoded into {}.",
484 self.encoding_id(),
485 self.len(),
486 canonical.as_ref().len()
487 );
488 assert_eq!(
489 self.dtype(),
490 canonical.as_ref().dtype(),
491 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
492 self.encoding_id(),
493 self.dtype(),
494 canonical.as_ref().dtype()
495 );
496 canonical.as_ref().statistics().inherit(self.statistics());
497 Ok(canonical)
498 }
499
500 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
501 if builder.dtype() != self.dtype() {
502 vortex_bail!(
503 "Builder dtype mismatch: expected {}, got {}",
504 self.dtype(),
505 builder.dtype(),
506 );
507 }
508 let len = builder.len();
509
510 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder)?;
511 assert_eq!(
512 len + self.len(),
513 builder.len(),
514 "Builder length mismatch after writing array for encoding {}",
515 self.encoding_id(),
516 );
517 Ok(())
518 }
519
520 fn statistics(&self) -> StatsSetRef<'_> {
521 <V::ArrayVTable as ArrayVTable<V>>::stats(&self.0)
522 }
523
524 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
525 struct ReplacementChildren<'a> {
526 children: &'a [ArrayRef],
527 }
528
529 impl ArrayChildren for ReplacementChildren<'_> {
530 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
531 if index >= self.children.len() {
532 vortex_bail!(OutOfBounds: index, 0, self.children.len());
533 }
534 let child = &self.children[index];
535 if child.len() != len {
536 vortex_bail!(
537 "Child length mismatch: expected {}, got {}",
538 len,
539 child.len()
540 );
541 }
542 if child.dtype() != dtype {
543 vortex_bail!(
544 "Child dtype mismatch: expected {}, got {}",
545 dtype,
546 child.dtype()
547 );
548 }
549 Ok(child.clone())
550 }
551
552 fn len(&self) -> usize {
553 self.children.len()
554 }
555 }
556
557 let metadata = self.metadata()?.ok_or_else(|| {
558 vortex_err!("Cannot replace children for arrays that do not support serialization")
559 })?;
560
561 self.encoding().build(
563 self.dtype(),
564 self.len(),
565 &metadata,
566 &self.buffers(),
567 &ReplacementChildren { children },
568 )
569 }
570
571 fn invoke(
572 &self,
573 compute_fn: &ComputeFn,
574 args: &InvocationArgs,
575 ) -> VortexResult<Option<Output>> {
576 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
577 }
578}
579
580impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
581 fn children(&self) -> Vec<ArrayRef> {
582 struct ChildrenCollector {
583 children: Vec<ArrayRef>,
584 }
585
586 impl ArrayChildVisitor for ChildrenCollector {
587 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
588 self.children.push(array.to_array());
589 }
590 }
591
592 let mut collector = ChildrenCollector {
593 children: Vec::new(),
594 };
595 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
596 collector.children
597 }
598
599 fn nchildren(&self) -> usize {
600 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
601 }
602
603 fn children_names(&self) -> Vec<String> {
604 struct ChildNameCollector {
605 names: Vec<String>,
606 }
607
608 impl ArrayChildVisitor for ChildNameCollector {
609 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
610 self.names.push(name.to_string());
611 }
612 }
613
614 let mut collector = ChildNameCollector { names: Vec::new() };
615 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
616 collector.names
617 }
618
619 fn named_children(&self) -> Vec<(String, ArrayRef)> {
620 struct NamedChildrenCollector {
621 children: Vec<(String, ArrayRef)>,
622 }
623
624 impl ArrayChildVisitor for NamedChildrenCollector {
625 fn visit_child(&mut self, name: &str, array: &dyn Array) {
626 self.children.push((name.to_string(), array.to_array()));
627 }
628 }
629
630 let mut collector = NamedChildrenCollector {
631 children: Vec::new(),
632 };
633
634 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
635 collector.children
636 }
637
638 fn buffers(&self) -> Vec<ByteBuffer> {
639 struct BufferCollector {
640 buffers: Vec<ByteBuffer>,
641 }
642
643 impl ArrayBufferVisitor for BufferCollector {
644 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
645 self.buffers.push(buffer.clone());
646 }
647 }
648
649 let mut collector = BufferCollector {
650 buffers: Vec::new(),
651 };
652 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
653 collector.buffers
654 }
655
656 fn nbuffers(&self) -> usize {
657 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
658 }
659
660 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
661 Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
662 }
663
664 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
665 match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
666 Err(e) => write!(f, "<serde error: {e}>"),
667 Ok(None) => write!(f, "<serde not supported>"),
668 Ok(Some(metadata)) => Debug::fmt(&metadata, f),
669 }
670 }
671}