1pub mod display;
5mod visitor;
6
7use std::any::Any;
8use std::fmt::{Debug, Formatter};
9use std::sync::Arc;
10
11pub use visitor::*;
12use vortex_buffer::ByteBuffer;
13use vortex_dtype::DType;
14use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
15use vortex_mask::Mask;
16use vortex_scalar::Scalar;
17
18use crate::arrays::{
19 BoolEncoding, ConstantVTable, DecimalEncoding, ExtensionEncoding, ListEncoding, NullEncoding,
20 PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
21};
22use crate::builders::ArrayBuilder;
23use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
24use crate::serde::ArrayChildren;
25use crate::stats::{Precision, Stat, StatsSetRef};
26use crate::vtable::{
27 ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
28 ValidityVTable, VisitorVTable,
29};
30use crate::{Canonical, EncodingId, EncodingRef, SerializeMetadata};
31
32pub trait Array: 'static + private::Sealed + Send + Sync + Debug + ArrayVisitor {
34 fn as_any(&self) -> &dyn Any;
36
37 fn to_array(&self) -> ArrayRef;
39
40 fn len(&self) -> usize;
42
43 fn is_empty(&self) -> bool {
45 self.len() == 0
46 }
47
48 fn dtype(&self) -> &DType;
50
51 fn encoding(&self) -> EncodingRef;
53
54 fn encoding_id(&self) -> EncodingId;
56
57 fn slice(&self, start: usize, end: usize) -> VortexResult<ArrayRef>;
59
60 fn scalar_at(&self, index: usize) -> VortexResult<Scalar>;
62
63 fn is_encoding(&self, encoding: EncodingId) -> bool {
65 self.encoding_id() == encoding
66 }
67
68 fn is_arrow(&self) -> bool {
71 self.is_encoding(NullEncoding.id())
72 || self.is_encoding(BoolEncoding.id())
73 || self.is_encoding(PrimitiveEncoding.id())
74 || self.is_encoding(VarBinEncoding.id())
75 || self.is_encoding(VarBinViewEncoding.id())
76 }
77
78 fn is_canonical(&self) -> bool {
81 self.is_encoding(NullEncoding.id())
82 || self.is_encoding(BoolEncoding.id())
83 || self.is_encoding(PrimitiveEncoding.id())
84 || self.is_encoding(DecimalEncoding.id())
85 || self.is_encoding(StructEncoding.id())
86 || self.is_encoding(ListEncoding.id())
87 || self.is_encoding(VarBinViewEncoding.id())
88 || self.is_encoding(ExtensionEncoding.id())
89 }
90
91 fn is_valid(&self, index: usize) -> VortexResult<bool>;
93
94 fn is_invalid(&self, index: usize) -> VortexResult<bool>;
96
97 fn all_valid(&self) -> VortexResult<bool>;
101
102 fn all_invalid(&self) -> VortexResult<bool>;
106
107 fn valid_count(&self) -> VortexResult<usize>;
109
110 fn invalid_count(&self) -> VortexResult<usize>;
112
113 fn validity_mask(&self) -> VortexResult<Mask>;
115
116 fn to_canonical(&self) -> VortexResult<Canonical>;
118
119 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()>;
123
124 fn statistics(&self) -> StatsSetRef<'_>;
127
128 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
130
131 fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
148 -> VortexResult<Option<Output>>;
149}
150
151impl Array for Arc<dyn Array> {
152 fn as_any(&self) -> &dyn Any {
153 self.as_ref().as_any()
154 }
155
156 fn to_array(&self) -> ArrayRef {
157 self.clone()
158 }
159
160 fn len(&self) -> usize {
161 self.as_ref().len()
162 }
163
164 fn dtype(&self) -> &DType {
165 self.as_ref().dtype()
166 }
167
168 fn encoding(&self) -> EncodingRef {
169 self.as_ref().encoding()
170 }
171
172 fn encoding_id(&self) -> EncodingId {
173 self.as_ref().encoding_id()
174 }
175
176 fn slice(&self, start: usize, end: usize) -> VortexResult<ArrayRef> {
177 self.as_ref().slice(start, end)
178 }
179
180 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
181 self.as_ref().scalar_at(index)
182 }
183
184 fn is_valid(&self, index: usize) -> VortexResult<bool> {
185 self.as_ref().is_valid(index)
186 }
187
188 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
189 self.as_ref().is_invalid(index)
190 }
191
192 fn all_valid(&self) -> VortexResult<bool> {
193 self.as_ref().all_valid()
194 }
195
196 fn all_invalid(&self) -> VortexResult<bool> {
197 self.as_ref().all_invalid()
198 }
199
200 fn valid_count(&self) -> VortexResult<usize> {
201 self.as_ref().valid_count()
202 }
203
204 fn invalid_count(&self) -> VortexResult<usize> {
205 self.as_ref().invalid_count()
206 }
207
208 fn validity_mask(&self) -> VortexResult<Mask> {
209 self.as_ref().validity_mask()
210 }
211
212 fn to_canonical(&self) -> VortexResult<Canonical> {
213 self.as_ref().to_canonical()
214 }
215
216 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
217 self.as_ref().append_to_builder(builder)
218 }
219
220 fn statistics(&self) -> StatsSetRef<'_> {
221 self.as_ref().statistics()
222 }
223
224 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
225 self.as_ref().with_children(children)
226 }
227
228 fn invoke(
229 &self,
230 compute_fn: &ComputeFn,
231 args: &InvocationArgs,
232 ) -> VortexResult<Option<Output>> {
233 self.as_ref().invoke(compute_fn, args)
234 }
235}
236
237pub type ArrayRef = Arc<dyn Array>;
239
240impl ToOwned for dyn Array {
241 type Owned = ArrayRef;
242
243 fn to_owned(&self) -> Self::Owned {
244 self.to_array()
245 }
246}
247
248impl dyn Array + '_ {
249 pub fn as_<V: VTable>(&self) -> &V::Array {
251 self.as_opt::<V>().vortex_expect("Failed to downcast")
252 }
253
254 pub fn as_opt<V: VTable>(&self) -> Option<&V::Array> {
256 self.as_any()
257 .downcast_ref::<ArrayAdapter<V>>()
258 .map(|array_adapter| &array_adapter.0)
259 }
260
261 pub fn is<V: VTable>(&self) -> bool {
263 self.as_opt::<V>().is_some()
264 }
265
266 pub fn is_constant(&self) -> bool {
267 let opts = IsConstantOpts {
268 cost: Cost::Specialized,
269 };
270 is_constant_opts(self, &opts)
271 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
272 .ok()
273 .flatten()
274 .unwrap_or_default()
275 }
276
277 pub fn is_constant_opts(&self, cost: Cost) -> bool {
278 let opts = IsConstantOpts { cost };
279 is_constant_opts(self, &opts)
280 .inspect_err(|e| log::warn!("Failed to compute IsConstant: {e}"))
281 .ok()
282 .flatten()
283 .unwrap_or_default()
284 }
285
286 pub fn as_constant(&self) -> Option<Scalar> {
287 self.is_constant().then(|| self.scalar_at(0).ok()).flatten()
288 }
289
290 pub fn nbytes(&self) -> u64 {
292 let mut nbytes = 0;
293 for array in self.depth_first_traversal() {
294 for buffer in array.buffers() {
295 nbytes += buffer.len() as u64;
296 }
297 }
298 nbytes
299 }
300}
301
302pub trait IntoArray {
304 fn into_array(self) -> ArrayRef;
305}
306
307impl IntoArray for ArrayRef {
308 fn into_array(self) -> ArrayRef {
309 self
310 }
311}
312
313mod private {
314 use super::*;
315
316 pub trait Sealed {}
317
318 impl<V: VTable> Sealed for ArrayAdapter<V> {}
319 impl Sealed for Arc<dyn Array> {}
320}
321
322#[repr(transparent)]
329pub struct ArrayAdapter<V: VTable>(V::Array);
330
331impl<V: VTable> Debug for ArrayAdapter<V> {
332 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
333 self.0.fmt(f)
334 }
335}
336
337impl<V: VTable> Array for ArrayAdapter<V> {
338 fn as_any(&self) -> &dyn Any {
339 self
340 }
341
342 fn to_array(&self) -> ArrayRef {
343 Arc::new(ArrayAdapter::<V>(self.0.clone()))
344 }
345
346 fn len(&self) -> usize {
347 <V::ArrayVTable as ArrayVTable<V>>::len(&self.0)
348 }
349
350 fn dtype(&self) -> &DType {
351 <V::ArrayVTable as ArrayVTable<V>>::dtype(&self.0)
352 }
353
354 fn encoding(&self) -> EncodingRef {
355 V::encoding(&self.0)
356 }
357
358 fn encoding_id(&self) -> EncodingId {
359 V::encoding(&self.0).id()
360 }
361
362 fn slice(&self, start: usize, stop: usize) -> VortexResult<ArrayRef> {
363 if start == 0 && stop == self.len() {
364 return Ok(self.to_array());
365 }
366
367 if start > self.len() {
368 vortex_bail!(OutOfBounds: start, 0, self.len());
369 }
370 if stop > self.len() {
371 vortex_bail!(OutOfBounds: stop, 0, self.len());
372 }
373 if start > stop {
374 vortex_bail!("start ({start}) must be <= stop ({stop})");
375 }
376 if start == stop {
377 return Ok(Canonical::empty(self.dtype()).into_array());
378 }
379
380 let sliced = <V::OperationsVTable as OperationsVTable<V>>::slice(&self.0, start, stop)?;
381
382 assert_eq!(
383 sliced.len(),
384 stop - start,
385 "Slice length mismatch {}",
386 self.encoding_id()
387 );
388
389 debug_assert_eq!(
391 sliced.dtype(),
392 self.dtype(),
393 "Slice dtype mismatch {}",
394 self.encoding_id()
395 );
396
397 if !sliced.is::<ConstantVTable>() {
399 self.statistics().with_iter(|iter| {
400 sliced.statistics().inherit(iter.filter(|(stat, value)| {
401 matches!(
402 stat,
403 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
404 ) && value.as_ref().as_exact().is_some_and(|v| {
405 v.as_bool()
406 .vortex_expect("must be a bool")
407 .unwrap_or_default()
408 })
409 }));
410 });
411 }
412
413 Ok(sliced)
414 }
415
416 fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
417 if index >= self.len() {
418 vortex_bail!(OutOfBounds: index, 0, self.len());
419 }
420 if self.is_invalid(index)? {
421 return Ok(Scalar::null(self.dtype().clone()));
422 }
423 let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index)?;
424 assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
425 Ok(scalar)
426 }
427
428 fn is_valid(&self, index: usize) -> VortexResult<bool> {
429 if index >= self.len() {
430 vortex_bail!(OutOfBounds: index, 0, self.len());
431 }
432 <V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
433 }
434
435 fn is_invalid(&self, index: usize) -> VortexResult<bool> {
436 self.is_valid(index).map(|valid| !valid)
437 }
438
439 fn all_valid(&self) -> VortexResult<bool> {
440 <V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
441 }
442
443 fn all_invalid(&self) -> VortexResult<bool> {
444 <V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
445 }
446
447 fn valid_count(&self) -> VortexResult<usize> {
448 if let Some(Precision::Exact(invalid_count)) =
449 self.statistics().get_as::<usize>(Stat::NullCount)
450 {
451 return Ok(self.len() - invalid_count);
452 }
453
454 let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0)?;
455 assert!(count <= self.len(), "Valid count exceeds array length");
456
457 self.statistics()
458 .set(Stat::NullCount, Precision::exact(self.len() - count));
459
460 Ok(count)
461 }
462
463 fn invalid_count(&self) -> VortexResult<usize> {
464 if let Some(Precision::Exact(invalid_count)) =
465 self.statistics().get_as::<usize>(Stat::NullCount)
466 {
467 return Ok(invalid_count);
468 }
469
470 let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0)?;
471 assert!(count <= self.len(), "Invalid count exceeds array length");
472
473 self.statistics()
474 .set(Stat::NullCount, Precision::exact(count));
475
476 Ok(count)
477 }
478
479 fn validity_mask(&self) -> VortexResult<Mask> {
480 let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0)?;
481 assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
482 Ok(mask)
483 }
484
485 fn to_canonical(&self) -> VortexResult<Canonical> {
486 let canonical = <V::CanonicalVTable as CanonicalVTable<V>>::canonicalize(&self.0)?;
487 assert_eq!(
488 self.len(),
489 canonical.as_ref().len(),
490 "Canonical length mismatch {}. Expected {} but encoded into {}.",
491 self.encoding_id(),
492 self.len(),
493 canonical.as_ref().len()
494 );
495 assert_eq!(
496 self.dtype(),
497 canonical.as_ref().dtype(),
498 "Canonical dtype mismatch {}. Expected {} but encoded into {}.",
499 self.encoding_id(),
500 self.dtype(),
501 canonical.as_ref().dtype()
502 );
503 canonical
504 .as_ref()
505 .statistics()
506 .replace(self.statistics().to_owned());
507 Ok(canonical)
508 }
509
510 fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
511 if builder.dtype() != self.dtype() {
512 vortex_bail!(
513 "Builder dtype mismatch: expected {}, got {}",
514 self.dtype(),
515 builder.dtype(),
516 );
517 }
518 let len = builder.len();
519
520 <V::CanonicalVTable as CanonicalVTable<V>>::append_to_builder(&self.0, builder)?;
521 assert_eq!(
522 len + self.len(),
523 builder.len(),
524 "Builder length mismatch after writing array for encoding {}",
525 self.encoding_id(),
526 );
527 Ok(())
528 }
529
530 fn statistics(&self) -> StatsSetRef<'_> {
531 <V::ArrayVTable as ArrayVTable<V>>::stats(&self.0)
532 }
533
534 fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
535 struct ReplacementChildren<'a> {
536 children: &'a [ArrayRef],
537 }
538
539 impl ArrayChildren for ReplacementChildren<'_> {
540 fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult<ArrayRef> {
541 if index >= self.children.len() {
542 vortex_bail!(OutOfBounds: index, 0, self.children.len());
543 }
544 let child = &self.children[index];
545 if child.len() != len {
546 vortex_bail!(
547 "Child length mismatch: expected {}, got {}",
548 len,
549 child.len()
550 );
551 }
552 if child.dtype() != dtype {
553 vortex_bail!(
554 "Child dtype mismatch: expected {}, got {}",
555 dtype,
556 child.dtype()
557 );
558 }
559 Ok(child.clone())
560 }
561
562 fn len(&self) -> usize {
563 self.children.len()
564 }
565 }
566
567 let metadata = self.metadata()?.ok_or_else(|| {
568 vortex_err!("Cannot replace children for arrays that do not support serialization")
569 })?;
570
571 self.encoding().build(
573 self.dtype(),
574 self.len(),
575 &metadata,
576 &self.buffers(),
577 &ReplacementChildren { children },
578 )
579 }
580
581 fn invoke(
582 &self,
583 compute_fn: &ComputeFn,
584 args: &InvocationArgs,
585 ) -> VortexResult<Option<Output>> {
586 <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
587 }
588}
589
590impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
591 fn children(&self) -> Vec<ArrayRef> {
592 struct ChildrenCollector {
593 children: Vec<ArrayRef>,
594 }
595
596 impl ArrayChildVisitor for ChildrenCollector {
597 fn visit_child(&mut self, _name: &str, array: &dyn Array) {
598 self.children.push(array.to_array());
599 }
600 }
601
602 let mut collector = ChildrenCollector {
603 children: Vec::new(),
604 };
605 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
606 collector.children
607 }
608
609 fn nchildren(&self) -> usize {
610 <V::VisitorVTable as VisitorVTable<V>>::nchildren(&self.0)
611 }
612
613 fn children_names(&self) -> Vec<String> {
614 struct ChildNameCollector {
615 names: Vec<String>,
616 }
617
618 impl ArrayChildVisitor for ChildNameCollector {
619 fn visit_child(&mut self, name: &str, _array: &dyn Array) {
620 self.names.push(name.to_string());
621 }
622 }
623
624 let mut collector = ChildNameCollector { names: Vec::new() };
625 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
626 collector.names
627 }
628
629 fn named_children(&self) -> Vec<(String, ArrayRef)> {
630 struct NamedChildrenCollector {
631 children: Vec<(String, ArrayRef)>,
632 }
633
634 impl ArrayChildVisitor for NamedChildrenCollector {
635 fn visit_child(&mut self, name: &str, array: &dyn Array) {
636 self.children.push((name.to_string(), array.to_array()));
637 }
638 }
639
640 let mut collector = NamedChildrenCollector {
641 children: Vec::new(),
642 };
643
644 <V::VisitorVTable as VisitorVTable<V>>::visit_children(&self.0, &mut collector);
645 collector.children
646 }
647
648 fn buffers(&self) -> Vec<ByteBuffer> {
649 struct BufferCollector {
650 buffers: Vec<ByteBuffer>,
651 }
652
653 impl ArrayBufferVisitor for BufferCollector {
654 fn visit_buffer(&mut self, buffer: &ByteBuffer) {
655 self.buffers.push(buffer.clone());
656 }
657 }
658
659 let mut collector = BufferCollector {
660 buffers: Vec::new(),
661 };
662 <V::VisitorVTable as VisitorVTable<V>>::visit_buffers(&self.0, &mut collector);
663 collector.buffers
664 }
665
666 fn nbuffers(&self) -> usize {
667 <V::VisitorVTable as VisitorVTable<V>>::nbuffers(&self.0)
668 }
669
670 fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
671 Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
672 }
673
674 fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
675 match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
676 Err(e) => write!(f, "<serde error: {e}>"),
677 Ok(None) => write!(f, "<serde not supported>"),
678 Ok(Some(metadata)) => Debug::fmt(&metadata, f),
679 }
680 }
681}