1use std::any::type_name;
5use std::fmt::Debug;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9use std::ops::Range;
10use std::sync::Arc;
11
12use vortex_buffer::ByteBuffer;
13use vortex_error::VortexExpect;
14use vortex_error::VortexResult;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_err;
17use vortex_error::vortex_panic;
18use vortex_mask::Mask;
19
20use crate::AnyCanonical;
21use crate::Array;
22use crate::ArrayEq;
23use crate::ArrayHash;
24use crate::ArrayView;
25use crate::Canonical;
26use crate::ExecutionCtx;
27use crate::ExecutionResult;
28use crate::IntoArray;
29use crate::LEGACY_SESSION;
30use crate::VTable;
31use crate::VortexSessionExecute;
32use crate::aggregate_fn::fns::sum::sum;
33use crate::array::ArrayData;
34use crate::array::ArrayId;
35use crate::array::ArrayInner;
36use crate::array::ArraySlots;
37use crate::array::DynArrayData;
38use crate::arrays::Bool;
39use crate::arrays::Constant;
40use crate::arrays::DictArray;
41use crate::arrays::FilterArray;
42use crate::arrays::Null;
43use crate::arrays::Primitive;
44use crate::arrays::SliceArray;
45use crate::arrays::VarBin;
46use crate::arrays::VarBinView;
47use crate::buffer::BufferHandle;
48use crate::builders::ArrayBuilder;
49use crate::dtype::DType;
50use crate::dtype::Nullability;
51use crate::expr::stats::Precision;
52use crate::expr::stats::Stat;
53use crate::expr::stats::StatsProviderExt;
54use crate::matcher::Matcher;
55use crate::optimizer::ArrayOptimizer;
56use crate::scalar::Scalar;
57use crate::stats::StatsSetRef;
58use crate::validity::Validity;
59
60pub struct DepthFirstArrayIterator {
62 stack: Vec<ArrayRef>,
63}
64
65impl Iterator for DepthFirstArrayIterator {
66 type Item = ArrayRef;
67
68 fn next(&mut self) -> Option<Self::Item> {
69 let next = self.stack.pop()?;
70 for child in next.children().into_iter().rev() {
71 self.stack.push(child);
72 }
73 Some(next)
74 }
75}
76
77#[derive(Clone)]
83pub struct ArrayRef(Arc<ArrayInner<dyn DynArrayData>>);
84
85impl ArrayRef {
86 pub(crate) fn from_inner<D: DynArrayData>(inner: Arc<ArrayInner<D>>) -> Self {
88 Self(inner)
89 }
90
91 #[inline(always)]
93 pub(crate) fn dyn_array(&self) -> &dyn DynArrayData {
94 &self.0.data
95 }
96
97 #[inline(always)]
99 pub(crate) fn inner_mut(&mut self) -> Option<&mut ArrayInner<dyn DynArrayData>> {
100 Arc::get_mut(&mut self.0)
101 }
102
103 #[doc(hidden)]
106 pub fn addr(&self) -> usize {
107 Arc::as_ptr(&self.0).addr()
108 }
109
110 #[allow(dead_code)]
114 pub(crate) fn downcast_inner<V: VTable>(self) -> Result<Arc<ArrayInner<ArrayData<V>>>, Self> {
115 if self.0.data.as_any().is::<ArrayData<V>>() {
117 Ok(unsafe { self.downcast_inner_unchecked() })
118 } else {
119 Err(self)
120 }
121 }
122
123 #[inline(always)]
128 pub(crate) unsafe fn downcast_inner_unchecked<V: VTable>(
129 self,
130 ) -> Arc<ArrayInner<ArrayData<V>>> {
131 debug_assert!(self.0.data.as_any().is::<ArrayData<V>>());
132 let raw = Arc::into_raw(self.0);
136 unsafe { Arc::from_raw(raw.cast::<ArrayInner<ArrayData<V>>>()) }
138 }
139
140 pub fn ptr_eq(this: &ArrayRef, other: &ArrayRef) -> bool {
142 Arc::ptr_eq(&this.0, &other.0)
143 }
144}
145
146impl Debug for ArrayRef {
147 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
148 f.debug_struct("Array")
149 .field("encoding", &self.0.encoding_id)
150 .field("dtype", &self.0.dtype)
151 .field("len", &self.0.len)
152 .field("data", &self.0.data)
153 .finish()
154 }
155}
156
157impl ArrayHash for ArrayRef {
158 fn array_hash<H: Hasher>(&self, state: &mut H, precision: crate::Precision) {
159 self.0.len.hash(state);
160 self.0.dtype.hash(state);
161 self.0.encoding_id.hash(state);
162 self.0.slots.len().hash(state);
163 for slot in &self.0.slots {
164 slot.array_hash(state, precision);
165 }
166 self.0
167 .data
168 .dyn_array_hash(state as &mut dyn Hasher, precision);
169 }
170}
171
172impl ArrayEq for ArrayRef {
173 fn array_eq(&self, other: &Self, precision: crate::Precision) -> bool {
174 self.0.len == other.0.len
175 && self.0.dtype == other.0.dtype
176 && self.0.encoding_id == other.0.encoding_id
177 && self.0.slots.len() == other.0.slots.len()
178 && self
179 .0
180 .slots
181 .iter()
182 .zip(other.0.slots.iter())
183 .all(|(slot, other_slot)| slot.array_eq(other_slot, precision))
184 && self.0.data.dyn_array_eq(other, precision)
185 }
186}
187impl ArrayRef {
188 #[inline]
190 pub fn len(&self) -> usize {
191 self.0.len
192 }
193
194 #[inline]
196 pub fn is_empty(&self) -> bool {
197 self.0.len == 0
198 }
199
200 #[inline]
202 pub fn dtype(&self) -> &DType {
203 &self.0.dtype
204 }
205
206 #[inline]
208 pub fn encoding_id(&self) -> ArrayId {
209 self.0.encoding_id
210 }
211
212 pub fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef> {
214 let len = self.len();
215 let start = range.start;
216 let stop = range.end;
217
218 if start == 0 && stop == len {
219 return Ok(self.clone());
220 }
221
222 vortex_ensure!(start <= len, "OutOfBounds: start {start} > length {}", len);
223 vortex_ensure!(stop <= len, "OutOfBounds: stop {stop} > length {}", len);
224
225 vortex_ensure!(start <= stop, "start ({start}) must be <= stop ({stop})");
226
227 if start == stop {
228 return Ok(Canonical::empty(self.dtype()).into_array());
229 }
230
231 let sliced = SliceArray::try_new(self.clone(), range)?
232 .into_array()
233 .optimize()?;
234
235 if !sliced.is::<Constant>() {
237 self.statistics().with_iter(|iter| {
238 sliced.statistics().inherit(iter.filter(|(stat, value)| {
239 matches!(
240 stat,
241 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
242 ) && value.as_ref().as_exact().is_some_and(|v| {
243 Scalar::try_new(DType::Bool(Nullability::NonNullable), Some(v.clone()))
244 .vortex_expect("A stat that was expected to be a boolean stat was not")
245 .as_bool()
246 .value()
247 .unwrap_or_default()
248 })
249 }));
250 });
251 }
252
253 Ok(sliced)
254 }
255
256 pub fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
258 FilterArray::try_new(self.clone(), mask)?
259 .into_array()
260 .optimize()
261 }
262
263 pub fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
265 DictArray::try_new(indices, self.clone())?
266 .into_array()
267 .optimize()
268 }
269
270 #[deprecated(
272 note = "Use `execute_scalar` instead, which allows passing an execution context for more \
273 efficient execution when fetching multiple scalars from the same array."
274 )]
275 pub fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
276 self.execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
277 }
278
279 pub fn execute_scalar(&self, index: usize, ctx: &mut ExecutionCtx) -> VortexResult<Scalar> {
281 vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
282 if self.dtype().is_nullable() && self.is_invalid(index, ctx)? {
283 return Ok(Scalar::null(self.dtype().clone()));
284 }
285 let scalar = self.0.data.execute_scalar(self, index, ctx)?;
286 debug_assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
287 Ok(scalar)
288 }
289
290 pub fn is_valid(&self, index: usize, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
292 vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
293 match self.validity()? {
294 Validity::NonNullable | Validity::AllValid => Ok(true),
295 Validity::AllInvalid => Ok(false),
296 Validity::Array(a) => a
297 .execute_scalar(index, ctx)?
298 .as_bool()
299 .value()
300 .ok_or_else(|| vortex_err!("validity value at index {} is null", index)),
301 }
302 }
303
304 pub fn is_invalid(&self, index: usize, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
306 Ok(!self.is_valid(index, ctx)?)
307 }
308
309 pub fn all_valid(&self, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
311 match self.validity()? {
312 Validity::NonNullable | Validity::AllValid => Ok(true),
313 Validity::AllInvalid => Ok(false),
314 Validity::Array(a) => Ok(a.statistics().compute_min::<bool>(ctx).unwrap_or(false)),
315 }
316 }
317
318 pub fn all_invalid(&self, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
320 match self.validity()? {
321 Validity::NonNullable | Validity::AllValid => Ok(false),
322 Validity::AllInvalid => Ok(true),
323 Validity::Array(a) => Ok(!a.statistics().compute_max::<bool>(ctx).unwrap_or(true)),
324 }
325 }
326
327 pub fn valid_count(&self, ctx: &mut ExecutionCtx) -> VortexResult<usize> {
329 let len = self.len();
330 if let Precision::Exact(invalid_count) = self.statistics().get_as::<usize>(Stat::NullCount)
331 {
332 return Ok(len - invalid_count);
333 }
334
335 let count = match self.validity()? {
336 Validity::NonNullable | Validity::AllValid => len,
337 Validity::AllInvalid => 0,
338 Validity::Array(a) => {
339 let array_sum = sum(&a, ctx)?;
340 array_sum
341 .as_primitive()
342 .as_::<usize>()
343 .ok_or_else(|| vortex_err!("sum of validity array is null"))?
344 }
345 };
346 vortex_ensure!(count <= len, "Valid count exceeds array length");
347
348 self.statistics()
349 .set(Stat::NullCount, Precision::exact(len - count));
350
351 Ok(count)
352 }
353
354 pub fn invalid_count(&self, ctx: &mut ExecutionCtx) -> VortexResult<usize> {
356 Ok(self.len() - self.valid_count(ctx)?)
357 }
358
359 pub fn validity(&self) -> VortexResult<Validity> {
361 self.0.data.validity(self)
362 }
363
364 #[deprecated(note = "use `array.execute::<Canonical>(ctx)` instead")]
366 pub fn into_canonical(self) -> VortexResult<Canonical> {
367 self.execute(&mut LEGACY_SESSION.create_execution_ctx())
368 }
369
370 #[deprecated(note = "use `array.execute::<Canonical>(ctx)` instead")]
372 pub fn to_canonical(&self) -> VortexResult<Canonical> {
373 #[expect(deprecated)]
374 let result = self.clone().into_canonical();
375 result
376 }
377
378 pub fn append_to_builder(
380 &self,
381 builder: &mut dyn ArrayBuilder,
382 ctx: &mut ExecutionCtx,
383 ) -> VortexResult<()> {
384 self.0.data.append_to_builder(self, builder, ctx)
385 }
386
387 pub fn statistics(&self) -> StatsSetRef<'_> {
389 self.0.stats.to_ref(self)
390 }
391
392 pub fn is<M: Matcher>(&self) -> bool {
394 M::matches(self)
395 }
396
397 pub fn as_<M: Matcher>(&self) -> M::Match<'_> {
399 self.as_opt::<M>().vortex_expect("Failed to downcast")
400 }
401
402 pub fn as_opt<M: Matcher>(&self) -> Option<M::Match<'_>> {
404 M::try_match(self)
405 }
406
407 pub fn try_downcast<V: VTable>(self) -> Result<Array<V>, ArrayRef> {
409 Array::<V>::try_from_array_ref(self)
410 }
411
412 pub fn downcast<V: VTable>(self) -> Array<V> {
418 Self::try_downcast(self)
419 .unwrap_or_else(|_| vortex_panic!("Failed to downcast to {}", type_name::<V>()))
420 }
421
422 pub fn as_typed<V: VTable>(&self) -> Option<ArrayView<'_, V>> {
424 let inner = self.0.data.as_any().downcast_ref::<ArrayData<V>>()?;
425 Some(unsafe { ArrayView::new_unchecked(self, &inner.data) })
426 }
427
428 pub fn as_constant(&self) -> Option<Scalar> {
430 self.as_opt::<Constant>().map(|a| a.scalar().clone())
431 }
432
433 pub fn nbytes(&self) -> u64 {
435 let mut nbytes = 0;
436 for array in self.depth_first_traversal() {
437 for buffer in array.buffers() {
438 nbytes += buffer.len() as u64;
439 }
440 }
441 nbytes
442 }
443
444 pub fn is_arrow(&self) -> bool {
446 self.is::<Null>()
447 || self.is::<Bool>()
448 || self.is::<Primitive>()
449 || self.is::<VarBin>()
450 || self.is::<VarBinView>()
451 }
452
453 pub fn is_canonical(&self) -> bool {
455 self.is::<AnyCanonical>()
456 }
457
458 pub fn with_slot(self, slot_idx: usize, replacement: ArrayRef) -> VortexResult<ArrayRef> {
465 let mut slots: ArraySlots = self.slots().iter().cloned().collect();
466 let nslots = slots.len();
467 vortex_ensure!(
468 slot_idx < nslots,
469 "slot index {} out of bounds for array with {} slots",
470 slot_idx,
471 nslots
472 );
473 let existing = slots[slot_idx]
474 .as_ref()
475 .vortex_expect("with_slot cannot replace an absent slot");
476 vortex_ensure!(
477 existing.dtype() == replacement.dtype(),
478 "slot {} dtype changed from {} to {} during physical rewrite",
479 slot_idx,
480 existing.dtype(),
481 replacement.dtype()
482 );
483 vortex_ensure!(
484 existing.len() == replacement.len(),
485 "slot {} len changed from {} to {} during physical rewrite",
486 slot_idx,
487 existing.len(),
488 replacement.len()
489 );
490 slots[slot_idx] = Some(replacement);
491 self.with_slots(slots)
492 }
493
494 pub(crate) unsafe fn take_slot_unchecked(
506 mut self,
507 slot_idx: usize,
508 ) -> VortexResult<(ArrayRef, ArrayRef)> {
509 if let Some(inner) = Arc::get_mut(&mut self.0) {
510 let child = inner.slots[slot_idx]
511 .take()
512 .vortex_expect("take_slot_unchecked cannot take an absent slot");
513 return Ok((self, child));
514 }
515
516 let child = self.slots()[slot_idx]
519 .as_ref()
520 .vortex_expect("take_slot_unchecked cannot take an absent slot")
521 .clone();
522
523 let mut new_slots: ArraySlots = self.slots().iter().cloned().collect();
524 new_slots[slot_idx] = None;
525
526 let new_parent = unsafe { self.0.data.with_slots_unchecked(&self, new_slots) };
529 Ok((new_parent, child))
530 }
531
532 pub(crate) unsafe fn put_slot_unchecked(
540 mut self,
541 slot_idx: usize,
542 replacement: ArrayRef,
543 ) -> VortexResult<ArrayRef> {
544 if let Some(inner) = Arc::get_mut(&mut self.0) {
545 inner.slots[slot_idx] = Some(replacement);
546 return Ok(self);
547 }
548
549 let mut slots: ArraySlots = self.slots().iter().cloned().collect();
550 slots[slot_idx] = Some(replacement);
551 self.0.data.with_slots(&self, slots)
552 }
553
554 pub fn with_slots(self, slots: ArraySlots) -> VortexResult<ArrayRef> {
559 let old_slots = self.slots();
560 vortex_ensure!(
561 old_slots.len() == slots.len(),
562 "slot count changed from {} to {} during physical rewrite",
563 old_slots.len(),
564 slots.len()
565 );
566 for (idx, (old_slot, new_slot)) in old_slots.iter().zip(slots.iter()).enumerate() {
567 vortex_ensure!(
568 old_slot.is_some() == new_slot.is_some(),
569 "slot {} presence changed during physical rewrite",
570 idx
571 );
572 if let (Some(old_slot), Some(new_slot)) = (old_slot.as_ref(), new_slot.as_ref()) {
573 vortex_ensure!(
574 old_slot.dtype() == new_slot.dtype(),
575 "slot {} dtype changed from {} to {} during physical rewrite",
576 idx,
577 old_slot.dtype(),
578 new_slot.dtype()
579 );
580 vortex_ensure!(
581 old_slot.len() == new_slot.len(),
582 "slot {} len changed from {} to {} during physical rewrite",
583 idx,
584 old_slot.len(),
585 new_slot.len()
586 );
587 }
588 }
589 self.0.data.with_slots(&self, slots)
590 }
591
592 pub fn reduce(&self) -> VortexResult<Option<ArrayRef>> {
593 self.0.data.reduce(self)
594 }
595
596 pub fn reduce_parent(
597 &self,
598 parent: &ArrayRef,
599 child_idx: usize,
600 ) -> VortexResult<Option<ArrayRef>> {
601 self.0.data.reduce_parent(self, parent, child_idx)
602 }
603
604 pub(crate) fn execute_encoding(self, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
605 let inner = Arc::as_ptr(&self.0);
606 unsafe { (&*inner).data.execute(self, ctx) }
608 }
609
610 pub(crate) fn execute_encoding_unchecked(
616 self,
617 ctx: &mut ExecutionCtx,
618 ) -> VortexResult<ExecutionResult> {
619 let inner = Arc::as_ptr(&self.0);
620 unsafe { (&*inner).data.execute_unchecked(self, ctx) }
624 }
625
626 pub fn execute_parent(
627 &self,
628 parent: &ArrayRef,
629 child_idx: usize,
630 ctx: &mut ExecutionCtx,
631 ) -> VortexResult<Option<ArrayRef>> {
632 self.0.data.execute_parent(self, parent, child_idx, ctx)
633 }
634
635 pub fn children(&self) -> Vec<ArrayRef> {
639 self.0.data.children(self)
640 }
641
642 pub fn nchildren(&self) -> usize {
644 self.0.data.nchildren(self)
645 }
646
647 pub fn nth_child(&self, idx: usize) -> Option<ArrayRef> {
649 self.0.data.nth_child(self, idx)
650 }
651
652 pub fn children_names(&self) -> Vec<String> {
654 self.0.data.children_names(self)
655 }
656
657 pub fn named_children(&self) -> Vec<(String, ArrayRef)> {
659 self.0.data.named_children(self)
660 }
661
662 pub fn buffers(&self) -> Vec<ByteBuffer> {
664 self.0.data.buffers(self)
665 }
666
667 pub fn buffer_handles(&self) -> Vec<BufferHandle> {
669 self.0.data.buffer_handles(self)
670 }
671
672 pub fn buffer_names(&self) -> Vec<String> {
674 self.0.data.buffer_names(self)
675 }
676
677 pub fn named_buffers(&self) -> Vec<(String, BufferHandle)> {
679 self.0.data.named_buffers(self)
680 }
681
682 pub fn nbuffers(&self) -> usize {
684 self.0.data.nbuffers(self)
685 }
686
687 pub fn slots(&self) -> &[Option<ArrayRef>] {
689 &self.0.slots
690 }
691
692 pub fn slot_name(&self, idx: usize) -> String {
694 self.0.data.slot_name(self, idx)
695 }
696
697 pub fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
699 self.0.data.metadata_fmt(f)
700 }
701
702 pub fn is_host(&self) -> bool {
704 for array in self.depth_first_traversal() {
705 if !array.buffer_handles().iter().all(BufferHandle::is_on_host) {
706 return false;
707 }
708 }
709 true
710 }
711
712 pub fn nbuffers_recursive(&self) -> usize {
716 self.children()
717 .iter()
718 .map(|c| c.nbuffers_recursive())
719 .sum::<usize>()
720 + self.nbuffers()
721 }
722
723 pub fn depth_first_traversal(&self) -> DepthFirstArrayIterator {
725 DepthFirstArrayIterator {
726 stack: vec![self.clone()],
727 }
728 }
729}
730
731impl IntoArray for ArrayRef {
732 #[inline(always)]
733 fn into_array(self) -> ArrayRef {
734 self
735 }
736}
737
738impl<V: VTable> Matcher for V {
739 type Match<'a> = ArrayView<'a, V>;
740
741 fn matches(array: &ArrayRef) -> bool {
742 array.0.data.as_any().is::<ArrayData<V>>()
743 }
744
745 fn try_match(array: &'_ ArrayRef) -> Option<ArrayView<'_, V>> {
746 let inner = array.0.data.as_any().downcast_ref::<ArrayData<V>>()?;
747 Some(unsafe { ArrayView::new_unchecked(array, &inner.data) })
749 }
750}