1use std::any::type_name;
5use std::fmt::Debug;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9use std::ops::Range;
10use std::sync::Arc;
11
12use vortex_buffer::ByteBuffer;
13use vortex_error::VortexExpect;
14use vortex_error::VortexResult;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_err;
17use vortex_error::vortex_panic;
18use vortex_mask::Mask;
19
20use crate::AnyCanonical;
21use crate::Array;
22use crate::ArrayEq;
23use crate::ArrayHash;
24use crate::ArrayView;
25use crate::Canonical;
26use crate::ExecutionCtx;
27use crate::ExecutionResult;
28use crate::IntoArray;
29use crate::LEGACY_SESSION;
30use crate::VTable;
31use crate::VortexSessionExecute;
32use crate::aggregate_fn::fns::sum::sum;
33use crate::array::ArrayData;
34use crate::array::ArrayId;
35use crate::array::ArrayInner;
36use crate::array::ArraySlots;
37use crate::array::DynArrayData;
38use crate::arrays::Bool;
39use crate::arrays::Constant;
40use crate::arrays::DictArray;
41use crate::arrays::FilterArray;
42use crate::arrays::Null;
43use crate::arrays::Primitive;
44use crate::arrays::SliceArray;
45use crate::arrays::VarBin;
46use crate::arrays::VarBinView;
47use crate::buffer::BufferHandle;
48use crate::builders::ArrayBuilder;
49use crate::dtype::DType;
50use crate::dtype::Nullability;
51use crate::expr::stats::Precision;
52use crate::expr::stats::Stat;
53use crate::expr::stats::StatsProviderExt;
54use crate::matcher::Matcher;
55use crate::optimizer::ArrayOptimizer;
56use crate::scalar::Scalar;
57use crate::stats::StatsSetRef;
58use crate::validity::Validity;
59
60pub struct DepthFirstArrayIterator {
62 stack: Vec<ArrayRef>,
63}
64
65impl Iterator for DepthFirstArrayIterator {
66 type Item = ArrayRef;
67
68 fn next(&mut self) -> Option<Self::Item> {
69 let next = self.stack.pop()?;
70 for child in next.children().into_iter().rev() {
71 self.stack.push(child);
72 }
73 Some(next)
74 }
75}
76
77#[derive(Clone)]
83pub struct ArrayRef(Arc<ArrayInner<dyn DynArrayData>>);
84
85impl ArrayRef {
86 pub(crate) fn from_inner<D: DynArrayData>(inner: Arc<ArrayInner<D>>) -> Self {
88 Self(inner)
89 }
90
91 #[inline(always)]
93 pub(crate) fn dyn_array(&self) -> &dyn DynArrayData {
94 &self.0.data
95 }
96
97 #[inline(always)]
99 pub(crate) fn inner_mut(&mut self) -> Option<&mut ArrayInner<dyn DynArrayData>> {
100 Arc::get_mut(&mut self.0)
101 }
102
103 #[doc(hidden)]
106 pub fn addr(&self) -> usize {
107 Arc::as_ptr(&self.0).addr()
108 }
109
110 #[allow(dead_code)]
114 pub(crate) fn downcast_inner<V: VTable>(self) -> Result<Arc<ArrayInner<ArrayData<V>>>, Self> {
115 if self.0.data.as_any().is::<ArrayData<V>>() {
117 Ok(unsafe { self.downcast_inner_unchecked() })
118 } else {
119 Err(self)
120 }
121 }
122
123 #[inline(always)]
128 pub(crate) unsafe fn downcast_inner_unchecked<V: VTable>(
129 self,
130 ) -> Arc<ArrayInner<ArrayData<V>>> {
131 debug_assert!(self.0.data.as_any().is::<ArrayData<V>>());
132 let raw = Arc::into_raw(self.0);
136 unsafe { Arc::from_raw(raw.cast::<ArrayInner<ArrayData<V>>>()) }
138 }
139
140 pub fn ptr_eq(this: &ArrayRef, other: &ArrayRef) -> bool {
142 Arc::ptr_eq(&this.0, &other.0)
143 }
144}
145
146impl Debug for ArrayRef {
147 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
148 f.debug_struct("Array")
149 .field("encoding", &self.0.encoding_id)
150 .field("dtype", &self.0.dtype)
151 .field("len", &self.0.len)
152 .field("data", &self.0.data)
153 .finish()
154 }
155}
156
157impl ArrayHash for ArrayRef {
158 fn array_hash<H: Hasher>(&self, state: &mut H, precision: crate::Precision) {
159 self.0.len.hash(state);
160 self.0.dtype.hash(state);
161 self.0.encoding_id.hash(state);
162 self.0.slots.len().hash(state);
163 for slot in &self.0.slots {
164 slot.array_hash(state, precision);
165 }
166 self.0
167 .data
168 .dyn_array_hash(state as &mut dyn Hasher, precision);
169 }
170}
171
172impl ArrayEq for ArrayRef {
173 fn array_eq(&self, other: &Self, precision: crate::Precision) -> bool {
174 self.0.len == other.0.len
175 && self.0.dtype == other.0.dtype
176 && self.0.encoding_id == other.0.encoding_id
177 && self.0.slots.len() == other.0.slots.len()
178 && self
179 .0
180 .slots
181 .iter()
182 .zip(other.0.slots.iter())
183 .all(|(slot, other_slot)| slot.array_eq(other_slot, precision))
184 && self.0.data.dyn_array_eq(other, precision)
185 }
186}
187impl ArrayRef {
188 #[inline]
190 pub fn len(&self) -> usize {
191 self.0.len
192 }
193
194 #[inline]
196 pub fn is_empty(&self) -> bool {
197 self.0.len == 0
198 }
199
200 #[inline]
202 pub fn dtype(&self) -> &DType {
203 &self.0.dtype
204 }
205
206 #[inline]
208 pub fn encoding_id(&self) -> ArrayId {
209 self.0.encoding_id
210 }
211
212 pub fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef> {
214 let len = self.len();
215 let start = range.start;
216 let stop = range.end;
217
218 if start == 0 && stop == len {
219 return Ok(self.clone());
220 }
221
222 vortex_ensure!(start <= len, "OutOfBounds: start {start} > length {}", len);
223 vortex_ensure!(stop <= len, "OutOfBounds: stop {stop} > length {}", len);
224
225 vortex_ensure!(start <= stop, "start ({start}) must be <= stop ({stop})");
226
227 if start == stop {
228 return Ok(Canonical::empty(self.dtype()).into_array());
229 }
230
231 let sliced = SliceArray::try_new(self.clone(), range)?
232 .into_array()
233 .optimize()?;
234
235 if !sliced.is::<Constant>() {
237 self.statistics().with_iter(|iter| {
238 sliced.statistics().inherit(iter.filter(|(stat, value)| {
239 matches!(
240 stat,
241 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
242 ) && value.as_ref().as_exact().is_some_and(|v| {
243 Scalar::try_new(DType::Bool(Nullability::NonNullable), Some(v.clone()))
244 .vortex_expect("A stat that was expected to be a boolean stat was not")
245 .as_bool()
246 .value()
247 .unwrap_or_default()
248 })
249 }));
250 });
251 }
252
253 Ok(sliced)
254 }
255
256 pub fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
258 FilterArray::try_new(self.clone(), mask)?
259 .into_array()
260 .optimize()
261 }
262
263 pub fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
265 DictArray::try_new(indices, self.clone())?
266 .into_array()
267 .optimize()
268 }
269
270 #[deprecated(
272 note = "Use `execute_scalar` instead, which allows passing an execution context for more \
273 efficient execution when fetching multiple scalars from the same array."
274 )]
275 pub fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
276 self.execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
277 }
278
279 pub fn execute_scalar(&self, index: usize, ctx: &mut ExecutionCtx) -> VortexResult<Scalar> {
281 vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
282 if self.dtype().is_nullable() && self.is_invalid(index, ctx)? {
283 return Ok(Scalar::null(self.dtype().clone()));
284 }
285 let scalar = self.0.data.execute_scalar(self, index, ctx)?;
286 debug_assert_eq!(self.dtype(), scalar.dtype(), "Scalar dtype mismatch");
287 Ok(scalar)
288 }
289
290 pub fn is_valid(&self, index: usize, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
292 vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
293 match self.validity()? {
294 Validity::NonNullable | Validity::AllValid => Ok(true),
295 Validity::AllInvalid => Ok(false),
296 Validity::Array(a) => a
297 .execute_scalar(index, ctx)?
298 .as_bool()
299 .value()
300 .ok_or_else(|| vortex_err!("validity value at index {} is null", index)),
301 }
302 }
303
304 pub fn is_invalid(&self, index: usize, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
306 Ok(!self.is_valid(index, ctx)?)
307 }
308
309 pub fn all_valid(&self, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
311 match self.validity()? {
312 Validity::NonNullable | Validity::AllValid => Ok(true),
313 Validity::AllInvalid => Ok(false),
314 Validity::Array(a) => Ok(a.statistics().compute_min::<bool>(ctx).unwrap_or(false)),
315 }
316 }
317
318 pub fn all_invalid(&self, ctx: &mut ExecutionCtx) -> VortexResult<bool> {
320 match self.validity()? {
321 Validity::NonNullable | Validity::AllValid => Ok(false),
322 Validity::AllInvalid => Ok(true),
323 Validity::Array(a) => Ok(!a.statistics().compute_max::<bool>(ctx).unwrap_or(true)),
324 }
325 }
326
327 pub fn valid_count(&self, ctx: &mut ExecutionCtx) -> VortexResult<usize> {
329 let len = self.len();
330 if let Some(Precision::Exact(invalid_count)) =
331 self.statistics().get_as::<usize>(Stat::NullCount)
332 {
333 return Ok(len - invalid_count);
334 }
335
336 let count = match self.validity()? {
337 Validity::NonNullable | Validity::AllValid => len,
338 Validity::AllInvalid => 0,
339 Validity::Array(a) => {
340 let array_sum = sum(&a, ctx)?;
341 array_sum
342 .as_primitive()
343 .as_::<usize>()
344 .ok_or_else(|| vortex_err!("sum of validity array is null"))?
345 }
346 };
347 vortex_ensure!(count <= len, "Valid count exceeds array length");
348
349 self.statistics()
350 .set(Stat::NullCount, Precision::exact(len - count));
351
352 Ok(count)
353 }
354
355 pub fn invalid_count(&self, ctx: &mut ExecutionCtx) -> VortexResult<usize> {
357 Ok(self.len() - self.valid_count(ctx)?)
358 }
359
360 pub fn validity(&self) -> VortexResult<Validity> {
362 self.0.data.validity(self)
363 }
364
365 #[deprecated(note = "use `array.execute::<Canonical>(ctx)` instead")]
367 pub fn into_canonical(self) -> VortexResult<Canonical> {
368 self.execute(&mut LEGACY_SESSION.create_execution_ctx())
369 }
370
371 #[deprecated(note = "use `array.execute::<Canonical>(ctx)` instead")]
373 pub fn to_canonical(&self) -> VortexResult<Canonical> {
374 #[expect(deprecated)]
375 let result = self.clone().into_canonical();
376 result
377 }
378
379 pub fn append_to_builder(
381 &self,
382 builder: &mut dyn ArrayBuilder,
383 ctx: &mut ExecutionCtx,
384 ) -> VortexResult<()> {
385 self.0.data.append_to_builder(self, builder, ctx)
386 }
387
388 pub fn statistics(&self) -> StatsSetRef<'_> {
390 self.0.stats.to_ref(self)
391 }
392
393 pub fn is<M: Matcher>(&self) -> bool {
395 M::matches(self)
396 }
397
398 pub fn as_<M: Matcher>(&self) -> M::Match<'_> {
400 self.as_opt::<M>().vortex_expect("Failed to downcast")
401 }
402
403 pub fn as_opt<M: Matcher>(&self) -> Option<M::Match<'_>> {
405 M::try_match(self)
406 }
407
408 pub fn try_downcast<V: VTable>(self) -> Result<Array<V>, ArrayRef> {
410 Array::<V>::try_from_array_ref(self)
411 }
412
413 pub fn downcast<V: VTable>(self) -> Array<V> {
419 Self::try_downcast(self)
420 .unwrap_or_else(|_| vortex_panic!("Failed to downcast to {}", type_name::<V>()))
421 }
422
423 pub fn as_typed<V: VTable>(&self) -> Option<ArrayView<'_, V>> {
425 let inner = self.0.data.as_any().downcast_ref::<ArrayData<V>>()?;
426 Some(unsafe { ArrayView::new_unchecked(self, &inner.data) })
427 }
428
429 pub fn as_constant(&self) -> Option<Scalar> {
431 self.as_opt::<Constant>().map(|a| a.scalar().clone())
432 }
433
434 pub fn nbytes(&self) -> u64 {
436 let mut nbytes = 0;
437 for array in self.depth_first_traversal() {
438 for buffer in array.buffers() {
439 nbytes += buffer.len() as u64;
440 }
441 }
442 nbytes
443 }
444
445 pub fn is_arrow(&self) -> bool {
447 self.is::<Null>()
448 || self.is::<Bool>()
449 || self.is::<Primitive>()
450 || self.is::<VarBin>()
451 || self.is::<VarBinView>()
452 }
453
454 pub fn is_canonical(&self) -> bool {
456 self.is::<AnyCanonical>()
457 }
458
459 pub fn with_slot(self, slot_idx: usize, replacement: ArrayRef) -> VortexResult<ArrayRef> {
466 let mut slots: ArraySlots = self.slots().iter().cloned().collect();
467 let nslots = slots.len();
468 vortex_ensure!(
469 slot_idx < nslots,
470 "slot index {} out of bounds for array with {} slots",
471 slot_idx,
472 nslots
473 );
474 let existing = slots[slot_idx]
475 .as_ref()
476 .vortex_expect("with_slot cannot replace an absent slot");
477 vortex_ensure!(
478 existing.dtype() == replacement.dtype(),
479 "slot {} dtype changed from {} to {} during physical rewrite",
480 slot_idx,
481 existing.dtype(),
482 replacement.dtype()
483 );
484 vortex_ensure!(
485 existing.len() == replacement.len(),
486 "slot {} len changed from {} to {} during physical rewrite",
487 slot_idx,
488 existing.len(),
489 replacement.len()
490 );
491 slots[slot_idx] = Some(replacement);
492 self.with_slots(slots)
493 }
494
495 pub(crate) unsafe fn take_slot_unchecked(
507 mut self,
508 slot_idx: usize,
509 ) -> VortexResult<(ArrayRef, ArrayRef)> {
510 if let Some(inner) = Arc::get_mut(&mut self.0) {
511 let child = inner.slots[slot_idx]
512 .take()
513 .vortex_expect("take_slot_unchecked cannot take an absent slot");
514 return Ok((self, child));
515 }
516
517 let child = self.slots()[slot_idx]
520 .as_ref()
521 .vortex_expect("take_slot_unchecked cannot take an absent slot")
522 .clone();
523
524 let mut new_slots: ArraySlots = self.slots().iter().cloned().collect();
525 new_slots[slot_idx] = None;
526
527 let new_parent = unsafe { self.0.data.with_slots_unchecked(&self, new_slots) };
530 Ok((new_parent, child))
531 }
532
533 pub(crate) unsafe fn put_slot_unchecked(
541 mut self,
542 slot_idx: usize,
543 replacement: ArrayRef,
544 ) -> VortexResult<ArrayRef> {
545 if let Some(inner) = Arc::get_mut(&mut self.0) {
546 inner.slots[slot_idx] = Some(replacement);
547 return Ok(self);
548 }
549
550 let mut slots: ArraySlots = self.slots().iter().cloned().collect();
551 slots[slot_idx] = Some(replacement);
552 self.0.data.with_slots(&self, slots)
553 }
554
555 pub fn with_slots(self, slots: ArraySlots) -> VortexResult<ArrayRef> {
560 let old_slots = self.slots();
561 vortex_ensure!(
562 old_slots.len() == slots.len(),
563 "slot count changed from {} to {} during physical rewrite",
564 old_slots.len(),
565 slots.len()
566 );
567 for (idx, (old_slot, new_slot)) in old_slots.iter().zip(slots.iter()).enumerate() {
568 vortex_ensure!(
569 old_slot.is_some() == new_slot.is_some(),
570 "slot {} presence changed during physical rewrite",
571 idx
572 );
573 if let (Some(old_slot), Some(new_slot)) = (old_slot.as_ref(), new_slot.as_ref()) {
574 vortex_ensure!(
575 old_slot.dtype() == new_slot.dtype(),
576 "slot {} dtype changed from {} to {} during physical rewrite",
577 idx,
578 old_slot.dtype(),
579 new_slot.dtype()
580 );
581 vortex_ensure!(
582 old_slot.len() == new_slot.len(),
583 "slot {} len changed from {} to {} during physical rewrite",
584 idx,
585 old_slot.len(),
586 new_slot.len()
587 );
588 }
589 }
590 self.0.data.with_slots(&self, slots)
591 }
592
593 pub fn reduce(&self) -> VortexResult<Option<ArrayRef>> {
594 self.0.data.reduce(self)
595 }
596
597 pub fn reduce_parent(
598 &self,
599 parent: &ArrayRef,
600 child_idx: usize,
601 ) -> VortexResult<Option<ArrayRef>> {
602 self.0.data.reduce_parent(self, parent, child_idx)
603 }
604
605 pub(crate) fn execute_encoding(self, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
606 let inner = Arc::as_ptr(&self.0);
607 unsafe { (&*inner).data.execute(self, ctx) }
609 }
610
611 pub(crate) fn execute_encoding_unchecked(
617 self,
618 ctx: &mut ExecutionCtx,
619 ) -> VortexResult<ExecutionResult> {
620 let inner = Arc::as_ptr(&self.0);
621 unsafe { (&*inner).data.execute_unchecked(self, ctx) }
625 }
626
627 pub fn execute_parent(
628 &self,
629 parent: &ArrayRef,
630 child_idx: usize,
631 ctx: &mut ExecutionCtx,
632 ) -> VortexResult<Option<ArrayRef>> {
633 self.0.data.execute_parent(self, parent, child_idx, ctx)
634 }
635
636 pub fn children(&self) -> Vec<ArrayRef> {
640 self.0.data.children(self)
641 }
642
643 pub fn nchildren(&self) -> usize {
645 self.0.data.nchildren(self)
646 }
647
648 pub fn nth_child(&self, idx: usize) -> Option<ArrayRef> {
650 self.0.data.nth_child(self, idx)
651 }
652
653 pub fn children_names(&self) -> Vec<String> {
655 self.0.data.children_names(self)
656 }
657
658 pub fn named_children(&self) -> Vec<(String, ArrayRef)> {
660 self.0.data.named_children(self)
661 }
662
663 pub fn buffers(&self) -> Vec<ByteBuffer> {
665 self.0.data.buffers(self)
666 }
667
668 pub fn buffer_handles(&self) -> Vec<BufferHandle> {
670 self.0.data.buffer_handles(self)
671 }
672
673 pub fn buffer_names(&self) -> Vec<String> {
675 self.0.data.buffer_names(self)
676 }
677
678 pub fn named_buffers(&self) -> Vec<(String, BufferHandle)> {
680 self.0.data.named_buffers(self)
681 }
682
683 pub fn nbuffers(&self) -> usize {
685 self.0.data.nbuffers(self)
686 }
687
688 pub fn slots(&self) -> &[Option<ArrayRef>] {
690 &self.0.slots
691 }
692
693 pub fn slot_name(&self, idx: usize) -> String {
695 self.0.data.slot_name(self, idx)
696 }
697
698 pub fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
700 self.0.data.metadata_fmt(f)
701 }
702
703 pub fn is_host(&self) -> bool {
705 for array in self.depth_first_traversal() {
706 if !array.buffer_handles().iter().all(BufferHandle::is_on_host) {
707 return false;
708 }
709 }
710 true
711 }
712
713 pub fn nbuffers_recursive(&self) -> usize {
717 self.children()
718 .iter()
719 .map(|c| c.nbuffers_recursive())
720 .sum::<usize>()
721 + self.nbuffers()
722 }
723
724 pub fn depth_first_traversal(&self) -> DepthFirstArrayIterator {
726 DepthFirstArrayIterator {
727 stack: vec![self.clone()],
728 }
729 }
730}
731
732impl IntoArray for ArrayRef {
733 #[inline(always)]
734 fn into_array(self) -> ArrayRef {
735 self
736 }
737}
738
739impl<V: VTable> Matcher for V {
740 type Match<'a> = ArrayView<'a, V>;
741
742 fn matches(array: &ArrayRef) -> bool {
743 array.0.data.as_any().is::<ArrayData<V>>()
744 }
745
746 fn try_match(array: &'_ ArrayRef) -> Option<ArrayView<'_, V>> {
747 let inner = array.0.data.as_any().downcast_ref::<ArrayData<V>>()?;
748 Some(unsafe { ArrayView::new_unchecked(array, &inner.data) })
750 }
751}