1use std::{marker::PhantomData, ptr::NonNull, slice, str, sync::Arc};
2
3use arrow_array::{
4 Array, ArrayRef, BinaryArray, BooleanArray, DictionaryArray, FixedSizeBinaryArray,
5 FixedSizeListArray, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array,
6 LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray, PrimitiveArray,
7 StringArray, StructArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, UnionArray,
8 types::{
9 Date32Type, Date64Type, DurationMicrosecondType, DurationMillisecondType,
10 DurationNanosecondType, DurationSecondType, Int8Type, Int16Type, Int32Type, Int64Type,
11 Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
12 TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
13 TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
14 },
15};
16use arrow_schema::{DataType, Field};
17
18use super::{
19 path::Path,
20 projection::{FieldProjector, StructProjection},
21 raw::{
22 DynFixedSizeListViewRaw, DynListViewRaw, DynMapViewRaw, DynStructViewRaw, DynUnionViewRaw,
23 },
24 views::{DynFixedSizeListView, DynListView, DynMapView, DynStructView, DynUnionView},
25};
26use crate::{DynViewError, cell::DynCell};
27
28impl DynCell {
29 #[must_use]
34 pub fn as_ref(&self) -> Option<DynCellRef<'_>> {
35 use DynCell::*;
36 Some(match self {
37 Null => DynCellRef::null(),
38 Bool(v) => DynCellRef::from_raw(DynCellRaw::Bool(*v)),
39 I8(v) => DynCellRef::from_raw(DynCellRaw::I8(*v)),
40 I16(v) => DynCellRef::from_raw(DynCellRaw::I16(*v)),
41 I32(v) => DynCellRef::from_raw(DynCellRaw::I32(*v)),
42 I64(v) => DynCellRef::from_raw(DynCellRaw::I64(*v)),
43 U8(v) => DynCellRef::from_raw(DynCellRaw::U8(*v)),
44 U16(v) => DynCellRef::from_raw(DynCellRaw::U16(*v)),
45 U32(v) => DynCellRef::from_raw(DynCellRaw::U32(*v)),
46 U64(v) => DynCellRef::from_raw(DynCellRaw::U64(*v)),
47 F32(v) => DynCellRef::from_raw(DynCellRaw::F32(*v)),
48 F64(v) => DynCellRef::from_raw(DynCellRaw::F64(*v)),
49 Str(s) => DynCellRef::from_raw(DynCellRaw::from_str(s)),
50 Bin(b) => DynCellRef::from_raw(DynCellRaw::from_bin(b)),
51 Struct(_) | List(_) | FixedSizeList(_) | Map(_) | Union { .. } => return None,
52 })
53 }
54}
55
56macro_rules! dyn_cell_primitive_methods {
57 ($(($variant:ident, $ctor:ident, $getter:ident, $into:ident, $ty:ty, $arrow:literal, $desc:literal)),* $(,)?) => {
58 $(
59 #[doc = concat!("Constructs a dynamic cell wrapping an ", $arrow, " value.")]
60 pub(crate) fn $ctor(value: $ty) -> Self {
61 Self::from_raw(DynCellRaw::$variant(value))
62 }
63
64 #[doc = concat!("Returns the ", $desc, " value if this cell stores an ", $arrow, ".")]
65 pub fn $getter(&self) -> Option<$ty> {
66 match self.raw {
67 DynCellRaw::$variant(value) => Some(value),
68 _ => None,
69 }
70 }
71
72 #[doc = concat!("Consumes the cell and returns the ", $desc, " value if it stores an ", $arrow, ".")]
73 pub fn $into(self) -> Option<$ty> {
74 match self.raw {
75 DynCellRaw::$variant(value) => Some(value),
76 _ => None,
77 }
78 }
79 )*
80 };
81}
82
83#[derive(Clone)]
85pub struct DynCellRef<'a> {
86 raw: DynCellRaw,
87 _marker: PhantomData<&'a ()>,
88}
89
90impl<'a> DynCellRef<'a> {
91 pub fn from_raw(raw: DynCellRaw) -> Self {
93 Self {
94 raw,
95 _marker: PhantomData,
96 }
97 }
98
99 pub fn as_raw(&self) -> &DynCellRaw {
101 &self.raw
102 }
103
104 pub fn into_raw(self) -> DynCellRaw {
106 self.raw
107 }
108
109 pub fn into_owned(self) -> Result<DynCell, DynViewError> {
111 self.raw.into_owned()
112 }
113
114 pub fn to_owned(&self) -> Result<DynCell, DynViewError> {
116 self.clone().into_owned()
117 }
118
119 pub fn is_null(&self) -> bool {
121 matches!(self.raw, DynCellRaw::Null)
122 }
123
124 pub(crate) fn null() -> Self {
126 Self::from_raw(DynCellRaw::Null)
127 }
128
129 dyn_cell_primitive_methods! {
130 (Bool, bool, as_bool, into_bool, bool, "Arrow boolean", "boolean"),
131 (I8, i8, as_i8, into_i8, i8, "Arrow Int8", "`i8`"),
132 (I16, i16, as_i16, into_i16, i16, "Arrow Int16", "`i16`"),
133 (I32, i32, as_i32, into_i32, i32, "Arrow Int32", "`i32`"),
134 (I64, i64, as_i64, into_i64, i64, "Arrow Int64", "`i64`"),
135 (U8, u8, as_u8, into_u8, u8, "Arrow UInt8", "`u8`"),
136 (U16, u16, as_u16, into_u16, u16, "Arrow UInt16", "`u16`"),
137 (U32, u32, as_u32, into_u32, u32, "Arrow UInt32", "`u32`"),
138 (U64, u64, as_u64, into_u64, u64, "Arrow UInt64", "`u64`"),
139 (F32, f32, as_f32, into_f32, f32, "Arrow Float32", "`f32`"),
140 (F64, f64, as_f64, into_f64, f64, "Arrow Float64", "`f64`")
141 }
142
143 pub(crate) fn string(value: &'a str) -> Self {
145 Self::from_raw(DynCellRaw::from_str(value))
146 }
147
148 pub(crate) fn binary(value: &'a [u8]) -> Self {
150 Self::from_raw(DynCellRaw::from_bin(value))
151 }
152
153 pub(crate) fn structure(view: DynStructView<'a>) -> Self {
155 Self::from_raw(DynCellRaw::from_struct(view))
156 }
157
158 pub(crate) fn list(view: DynListView<'a>) -> Self {
160 Self::from_raw(DynCellRaw::from_list(view))
161 }
162
163 pub(crate) fn fixed_size_list(view: DynFixedSizeListView<'a>) -> Self {
165 Self::from_raw(DynCellRaw::from_fixed_size_list(view))
166 }
167
168 pub(crate) fn map(view: DynMapView<'a>) -> Self {
170 Self::from_raw(DynCellRaw::from_map(view))
171 }
172
173 pub(crate) fn union(view: DynUnionView<'a>) -> Self {
175 Self::from_raw(DynCellRaw::from_union(view))
176 }
177
178 pub fn as_str(&self) -> Option<&'a str> {
180 match &self.raw {
181 DynCellRaw::Str { ptr, len } => unsafe {
182 let bytes = slice::from_raw_parts(ptr.as_ptr() as *const u8, *len);
183 Some(str::from_utf8_unchecked(bytes))
184 },
185 _ => None,
186 }
187 }
188
189 pub fn as_bin(&self) -> Option<&'a [u8]> {
192 match &self.raw {
193 DynCellRaw::Bin { ptr, len } => unsafe {
194 Some(slice::from_raw_parts(ptr.as_ptr() as *const u8, *len))
195 },
196 _ => None,
197 }
198 }
199
200 pub fn as_struct(&self) -> Option<DynStructView<'a>> {
202 match &self.raw {
203 DynCellRaw::Struct(raw) => unsafe { Some(raw.as_view()) },
204 _ => None,
205 }
206 }
207
208 pub fn as_list(&self) -> Option<DynListView<'a>> {
210 match &self.raw {
211 DynCellRaw::List(raw) => unsafe { Some(raw.as_view()) },
212 _ => None,
213 }
214 }
215
216 pub fn as_fixed_size_list(&self) -> Option<DynFixedSizeListView<'a>> {
218 match &self.raw {
219 DynCellRaw::FixedSizeList(raw) => unsafe { Some(raw.as_view()) },
220 _ => None,
221 }
222 }
223
224 pub fn as_map(&self) -> Option<DynMapView<'a>> {
226 match &self.raw {
227 DynCellRaw::Map(raw) => unsafe { Some(raw.as_view()) },
228 _ => None,
229 }
230 }
231
232 pub fn as_union(&self) -> Option<DynUnionView<'a>> {
234 match &self.raw {
235 DynCellRaw::Union(raw) => unsafe { Some(raw.as_view()) },
236 _ => None,
237 }
238 }
239
240 pub fn into_str(self) -> Option<&'a str> {
243 match self.raw {
244 DynCellRaw::Str { ptr, len } => unsafe {
245 let bytes = slice::from_raw_parts(ptr.as_ptr() as *const u8, len);
246 Some(str::from_utf8_unchecked(bytes))
247 },
248 _ => None,
249 }
250 }
251
252 pub fn into_bin(self) -> Option<&'a [u8]> {
255 match self.raw {
256 DynCellRaw::Bin { ptr, len } => unsafe {
257 Some(slice::from_raw_parts(ptr.as_ptr() as *const u8, len))
258 },
259 _ => None,
260 }
261 }
262
263 pub fn into_struct(self) -> Option<DynStructView<'a>> {
265 match self.raw {
266 DynCellRaw::Struct(raw) => unsafe { Some(raw.into_view()) },
267 _ => None,
268 }
269 }
270
271 pub fn into_list(self) -> Option<DynListView<'a>> {
273 match self.raw {
274 DynCellRaw::List(raw) => unsafe { Some(raw.into_view()) },
275 _ => None,
276 }
277 }
278
279 pub fn into_fixed_size_list(self) -> Option<DynFixedSizeListView<'a>> {
281 match self.raw {
282 DynCellRaw::FixedSizeList(raw) => unsafe { Some(raw.into_view()) },
283 _ => None,
284 }
285 }
286
287 pub fn into_map(self) -> Option<DynMapView<'a>> {
289 match self.raw {
290 DynCellRaw::Map(raw) => unsafe { Some(raw.into_view()) },
291 _ => None,
292 }
293 }
294
295 pub fn into_union(self) -> Option<DynUnionView<'a>> {
297 match self.raw {
298 DynCellRaw::Union(raw) => unsafe { Some(raw.into_view()) },
299 _ => None,
300 }
301 }
302}
303
304impl<'a> From<DynCellRaw> for DynCellRef<'a> {
305 fn from(raw: DynCellRaw) -> Self {
306 Self::from_raw(raw)
307 }
308}
309
310impl<'a> std::fmt::Debug for DynCellRef<'a> {
311 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
312 self.as_raw().fmt(f)
313 }
314}
315
316#[derive(Clone)]
322pub enum DynCellRaw {
323 Null,
325 Bool(bool),
327 I8(i8),
329 I16(i16),
331 I32(i32),
333 I64(i64),
335 U8(u8),
337 U16(u16),
339 U32(u32),
341 U64(u64),
343 F32(f32),
345 F64(f64),
347 Str {
349 ptr: NonNull<u8>,
351 len: usize,
353 },
354 Bin {
356 ptr: NonNull<u8>,
358 len: usize,
360 },
361 Struct(DynStructViewRaw),
363 List(DynListViewRaw),
365 FixedSizeList(DynFixedSizeListViewRaw),
367 Map(DynMapViewRaw),
369 Union(DynUnionViewRaw),
371}
372
373unsafe impl Send for DynCellRaw {}
377unsafe impl Sync for DynCellRaw {}
378
379impl DynCellRaw {
380 pub fn from_ref(cell: DynCellRef<'_>) -> Self {
382 cell.into_raw()
383 }
384
385 pub fn into_owned(self) -> Result<DynCell, DynViewError> {
387 match self {
388 DynCellRaw::Null => Ok(DynCell::Null),
389 DynCellRaw::Bool(value) => Ok(DynCell::Bool(value)),
390 DynCellRaw::I8(value) => Ok(DynCell::I8(value)),
391 DynCellRaw::I16(value) => Ok(DynCell::I16(value)),
392 DynCellRaw::I32(value) => Ok(DynCell::I32(value)),
393 DynCellRaw::I64(value) => Ok(DynCell::I64(value)),
394 DynCellRaw::U8(value) => Ok(DynCell::U8(value)),
395 DynCellRaw::U16(value) => Ok(DynCell::U16(value)),
396 DynCellRaw::U32(value) => Ok(DynCell::U32(value)),
397 DynCellRaw::U64(value) => Ok(DynCell::U64(value)),
398 DynCellRaw::F32(value) => Ok(DynCell::F32(value)),
399 DynCellRaw::F64(value) => Ok(DynCell::F64(value)),
400 DynCellRaw::Str { ptr, len } => {
401 let bytes = unsafe { slice::from_raw_parts(ptr.as_ptr(), len) };
402 let owned = unsafe { String::from_utf8_unchecked(bytes.to_vec()) };
403 Ok(DynCell::Str(owned))
404 }
405 DynCellRaw::Bin { ptr, len } => {
406 let bytes = unsafe { slice::from_raw_parts(ptr.as_ptr(), len) };
407 Ok(DynCell::Bin(bytes.to_vec()))
408 }
409 DynCellRaw::Struct(raw) => {
410 let values = Self::collect_struct(raw)?;
411 Ok(DynCell::Struct(values))
412 }
413 DynCellRaw::List(raw) => {
414 let items = Self::collect_list(raw)?;
415 Ok(DynCell::List(items))
416 }
417 DynCellRaw::FixedSizeList(raw) => {
418 let items = Self::collect_fixed_size_list(raw)?;
419 Ok(DynCell::FixedSizeList(items))
420 }
421 DynCellRaw::Map(raw) => {
422 let entries = Self::collect_map(raw)?;
423 Ok(DynCell::Map(entries))
424 }
425 DynCellRaw::Union(raw) => Self::collect_union(raw),
426 }
427 }
428
429 pub(super) fn from_str(value: &str) -> Self {
430 Self::Str {
431 ptr: non_null_from_bytes(value.as_bytes()),
432 len: value.len(),
433 }
434 }
435
436 pub(super) fn from_bin(value: &[u8]) -> Self {
437 Self::Bin {
438 ptr: non_null_from_bytes(value),
439 len: value.len(),
440 }
441 }
442
443 fn from_struct(view: DynStructView<'_>) -> Self {
444 Self::Struct(DynStructViewRaw::from_view(view))
445 }
446
447 fn from_list(view: DynListView<'_>) -> Self {
448 Self::List(DynListViewRaw::from_view(view))
449 }
450
451 fn from_fixed_size_list(view: DynFixedSizeListView<'_>) -> Self {
452 Self::FixedSizeList(DynFixedSizeListViewRaw::from_view(view))
453 }
454
455 fn from_map(view: DynMapView<'_>) -> Self {
456 Self::Map(DynMapViewRaw::from_view(view))
457 }
458
459 fn from_union(view: DynUnionView<'_>) -> Self {
460 Self::Union(DynUnionViewRaw::from_view(view))
461 }
462
463 pub unsafe fn as_ref<'a>(&self) -> DynCellRef<'a> {
469 DynCellRef::from_raw(self.clone())
470 }
471
472 fn cell_opt_into_owned(cell: Option<DynCellRef<'_>>) -> Result<Option<DynCell>, DynViewError> {
473 cell.map(DynCellRef::into_owned).transpose()
474 }
475
476 fn collect_struct(raw: DynStructViewRaw) -> Result<Vec<Option<DynCell>>, DynViewError> {
477 let view = unsafe { raw.into_view() };
478 let mut values = Vec::with_capacity(view.len());
479 for idx in 0..view.len() {
480 let value = view.get(idx)?;
481 values.push(Self::cell_opt_into_owned(value)?);
482 }
483 Ok(values)
484 }
485
486 fn collect_list(raw: DynListViewRaw) -> Result<Vec<Option<DynCell>>, DynViewError> {
487 let view = unsafe { raw.into_view() };
488 let mut items = Vec::with_capacity(view.len());
489 for idx in 0..view.len() {
490 let item = view.get(idx)?;
491 items.push(Self::cell_opt_into_owned(item)?);
492 }
493 Ok(items)
494 }
495
496 fn collect_fixed_size_list(
497 raw: DynFixedSizeListViewRaw,
498 ) -> Result<Vec<Option<DynCell>>, DynViewError> {
499 let view = unsafe { raw.into_view() };
500 let mut items = Vec::with_capacity(view.len());
501 for idx in 0..view.len() {
502 let item = view.get(idx)?;
503 items.push(Self::cell_opt_into_owned(item)?);
504 }
505 Ok(items)
506 }
507
508 fn collect_map(raw: DynMapViewRaw) -> Result<Vec<(DynCell, Option<DynCell>)>, DynViewError> {
509 let view = unsafe { raw.into_view() };
510 let mut entries = Vec::with_capacity(view.len());
511 for idx in 0..view.len() {
512 let (key, value) = view.get(idx)?;
513 let owned_key = key.into_owned()?;
514 let owned_value = Self::cell_opt_into_owned(value)?;
515 entries.push((owned_key, owned_value));
516 }
517 Ok(entries)
518 }
519
520 fn collect_union(raw: DynUnionViewRaw) -> Result<DynCell, DynViewError> {
521 let view = unsafe { raw.into_view() };
522 let type_id = view.type_id();
523 let payload = view
524 .value()?
525 .map(|cell| cell.into_owned().map(Box::new))
526 .transpose()?;
527 Ok(DynCell::Union {
528 type_id,
529 value: payload,
530 })
531 }
532}
533
534impl std::fmt::Debug for DynCellRaw {
535 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
536 unsafe { self.as_ref() }.fmt(f)
537 }
538}
539
540fn non_null_from_bytes(bytes: &[u8]) -> NonNull<u8> {
541 let ptr = bytes.as_ptr() as *mut u8;
542 NonNull::new(ptr).unwrap_or_else(NonNull::dangling)
544}
545
546fn view_cell_identity<'a>(
547 path: &Path,
548 field: &Field,
549 array: &'a dyn Array,
550 index: usize,
551) -> Result<Option<DynCellRef<'a>>, DynViewError> {
552 if index >= array.len() {
553 return Err(DynViewError::RowOutOfBounds {
554 row: index,
555 len: array.len(),
556 });
557 }
558 if array.is_null(index) {
559 return Ok(None);
560 }
561 Ok(Some(view_non_null(path, field, array, index)?))
562}
563
564pub(super) fn view_cell_with_projector<'a>(
565 path: &Path,
566 field: &Field,
567 projector: Option<&FieldProjector>,
568 array: &'a dyn Array,
569 index: usize,
570) -> Result<Option<DynCellRef<'a>>, DynViewError> {
571 match projector {
572 None | Some(FieldProjector::Identity) => view_cell_identity(path, field, array, index),
573 Some(projector) => view_cell_projected(path, field, projector, array, index),
574 }
575}
576
577fn view_cell_projected<'a>(
578 path: &Path,
579 field: &Field,
580 projector: &FieldProjector,
581 array: &'a dyn Array,
582 index: usize,
583) -> Result<Option<DynCellRef<'a>>, DynViewError> {
584 if index >= array.len() {
585 return Err(DynViewError::RowOutOfBounds {
586 row: index,
587 len: array.len(),
588 });
589 }
590 if array.is_null(index) {
591 return Ok(None);
592 }
593 let value = match projector {
594 FieldProjector::Identity => view_non_null(path, field, array, index)?,
595 FieldProjector::Struct(struct_proj) => {
596 view_struct_projected(path, field, struct_proj, array, index)?
597 }
598 FieldProjector::List(item_proj) => {
599 view_list_projected(path, field, item_proj, array, index)?
600 }
601 FieldProjector::LargeList(item_proj) => {
602 view_large_list_projected(path, field, item_proj, array, index)?
603 }
604 FieldProjector::FixedSizeList(item_proj) => {
605 view_fixed_size_list_projected(path, field, item_proj, array, index)?
606 }
607 FieldProjector::Map(entry_proj) => {
608 view_map_projected(path, field, entry_proj, array, index)?
609 }
610 };
611 Ok(Some(value))
612}
613
614fn view_struct_projected<'a>(
615 path: &Path,
616 field: &Field,
617 projection: &Arc<StructProjection>,
618 array: &'a dyn Array,
619 index: usize,
620) -> Result<DynCellRef<'a>, DynViewError> {
621 let DataType::Struct(children) = field.data_type() else {
622 return Err(DynViewError::Invalid {
623 column: path.column,
624 path: path.path.clone(),
625 message: "expected struct field for projected struct".to_string(),
626 });
627 };
628 let arr = array
629 .as_any()
630 .downcast_ref::<StructArray>()
631 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
632 let view = DynStructView {
633 array: arr,
634 fields: children.clone(),
635 row: index,
636 base_path: path.clone(),
637 projection: Some(Arc::clone(projection)),
638 };
639 Ok(DynCellRef::structure(view))
640}
641
642fn view_list_projected<'a>(
643 path: &Path,
644 field: &Field,
645 item_projector: &FieldProjector,
646 array: &'a dyn Array,
647 index: usize,
648) -> Result<DynCellRef<'a>, DynViewError> {
649 let DataType::List(item_field) = field.data_type() else {
650 return Err(DynViewError::Invalid {
651 column: path.column,
652 path: path.path.clone(),
653 message: "expected list field for projected list".to_string(),
654 });
655 };
656 let arr = array
657 .as_any()
658 .downcast_ref::<ListArray>()
659 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
660 let view = DynListView::new_list(
661 arr,
662 item_field.clone(),
663 path.clone(),
664 index,
665 Some(item_projector.clone()),
666 )?;
667 Ok(DynCellRef::list(view))
668}
669
670fn view_large_list_projected<'a>(
671 path: &Path,
672 field: &Field,
673 item_projector: &FieldProjector,
674 array: &'a dyn Array,
675 index: usize,
676) -> Result<DynCellRef<'a>, DynViewError> {
677 let DataType::LargeList(item_field) = field.data_type() else {
678 return Err(DynViewError::Invalid {
679 column: path.column,
680 path: path.path.clone(),
681 message: "expected large list field for projected list".to_string(),
682 });
683 };
684 let arr = array
685 .as_any()
686 .downcast_ref::<LargeListArray>()
687 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
688 let view = DynListView::new_large_list(
689 arr,
690 item_field.clone(),
691 path.clone(),
692 index,
693 Some(item_projector.clone()),
694 )?;
695 Ok(DynCellRef::list(view))
696}
697
698fn view_fixed_size_list_projected<'a>(
699 path: &Path,
700 field: &Field,
701 item_projector: &FieldProjector,
702 array: &'a dyn Array,
703 index: usize,
704) -> Result<DynCellRef<'a>, DynViewError> {
705 let DataType::FixedSizeList(item_field, len) = field.data_type() else {
706 return Err(DynViewError::Invalid {
707 column: path.column,
708 path: path.path.clone(),
709 message: "expected fixed-size list field for projection".to_string(),
710 });
711 };
712 let arr = array
713 .as_any()
714 .downcast_ref::<FixedSizeListArray>()
715 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
716 let view = DynFixedSizeListView::new(
717 arr,
718 item_field.clone(),
719 *len as usize,
720 path.clone(),
721 index,
722 Some(item_projector.clone()),
723 )?;
724 Ok(DynCellRef::fixed_size_list(view))
725}
726
727fn view_map_projected<'a>(
728 path: &Path,
729 field: &Field,
730 entry_projection: &Arc<StructProjection>,
731 array: &'a dyn Array,
732 index: usize,
733) -> Result<DynCellRef<'a>, DynViewError> {
734 let DataType::Map(entry_field, _) = field.data_type() else {
735 return Err(DynViewError::Invalid {
736 column: path.column,
737 path: path.path.clone(),
738 message: "expected map field for projection".to_string(),
739 });
740 };
741 let arr = array
742 .as_any()
743 .downcast_ref::<MapArray>()
744 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
745 let entry_fields = match entry_field.data_type() {
746 DataType::Struct(children) => children.clone(),
747 other => {
748 return Err(DynViewError::Invalid {
749 column: path.column,
750 path: path.path.clone(),
751 message: format!("map entry must be struct, found {other:?}"),
752 });
753 }
754 };
755 let view = DynMapView::with_projection(
756 arr,
757 entry_fields,
758 path.clone(),
759 index,
760 Some(Arc::clone(entry_projection)),
761 )?;
762 Ok(DynCellRef::map(view))
763}
764
765fn view_non_null<'a>(
766 path: &Path,
767 field: &Field,
768 array: &'a dyn Array,
769 index: usize,
770) -> Result<DynCellRef<'a>, DynViewError> {
771 let dt = field.data_type();
772 match dt {
773 DataType::Null => Ok(DynCellRef::null()),
774 DataType::Boolean => {
775 let arr = as_bool(array, path)?;
776 Ok(DynCellRef::bool(arr.value(index)))
777 }
778 DataType::Int8 => {
779 let arr = as_primitive::<Int8Type>(array, path, dt)?;
780 Ok(DynCellRef::i8(arr.value(index)))
781 }
782 DataType::Int16 => {
783 let arr = as_primitive::<Int16Type>(array, path, dt)?;
784 Ok(DynCellRef::i16(arr.value(index)))
785 }
786 DataType::Int32 => {
787 let arr = as_primitive::<Int32Type>(array, path, dt)?;
788 Ok(DynCellRef::i32(arr.value(index)))
789 }
790 DataType::Date32 => {
791 let arr = as_primitive::<Date32Type>(array, path, dt)?;
792 Ok(DynCellRef::i32(arr.value(index)))
793 }
794 DataType::Time32(unit) => match unit {
795 arrow_schema::TimeUnit::Second => {
796 let arr = as_primitive::<Time32SecondType>(array, path, dt)?;
797 Ok(DynCellRef::i32(arr.value(index)))
798 }
799 arrow_schema::TimeUnit::Millisecond => {
800 let arr = as_primitive::<Time32MillisecondType>(array, path, dt)?;
801 Ok(DynCellRef::i32(arr.value(index)))
802 }
803 other => Err(DynViewError::Invalid {
804 column: path.column,
805 path: path.path.clone(),
806 message: format!("unsupported Time32 unit {other:?}"),
807 }),
808 },
809 DataType::Int64 => {
810 let arr = as_primitive::<Int64Type>(array, path, dt)?;
811 Ok(DynCellRef::i64(arr.value(index)))
812 }
813 DataType::Date64 => {
814 let arr = as_primitive::<Date64Type>(array, path, dt)?;
815 Ok(DynCellRef::i64(arr.value(index)))
816 }
817 DataType::Timestamp(unit, _) => match unit {
818 arrow_schema::TimeUnit::Second => {
819 let arr = as_primitive::<TimestampSecondType>(array, path, dt)?;
820 Ok(DynCellRef::i64(arr.value(index)))
821 }
822 arrow_schema::TimeUnit::Millisecond => {
823 let arr = as_primitive::<TimestampMillisecondType>(array, path, dt)?;
824 Ok(DynCellRef::i64(arr.value(index)))
825 }
826 arrow_schema::TimeUnit::Microsecond => {
827 let arr = as_primitive::<TimestampMicrosecondType>(array, path, dt)?;
828 Ok(DynCellRef::i64(arr.value(index)))
829 }
830 arrow_schema::TimeUnit::Nanosecond => {
831 let arr = as_primitive::<TimestampNanosecondType>(array, path, dt)?;
832 Ok(DynCellRef::i64(arr.value(index)))
833 }
834 },
835 DataType::Time64(unit) => match unit {
836 arrow_schema::TimeUnit::Microsecond => {
837 let arr = as_primitive::<Time64MicrosecondType>(array, path, dt)?;
838 Ok(DynCellRef::i64(arr.value(index)))
839 }
840 arrow_schema::TimeUnit::Nanosecond => {
841 let arr = as_primitive::<Time64NanosecondType>(array, path, dt)?;
842 Ok(DynCellRef::i64(arr.value(index)))
843 }
844 other => Err(DynViewError::Invalid {
845 column: path.column,
846 path: path.path.clone(),
847 message: format!("unsupported Time64 unit {other:?}"),
848 }),
849 },
850 DataType::Duration(unit) => match unit {
851 arrow_schema::TimeUnit::Second => {
852 let arr = as_primitive::<DurationSecondType>(array, path, dt)?;
853 Ok(DynCellRef::i64(arr.value(index)))
854 }
855 arrow_schema::TimeUnit::Millisecond => {
856 let arr = as_primitive::<DurationMillisecondType>(array, path, dt)?;
857 Ok(DynCellRef::i64(arr.value(index)))
858 }
859 arrow_schema::TimeUnit::Microsecond => {
860 let arr = as_primitive::<DurationMicrosecondType>(array, path, dt)?;
861 Ok(DynCellRef::i64(arr.value(index)))
862 }
863 arrow_schema::TimeUnit::Nanosecond => {
864 let arr = as_primitive::<DurationNanosecondType>(array, path, dt)?;
865 Ok(DynCellRef::i64(arr.value(index)))
866 }
867 },
868 DataType::UInt8 => {
869 let arr = as_primitive::<UInt8Type>(array, path, dt)?;
870 Ok(DynCellRef::u8(arr.value(index)))
871 }
872 DataType::UInt16 => {
873 let arr = as_primitive::<UInt16Type>(array, path, dt)?;
874 Ok(DynCellRef::u16(arr.value(index)))
875 }
876 DataType::UInt32 => {
877 let arr = as_primitive::<UInt32Type>(array, path, dt)?;
878 Ok(DynCellRef::u32(arr.value(index)))
879 }
880 DataType::UInt64 => {
881 let arr = as_primitive::<UInt64Type>(array, path, dt)?;
882 Ok(DynCellRef::u64(arr.value(index)))
883 }
884 DataType::Float32 => {
885 let arr = array
886 .as_any()
887 .downcast_ref::<Float32Array>()
888 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
889 Ok(DynCellRef::f32(arr.value(index)))
890 }
891 DataType::Float64 => {
892 let arr = array
893 .as_any()
894 .downcast_ref::<Float64Array>()
895 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
896 Ok(DynCellRef::f64(arr.value(index)))
897 }
898 DataType::Utf8 => {
899 let arr = array
900 .as_any()
901 .downcast_ref::<StringArray>()
902 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
903 Ok(DynCellRef::string(arr.value(index)))
904 }
905 DataType::LargeUtf8 => {
906 let arr = array
907 .as_any()
908 .downcast_ref::<LargeStringArray>()
909 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
910 Ok(DynCellRef::string(arr.value(index)))
911 }
912 DataType::Binary => {
913 let arr = array
914 .as_any()
915 .downcast_ref::<BinaryArray>()
916 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
917 Ok(DynCellRef::binary(arr.value(index)))
918 }
919 DataType::LargeBinary => {
920 let arr = array
921 .as_any()
922 .downcast_ref::<LargeBinaryArray>()
923 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
924 Ok(DynCellRef::binary(arr.value(index)))
925 }
926 DataType::FixedSizeBinary(_) => {
927 let arr = array
928 .as_any()
929 .downcast_ref::<FixedSizeBinaryArray>()
930 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
931 Ok(DynCellRef::binary(arr.value(index)))
932 }
933 DataType::Struct(children) => {
934 let arr = array
935 .as_any()
936 .downcast_ref::<StructArray>()
937 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
938 let view = DynStructView {
939 array: arr,
940 fields: children.clone(),
941 row: index,
942 base_path: path.clone(),
943 projection: None,
944 };
945 Ok(DynCellRef::structure(view))
946 }
947 DataType::List(item) => {
948 let arr = array
949 .as_any()
950 .downcast_ref::<ListArray>()
951 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
952 let view = DynListView::new_list(arr, item.clone(), path.clone(), index, None)?;
953 Ok(DynCellRef::list(view))
954 }
955 DataType::LargeList(item) => {
956 let arr = array
957 .as_any()
958 .downcast_ref::<LargeListArray>()
959 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
960 let view = DynListView::new_large_list(arr, item.clone(), path.clone(), index, None)?;
961 Ok(DynCellRef::list(view))
962 }
963 DataType::FixedSizeList(item, len) => {
964 let arr = array
965 .as_any()
966 .downcast_ref::<FixedSizeListArray>()
967 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
968 let view = DynFixedSizeListView::new(
969 arr,
970 item.clone(),
971 *len as usize,
972 path.clone(),
973 index,
974 None,
975 )?;
976 Ok(DynCellRef::fixed_size_list(view))
977 }
978 DataType::Map(_, _) => {
979 let arr = array
980 .as_any()
981 .downcast_ref::<MapArray>()
982 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
983 let view = DynMapView::new(arr, path.clone(), index)?;
984 Ok(DynCellRef::map(view))
985 }
986 DataType::Union(fields, mode) => {
987 let arr = array
988 .as_any()
989 .downcast_ref::<UnionArray>()
990 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
991 let view = DynUnionView::new(arr, fields.clone(), *mode, path.clone(), index)?;
992 Ok(DynCellRef::union(view))
993 }
994 DataType::Dictionary(key_type, value_type) => dictionary_value(
995 path,
996 field,
997 array,
998 index,
999 key_type.as_ref(),
1000 value_type.as_ref(),
1001 ),
1002 other => Err(DynViewError::Invalid {
1003 column: path.column,
1004 path: path.path.clone(),
1005 message: format!("unsupported data type {other:?}"),
1006 }),
1007 }
1008}
1009
1010fn dictionary_value<'a>(
1011 path: &Path,
1012 field: &Field,
1013 array: &'a dyn Array,
1014 index: usize,
1015 key_type: &DataType,
1016 value_type: &DataType,
1017) -> Result<DynCellRef<'a>, DynViewError> {
1018 macro_rules! match_dict {
1019 ($key_ty:ty) => {{
1020 let dict = array
1021 .as_any()
1022 .downcast_ref::<DictionaryArray<$key_ty>>()
1023 .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
1024 dict_value(
1025 path,
1026 dict.keys().value(index) as usize,
1027 dict.values(),
1028 value_type,
1029 )
1030 }};
1031 }
1032
1033 match key_type {
1034 DataType::Int8 => match_dict!(Int8Type),
1035 DataType::Int16 => match_dict!(Int16Type),
1036 DataType::Int32 => match_dict!(Int32Type),
1037 DataType::Int64 => match_dict!(Int64Type),
1038 DataType::UInt8 => match_dict!(UInt8Type),
1039 DataType::UInt16 => match_dict!(UInt16Type),
1040 DataType::UInt32 => match_dict!(UInt32Type),
1041 DataType::UInt64 => match_dict!(UInt64Type),
1042 other => Err(DynViewError::Invalid {
1043 column: path.column,
1044 path: path.path.clone(),
1045 message: format!("unsupported dictionary key type {other:?}"),
1046 }),
1047 }
1048}
1049
1050fn dict_value<'a>(
1051 path: &Path,
1052 key_index: usize,
1053 values: &'a ArrayRef,
1054 value_type: &DataType,
1055) -> Result<DynCellRef<'a>, DynViewError> {
1056 if key_index >= values.len() {
1057 return Err(DynViewError::Invalid {
1058 column: path.column,
1059 path: path.path.clone(),
1060 message: format!(
1061 "dictionary key index {} out of bounds for {}",
1062 key_index,
1063 values.len()
1064 ),
1065 });
1066 }
1067 if values.is_null(key_index) {
1068 return Err(DynViewError::UnexpectedNull {
1069 column: path.column,
1070 path: path.path.clone(),
1071 });
1072 }
1073 match value_type {
1074 DataType::Utf8 => {
1075 let arr = values
1076 .as_any()
1077 .downcast_ref::<StringArray>()
1078 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1079 Ok(DynCellRef::string(arr.value(key_index)))
1080 }
1081 DataType::LargeUtf8 => {
1082 let arr = values
1083 .as_any()
1084 .downcast_ref::<LargeStringArray>()
1085 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1086 Ok(DynCellRef::string(arr.value(key_index)))
1087 }
1088 DataType::Binary => {
1089 let arr = values
1090 .as_any()
1091 .downcast_ref::<BinaryArray>()
1092 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1093 Ok(DynCellRef::binary(arr.value(key_index)))
1094 }
1095 DataType::LargeBinary => {
1096 let arr = values
1097 .as_any()
1098 .downcast_ref::<LargeBinaryArray>()
1099 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1100 Ok(DynCellRef::binary(arr.value(key_index)))
1101 }
1102 DataType::FixedSizeBinary(_) => {
1103 let arr = values
1104 .as_any()
1105 .downcast_ref::<FixedSizeBinaryArray>()
1106 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1107 Ok(DynCellRef::binary(arr.value(key_index)))
1108 }
1109 DataType::Int8 => {
1110 let arr = values
1111 .as_any()
1112 .downcast_ref::<Int8Array>()
1113 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1114 Ok(DynCellRef::i8(arr.value(key_index)))
1115 }
1116 DataType::Int16 => {
1117 let arr = values
1118 .as_any()
1119 .downcast_ref::<Int16Array>()
1120 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1121 Ok(DynCellRef::i16(arr.value(key_index)))
1122 }
1123 DataType::Int32 => {
1124 let arr = values
1125 .as_any()
1126 .downcast_ref::<Int32Array>()
1127 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1128 Ok(DynCellRef::i32(arr.value(key_index)))
1129 }
1130 DataType::Int64 => {
1131 let arr = values
1132 .as_any()
1133 .downcast_ref::<Int64Array>()
1134 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1135 Ok(DynCellRef::i64(arr.value(key_index)))
1136 }
1137 DataType::UInt8 => {
1138 let arr = values
1139 .as_any()
1140 .downcast_ref::<UInt8Array>()
1141 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1142 Ok(DynCellRef::u8(arr.value(key_index)))
1143 }
1144 DataType::UInt16 => {
1145 let arr = values
1146 .as_any()
1147 .downcast_ref::<UInt16Array>()
1148 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1149 Ok(DynCellRef::u16(arr.value(key_index)))
1150 }
1151 DataType::UInt32 => {
1152 let arr = values
1153 .as_any()
1154 .downcast_ref::<UInt32Array>()
1155 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1156 Ok(DynCellRef::u32(arr.value(key_index)))
1157 }
1158 DataType::UInt64 => {
1159 let arr = values
1160 .as_any()
1161 .downcast_ref::<UInt64Array>()
1162 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1163 Ok(DynCellRef::u64(arr.value(key_index)))
1164 }
1165 DataType::Float32 => {
1166 let arr = values
1167 .as_any()
1168 .downcast_ref::<Float32Array>()
1169 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1170 Ok(DynCellRef::f32(arr.value(key_index)))
1171 }
1172 DataType::Float64 => {
1173 let arr = values
1174 .as_any()
1175 .downcast_ref::<Float64Array>()
1176 .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1177 Ok(DynCellRef::f64(arr.value(key_index)))
1178 }
1179 other => Err(DynViewError::Invalid {
1180 column: path.column,
1181 path: path.path.clone(),
1182 message: format!("unsupported dictionary value type {other:?}"),
1183 }),
1184 }
1185}
1186
1187pub(super) fn type_mismatch(path: &Path, expected: DataType, actual: &DataType) -> DynViewError {
1188 DynViewError::TypeMismatch {
1189 column: path.column,
1190 path: path.path.clone(),
1191 expected,
1192 actual: actual.clone(),
1193 }
1194}
1195
1196fn as_bool<'a>(array: &'a dyn Array, path: &Path) -> Result<&'a BooleanArray, DynViewError> {
1197 array
1198 .as_any()
1199 .downcast_ref::<BooleanArray>()
1200 .ok_or_else(|| type_mismatch(path, DataType::Boolean, array.data_type()))
1201}
1202
1203fn as_primitive<'a, T>(
1204 array: &'a dyn Array,
1205 path: &Path,
1206 expected: &DataType,
1207) -> Result<&'a PrimitiveArray<T>, DynViewError>
1208where
1209 T: arrow_array::types::ArrowPrimitiveType,
1210{
1211 array
1212 .as_any()
1213 .downcast_ref::<PrimitiveArray<T>>()
1214 .ok_or_else(|| type_mismatch(path, expected.clone(), array.data_type()))
1215}