typed_arrow_dyn/view/
cell.rs

1use std::{marker::PhantomData, ptr::NonNull, slice, str, sync::Arc};
2
3use arrow_array::{
4    Array, ArrayRef, BinaryArray, BooleanArray, DictionaryArray, FixedSizeBinaryArray,
5    FixedSizeListArray, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array,
6    LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray, PrimitiveArray,
7    StringArray, StructArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, UnionArray,
8    types::{
9        Date32Type, Date64Type, DurationMicrosecondType, DurationMillisecondType,
10        DurationNanosecondType, DurationSecondType, Int8Type, Int16Type, Int32Type, Int64Type,
11        Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
12        TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
13        TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
14    },
15};
16use arrow_schema::{DataType, Field};
17
18use super::{
19    path::Path,
20    projection::{FieldProjector, StructProjection},
21    raw::{
22        DynFixedSizeListViewRaw, DynListViewRaw, DynMapViewRaw, DynStructViewRaw, DynUnionViewRaw,
23    },
24    views::{DynFixedSizeListView, DynListView, DynMapView, DynStructView, DynUnionView},
25};
26use crate::{DynViewError, cell::DynCell};
27
28impl DynCell {
29    /// Borrow this owned cell as a [`DynCellRef`] without cloning underlying buffers.
30    ///
31    /// Container variants (`Struct`, `List`, `FixedSizeList`, `Map`, `Union`) are not supported
32    /// because their borrowed form requires array-backed views; in those cases this returns `None`.
33    #[must_use]
34    pub fn as_ref(&self) -> Option<DynCellRef<'_>> {
35        use DynCell::*;
36        Some(match self {
37            Null => DynCellRef::null(),
38            Bool(v) => DynCellRef::from_raw(DynCellRaw::Bool(*v)),
39            I8(v) => DynCellRef::from_raw(DynCellRaw::I8(*v)),
40            I16(v) => DynCellRef::from_raw(DynCellRaw::I16(*v)),
41            I32(v) => DynCellRef::from_raw(DynCellRaw::I32(*v)),
42            I64(v) => DynCellRef::from_raw(DynCellRaw::I64(*v)),
43            U8(v) => DynCellRef::from_raw(DynCellRaw::U8(*v)),
44            U16(v) => DynCellRef::from_raw(DynCellRaw::U16(*v)),
45            U32(v) => DynCellRef::from_raw(DynCellRaw::U32(*v)),
46            U64(v) => DynCellRef::from_raw(DynCellRaw::U64(*v)),
47            F32(v) => DynCellRef::from_raw(DynCellRaw::F32(*v)),
48            F64(v) => DynCellRef::from_raw(DynCellRaw::F64(*v)),
49            Str(s) => DynCellRef::from_raw(DynCellRaw::from_str(s)),
50            Bin(b) => DynCellRef::from_raw(DynCellRaw::from_bin(b)),
51            Struct(_) | List(_) | FixedSizeList(_) | Map(_) | Union { .. } => return None,
52        })
53    }
54}
55
56macro_rules! dyn_cell_primitive_methods {
57    ($(($variant:ident, $ctor:ident, $getter:ident, $into:ident, $ty:ty, $arrow:literal, $desc:literal)),* $(,)?) => {
58        $(
59            #[doc = concat!("Constructs a dynamic cell wrapping an ", $arrow, " value.")]
60            pub(crate) fn $ctor(value: $ty) -> Self {
61                Self::from_raw(DynCellRaw::$variant(value))
62            }
63
64            #[doc = concat!("Returns the ", $desc, " value if this cell stores an ", $arrow, ".")]
65            pub fn $getter(&self) -> Option<$ty> {
66                match self.raw {
67                    DynCellRaw::$variant(value) => Some(value),
68                    _ => None,
69                }
70            }
71
72            #[doc = concat!("Consumes the cell and returns the ", $desc, " value if it stores an ", $arrow, ".")]
73            pub fn $into(self) -> Option<$ty> {
74                match self.raw {
75                    DynCellRaw::$variant(value) => Some(value),
76                    _ => None,
77                }
78            }
79        )*
80    };
81}
82
83/// Borrowed representation of a single value backed by a raw pointer payload.
84#[derive(Clone)]
85pub struct DynCellRef<'a> {
86    raw: DynCellRaw,
87    _marker: PhantomData<&'a ()>,
88}
89
90impl<'a> DynCellRef<'a> {
91    /// Create a new borrowed cell from its raw lifetime-erased payload.
92    pub fn from_raw(raw: DynCellRaw) -> Self {
93        Self {
94            raw,
95            _marker: PhantomData,
96        }
97    }
98
99    /// Access the underlying raw representation.
100    pub fn as_raw(&self) -> &DynCellRaw {
101        &self.raw
102    }
103
104    /// Consume this reference, yielding the raw payload.
105    pub fn into_raw(self) -> DynCellRaw {
106        self.raw
107    }
108
109    /// Convert this borrowed cell into an owned [`DynCell`], cloning any backing data as needed.
110    pub fn into_owned(self) -> Result<DynCell, DynViewError> {
111        self.raw.into_owned()
112    }
113
114    /// Clone this borrowed cell into an owned [`DynCell`] without consuming the reference.
115    pub fn to_owned(&self) -> Result<DynCell, DynViewError> {
116        self.clone().into_owned()
117    }
118
119    /// Returns true if this cell represents Arrow `Null`.
120    pub fn is_null(&self) -> bool {
121        matches!(self.raw, DynCellRaw::Null)
122    }
123
124    /// Constructs a dynamic cell representing Arrow `Null`.
125    pub(crate) fn null() -> Self {
126        Self::from_raw(DynCellRaw::Null)
127    }
128
129    dyn_cell_primitive_methods! {
130        (Bool, bool, as_bool, into_bool, bool, "Arrow boolean", "boolean"),
131        (I8, i8, as_i8, into_i8, i8, "Arrow Int8", "`i8`"),
132        (I16, i16, as_i16, into_i16, i16, "Arrow Int16", "`i16`"),
133        (I32, i32, as_i32, into_i32, i32, "Arrow Int32", "`i32`"),
134        (I64, i64, as_i64, into_i64, i64, "Arrow Int64", "`i64`"),
135        (U8, u8, as_u8, into_u8, u8, "Arrow UInt8", "`u8`"),
136        (U16, u16, as_u16, into_u16, u16, "Arrow UInt16", "`u16`"),
137        (U32, u32, as_u32, into_u32, u32, "Arrow UInt32", "`u32`"),
138        (U64, u64, as_u64, into_u64, u64, "Arrow UInt64", "`u64`"),
139        (F32, f32, as_f32, into_f32, f32, "Arrow Float32", "`f32`"),
140        (F64, f64, as_f64, into_f64, f64, "Arrow Float64", "`f64`")
141    }
142
143    /// Constructs a dynamic cell wrapping an Arrow UTF-8 string slice.
144    pub(crate) fn string(value: &'a str) -> Self {
145        Self::from_raw(DynCellRaw::from_str(value))
146    }
147
148    /// Constructs a dynamic cell wrapping an Arrow binary slice.
149    pub(crate) fn binary(value: &'a [u8]) -> Self {
150        Self::from_raw(DynCellRaw::from_bin(value))
151    }
152
153    /// Constructs a dynamic cell wrapping a struct view.
154    pub(crate) fn structure(view: DynStructView<'a>) -> Self {
155        Self::from_raw(DynCellRaw::from_struct(view))
156    }
157
158    /// Constructs a dynamic cell wrapping a list view.
159    pub(crate) fn list(view: DynListView<'a>) -> Self {
160        Self::from_raw(DynCellRaw::from_list(view))
161    }
162
163    /// Constructs a dynamic cell wrapping a fixed-size list view.
164    pub(crate) fn fixed_size_list(view: DynFixedSizeListView<'a>) -> Self {
165        Self::from_raw(DynCellRaw::from_fixed_size_list(view))
166    }
167
168    /// Constructs a dynamic cell wrapping a map view.
169    pub(crate) fn map(view: DynMapView<'a>) -> Self {
170        Self::from_raw(DynCellRaw::from_map(view))
171    }
172
173    /// Constructs a dynamic cell wrapping a union view.
174    pub(crate) fn union(view: DynUnionView<'a>) -> Self {
175        Self::from_raw(DynCellRaw::from_union(view))
176    }
177
178    /// Returns the UTF-8 string slice if this cell stores Arrow `Utf8` or `LargeUtf8`.
179    pub fn as_str(&self) -> Option<&'a str> {
180        match &self.raw {
181            DynCellRaw::Str { ptr, len } => unsafe {
182                let bytes = slice::from_raw_parts(ptr.as_ptr() as *const u8, *len);
183                Some(str::from_utf8_unchecked(bytes))
184            },
185            _ => None,
186        }
187    }
188
189    /// Returns the binary slice if this cell stores Arrow `Binary`, `LargeBinary`, or
190    /// `FixedSizeBinary`.
191    pub fn as_bin(&self) -> Option<&'a [u8]> {
192        match &self.raw {
193            DynCellRaw::Bin { ptr, len } => unsafe {
194                Some(slice::from_raw_parts(ptr.as_ptr() as *const u8, *len))
195            },
196            _ => None,
197        }
198    }
199
200    /// Returns a struct view if this cell stores Arrow `Struct`.
201    pub fn as_struct(&self) -> Option<DynStructView<'a>> {
202        match &self.raw {
203            DynCellRaw::Struct(raw) => unsafe { Some(raw.as_view()) },
204            _ => None,
205        }
206    }
207
208    /// Returns a list view if this cell stores Arrow `List` or `LargeList`.
209    pub fn as_list(&self) -> Option<DynListView<'a>> {
210        match &self.raw {
211            DynCellRaw::List(raw) => unsafe { Some(raw.as_view()) },
212            _ => None,
213        }
214    }
215
216    /// Returns a fixed-size list view if this cell stores Arrow `FixedSizeList`.
217    pub fn as_fixed_size_list(&self) -> Option<DynFixedSizeListView<'a>> {
218        match &self.raw {
219            DynCellRaw::FixedSizeList(raw) => unsafe { Some(raw.as_view()) },
220            _ => None,
221        }
222    }
223
224    /// Returns a map view if this cell stores Arrow `Map`.
225    pub fn as_map(&self) -> Option<DynMapView<'a>> {
226        match &self.raw {
227            DynCellRaw::Map(raw) => unsafe { Some(raw.as_view()) },
228            _ => None,
229        }
230    }
231
232    /// Returns a union view if this cell stores Arrow `Union`.
233    pub fn as_union(&self) -> Option<DynUnionView<'a>> {
234        match &self.raw {
235            DynCellRaw::Union(raw) => unsafe { Some(raw.as_view()) },
236            _ => None,
237        }
238    }
239
240    /// Consumes the cell and returns the UTF-8 string slice if it stores Arrow `Utf8` or
241    /// `LargeUtf8`.
242    pub fn into_str(self) -> Option<&'a str> {
243        match self.raw {
244            DynCellRaw::Str { ptr, len } => unsafe {
245                let bytes = slice::from_raw_parts(ptr.as_ptr() as *const u8, len);
246                Some(str::from_utf8_unchecked(bytes))
247            },
248            _ => None,
249        }
250    }
251
252    /// Consumes the cell and returns the binary slice if it stores Arrow `Binary`, `LargeBinary`,
253    /// or `FixedSizeBinary`.
254    pub fn into_bin(self) -> Option<&'a [u8]> {
255        match self.raw {
256            DynCellRaw::Bin { ptr, len } => unsafe {
257                Some(slice::from_raw_parts(ptr.as_ptr() as *const u8, len))
258            },
259            _ => None,
260        }
261    }
262
263    /// Consumes the cell and returns a struct view if it stores Arrow `Struct`.
264    pub fn into_struct(self) -> Option<DynStructView<'a>> {
265        match self.raw {
266            DynCellRaw::Struct(raw) => unsafe { Some(raw.into_view()) },
267            _ => None,
268        }
269    }
270
271    /// Consumes the cell and returns a list view if it stores Arrow `List` or `LargeList`.
272    pub fn into_list(self) -> Option<DynListView<'a>> {
273        match self.raw {
274            DynCellRaw::List(raw) => unsafe { Some(raw.into_view()) },
275            _ => None,
276        }
277    }
278
279    /// Consumes the cell and returns a fixed-size list view if it stores Arrow `FixedSizeList`.
280    pub fn into_fixed_size_list(self) -> Option<DynFixedSizeListView<'a>> {
281        match self.raw {
282            DynCellRaw::FixedSizeList(raw) => unsafe { Some(raw.into_view()) },
283            _ => None,
284        }
285    }
286
287    /// Consumes the cell and returns a map view if it stores Arrow `Map`.
288    pub fn into_map(self) -> Option<DynMapView<'a>> {
289        match self.raw {
290            DynCellRaw::Map(raw) => unsafe { Some(raw.into_view()) },
291            _ => None,
292        }
293    }
294
295    /// Consumes the cell and returns a union view if it stores Arrow `Union`.
296    pub fn into_union(self) -> Option<DynUnionView<'a>> {
297        match self.raw {
298            DynCellRaw::Union(raw) => unsafe { Some(raw.into_view()) },
299            _ => None,
300        }
301    }
302}
303
304impl<'a> From<DynCellRaw> for DynCellRef<'a> {
305    fn from(raw: DynCellRaw) -> Self {
306        Self::from_raw(raw)
307    }
308}
309
310impl<'a> std::fmt::Debug for DynCellRef<'a> {
311    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
312        self.as_raw().fmt(f)
313    }
314}
315
316/// Lifetime-erased counterpart to [`DynCellRef`].
317///
318/// This representation stores raw pointers in place of borrowed references. Callers must ensure the
319/// backing Arrow arrays and batches remain alive while the raw cell (and any derived views) are in
320/// use.
321#[derive(Clone)]
322pub enum DynCellRaw {
323    /// Arrow `Null` value.
324    Null,
325    /// Boolean scalar.
326    Bool(bool),
327    /// 8-bit signed integer.
328    I8(i8),
329    /// 16-bit signed integer.
330    I16(i16),
331    /// 32-bit signed integer.
332    I32(i32),
333    /// 64-bit signed integer.
334    I64(i64),
335    /// 8-bit unsigned integer.
336    U8(u8),
337    /// 16-bit unsigned integer.
338    U16(u16),
339    /// 32-bit unsigned integer.
340    U32(u32),
341    /// 64-bit unsigned integer.
342    U64(u64),
343    /// 32-bit floating-point number.
344    F32(f32),
345    /// 64-bit floating-point number.
346    F64(f64),
347    /// Borrowed UTF-8 string slice.
348    Str {
349        /// Pointer to the first byte of the UTF-8 value.
350        ptr: NonNull<u8>,
351        /// Length in bytes of the UTF-8 value.
352        len: usize,
353    },
354    /// Borrowed binary slice.
355    Bin {
356        /// Pointer to the first byte of the binary value.
357        ptr: NonNull<u8>,
358        /// Length in bytes of the binary value.
359        len: usize,
360    },
361    /// Borrowed struct view.
362    Struct(DynStructViewRaw),
363    /// Borrowed variable-sized list view.
364    List(DynListViewRaw),
365    /// Borrowed fixed-size list view.
366    FixedSizeList(DynFixedSizeListViewRaw),
367    /// Borrowed map view.
368    Map(DynMapViewRaw),
369    /// Borrowed union view.
370    Union(DynUnionViewRaw),
371}
372
373// Safety: the raw variants only carry pointer/address data to Arrow arrays or plain POD values.
374// Arrow arrays themselves are `Send + Sync`; forwarding these markers relies on callers upholding
375// the documented lifetime requirement that the backing arrays outlive any raw handles.
376unsafe impl Send for DynCellRaw {}
377unsafe impl Sync for DynCellRaw {}
378
379impl DynCellRaw {
380    /// Convert a borrowed dynamic cell into its lifetime-erased form.
381    pub fn from_ref(cell: DynCellRef<'_>) -> Self {
382        cell.into_raw()
383    }
384
385    /// Convert this raw cell into an owned [`DynCell`] by cloning any referenced data.
386    pub fn into_owned(self) -> Result<DynCell, DynViewError> {
387        match self {
388            DynCellRaw::Null => Ok(DynCell::Null),
389            DynCellRaw::Bool(value) => Ok(DynCell::Bool(value)),
390            DynCellRaw::I8(value) => Ok(DynCell::I8(value)),
391            DynCellRaw::I16(value) => Ok(DynCell::I16(value)),
392            DynCellRaw::I32(value) => Ok(DynCell::I32(value)),
393            DynCellRaw::I64(value) => Ok(DynCell::I64(value)),
394            DynCellRaw::U8(value) => Ok(DynCell::U8(value)),
395            DynCellRaw::U16(value) => Ok(DynCell::U16(value)),
396            DynCellRaw::U32(value) => Ok(DynCell::U32(value)),
397            DynCellRaw::U64(value) => Ok(DynCell::U64(value)),
398            DynCellRaw::F32(value) => Ok(DynCell::F32(value)),
399            DynCellRaw::F64(value) => Ok(DynCell::F64(value)),
400            DynCellRaw::Str { ptr, len } => {
401                let bytes = unsafe { slice::from_raw_parts(ptr.as_ptr(), len) };
402                let owned = unsafe { String::from_utf8_unchecked(bytes.to_vec()) };
403                Ok(DynCell::Str(owned))
404            }
405            DynCellRaw::Bin { ptr, len } => {
406                let bytes = unsafe { slice::from_raw_parts(ptr.as_ptr(), len) };
407                Ok(DynCell::Bin(bytes.to_vec()))
408            }
409            DynCellRaw::Struct(raw) => {
410                let values = Self::collect_struct(raw)?;
411                Ok(DynCell::Struct(values))
412            }
413            DynCellRaw::List(raw) => {
414                let items = Self::collect_list(raw)?;
415                Ok(DynCell::List(items))
416            }
417            DynCellRaw::FixedSizeList(raw) => {
418                let items = Self::collect_fixed_size_list(raw)?;
419                Ok(DynCell::FixedSizeList(items))
420            }
421            DynCellRaw::Map(raw) => {
422                let entries = Self::collect_map(raw)?;
423                Ok(DynCell::Map(entries))
424            }
425            DynCellRaw::Union(raw) => Self::collect_union(raw),
426        }
427    }
428
429    pub(super) fn from_str(value: &str) -> Self {
430        Self::Str {
431            ptr: non_null_from_bytes(value.as_bytes()),
432            len: value.len(),
433        }
434    }
435
436    pub(super) fn from_bin(value: &[u8]) -> Self {
437        Self::Bin {
438            ptr: non_null_from_bytes(value),
439            len: value.len(),
440        }
441    }
442
443    fn from_struct(view: DynStructView<'_>) -> Self {
444        Self::Struct(DynStructViewRaw::from_view(view))
445    }
446
447    fn from_list(view: DynListView<'_>) -> Self {
448        Self::List(DynListViewRaw::from_view(view))
449    }
450
451    fn from_fixed_size_list(view: DynFixedSizeListView<'_>) -> Self {
452        Self::FixedSizeList(DynFixedSizeListViewRaw::from_view(view))
453    }
454
455    fn from_map(view: DynMapView<'_>) -> Self {
456        Self::Map(DynMapViewRaw::from_view(view))
457    }
458
459    fn from_union(view: DynUnionView<'_>) -> Self {
460        Self::Union(DynUnionViewRaw::from_view(view))
461    }
462
463    /// Reborrow this raw cell as a scoped [`DynCellRef`].
464    ///
465    /// # Safety
466    /// The caller must guarantee that all underlying Arrow data structures outlive the returned
467    /// reference.
468    pub unsafe fn as_ref<'a>(&self) -> DynCellRef<'a> {
469        DynCellRef::from_raw(self.clone())
470    }
471
472    fn cell_opt_into_owned(cell: Option<DynCellRef<'_>>) -> Result<Option<DynCell>, DynViewError> {
473        cell.map(DynCellRef::into_owned).transpose()
474    }
475
476    fn collect_struct(raw: DynStructViewRaw) -> Result<Vec<Option<DynCell>>, DynViewError> {
477        let view = unsafe { raw.into_view() };
478        let mut values = Vec::with_capacity(view.len());
479        for idx in 0..view.len() {
480            let value = view.get(idx)?;
481            values.push(Self::cell_opt_into_owned(value)?);
482        }
483        Ok(values)
484    }
485
486    fn collect_list(raw: DynListViewRaw) -> Result<Vec<Option<DynCell>>, DynViewError> {
487        let view = unsafe { raw.into_view() };
488        let mut items = Vec::with_capacity(view.len());
489        for idx in 0..view.len() {
490            let item = view.get(idx)?;
491            items.push(Self::cell_opt_into_owned(item)?);
492        }
493        Ok(items)
494    }
495
496    fn collect_fixed_size_list(
497        raw: DynFixedSizeListViewRaw,
498    ) -> Result<Vec<Option<DynCell>>, DynViewError> {
499        let view = unsafe { raw.into_view() };
500        let mut items = Vec::with_capacity(view.len());
501        for idx in 0..view.len() {
502            let item = view.get(idx)?;
503            items.push(Self::cell_opt_into_owned(item)?);
504        }
505        Ok(items)
506    }
507
508    fn collect_map(raw: DynMapViewRaw) -> Result<Vec<(DynCell, Option<DynCell>)>, DynViewError> {
509        let view = unsafe { raw.into_view() };
510        let mut entries = Vec::with_capacity(view.len());
511        for idx in 0..view.len() {
512            let (key, value) = view.get(idx)?;
513            let owned_key = key.into_owned()?;
514            let owned_value = Self::cell_opt_into_owned(value)?;
515            entries.push((owned_key, owned_value));
516        }
517        Ok(entries)
518    }
519
520    fn collect_union(raw: DynUnionViewRaw) -> Result<DynCell, DynViewError> {
521        let view = unsafe { raw.into_view() };
522        let type_id = view.type_id();
523        let payload = view
524            .value()?
525            .map(|cell| cell.into_owned().map(Box::new))
526            .transpose()?;
527        Ok(DynCell::Union {
528            type_id,
529            value: payload,
530        })
531    }
532}
533
534impl std::fmt::Debug for DynCellRaw {
535    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
536        unsafe { self.as_ref() }.fmt(f)
537    }
538}
539
540fn non_null_from_bytes(bytes: &[u8]) -> NonNull<u8> {
541    let ptr = bytes.as_ptr() as *mut u8;
542    // `NonNull::dangling` is acceptable for zero-length slices/strings.
543    NonNull::new(ptr).unwrap_or_else(NonNull::dangling)
544}
545
546fn view_cell_identity<'a>(
547    path: &Path,
548    field: &Field,
549    array: &'a dyn Array,
550    index: usize,
551) -> Result<Option<DynCellRef<'a>>, DynViewError> {
552    if index >= array.len() {
553        return Err(DynViewError::RowOutOfBounds {
554            row: index,
555            len: array.len(),
556        });
557    }
558    if array.is_null(index) {
559        return Ok(None);
560    }
561    Ok(Some(view_non_null(path, field, array, index)?))
562}
563
564pub(super) fn view_cell_with_projector<'a>(
565    path: &Path,
566    field: &Field,
567    projector: Option<&FieldProjector>,
568    array: &'a dyn Array,
569    index: usize,
570) -> Result<Option<DynCellRef<'a>>, DynViewError> {
571    match projector {
572        None | Some(FieldProjector::Identity) => view_cell_identity(path, field, array, index),
573        Some(projector) => view_cell_projected(path, field, projector, array, index),
574    }
575}
576
577fn view_cell_projected<'a>(
578    path: &Path,
579    field: &Field,
580    projector: &FieldProjector,
581    array: &'a dyn Array,
582    index: usize,
583) -> Result<Option<DynCellRef<'a>>, DynViewError> {
584    if index >= array.len() {
585        return Err(DynViewError::RowOutOfBounds {
586            row: index,
587            len: array.len(),
588        });
589    }
590    if array.is_null(index) {
591        return Ok(None);
592    }
593    let value = match projector {
594        FieldProjector::Identity => view_non_null(path, field, array, index)?,
595        FieldProjector::Struct(struct_proj) => {
596            view_struct_projected(path, field, struct_proj, array, index)?
597        }
598        FieldProjector::List(item_proj) => {
599            view_list_projected(path, field, item_proj, array, index)?
600        }
601        FieldProjector::LargeList(item_proj) => {
602            view_large_list_projected(path, field, item_proj, array, index)?
603        }
604        FieldProjector::FixedSizeList(item_proj) => {
605            view_fixed_size_list_projected(path, field, item_proj, array, index)?
606        }
607        FieldProjector::Map(entry_proj) => {
608            view_map_projected(path, field, entry_proj, array, index)?
609        }
610    };
611    Ok(Some(value))
612}
613
614fn view_struct_projected<'a>(
615    path: &Path,
616    field: &Field,
617    projection: &Arc<StructProjection>,
618    array: &'a dyn Array,
619    index: usize,
620) -> Result<DynCellRef<'a>, DynViewError> {
621    let DataType::Struct(children) = field.data_type() else {
622        return Err(DynViewError::Invalid {
623            column: path.column,
624            path: path.path.clone(),
625            message: "expected struct field for projected struct".to_string(),
626        });
627    };
628    let arr = array
629        .as_any()
630        .downcast_ref::<StructArray>()
631        .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
632    let view = DynStructView {
633        array: arr,
634        fields: children.clone(),
635        row: index,
636        base_path: path.clone(),
637        projection: Some(Arc::clone(projection)),
638    };
639    Ok(DynCellRef::structure(view))
640}
641
642fn view_list_projected<'a>(
643    path: &Path,
644    field: &Field,
645    item_projector: &FieldProjector,
646    array: &'a dyn Array,
647    index: usize,
648) -> Result<DynCellRef<'a>, DynViewError> {
649    let DataType::List(item_field) = field.data_type() else {
650        return Err(DynViewError::Invalid {
651            column: path.column,
652            path: path.path.clone(),
653            message: "expected list field for projected list".to_string(),
654        });
655    };
656    let arr = array
657        .as_any()
658        .downcast_ref::<ListArray>()
659        .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
660    let view = DynListView::new_list(
661        arr,
662        item_field.clone(),
663        path.clone(),
664        index,
665        Some(item_projector.clone()),
666    )?;
667    Ok(DynCellRef::list(view))
668}
669
670fn view_large_list_projected<'a>(
671    path: &Path,
672    field: &Field,
673    item_projector: &FieldProjector,
674    array: &'a dyn Array,
675    index: usize,
676) -> Result<DynCellRef<'a>, DynViewError> {
677    let DataType::LargeList(item_field) = field.data_type() else {
678        return Err(DynViewError::Invalid {
679            column: path.column,
680            path: path.path.clone(),
681            message: "expected large list field for projected list".to_string(),
682        });
683    };
684    let arr = array
685        .as_any()
686        .downcast_ref::<LargeListArray>()
687        .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
688    let view = DynListView::new_large_list(
689        arr,
690        item_field.clone(),
691        path.clone(),
692        index,
693        Some(item_projector.clone()),
694    )?;
695    Ok(DynCellRef::list(view))
696}
697
698fn view_fixed_size_list_projected<'a>(
699    path: &Path,
700    field: &Field,
701    item_projector: &FieldProjector,
702    array: &'a dyn Array,
703    index: usize,
704) -> Result<DynCellRef<'a>, DynViewError> {
705    let DataType::FixedSizeList(item_field, len) = field.data_type() else {
706        return Err(DynViewError::Invalid {
707            column: path.column,
708            path: path.path.clone(),
709            message: "expected fixed-size list field for projection".to_string(),
710        });
711    };
712    let arr = array
713        .as_any()
714        .downcast_ref::<FixedSizeListArray>()
715        .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
716    let view = DynFixedSizeListView::new(
717        arr,
718        item_field.clone(),
719        *len as usize,
720        path.clone(),
721        index,
722        Some(item_projector.clone()),
723    )?;
724    Ok(DynCellRef::fixed_size_list(view))
725}
726
727fn view_map_projected<'a>(
728    path: &Path,
729    field: &Field,
730    entry_projection: &Arc<StructProjection>,
731    array: &'a dyn Array,
732    index: usize,
733) -> Result<DynCellRef<'a>, DynViewError> {
734    let DataType::Map(entry_field, _) = field.data_type() else {
735        return Err(DynViewError::Invalid {
736            column: path.column,
737            path: path.path.clone(),
738            message: "expected map field for projection".to_string(),
739        });
740    };
741    let arr = array
742        .as_any()
743        .downcast_ref::<MapArray>()
744        .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
745    let entry_fields = match entry_field.data_type() {
746        DataType::Struct(children) => children.clone(),
747        other => {
748            return Err(DynViewError::Invalid {
749                column: path.column,
750                path: path.path.clone(),
751                message: format!("map entry must be struct, found {other:?}"),
752            });
753        }
754    };
755    let view = DynMapView::with_projection(
756        arr,
757        entry_fields,
758        path.clone(),
759        index,
760        Some(Arc::clone(entry_projection)),
761    )?;
762    Ok(DynCellRef::map(view))
763}
764
765fn view_non_null<'a>(
766    path: &Path,
767    field: &Field,
768    array: &'a dyn Array,
769    index: usize,
770) -> Result<DynCellRef<'a>, DynViewError> {
771    let dt = field.data_type();
772    match dt {
773        DataType::Null => Ok(DynCellRef::null()),
774        DataType::Boolean => {
775            let arr = as_bool(array, path)?;
776            Ok(DynCellRef::bool(arr.value(index)))
777        }
778        DataType::Int8 => {
779            let arr = as_primitive::<Int8Type>(array, path, dt)?;
780            Ok(DynCellRef::i8(arr.value(index)))
781        }
782        DataType::Int16 => {
783            let arr = as_primitive::<Int16Type>(array, path, dt)?;
784            Ok(DynCellRef::i16(arr.value(index)))
785        }
786        DataType::Int32 => {
787            let arr = as_primitive::<Int32Type>(array, path, dt)?;
788            Ok(DynCellRef::i32(arr.value(index)))
789        }
790        DataType::Date32 => {
791            let arr = as_primitive::<Date32Type>(array, path, dt)?;
792            Ok(DynCellRef::i32(arr.value(index)))
793        }
794        DataType::Time32(unit) => match unit {
795            arrow_schema::TimeUnit::Second => {
796                let arr = as_primitive::<Time32SecondType>(array, path, dt)?;
797                Ok(DynCellRef::i32(arr.value(index)))
798            }
799            arrow_schema::TimeUnit::Millisecond => {
800                let arr = as_primitive::<Time32MillisecondType>(array, path, dt)?;
801                Ok(DynCellRef::i32(arr.value(index)))
802            }
803            other => Err(DynViewError::Invalid {
804                column: path.column,
805                path: path.path.clone(),
806                message: format!("unsupported Time32 unit {other:?}"),
807            }),
808        },
809        DataType::Int64 => {
810            let arr = as_primitive::<Int64Type>(array, path, dt)?;
811            Ok(DynCellRef::i64(arr.value(index)))
812        }
813        DataType::Date64 => {
814            let arr = as_primitive::<Date64Type>(array, path, dt)?;
815            Ok(DynCellRef::i64(arr.value(index)))
816        }
817        DataType::Timestamp(unit, _) => match unit {
818            arrow_schema::TimeUnit::Second => {
819                let arr = as_primitive::<TimestampSecondType>(array, path, dt)?;
820                Ok(DynCellRef::i64(arr.value(index)))
821            }
822            arrow_schema::TimeUnit::Millisecond => {
823                let arr = as_primitive::<TimestampMillisecondType>(array, path, dt)?;
824                Ok(DynCellRef::i64(arr.value(index)))
825            }
826            arrow_schema::TimeUnit::Microsecond => {
827                let arr = as_primitive::<TimestampMicrosecondType>(array, path, dt)?;
828                Ok(DynCellRef::i64(arr.value(index)))
829            }
830            arrow_schema::TimeUnit::Nanosecond => {
831                let arr = as_primitive::<TimestampNanosecondType>(array, path, dt)?;
832                Ok(DynCellRef::i64(arr.value(index)))
833            }
834        },
835        DataType::Time64(unit) => match unit {
836            arrow_schema::TimeUnit::Microsecond => {
837                let arr = as_primitive::<Time64MicrosecondType>(array, path, dt)?;
838                Ok(DynCellRef::i64(arr.value(index)))
839            }
840            arrow_schema::TimeUnit::Nanosecond => {
841                let arr = as_primitive::<Time64NanosecondType>(array, path, dt)?;
842                Ok(DynCellRef::i64(arr.value(index)))
843            }
844            other => Err(DynViewError::Invalid {
845                column: path.column,
846                path: path.path.clone(),
847                message: format!("unsupported Time64 unit {other:?}"),
848            }),
849        },
850        DataType::Duration(unit) => match unit {
851            arrow_schema::TimeUnit::Second => {
852                let arr = as_primitive::<DurationSecondType>(array, path, dt)?;
853                Ok(DynCellRef::i64(arr.value(index)))
854            }
855            arrow_schema::TimeUnit::Millisecond => {
856                let arr = as_primitive::<DurationMillisecondType>(array, path, dt)?;
857                Ok(DynCellRef::i64(arr.value(index)))
858            }
859            arrow_schema::TimeUnit::Microsecond => {
860                let arr = as_primitive::<DurationMicrosecondType>(array, path, dt)?;
861                Ok(DynCellRef::i64(arr.value(index)))
862            }
863            arrow_schema::TimeUnit::Nanosecond => {
864                let arr = as_primitive::<DurationNanosecondType>(array, path, dt)?;
865                Ok(DynCellRef::i64(arr.value(index)))
866            }
867        },
868        DataType::UInt8 => {
869            let arr = as_primitive::<UInt8Type>(array, path, dt)?;
870            Ok(DynCellRef::u8(arr.value(index)))
871        }
872        DataType::UInt16 => {
873            let arr = as_primitive::<UInt16Type>(array, path, dt)?;
874            Ok(DynCellRef::u16(arr.value(index)))
875        }
876        DataType::UInt32 => {
877            let arr = as_primitive::<UInt32Type>(array, path, dt)?;
878            Ok(DynCellRef::u32(arr.value(index)))
879        }
880        DataType::UInt64 => {
881            let arr = as_primitive::<UInt64Type>(array, path, dt)?;
882            Ok(DynCellRef::u64(arr.value(index)))
883        }
884        DataType::Float32 => {
885            let arr = array
886                .as_any()
887                .downcast_ref::<Float32Array>()
888                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
889            Ok(DynCellRef::f32(arr.value(index)))
890        }
891        DataType::Float64 => {
892            let arr = array
893                .as_any()
894                .downcast_ref::<Float64Array>()
895                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
896            Ok(DynCellRef::f64(arr.value(index)))
897        }
898        DataType::Utf8 => {
899            let arr = array
900                .as_any()
901                .downcast_ref::<StringArray>()
902                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
903            Ok(DynCellRef::string(arr.value(index)))
904        }
905        DataType::LargeUtf8 => {
906            let arr = array
907                .as_any()
908                .downcast_ref::<LargeStringArray>()
909                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
910            Ok(DynCellRef::string(arr.value(index)))
911        }
912        DataType::Binary => {
913            let arr = array
914                .as_any()
915                .downcast_ref::<BinaryArray>()
916                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
917            Ok(DynCellRef::binary(arr.value(index)))
918        }
919        DataType::LargeBinary => {
920            let arr = array
921                .as_any()
922                .downcast_ref::<LargeBinaryArray>()
923                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
924            Ok(DynCellRef::binary(arr.value(index)))
925        }
926        DataType::FixedSizeBinary(_) => {
927            let arr = array
928                .as_any()
929                .downcast_ref::<FixedSizeBinaryArray>()
930                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
931            Ok(DynCellRef::binary(arr.value(index)))
932        }
933        DataType::Struct(children) => {
934            let arr = array
935                .as_any()
936                .downcast_ref::<StructArray>()
937                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
938            let view = DynStructView {
939                array: arr,
940                fields: children.clone(),
941                row: index,
942                base_path: path.clone(),
943                projection: None,
944            };
945            Ok(DynCellRef::structure(view))
946        }
947        DataType::List(item) => {
948            let arr = array
949                .as_any()
950                .downcast_ref::<ListArray>()
951                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
952            let view = DynListView::new_list(arr, item.clone(), path.clone(), index, None)?;
953            Ok(DynCellRef::list(view))
954        }
955        DataType::LargeList(item) => {
956            let arr = array
957                .as_any()
958                .downcast_ref::<LargeListArray>()
959                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
960            let view = DynListView::new_large_list(arr, item.clone(), path.clone(), index, None)?;
961            Ok(DynCellRef::list(view))
962        }
963        DataType::FixedSizeList(item, len) => {
964            let arr = array
965                .as_any()
966                .downcast_ref::<FixedSizeListArray>()
967                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
968            let view = DynFixedSizeListView::new(
969                arr,
970                item.clone(),
971                *len as usize,
972                path.clone(),
973                index,
974                None,
975            )?;
976            Ok(DynCellRef::fixed_size_list(view))
977        }
978        DataType::Map(_, _) => {
979            let arr = array
980                .as_any()
981                .downcast_ref::<MapArray>()
982                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
983            let view = DynMapView::new(arr, path.clone(), index)?;
984            Ok(DynCellRef::map(view))
985        }
986        DataType::Union(fields, mode) => {
987            let arr = array
988                .as_any()
989                .downcast_ref::<UnionArray>()
990                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
991            let view = DynUnionView::new(arr, fields.clone(), *mode, path.clone(), index)?;
992            Ok(DynCellRef::union(view))
993        }
994        DataType::Dictionary(key_type, value_type) => dictionary_value(
995            path,
996            field,
997            array,
998            index,
999            key_type.as_ref(),
1000            value_type.as_ref(),
1001        ),
1002        other => Err(DynViewError::Invalid {
1003            column: path.column,
1004            path: path.path.clone(),
1005            message: format!("unsupported data type {other:?}"),
1006        }),
1007    }
1008}
1009
1010fn dictionary_value<'a>(
1011    path: &Path,
1012    field: &Field,
1013    array: &'a dyn Array,
1014    index: usize,
1015    key_type: &DataType,
1016    value_type: &DataType,
1017) -> Result<DynCellRef<'a>, DynViewError> {
1018    macro_rules! match_dict {
1019        ($key_ty:ty) => {{
1020            let dict = array
1021                .as_any()
1022                .downcast_ref::<DictionaryArray<$key_ty>>()
1023                .ok_or_else(|| type_mismatch(path, field.data_type().clone(), array.data_type()))?;
1024            dict_value(
1025                path,
1026                dict.keys().value(index) as usize,
1027                dict.values(),
1028                value_type,
1029            )
1030        }};
1031    }
1032
1033    match key_type {
1034        DataType::Int8 => match_dict!(Int8Type),
1035        DataType::Int16 => match_dict!(Int16Type),
1036        DataType::Int32 => match_dict!(Int32Type),
1037        DataType::Int64 => match_dict!(Int64Type),
1038        DataType::UInt8 => match_dict!(UInt8Type),
1039        DataType::UInt16 => match_dict!(UInt16Type),
1040        DataType::UInt32 => match_dict!(UInt32Type),
1041        DataType::UInt64 => match_dict!(UInt64Type),
1042        other => Err(DynViewError::Invalid {
1043            column: path.column,
1044            path: path.path.clone(),
1045            message: format!("unsupported dictionary key type {other:?}"),
1046        }),
1047    }
1048}
1049
1050fn dict_value<'a>(
1051    path: &Path,
1052    key_index: usize,
1053    values: &'a ArrayRef,
1054    value_type: &DataType,
1055) -> Result<DynCellRef<'a>, DynViewError> {
1056    if key_index >= values.len() {
1057        return Err(DynViewError::Invalid {
1058            column: path.column,
1059            path: path.path.clone(),
1060            message: format!(
1061                "dictionary key index {} out of bounds for {}",
1062                key_index,
1063                values.len()
1064            ),
1065        });
1066    }
1067    if values.is_null(key_index) {
1068        return Err(DynViewError::UnexpectedNull {
1069            column: path.column,
1070            path: path.path.clone(),
1071        });
1072    }
1073    match value_type {
1074        DataType::Utf8 => {
1075            let arr = values
1076                .as_any()
1077                .downcast_ref::<StringArray>()
1078                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1079            Ok(DynCellRef::string(arr.value(key_index)))
1080        }
1081        DataType::LargeUtf8 => {
1082            let arr = values
1083                .as_any()
1084                .downcast_ref::<LargeStringArray>()
1085                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1086            Ok(DynCellRef::string(arr.value(key_index)))
1087        }
1088        DataType::Binary => {
1089            let arr = values
1090                .as_any()
1091                .downcast_ref::<BinaryArray>()
1092                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1093            Ok(DynCellRef::binary(arr.value(key_index)))
1094        }
1095        DataType::LargeBinary => {
1096            let arr = values
1097                .as_any()
1098                .downcast_ref::<LargeBinaryArray>()
1099                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1100            Ok(DynCellRef::binary(arr.value(key_index)))
1101        }
1102        DataType::FixedSizeBinary(_) => {
1103            let arr = values
1104                .as_any()
1105                .downcast_ref::<FixedSizeBinaryArray>()
1106                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1107            Ok(DynCellRef::binary(arr.value(key_index)))
1108        }
1109        DataType::Int8 => {
1110            let arr = values
1111                .as_any()
1112                .downcast_ref::<Int8Array>()
1113                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1114            Ok(DynCellRef::i8(arr.value(key_index)))
1115        }
1116        DataType::Int16 => {
1117            let arr = values
1118                .as_any()
1119                .downcast_ref::<Int16Array>()
1120                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1121            Ok(DynCellRef::i16(arr.value(key_index)))
1122        }
1123        DataType::Int32 => {
1124            let arr = values
1125                .as_any()
1126                .downcast_ref::<Int32Array>()
1127                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1128            Ok(DynCellRef::i32(arr.value(key_index)))
1129        }
1130        DataType::Int64 => {
1131            let arr = values
1132                .as_any()
1133                .downcast_ref::<Int64Array>()
1134                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1135            Ok(DynCellRef::i64(arr.value(key_index)))
1136        }
1137        DataType::UInt8 => {
1138            let arr = values
1139                .as_any()
1140                .downcast_ref::<UInt8Array>()
1141                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1142            Ok(DynCellRef::u8(arr.value(key_index)))
1143        }
1144        DataType::UInt16 => {
1145            let arr = values
1146                .as_any()
1147                .downcast_ref::<UInt16Array>()
1148                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1149            Ok(DynCellRef::u16(arr.value(key_index)))
1150        }
1151        DataType::UInt32 => {
1152            let arr = values
1153                .as_any()
1154                .downcast_ref::<UInt32Array>()
1155                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1156            Ok(DynCellRef::u32(arr.value(key_index)))
1157        }
1158        DataType::UInt64 => {
1159            let arr = values
1160                .as_any()
1161                .downcast_ref::<UInt64Array>()
1162                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1163            Ok(DynCellRef::u64(arr.value(key_index)))
1164        }
1165        DataType::Float32 => {
1166            let arr = values
1167                .as_any()
1168                .downcast_ref::<Float32Array>()
1169                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1170            Ok(DynCellRef::f32(arr.value(key_index)))
1171        }
1172        DataType::Float64 => {
1173            let arr = values
1174                .as_any()
1175                .downcast_ref::<Float64Array>()
1176                .ok_or_else(|| type_mismatch(path, value_type.clone(), values.data_type()))?;
1177            Ok(DynCellRef::f64(arr.value(key_index)))
1178        }
1179        other => Err(DynViewError::Invalid {
1180            column: path.column,
1181            path: path.path.clone(),
1182            message: format!("unsupported dictionary value type {other:?}"),
1183        }),
1184    }
1185}
1186
1187pub(super) fn type_mismatch(path: &Path, expected: DataType, actual: &DataType) -> DynViewError {
1188    DynViewError::TypeMismatch {
1189        column: path.column,
1190        path: path.path.clone(),
1191        expected,
1192        actual: actual.clone(),
1193    }
1194}
1195
1196fn as_bool<'a>(array: &'a dyn Array, path: &Path) -> Result<&'a BooleanArray, DynViewError> {
1197    array
1198        .as_any()
1199        .downcast_ref::<BooleanArray>()
1200        .ok_or_else(|| type_mismatch(path, DataType::Boolean, array.data_type()))
1201}
1202
1203fn as_primitive<'a, T>(
1204    array: &'a dyn Array,
1205    path: &Path,
1206    expected: &DataType,
1207) -> Result<&'a PrimitiveArray<T>, DynViewError>
1208where
1209    T: arrow_array::types::ArrowPrimitiveType,
1210{
1211    array
1212        .as_any()
1213        .downcast_ref::<PrimitiveArray<T>>()
1214        .ok_or_else(|| type_mismatch(path, expected.clone(), array.data_type()))
1215}