Skip to main content

vortex_array/arrow/
session.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Plugin layer for moving Arrow extension types in and out of Vortex.
5//!
6//! Vortex's canonical Arrow conversion (see [`crate::dtype::arrow`] and the executor in
7//! [`crate::arrow::executor`]) handles every non-extension Arrow type and the builtin temporal
8//! extensions. The plugins registered here cover the remaining case: **Arrow extension types**.
9//!
10//! * An [`ArrowExportVTable`] is dispatched purely by the **target Arrow extension Id** —
11//!   the plugin is selected when the caller asks for an Arrow [`Field`] carrying matching
12//!   `ARROW:extension:name` metadata. The Vortex source dtype/encoding is irrelevant to
13//!   dispatch.
14//! * An [`ArrowImportVTable`] is dispatched by the **source Arrow extension name** carried
15//!   on the incoming [`Field`]. The plugin is responsible for both preserving extension
16//!   identity and re-encoding storage if needed (e.g. Arrow `FixedSizeBinary[16]` for UUID
17//!   becomes Vortex `FixedSizeList<u8; 16>`).
18//!
19//! Multiple plugins may register against the same key. They are tried in registration order;
20//! each may return [`ArrowExport::Unsupported`] / [`ArrowImport::Unsupported`] to defer to
21//! the next.
22
23use std::any::Any;
24use std::fmt::Debug;
25use std::sync::Arc;
26
27use arrow_array::Array as _;
28use arrow_array::ArrayRef as ArrowArrayRef;
29use arrow_array::RecordBatch;
30use arrow_array::make_array;
31use arrow_schema::DataType;
32use arrow_schema::Field;
33use arrow_schema::Fields;
34use arrow_schema::Schema;
35use arrow_schema::extension::EXTENSION_TYPE_NAME_KEY;
36use arrow_schema::extension::ExtensionType;
37use tracing::debug;
38use tracing::trace;
39use vortex_error::VortexResult;
40use vortex_error::vortex_bail;
41use vortex_error::vortex_ensure;
42use vortex_session::SessionExt;
43use vortex_session::SessionGuard;
44use vortex_session::SessionVar;
45use vortex_session::registry::Id;
46
47use crate::ArrayRef;
48use crate::ExecutionCtx;
49use crate::IntoArray;
50use crate::arc_swap_map::ArcSwapMap;
51use crate::arrays::StructArray;
52use crate::arrow::FromArrowArray;
53use crate::arrow::convert::nulls;
54use crate::arrow::convert::remove_nulls;
55use crate::arrow::executor::execute_arrow_naive;
56use crate::dtype::DType;
57use crate::dtype::FieldName;
58use crate::dtype::FieldNames;
59use crate::dtype::Nullability;
60use crate::dtype::StructFields;
61use crate::dtype::arrow::TryFromArrowType;
62use crate::dtype::arrow::to_data_type_naive;
63use crate::dtype::extension::ExtId;
64use crate::extension::datetime::AnyTemporal;
65use crate::extension::uuid::Uuid;
66use crate::validity::Validity;
67
68/// Outcome of a successful call to [`ArrowExportVTable::execute_arrow`].
69///
70/// Plugins that don't handle the supplied array return [`Unsupported`][Self::Unsupported]
71/// with ownership of the input so the session can probe the next plugin or fall back to the
72/// canonical path. Errors are propagated through [`VortexResult`].
73pub enum ArrowExport {
74    /// The plugin does not handle this input; the session may try another plugin.
75    Unsupported(ArrayRef),
76    /// A successful export.
77    Exported(ArrowArrayRef),
78}
79
80/// Outcome of a successful call to [`ArrowImportVTable::from_arrow_array`].
81///
82/// Plugins that don't handle the supplied array return [`Unsupported`][Self::Unsupported]
83/// with ownership of the input so the session can probe the next plugin or fall back to the
84/// canonical path. Errors are propagated through [`VortexResult`].
85pub enum ArrowImport {
86    /// The plugin does not handle this input; the session may try another plugin.
87    Unsupported(ArrowArrayRef),
88    /// A successful import.
89    Imported(ArrayRef),
90}
91
92/// Plugin layer for exporting a Vortex array to an Arrow extension type.
93///
94/// This is purely an implementation trait, its methods should not be called directly. Instead,
95/// use the methods on [`ArrowSession`].
96pub trait ArrowExportVTable: 'static + Send + Sync + Debug {
97    /// The Arrow extension ID this plugin produces.
98    fn arrow_ext_id(&self) -> Id;
99
100    /// The Vortex array or extension ID this plugin maps from. Used only for inference by
101    /// [`ArrowSession::to_arrow_field`] / [`ArrowSession::to_arrow_schema`]; never as a
102    /// dispatch key for [`execute_arrow`][Self::execute_arrow].
103    fn vortex_id(&self) -> Id;
104
105    /// Build the Arrow [`Field`] this plugin produces for the given Vortex extension
106    /// `dtype`. Used during schema inference.
107    fn to_arrow_field(
108        &self,
109        name: &str,
110        dtype: &DType,
111        session: &ArrowSession,
112    ) -> VortexResult<Option<Field>>;
113
114    /// Convert a Vortex array into an Arrow array shaped to `target`.
115    ///
116    /// Returns ownership of `array` via [`ArrowExport::Unsupported`] when the plugin cannot
117    /// handle the input.
118    fn execute_arrow(
119        &self,
120        array: ArrayRef,
121        target: &Field,
122        ctx: &mut ExecutionCtx,
123    ) -> VortexResult<ArrowExport>;
124}
125
126/// Plugin layer for importing an Arrow extension-typed array into a Vortex array.
127///
128/// Plugins are dispatched by `arrow_ext_id`.
129///
130/// This is purely an implementation trait, its methods should not be called directly. Instead,
131/// use the methods on [`ArrowSession`].
132pub trait ArrowImportVTable: 'static + Send + Sync + Debug {
133    /// The Arrow extension name this plugin handles.
134    fn arrow_ext_id(&self) -> Id;
135
136    /// Build the Vortex [`DType`] that corresponds to `field` (which carries this plugin's
137    /// Arrow extension metadata).
138    #[allow(clippy::wrong_self_convention)]
139    fn from_arrow_field(&self, field: &Field) -> VortexResult<Option<DType>>;
140
141    /// Convert an Arrow array into a Vortex array of `dtype`.
142    ///
143    /// Returns ownership of `array` via [`ArrowImport::Unsupported`] when the plugin cannot
144    /// handle the input.
145    #[allow(clippy::wrong_self_convention)]
146    fn from_arrow_array(
147        &self,
148        array: ArrowArrayRef,
149        field: &Field,
150        dtype: &DType,
151    ) -> VortexResult<ArrowImport>;
152}
153
154pub type ArrowExportVTableRef = Arc<dyn ArrowExportVTable>;
155pub type ArrowImportVTableRef = Arc<dyn ArrowImportVTable>;
156
157/// Session-scoped registry of Arrow extension plugins.
158///
159/// Exporters are stored in two indices: one keyed by Arrow extension Id (used for
160/// `execute_arrow` dispatch) and one keyed by Vortex extension Id (used **only** by
161/// `to_arrow_field` / `to_arrow_schema` inference, when callers need to translate a Vortex
162/// extension `DType` into an Arrow `Field` with no target schema in hand). Importers are
163/// keyed by Arrow extension name. The default session pre-registers the builtin UUID
164/// plugin; temporal extensions are handled by the canonical Arrow ↔ Vortex path and do not
165/// need plugins.
166#[derive(Clone, Debug)]
167pub struct ArrowSession {
168    exporters: ArcSwapMap<Id, Arc<[ArrowExportVTableRef]>>,
169    exporters_by_vortex: ArcSwapMap<ExtId, Arc<[ArrowExportVTableRef]>>,
170    importers: ArcSwapMap<Id, Arc<[ArrowImportVTableRef]>>,
171}
172
173impl Default for ArrowSession {
174    fn default() -> Self {
175        let session = Self {
176            exporters: ArcSwapMap::default(),
177            exporters_by_vortex: ArcSwapMap::default(),
178            importers: ArcSwapMap::default(),
179        };
180
181        session.register_exporter(Arc::new(Uuid));
182        session.register_importer(Arc::new(Uuid));
183
184        session
185    }
186}
187
188impl ArrowSession {
189    /// Register an [`ArrowExportVTable`] under its target Arrow extension Id (for dispatch)
190    /// and its source Vortex extension Id (for schema inference).
191    pub fn register_exporter(&self, exporter: ArrowExportVTableRef) {
192        self.exporters.push(
193            exporter.arrow_ext_id(),
194            ArrowExportVTableRef::clone(&exporter),
195        );
196        self.exporters_by_vortex
197            .push(exporter.vortex_id(), exporter);
198    }
199
200    /// Register an [`ArrowImportVTable`] under its source Arrow extension name.
201    pub fn register_importer(&self, importer: ArrowImportVTableRef) {
202        self.importers.push(importer.arrow_ext_id(), importer);
203    }
204
205    fn exporters(&self, id: &Id) -> Arc<[ArrowExportVTableRef]> {
206        self.exporters.get(id).unwrap_or_else(|| Arc::from([]))
207    }
208
209    fn exporters_by_vortex(&self, id: &Id) -> Arc<[ArrowExportVTableRef]> {
210        self.exporters_by_vortex
211            .get(id)
212            .unwrap_or_else(|| Arc::from([]))
213    }
214
215    fn importers(&self, id: &Id) -> Arc<[ArrowImportVTableRef]> {
216        self.importers.get(id).unwrap_or_else(|| Arc::from([]))
217    }
218
219    /// Build the Arrow [`Field`] for a Vortex [`DType`].
220    ///
221    /// For [`DType::Extension`]s, plugins registered against the extension's `Id`
222    /// are tried in registration order; the first plugin to return `Some(field)` wins.
223    pub fn to_arrow_field(&self, name: &str, dtype: &DType) -> VortexResult<Field> {
224        // Handle the structural encodings, which may have recursive types
225        match dtype {
226            DType::List(elem_dtype, nullability) => {
227                let elem_field = self.to_arrow_field(Field::LIST_FIELD_DEFAULT_NAME, elem_dtype)?;
228                Ok(Field::new_list(name, elem_field, nullability.is_nullable()))
229            }
230            DType::FixedSizeList(elem_dtype, elem_size, nullability) => {
231                let elem_field = self.to_arrow_field(Field::LIST_FIELD_DEFAULT_NAME, elem_dtype)?;
232                Ok(Field::new_fixed_size_list(
233                    name,
234                    elem_field,
235                    (*elem_size).try_into()?,
236                    nullability.is_nullable(),
237                ))
238            }
239            DType::Struct(fields, nullability) => {
240                let arrow_fields = Fields::from_iter(
241                    fields
242                        .fields()
243                        .zip(fields.names().iter())
244                        .map(|(field, name)| self.to_arrow_field(name.as_ref(), &field))
245                        .collect::<VortexResult<Vec<_>>>()?,
246                );
247                Ok(Field::new_struct(
248                    name,
249                    arrow_fields,
250                    nullability.is_nullable(),
251                ))
252            }
253            DType::Extension(ext) if !ext.is::<AnyTemporal>() => {
254                for plugin in self.exporters_by_vortex(&ext.id()).iter() {
255                    if let Some(field) =
256                        plugin.to_arrow_field(name, &DType::Extension(ext.clone()), self)?
257                    {
258                        return Ok(field);
259                    }
260                }
261                vortex_bail!("extension type cannot be converted to Arrow without a plugin: {ext}");
262            }
263            DType::Variant(_) => {
264                // TODO(Adam): This currently encodes information about parquet-variant
265                // at this level. Variant's complexity with being an essentially logical type
266                // with multiple physical layout complicates handling this correctly.
267                Ok(Field::new(
268                    name,
269                    DataType::Struct(
270                        vec![
271                            Field::new("metadata", DataType::BinaryView, dtype.is_nullable()),
272                            Field::new("value", DataType::BinaryView, dtype.is_nullable()),
273                        ]
274                        .into(),
275                    ),
276                    dtype.is_nullable(),
277                )
278                .with_metadata(
279                    [(
280                        EXTENSION_TYPE_NAME_KEY.to_string(),
281                        "arrow.parquet.variant".to_string(),
282                    )]
283                    .into(),
284                ))
285            }
286            _ => Ok(Field::new(
287                name,
288                to_data_type_naive(dtype)?,
289                dtype.is_nullable(),
290            )),
291        }
292    }
293
294    /// Build the Arrow [`Schema`] for a Vortex top-level [`DType::Struct`], dispatching
295    /// extension fields through registered export plugins for inference. Nested
296    /// extensions are preserved via [`Self::to_arrow_field`].
297    pub fn to_arrow_schema(&self, dtype: &DType) -> VortexResult<Schema> {
298        let DType::Struct(struct_dtype, _) = dtype else {
299            vortex_error::vortex_bail!(
300                "to_arrow_schema requires a top-level struct dtype, got {dtype}"
301            );
302        };
303        let mut fields = Vec::with_capacity(struct_dtype.names().len());
304        for (name, field_dtype) in struct_dtype.names().iter().zip(struct_dtype.fields()) {
305            fields.push(self.to_arrow_field(name.as_ref(), &field_dtype)?);
306        }
307        Ok(Schema::new(fields))
308    }
309
310    /// Build the Vortex [`DType`] for an Arrow [`Field`].
311    ///
312    /// Plugins registered against the field's Arrow extension name are tried in
313    /// registration order; the first plugin to return `Some(dtype)` wins. If none
314    /// match (or all return `None`), recurses into container types ([`DataType::List`]
315    /// family, [`DataType::FixedSizeList`], [`DataType::Struct`]) so extension metadata
316    /// on nested element/struct fields is preserved. Leaf types use the canonical
317    /// Arrow → Vortex mapping via [`DType::try_from_arrow`].
318    #[expect(clippy::disallowed_methods, reason = "interning a dynamic id")]
319    pub fn from_arrow_field(&self, field: &Field) -> VortexResult<DType> {
320        if let Some(name) = field.metadata().get(EXTENSION_TYPE_NAME_KEY) {
321            for plugin in self.importers(&Id::new(name)).iter() {
322                if let Some(dtype) = plugin.from_arrow_field(field)? {
323                    return Ok(dtype);
324                }
325            }
326        }
327        let nullability: Nullability = field.is_nullable().into();
328        Ok(match field.data_type() {
329            DataType::List(elem)
330            | DataType::LargeList(elem)
331            | DataType::ListView(elem)
332            | DataType::LargeListView(elem) => {
333                DType::List(Arc::new(self.from_arrow_field(elem.as_ref())?), nullability)
334            }
335            DataType::FixedSizeList(elem, size) => DType::FixedSizeList(
336                Arc::new(self.from_arrow_field(elem.as_ref())?),
337                *size as u32,
338                nullability,
339            ),
340            DataType::Struct(fields) => {
341                let entries = fields
342                    .iter()
343                    .map(|f| {
344                        self.from_arrow_field(f)
345                            .map(|dt| (FieldName::from(f.name().as_str()), dt))
346                    })
347                    .collect::<VortexResult<Vec<_>>>()?;
348                DType::Struct(StructFields::from_iter(entries), nullability)
349            }
350            _ => DType::try_from_arrow(field)?,
351        })
352    }
353
354    /// Build the Vortex [`DType`] for an Arrow [`Schema`], dispatching extension fields
355    /// through registered import plugins. The result is a top-level non-nullable struct
356    /// matching the schema's fields.
357    pub fn from_arrow_schema(&self, schema: &Schema) -> VortexResult<DType> {
358        let entries = schema
359            .fields()
360            .iter()
361            .map(|f| {
362                self.from_arrow_field(f)
363                    .map(|dt| (FieldName::from(f.name().as_str()), dt))
364            })
365            .collect::<VortexResult<Vec<_>>>()?;
366        Ok(DType::Struct(
367            StructFields::from_iter(entries),
368            Nullability::NonNullable,
369        ))
370    }
371
372    /// Decode an Arrow [`RecordBatch`] into a Vortex struct array, dispatching each
373    /// extension column through its registered import plugin.
374    ///
375    /// `schema` is the authoritative Arrow schema used for dispatch — the columns are
376    /// consumed positionally. Pass an external schema (rather than relying on
377    /// `batch.schema()`) when upstream DataFusion plumbing may have stripped Field-level
378    /// extension metadata from the runtime RecordBatch.
379    pub fn from_arrow_record_batch(
380        &self,
381        batch: RecordBatch,
382        schema: &Schema,
383    ) -> VortexResult<ArrayRef> {
384        vortex_ensure!(
385            batch.num_columns() == schema.fields().len(),
386            "RecordBatch has {} columns but schema has {} fields",
387            batch.num_columns(),
388            schema.fields().len()
389        );
390        let length = batch.num_rows();
391        let names = FieldNames::from_iter(
392            schema
393                .fields()
394                .iter()
395                .map(|f| FieldName::from(f.name().as_str())),
396        );
397        let mut columns = Vec::with_capacity(schema.fields().len());
398        for (col, field) in batch.columns().iter().zip(schema.fields().iter()) {
399            columns.push(self.from_arrow_array(ArrowArrayRef::clone(col), field)?);
400        }
401        Ok(StructArray::try_new(names, columns, length, Validity::NonNullable)?.into_array())
402    }
403
404    /// Execute a Vortex array into an Arrow array.
405    ///
406    /// If `target` carries an `ARROW:extension:name`, the plugin registry is probed for one that
407    /// can support executing to the target extension type.
408    ///
409    /// With `target = None` the fallback path picks the array's preferred Arrow physical type
410    /// and executes directly into that, ignoring extension types.
411    #[expect(clippy::disallowed_methods, reason = "interning a dynamic id")]
412    pub fn execute_arrow(
413        &self,
414        array: ArrayRef,
415        target: Option<&Field>,
416        ctx: &mut ExecutionCtx,
417    ) -> VortexResult<ArrowArrayRef> {
418        // NOTE(aduffy): this looks strange, but we do this to keep target_field as &Field so
419        //  we can avoid cloning target when it is provided. It contains a HashMap internally that
420        //  can be expensive to copy.
421        let arrow_field;
422        let target_field = match target {
423            Some(field) => field,
424            None => {
425                let session = ctx.session().clone();
426                arrow_field = session.arrow().to_arrow_field("", array.dtype())?;
427                &arrow_field
428            }
429        };
430
431        if let Some(arrow_ext_name) = target_field.metadata().get(EXTENSION_TYPE_NAME_KEY) {
432            // There can be multiple plugins that report support for a particular extension type.
433            // We try them in order until one of them reports a successful conversion.
434            let len = array.len();
435            let mut current = array;
436
437            for plugin in self.exporters(&Id::new(arrow_ext_name)).iter() {
438                trace!(
439                    plugin = ?plugin,
440                    extension_name = arrow_ext_name,
441                    "probing plugin for converting Arrow array"
442                );
443
444                match plugin.execute_arrow(current, target_field, ctx)? {
445                    ArrowExport::Exported(arrow) => {
446                        vortex_ensure!(
447                            arrow.len() == len,
448                            "Arrow array length does not match Vortex array length after conversion to {:?}",
449                            arrow
450                        );
451                        return Ok(arrow);
452                    }
453                    ArrowExport::Unsupported(array) => current = array,
454                }
455            }
456
457            debug!(
458                extension_id = arrow_ext_name,
459                data_type = ?target_field.data_type(),
460                "unsupported Arrow extension type encountered, falling back to naive execution"
461            );
462
463            return execute_arrow_naive(current, Some(target_field.data_type()), ctx);
464        }
465
466        execute_arrow_naive(array, target.map(|field| field.data_type()), ctx)
467    }
468
469    /// Decode an Arrow array into a Vortex array.
470    ///
471    /// Routes through the registered import plugin if `field` carries an Arrow extension
472    /// name we recognize, probing each plugin in registration order until one handles the
473    /// input or all return [`ArrowImport::Unsupported`]. Otherwise recurses into container
474    /// arrays ([`arrow_array::StructArray`], [`arrow_array::GenericListArray`],
475    /// [`arrow_array::FixedSizeListArray`], [`arrow_array::GenericListViewArray`]) so
476    /// extension fields nested inside containers reach their importers; leaf types fall
477    /// through to the canonical Arrow → Vortex array conversion.
478    pub fn from_arrow_array(&self, array: ArrowArrayRef, field: &Field) -> VortexResult<ArrayRef> {
479        if let Some(extension_name) = field.metadata().get(EXTENSION_TYPE_NAME_KEY) {
480            #[expect(clippy::disallowed_methods, reason = "interning a dynamic id")]
481            let importers = self.importers(&Id::new(extension_name));
482            if !importers.is_empty() {
483                let dtype = self.from_arrow_field(field)?;
484                let mut current = array;
485                for plugin in importers.iter() {
486                    match plugin.from_arrow_array(current, field, &dtype)? {
487                        ArrowImport::Imported(arr) => return Ok(arr),
488                        ArrowImport::Unsupported(arr) => current = arr,
489                    }
490                }
491                return ArrayRef::from_arrow(current.as_ref(), field.is_nullable());
492            }
493        }
494        self.from_arrow_array_canonical(array, field)
495    }
496
497    /// Recurse into Arrow container arrays so nested fields with extension metadata reach
498    /// their importers, falling through to [`ArrayRef::from_arrow`] for leaf types.
499    #[allow(clippy::wrong_self_convention)]
500    fn from_arrow_array_canonical(
501        &self,
502        array: ArrowArrayRef,
503        field: &Field,
504    ) -> VortexResult<ArrayRef> {
505        use arrow_array::cast::AsArray;
506
507        match field.data_type() {
508            DataType::Struct(fields) => {
509                let arrow_struct = array.as_struct();
510                let names = FieldNames::from_iter(
511                    fields.iter().map(|f| FieldName::from(f.name().as_str())),
512                );
513                let columns = arrow_struct
514                    .columns()
515                    .iter()
516                    .zip(fields.iter())
517                    .map(|(col, child_field)| {
518                        // Arrow pushes nulls into non-nullable fields; strip before recursing
519                        // so Vortex's stricter validity invariants are upheld.
520                        let inner = if col.null_count() > 0 && !child_field.is_nullable() {
521                            make_array(remove_nulls(col.to_data())?)
522                        } else {
523                            ArrowArrayRef::clone(col)
524                        };
525                        self.from_arrow_array(inner, child_field.as_ref())
526                    })
527                    .collect::<VortexResult<Vec<_>>>()?;
528                let validity = nulls(arrow_struct.nulls(), field.is_nullable())?;
529                Ok(
530                    StructArray::try_new(names, columns, arrow_struct.len(), validity)?
531                        .into_array(),
532                )
533            }
534            DataType::List(elem_field) => {
535                let list = array.as_list::<i32>();
536                let elements = self
537                    .from_arrow_array(ArrowArrayRef::clone(list.values()), elem_field.as_ref())?;
538                let offsets = list.offsets().clone().into_array();
539                let validity = nulls(list.nulls(), field.is_nullable())?;
540                Ok(crate::arrays::ListArray::try_new(elements, offsets, validity)?.into_array())
541            }
542            DataType::LargeList(elem_field) => {
543                let list = array.as_list::<i64>();
544                let elements = self
545                    .from_arrow_array(ArrowArrayRef::clone(list.values()), elem_field.as_ref())?;
546                let offsets = list.offsets().clone().into_array();
547                let validity = nulls(list.nulls(), field.is_nullable())?;
548                Ok(crate::arrays::ListArray::try_new(elements, offsets, validity)?.into_array())
549            }
550            DataType::FixedSizeList(elem_field, list_size) => {
551                let fsl = array.as_fixed_size_list();
552                let elements =
553                    self.from_arrow_array(ArrowArrayRef::clone(fsl.values()), elem_field.as_ref())?;
554                let validity = nulls(fsl.nulls(), field.is_nullable())?;
555                Ok(crate::arrays::FixedSizeListArray::try_new(
556                    elements,
557                    *list_size as u32,
558                    validity,
559                    fsl.len(),
560                )?
561                .into_array())
562            }
563            DataType::ListView(elem_field) => {
564                let list = array.as_list_view::<i32>();
565                let elements = self
566                    .from_arrow_array(ArrowArrayRef::clone(list.values()), elem_field.as_ref())?;
567                let offsets = list.offsets().clone().into_array();
568                let sizes = list.sizes().clone().into_array();
569                let validity = nulls(list.nulls(), field.is_nullable())?;
570                Ok(
571                    crate::arrays::ListViewArray::try_new(elements, offsets, sizes, validity)?
572                        .into_array(),
573                )
574            }
575            DataType::LargeListView(elem_field) => {
576                let list = array.as_list_view::<i64>();
577                let elements = self
578                    .from_arrow_array(ArrowArrayRef::clone(list.values()), elem_field.as_ref())?;
579                let offsets = list.offsets().clone().into_array();
580                let sizes = list.sizes().clone().into_array();
581                let validity = nulls(list.nulls(), field.is_nullable())?;
582                Ok(
583                    crate::arrays::ListViewArray::try_new(elements, offsets, sizes, validity)?
584                        .into_array(),
585                )
586            }
587            _ => ArrayRef::from_arrow(array.as_ref(), field.is_nullable()),
588        }
589    }
590}
591
592// NOTE(aduffy): We should remove this once we bump Arrow to 0.59.0. This is replicating the
593//  `Field::has_valid_extension_type` method on Arrow added in 58.2.0, we polyfill it here so that
594//  this crate can build with minimal-versions declared.
595pub(crate) fn has_valid_extension_type<E: ExtensionType>(field: &Field) -> bool {
596    if field.extension_type_name() != Some(E::NAME) {
597        return false;
598    }
599
600    E::try_new_from_field_metadata(field.data_type(), field.metadata()).is_ok()
601}
602
603impl SessionVar for ArrowSession {
604    fn as_any(&self) -> &dyn Any {
605        self
606    }
607
608    fn as_any_mut(&mut self) -> &mut dyn Any {
609        self
610    }
611}
612
613/// Extension trait for accessing the [`ArrowSession`] on a Vortex session.
614pub trait ArrowSessionExt: SessionExt {
615    /// Get the Arrow session.
616    fn arrow(&self) -> SessionGuard<'_, ArrowSession>;
617}
618
619impl<S: SessionExt> ArrowSessionExt for S {
620    fn arrow(&self) -> SessionGuard<'_, ArrowSession> {
621        self.get::<ArrowSession>()
622    }
623}
624
625#[cfg(test)]
626mod tests {
627    use std::sync::Arc;
628
629    use arrow_array::FixedSizeBinaryArray;
630    use arrow_array::cast::AsArray;
631    use arrow_schema::DataType;
632    use arrow_schema::Field;
633    use arrow_schema::extension::Uuid as ArrowUuid;
634    use vortex_error::VortexResult;
635
636    use super::*;
637    use crate::VortexSessionExecute;
638    use crate::array_session;
639    use crate::dtype::DType;
640    use crate::dtype::FieldName;
641    use crate::dtype::Nullability;
642    use crate::dtype::PType;
643    use crate::dtype::StructFields;
644    use crate::dtype::extension::ExtDType;
645    use crate::dtype::extension::ExtVTable;
646    use crate::extension::uuid::Uuid;
647    use crate::extension::uuid::UuidMetadata;
648
649    fn uuid_dtype(nullable: bool) -> DType {
650        let storage = DType::FixedSizeList(
651            Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)),
652            16,
653            nullable.into(),
654        );
655        DType::Extension(
656            ExtDType::try_with_vtable(Uuid, UuidMetadata::default(), storage)
657                .expect("uuid ext dtype")
658                .erased(),
659        )
660    }
661
662    #[test]
663    fn to_arrow_field_top_level_uuid_carries_extension_metadata() -> VortexResult<()> {
664        let session = ArrowSession::default();
665        let field = session.to_arrow_field("id", &uuid_dtype(false))?;
666        assert!(has_valid_extension_type::<ArrowUuid>(&field));
667        Ok(())
668    }
669
670    #[test]
671    fn to_arrow_field_struct_with_nested_uuid_preserves_metadata() -> VortexResult<()> {
672        let session = ArrowSession::default();
673        let dtype = DType::Struct(
674            StructFields::from_iter([(FieldName::from("id"), uuid_dtype(false))]),
675            Nullability::NonNullable,
676        );
677        let field = session.to_arrow_field("row", &dtype)?;
678        let DataType::Struct(inner) = field.data_type() else {
679            panic!("expected Struct, got {:?}", field.data_type());
680        };
681        assert_eq!(inner.len(), 1);
682        assert_eq!(inner[0].data_type(), &DataType::FixedSizeBinary(16));
683        assert!(has_valid_extension_type::<ArrowUuid>(&inner[0]));
684        Ok(())
685    }
686
687    #[test]
688    fn to_arrow_field_list_of_uuid_preserves_metadata() -> VortexResult<()> {
689        let session = ArrowSession::default();
690        let dtype = DType::List(Arc::new(uuid_dtype(true)), Nullability::NonNullable);
691        let field = session.to_arrow_field("ids", &dtype)?;
692        let DataType::List(elem) = field.data_type() else {
693            panic!("expected List, got {:?}", field.data_type());
694        };
695        assert!(has_valid_extension_type::<ArrowUuid>(elem));
696        Ok(())
697    }
698
699    #[test]
700    fn to_arrow_field_fixed_size_list_of_uuid_preserves_metadata() -> VortexResult<()> {
701        let session = ArrowSession::default();
702        let dtype = DType::FixedSizeList(Arc::new(uuid_dtype(false)), 3, Nullability::NonNullable);
703        let field = session.to_arrow_field("triple", &dtype)?;
704        let DataType::FixedSizeList(elem, size) = field.data_type() else {
705            panic!("expected FixedSizeList, got {:?}", field.data_type());
706        };
707        assert_eq!(*size, 3);
708        assert!(has_valid_extension_type::<ArrowUuid>(elem));
709        Ok(())
710    }
711
712    #[test]
713    fn to_arrow_schema_struct_of_struct_uuid() -> VortexResult<()> {
714        let session = ArrowSession::default();
715        let inner = DType::Struct(
716            StructFields::from_iter([(FieldName::from("id"), uuid_dtype(true))]),
717            Nullability::NonNullable,
718        );
719        let outer = DType::Struct(
720            StructFields::from_iter([(FieldName::from("payload"), inner)]),
721            Nullability::NonNullable,
722        );
723        let schema = session.to_arrow_schema(&outer)?;
724        let payload = schema.field(0);
725        let DataType::Struct(inner_fields) = payload.data_type() else {
726            panic!("expected Struct, got {:?}", payload.data_type());
727        };
728        assert!(has_valid_extension_type::<ArrowUuid>(&inner_fields[0]));
729        Ok(())
730    }
731
732    #[test]
733    fn from_arrow_field_recurses_into_nested_uuid() -> VortexResult<()> {
734        let session = ArrowSession::default();
735        let mut elem = Field::new("item", DataType::FixedSizeBinary(16), false);
736        elem.try_with_extension_type(ArrowUuid)?;
737        let outer = Field::new("ids", DataType::List(Arc::new(elem)), false);
738
739        let dtype = session.from_arrow_field(&outer)?;
740        let DType::List(inner_dt, _) = dtype else {
741            panic!("expected List dtype, got {dtype}");
742        };
743        assert!(
744            matches!(inner_dt.as_ref(), DType::Extension(ext) if ext.id() == Uuid.id()),
745            "expected Uuid extension element, got {inner_dt}",
746        );
747        Ok(())
748    }
749
750    #[test]
751    fn schema_roundtrip_preserves_nested_uuid() -> VortexResult<()> {
752        let session = ArrowSession::default();
753        let dtype = DType::Struct(
754            StructFields::from_iter([
755                (FieldName::from("id"), uuid_dtype(false)),
756                (
757                    FieldName::from("ids"),
758                    DType::List(Arc::new(uuid_dtype(true)), Nullability::NonNullable),
759                ),
760            ]),
761            Nullability::NonNullable,
762        );
763        let schema = session.to_arrow_schema(&dtype)?;
764        let roundtripped = session.from_arrow_schema(&schema)?;
765        assert_eq!(roundtripped, dtype);
766        Ok(())
767    }
768
769    #[test]
770    fn execute_arrow_target_none_preserves_top_level_uuid_metadata() -> VortexResult<()> {
771        let vortex_session = array_session();
772        let mut ctx = vortex_session.create_execution_ctx();
773        let session = vortex_session.arrow();
774
775        let mut field = Field::new("id", DataType::FixedSizeBinary(16), false);
776        field.try_with_extension_type(ArrowUuid)?;
777        let arrow_array: ArrowArrayRef = Arc::new(FixedSizeBinaryArray::try_from_iter(
778            [*b"0123456789abcdef", *b"fedcba9876543210"].into_iter(),
779        )?);
780
781        let vortex_array = session.from_arrow_array(arrow_array, &field)?;
782
783        let vortex_ext = vortex_array.dtype().as_extension();
784        assert!(vortex_ext.is::<Uuid>());
785
786        let exported = session.execute_arrow(vortex_array, None, &mut ctx)?;
787        assert_eq!(exported.data_type(), &DataType::FixedSizeBinary(16));
788        let fsb = exported.as_fixed_size_binary();
789        assert_eq!(fsb.len(), 2);
790        assert_eq!(fsb.value(0), b"0123456789abcdef");
791        assert_eq!(fsb.value(1), b"fedcba9876543210");
792        Ok(())
793    }
794}