Skip to main content

vortex_array/extension/uuid/
arrow.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Arrow plugin impls for the UUID extension type.
5//!
6//! UUIDs are a canonical Arrow extension type backed by `FixedSizeBinary[16]`. The Vortex side
7//! stores them as `FixedSizeList<u8; 16>`, so the conversion is a zero-copy reinterpretation
8//! of the byte buffer in both directions.
9
10use std::sync::Arc;
11
12use arrow_array::Array;
13use arrow_array::ArrayRef as ArrowArrayRef;
14use arrow_array::FixedSizeBinaryArray;
15use arrow_array::cast::AsArray;
16use arrow_array::types::UInt8Type;
17use arrow_schema::DataType;
18use arrow_schema::Field;
19use arrow_schema::extension::ExtensionType;
20use arrow_schema::extension::Uuid as ArrowUuid;
21use vortex_array::arrow::ArrowSession;
22use vortex_array::arrow::has_valid_extension_type;
23use vortex_buffer::Alignment;
24use vortex_buffer::Buffer;
25use vortex_error::VortexExpect;
26use vortex_error::VortexResult;
27use vortex_session::registry::CachedId;
28use vortex_session::registry::Id;
29
30use crate::ArrayRef;
31use crate::ExecutionCtx;
32use crate::IntoArray;
33use crate::arrays::ExtensionArray;
34use crate::arrays::FixedSizeListArray;
35use crate::arrays::PrimitiveArray;
36use crate::arrays::extension::ExtensionArrayExt;
37use crate::arrow::ArrowExport;
38use crate::arrow::ArrowExportVTable;
39use crate::arrow::ArrowImport;
40use crate::arrow::ArrowImportVTable;
41use crate::arrow::ArrowSessionExt;
42use crate::arrow::nulls;
43use crate::buffer::BufferHandle;
44use crate::dtype::DType;
45use crate::dtype::Nullability;
46use crate::dtype::PType;
47use crate::dtype::extension::ExtDType;
48use crate::dtype::extension::ExtDTypeRef;
49use crate::dtype::extension::ExtId;
50use crate::dtype::extension::ExtVTable;
51use crate::extension::uuid::Uuid;
52use crate::extension::uuid::UuidMetadata;
53use crate::validity::Validity;
54
55const UUID_BYTE_LEN: i32 = 16;
56
57static ARROW_UUID: CachedId = CachedId::new(ArrowUuid::NAME);
58
59impl ArrowExportVTable for Uuid {
60    fn arrow_ext_id(&self) -> Id {
61        *ARROW_UUID
62    }
63
64    fn vortex_ext_id(&self) -> ExtId {
65        Uuid.id()
66    }
67
68    // Encode all of these.
69    fn to_arrow_field(
70        &self,
71        name: &str,
72        dtype: &ExtDTypeRef,
73        _session: &ArrowSession,
74    ) -> VortexResult<Option<Field>> {
75        let mut field = Field::new(
76            name.to_string(),
77            DataType::FixedSizeBinary(UUID_BYTE_LEN),
78            dtype.is_nullable(),
79        );
80        field
81            .try_with_extension_type(ArrowUuid)
82            .vortex_expect("FixedSizeBinary[16] is correct type for ArrowUuid");
83        Ok(Some(field))
84    }
85
86    fn execute_arrow(
87        &self,
88        array: ArrayRef,
89        _target: &Field,
90        ctx: &mut ExecutionCtx,
91    ) -> VortexResult<ArrowExport> {
92        let is_uuid = array
93            .dtype()
94            .as_extension_opt()
95            .map(|ext| ext.is::<Uuid>())
96            .unwrap_or(false);
97        if !is_uuid {
98            return Ok(ArrowExport::Unsupported(array));
99        }
100        Ok(ArrowExport::Exported(try_fsl_to_fsb(array, ctx)?))
101    }
102}
103
104impl ArrowImportVTable for Uuid {
105    fn arrow_ext_id(&self) -> Id {
106        *ARROW_UUID
107    }
108
109    fn from_arrow_field(&self, field: &Field) -> VortexResult<Option<DType>> {
110        if !has_valid_extension_type::<ArrowUuid>(field) {
111            return Ok(None);
112        }
113
114        let storage_dtype = DType::FixedSizeList(
115            Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)),
116            UUID_BYTE_LEN as u32,
117            field.is_nullable().into(),
118        );
119
120        Ok(Some(DType::Extension(
121            ExtDType::try_with_vtable(Uuid, UuidMetadata::default(), storage_dtype)?.erased(),
122        )))
123    }
124
125    fn from_arrow_array(
126        &self,
127        array: ArrowArrayRef,
128        dtype: &ExtDTypeRef,
129    ) -> VortexResult<ArrowImport> {
130        if !matches!(array.data_type(), DataType::FixedSizeBinary(UUID_BYTE_LEN))
131            || !dtype.is::<Uuid>()
132        {
133            return Ok(ArrowImport::Unsupported(array));
134        }
135
136        let fsb = array.as_fixed_size_binary();
137        let buffer = Buffer::from_arrow_buffer(fsb.values().clone(), Alignment::none());
138        let u8_array = PrimitiveArray::from_buffer_handle(
139            BufferHandle::new_host(buffer),
140            PType::U8,
141            Validity::NonNullable,
142        );
143        let validity = nulls(fsb.nulls(), dtype.is_nullable());
144
145        let storage = FixedSizeListArray::new(
146            u8_array.into_array(),
147            fsb.value_length() as u32,
148            validity,
149            fsb.len(),
150        )
151        .into_array();
152        Ok(ArrowImport::Imported(
153            ExtensionArray::new(dtype.clone(), storage).into_array(),
154        ))
155    }
156}
157
158fn try_fsl_to_fsb(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ArrowArrayRef> {
159    let executed = array.execute::<ExtensionArray>(ctx)?;
160    let storage = executed.storage_array().clone();
161    let storage_arrow_type = DataType::FixedSizeList(
162        Arc::new(Field::new("item", DataType::UInt8, false)),
163        UUID_BYTE_LEN,
164    );
165
166    let storage_field = Field::new(
167        String::new(),
168        storage_arrow_type,
169        storage.dtype().is_nullable(),
170    );
171
172    let session = ctx.session().clone();
173    let arrow_storage = session
174        .arrow()
175        .execute_arrow(storage, Some(&storage_field), ctx)?;
176
177    let fsl = arrow_storage.as_fixed_size_list();
178    let bytes = fsl
179        .values()
180        .as_primitive::<UInt8Type>()
181        .values()
182        .inner()
183        .clone();
184
185    Ok(Arc::new(FixedSizeBinaryArray::new(
186        fsl.value_length(),
187        bytes,
188        fsl.nulls().cloned(),
189    )))
190}