Skip to main content

vortex_array/extension/uuid/
arrow.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Arrow plugin impls for the UUID extension type.
5//!
6//! UUIDs are a canonical Arrow extension type backed by `FixedSizeBinary[16]`. The Vortex side
7//! stores them as `FixedSizeList<u8; 16>`, so the conversion is a zero-copy reinterpretation
8//! of the byte buffer in both directions.
9
10use std::sync::Arc;
11
12use arrow_array::Array;
13use arrow_array::ArrayRef as ArrowArrayRef;
14use arrow_array::FixedSizeBinaryArray;
15use arrow_array::cast::AsArray;
16use arrow_array::types::UInt8Type;
17use arrow_schema::DataType;
18use arrow_schema::Field;
19use arrow_schema::extension::ExtensionType;
20use arrow_schema::extension::Uuid as ArrowUuid;
21use vortex_array::arrow::ArrowSession;
22use vortex_array::arrow::has_valid_extension_type;
23use vortex_buffer::Alignment;
24use vortex_buffer::Buffer;
25use vortex_error::VortexExpect;
26use vortex_error::VortexResult;
27use vortex_session::registry::CachedId;
28use vortex_session::registry::Id;
29
30use crate::ArrayRef;
31use crate::ExecutionCtx;
32use crate::IntoArray;
33use crate::arrays::ExtensionArray;
34use crate::arrays::FixedSizeListArray;
35use crate::arrays::PrimitiveArray;
36use crate::arrays::extension::ExtensionArrayExt;
37use crate::arrow::ArrowExport;
38use crate::arrow::ArrowExportVTable;
39use crate::arrow::ArrowImport;
40use crate::arrow::ArrowImportVTable;
41use crate::arrow::ArrowSessionExt;
42use crate::arrow::nulls;
43use crate::buffer::BufferHandle;
44use crate::dtype::DType;
45use crate::dtype::Nullability;
46use crate::dtype::PType;
47use crate::dtype::extension::ExtDType;
48use crate::dtype::extension::ExtVTable;
49use crate::extension::uuid::Uuid;
50use crate::extension::uuid::UuidMetadata;
51use crate::validity::Validity;
52
53const UUID_BYTE_LEN: i32 = 16;
54
55static ARROW_UUID: CachedId = CachedId::new(ArrowUuid::NAME);
56
57impl ArrowExportVTable for Uuid {
58    fn arrow_ext_id(&self) -> Id {
59        *ARROW_UUID
60    }
61
62    fn vortex_id(&self) -> Id {
63        Uuid.id()
64    }
65
66    // Encode all of these.
67    fn to_arrow_field(
68        &self,
69        name: &str,
70        dtype: &DType,
71        _session: &ArrowSession,
72    ) -> VortexResult<Option<Field>> {
73        let mut field = Field::new(
74            name.to_string(),
75            DataType::FixedSizeBinary(UUID_BYTE_LEN),
76            dtype.is_nullable(),
77        );
78        field
79            .try_with_extension_type(ArrowUuid)
80            .vortex_expect("FixedSizeBinary[16] is correct type for ArrowUuid");
81        Ok(Some(field))
82    }
83
84    fn execute_arrow(
85        &self,
86        array: ArrayRef,
87        _target: &Field,
88        ctx: &mut ExecutionCtx,
89    ) -> VortexResult<ArrowExport> {
90        let is_uuid = array
91            .dtype()
92            .as_extension_opt()
93            .map(|ext| ext.is::<Uuid>())
94            .unwrap_or(false);
95        if !is_uuid {
96            return Ok(ArrowExport::Unsupported(array));
97        }
98        Ok(ArrowExport::Exported(try_fsl_to_fsb(array, ctx)?))
99    }
100}
101
102impl ArrowImportVTable for Uuid {
103    fn arrow_ext_id(&self) -> Id {
104        *ARROW_UUID
105    }
106
107    fn from_arrow_field(&self, field: &Field) -> VortexResult<Option<DType>> {
108        if !has_valid_extension_type::<ArrowUuid>(field) {
109            return Ok(None);
110        }
111
112        let storage_dtype = DType::FixedSizeList(
113            Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)),
114            UUID_BYTE_LEN as u32,
115            field.is_nullable().into(),
116        );
117
118        Ok(Some(DType::Extension(
119            ExtDType::try_with_vtable(Uuid, UuidMetadata::default(), storage_dtype)?.erased(),
120        )))
121    }
122
123    fn from_arrow_array(
124        &self,
125        array: ArrowArrayRef,
126        _field: &Field,
127        dtype: &DType,
128    ) -> VortexResult<ArrowImport> {
129        let DType::Extension(dtype) = dtype else {
130            return Ok(ArrowImport::Unsupported(array));
131        };
132        if !matches!(array.data_type(), DataType::FixedSizeBinary(UUID_BYTE_LEN))
133            || !dtype.is::<Uuid>()
134        {
135            return Ok(ArrowImport::Unsupported(array));
136        }
137
138        let fsb = array.as_fixed_size_binary();
139        let buffer = Buffer::from_arrow_buffer(fsb.values().clone(), Alignment::none());
140        let u8_array = PrimitiveArray::from_buffer_handle(
141            BufferHandle::new_host(buffer),
142            PType::U8,
143            Validity::NonNullable,
144        );
145        let validity = nulls(fsb.nulls(), dtype.is_nullable());
146
147        let storage = FixedSizeListArray::new(
148            u8_array.into_array(),
149            fsb.value_length() as u32,
150            validity,
151            fsb.len(),
152        )
153        .into_array();
154        Ok(ArrowImport::Imported(
155            ExtensionArray::new(dtype.clone(), storage).into_array(),
156        ))
157    }
158}
159
160fn try_fsl_to_fsb(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ArrowArrayRef> {
161    let executed = array.execute::<ExtensionArray>(ctx)?;
162    let storage = executed.storage_array().clone();
163    let storage_arrow_type = DataType::FixedSizeList(
164        Arc::new(Field::new("item", DataType::UInt8, false)),
165        UUID_BYTE_LEN,
166    );
167
168    let storage_field = Field::new(
169        String::new(),
170        storage_arrow_type,
171        storage.dtype().is_nullable(),
172    );
173
174    let session = ctx.session().clone();
175    let arrow_storage = session
176        .arrow()
177        .execute_arrow(storage, Some(&storage_field), ctx)?;
178
179    let fsl = arrow_storage.as_fixed_size_list();
180    let bytes = fsl
181        .values()
182        .as_primitive::<UInt8Type>()
183        .values()
184        .inner()
185        .clone();
186
187    Ok(Arc::new(FixedSizeBinaryArray::new(
188        fsl.value_length(),
189        bytes,
190        fsl.nulls().cloned(),
191    )))
192}