Skip to main content

vortex_array/arrays/extension/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use kernel::PARENT_KERNELS;
5use vortex_error::VortexExpect;
6use vortex_error::VortexResult;
7use vortex_error::vortex_bail;
8use vortex_error::vortex_ensure_eq;
9use vortex_error::vortex_err;
10use vortex_error::vortex_panic;
11use vortex_session::VortexSession;
12use vortex_session::registry::CachedId;
13
14use crate::ArrayRef;
15use crate::EmptyArrayData;
16use crate::ExecutionCtx;
17use crate::ExecutionResult;
18use crate::array::Array;
19use crate::array::ArrayId;
20use crate::array::ArrayParts;
21use crate::array::ArrayView;
22use crate::array::VTable;
23use crate::array::ValidityVTableFromChild;
24use crate::arrays::extension::array::SLOT_NAMES;
25use crate::arrays::extension::array::STORAGE_SLOT;
26use crate::arrays::extension::compute::rules::PARENT_RULES;
27use crate::arrays::extension::compute::rules::RULES;
28use crate::buffer::BufferHandle;
29use crate::dtype::DType;
30use crate::serde::ArrayChildren;
31
32mod kernel;
33mod operations;
34mod validity;
35
36/// An extension array that wraps another array with additional type information.
37///
38/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
39/// in future versions. The extension type system is still evolving.
40///
41/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
42/// mechanism for adding semantic meaning to existing array types without requiring
43/// changes to the core type system.
44///
45/// ## Design Philosophy
46///
47/// Extension arrays serve as a type-safe wrapper that:
48/// - Preserves the underlying storage format and operations
49/// - Adds semantic type information via `ExtDType`
50/// - Enables custom serialization and deserialization logic
51/// - Allows domain-specific interpretations of generic data
52///
53/// ## Storage and Type Relationship
54///
55/// The extension array maintains a strict contract:
56/// - **Storage array**: Contains the actual data in a standard Vortex encoding
57/// - **Extension type**: Defines how to interpret the storage data semantically
58/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
59///
60/// ## Use Cases
61///
62/// Extension arrays are ideal for:
63/// - **Custom numeric types**: Units of measurement, currencies
64/// - **Temporal types**: Custom date/time formats, time zones, calendars
65/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
66/// - **Encoded types**: Base64 strings, compressed data, encrypted values
67///
68/// ## Validity and Operations
69///
70/// Extension arrays delegate validity and most operations to their storage array:
71/// - Validity is inherited from the underlying storage
72/// - Slicing preserves the extension type
73/// - Scalar access wraps storage scalars with extension metadata
74#[derive(Clone, Debug)]
75pub struct Extension;
76
77/// A [`Extension`]-encoded Vortex array.
78pub type ExtensionArray = Array<Extension>;
79
80impl VTable for Extension {
81    type ArrayData = EmptyArrayData;
82
83    type OperationsVTable = Self;
84    type ValidityVTable = ValidityVTableFromChild;
85
86    fn id(&self) -> ArrayId {
87        static ID: CachedId = CachedId::new("vortex.ext");
88        *ID
89    }
90
91    fn validate(
92        &self,
93        _data: &EmptyArrayData,
94        dtype: &DType,
95        len: usize,
96        slots: &[Option<ArrayRef>],
97    ) -> VortexResult<()> {
98        let storage = slots[STORAGE_SLOT]
99            .as_ref()
100            .vortex_expect("ExtensionArray storage slot");
101        vortex_ensure_eq!(
102            storage.len(),
103            len,
104            "ExtensionArray length {} does not match outer length {len}",
105            storage.len(),
106        );
107
108        let ext_dtype = dtype
109            .as_extension_opt()
110            .ok_or_else(|| vortex_err!("not an extension dtype"))?;
111
112        let actual_dtype = DType::Extension(ext_dtype.clone());
113        vortex_ensure_eq!(
114            &actual_dtype,
115            dtype,
116            "ExtensionArray dtype {actual_dtype} does not match outer dtype {dtype}",
117        );
118
119        Ok(())
120    }
121
122    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
123        0
124    }
125
126    fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
127        vortex_panic!("ExtensionArray buffer index {idx} out of bounds")
128    }
129
130    fn buffer_name(_array: ArrayView<'_, Self>, _idx: usize) -> Option<String> {
131        None
132    }
133
134    fn serialize(
135        _array: ArrayView<'_, Self>,
136        _session: &VortexSession,
137    ) -> VortexResult<Option<Vec<u8>>> {
138        Ok(Some(vec![]))
139    }
140
141    fn deserialize(
142        &self,
143        dtype: &DType,
144        len: usize,
145        metadata: &[u8],
146
147        _buffers: &[BufferHandle],
148        children: &dyn ArrayChildren,
149        _session: &VortexSession,
150    ) -> VortexResult<ArrayParts<Self>> {
151        if !metadata.is_empty() {
152            vortex_bail!(
153                "ExtensionArray expects empty metadata, got {} bytes",
154                metadata.len()
155            );
156        }
157        let DType::Extension(ext_dtype) = dtype else {
158            vortex_bail!("Not an extension DType");
159        };
160        if children.len() != 1 {
161            vortex_bail!("Expected 1 child, got {}", children.len());
162        }
163        let storage = children.get(0, ext_dtype.storage_dtype(), len)?;
164        Ok(
165            ArrayParts::new(self.clone(), dtype.clone(), len, EmptyArrayData)
166                .with_slots(vec![Some(storage)]),
167        )
168    }
169
170    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
171        SLOT_NAMES[idx].to_string()
172    }
173
174    fn execute(array: Array<Self>, _ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
175        Ok(ExecutionResult::done(array))
176    }
177
178    fn execute_parent(
179        array: ArrayView<'_, Self>,
180        parent: &ArrayRef,
181        child_idx: usize,
182        ctx: &mut ExecutionCtx,
183    ) -> VortexResult<Option<ArrayRef>> {
184        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
185    }
186
187    fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
188        RULES.evaluate(array)
189    }
190
191    fn reduce_parent(
192        array: ArrayView<'_, Self>,
193        parent: &ArrayRef,
194        child_idx: usize,
195    ) -> VortexResult<Option<ArrayRef>> {
196        PARENT_RULES.evaluate(array, parent, child_idx)
197    }
198}