Skip to main content

vortex_array/arrays/extension/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use kernel::PARENT_KERNELS;
5use smallvec::smallvec;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8use vortex_error::vortex_bail;
9use vortex_error::vortex_ensure_eq;
10use vortex_error::vortex_err;
11use vortex_error::vortex_panic;
12use vortex_session::VortexSession;
13use vortex_session::registry::CachedId;
14
15use crate::ArrayRef;
16use crate::EmptyArrayData;
17use crate::ExecutionCtx;
18use crate::ExecutionResult;
19use crate::array::Array;
20use crate::array::ArrayId;
21use crate::array::ArrayParts;
22use crate::array::ArrayView;
23use crate::array::VTable;
24use crate::array::ValidityVTableFromChild;
25use crate::arrays::extension::array::SLOT_NAMES;
26use crate::arrays::extension::array::STORAGE_SLOT;
27use crate::arrays::extension::compute::rules::PARENT_RULES;
28use crate::arrays::extension::compute::rules::RULES;
29use crate::buffer::BufferHandle;
30use crate::dtype::DType;
31use crate::serde::ArrayChildren;
32
33mod kernel;
34mod operations;
35mod validity;
36
37/// An extension array that wraps another array with additional type information.
38///
39/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
40/// in future versions. The extension type system is still evolving.
41///
42/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
43/// mechanism for adding semantic meaning to existing array types without requiring
44/// changes to the core type system.
45///
46/// ## Design Philosophy
47///
48/// Extension arrays serve as a type-safe wrapper that:
49/// - Preserves the underlying storage format and operations
50/// - Adds semantic type information via `ExtDType`
51/// - Enables custom serialization and deserialization logic
52/// - Allows domain-specific interpretations of generic data
53///
54/// ## Storage and Type Relationship
55///
56/// The extension array maintains a strict contract:
57/// - **Storage array**: Contains the actual data in a standard Vortex encoding
58/// - **Extension type**: Defines how to interpret the storage data semantically
59/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
60///
61/// ## Use Cases
62///
63/// Extension arrays are ideal for:
64/// - **Custom numeric types**: Units of measurement, currencies
65/// - **Temporal types**: Custom date/time formats, time zones, calendars
66/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
67/// - **Encoded types**: Base64 strings, compressed data, encrypted values
68///
69/// ## Validity and Operations
70///
71/// Extension arrays delegate validity and most operations to their storage array:
72/// - Validity is inherited from the underlying storage
73/// - Slicing preserves the extension type
74/// - Scalar access wraps storage scalars with extension metadata
75#[derive(Clone, Debug)]
76pub struct Extension;
77
78/// A [`Extension`]-encoded Vortex array.
79pub type ExtensionArray = Array<Extension>;
80
81impl VTable for Extension {
82    type TypedArrayData = EmptyArrayData;
83
84    type OperationsVTable = Self;
85    type ValidityVTable = ValidityVTableFromChild;
86
87    fn id(&self) -> ArrayId {
88        static ID: CachedId = CachedId::new("vortex.ext");
89        *ID
90    }
91
92    fn validate(
93        &self,
94        _data: &EmptyArrayData,
95        dtype: &DType,
96        len: usize,
97        slots: &[Option<ArrayRef>],
98    ) -> VortexResult<()> {
99        let storage = slots[STORAGE_SLOT]
100            .as_ref()
101            .vortex_expect("ExtensionArray storage slot");
102        vortex_ensure_eq!(
103            storage.len(),
104            len,
105            "ExtensionArray length {} does not match outer length {len}",
106            storage.len(),
107        );
108
109        let ext_dtype = dtype
110            .as_extension_opt()
111            .ok_or_else(|| vortex_err!("not an extension dtype"))?;
112
113        let actual_dtype = DType::Extension(ext_dtype.clone());
114        vortex_ensure_eq!(
115            &actual_dtype,
116            dtype,
117            "ExtensionArray dtype {actual_dtype} does not match outer dtype {dtype}",
118        );
119
120        Ok(())
121    }
122
123    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
124        0
125    }
126
127    fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
128        vortex_panic!("ExtensionArray buffer index {idx} out of bounds")
129    }
130
131    fn buffer_name(_array: ArrayView<'_, Self>, _idx: usize) -> Option<String> {
132        None
133    }
134
135    fn serialize(
136        _array: ArrayView<'_, Self>,
137        _session: &VortexSession,
138    ) -> VortexResult<Option<Vec<u8>>> {
139        Ok(Some(vec![]))
140    }
141
142    fn deserialize(
143        &self,
144        dtype: &DType,
145        len: usize,
146        metadata: &[u8],
147
148        _buffers: &[BufferHandle],
149        children: &dyn ArrayChildren,
150        _session: &VortexSession,
151    ) -> VortexResult<ArrayParts<Self>> {
152        if !metadata.is_empty() {
153            vortex_bail!(
154                "ExtensionArray expects empty metadata, got {} bytes",
155                metadata.len()
156            );
157        }
158        let DType::Extension(ext_dtype) = dtype else {
159            vortex_bail!("Not an extension DType");
160        };
161        if children.len() != 1 {
162            vortex_bail!("Expected 1 child, got {}", children.len());
163        }
164        let storage = children.get(0, ext_dtype.storage_dtype(), len)?;
165        Ok(
166            ArrayParts::new(self.clone(), dtype.clone(), len, EmptyArrayData)
167                .with_slots(smallvec![Some(storage)]),
168        )
169    }
170
171    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
172        SLOT_NAMES[idx].to_string()
173    }
174
175    fn execute(array: Array<Self>, _ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
176        Ok(ExecutionResult::done(array))
177    }
178
179    fn execute_parent(
180        array: ArrayView<'_, Self>,
181        parent: &ArrayRef,
182        child_idx: usize,
183        ctx: &mut ExecutionCtx,
184    ) -> VortexResult<Option<ArrayRef>> {
185        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
186    }
187
188    fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
189        RULES.evaluate(array)
190    }
191
192    fn reduce_parent(
193        array: ArrayView<'_, Self>,
194        parent: &ArrayRef,
195        child_idx: usize,
196    ) -> VortexResult<Option<ArrayRef>> {
197        PARENT_RULES.evaluate(array, parent, child_idx)
198    }
199}