Skip to main content

vortex_array/arrays/extension/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use smallvec::smallvec;
5use vortex_error::VortexExpect;
6use vortex_error::VortexResult;
7use vortex_error::vortex_bail;
8use vortex_error::vortex_ensure_eq;
9use vortex_error::vortex_err;
10use vortex_error::vortex_panic;
11use vortex_session::VortexSession;
12use vortex_session::registry::CachedId;
13
14use crate::ArrayRef;
15use crate::EmptyArrayData;
16use crate::ExecutionCtx;
17use crate::ExecutionResult;
18use crate::array::Array;
19use crate::array::ArrayId;
20use crate::array::ArrayParts;
21use crate::array::ArrayView;
22use crate::array::VTable;
23use crate::array::ValidityVTableFromChild;
24use crate::array::with_empty_buffers;
25use crate::arrays::extension::array::SLOT_NAMES;
26use crate::arrays::extension::array::STORAGE_SLOT;
27use crate::arrays::extension::compute::rules::PARENT_RULES;
28use crate::arrays::extension::compute::rules::RULES;
29use crate::buffer::BufferHandle;
30use crate::dtype::DType;
31use crate::serde::ArrayChildren;
32
33mod kernel;
34mod operations;
35mod validity;
36
37/// An extension array that wraps another array with additional type information.
38///
39/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
40/// in future versions. The extension type system is still evolving.
41///
42/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
43/// mechanism for adding semantic meaning to existing array types without requiring
44/// changes to the core type system.
45///
46/// ## Design Philosophy
47///
48/// Extension arrays serve as a type-safe wrapper that:
49/// - Preserves the underlying storage format and operations
50/// - Adds semantic type information via `ExtDType`
51/// - Enables custom serialization and deserialization logic
52/// - Allows domain-specific interpretations of generic data
53///
54/// ## Storage and Type Relationship
55///
56/// The extension array maintains a strict contract:
57/// - **Storage array**: Contains the actual data in a standard Vortex encoding
58/// - **Extension type**: Defines how to interpret the storage data semantically
59/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
60///
61/// ## Use Cases
62///
63/// Extension arrays are ideal for:
64/// - **Custom numeric types**: Units of measurement, currencies
65/// - **Temporal types**: Custom date/time formats, time zones, calendars
66/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
67/// - **Encoded types**: Base64 strings, compressed data, encrypted values
68///
69/// ## Validity and Operations
70///
71/// Extension arrays delegate validity and most operations to their storage array:
72/// - Validity is inherited from the underlying storage
73/// - Slicing preserves the extension type
74/// - Scalar access wraps storage scalars with extension metadata
75#[derive(Clone, Debug)]
76pub struct Extension;
77
78/// A [`Extension`]-encoded Vortex array.
79pub type ExtensionArray = Array<Extension>;
80
81pub(crate) fn initialize(session: &VortexSession) {
82    kernel::initialize(session);
83}
84
85impl VTable for Extension {
86    type TypedArrayData = EmptyArrayData;
87
88    type OperationsVTable = Self;
89    type ValidityVTable = ValidityVTableFromChild;
90
91    fn id(&self) -> ArrayId {
92        static ID: CachedId = CachedId::new("vortex.ext");
93        *ID
94    }
95
96    fn validate(
97        &self,
98        _data: &EmptyArrayData,
99        dtype: &DType,
100        len: usize,
101        slots: &[Option<ArrayRef>],
102    ) -> VortexResult<()> {
103        let storage = slots[STORAGE_SLOT]
104            .as_ref()
105            .vortex_expect("ExtensionArray storage slot");
106        vortex_ensure_eq!(
107            storage.len(),
108            len,
109            "ExtensionArray length {} does not match outer length {len}",
110            storage.len(),
111        );
112
113        let ext_dtype = dtype
114            .as_extension_opt()
115            .ok_or_else(|| vortex_err!("not an extension dtype"))?;
116
117        let actual_dtype = DType::Extension(ext_dtype.clone());
118        vortex_ensure_eq!(
119            &actual_dtype,
120            dtype,
121            "ExtensionArray dtype {actual_dtype} does not match outer dtype {dtype}",
122        );
123
124        Ok(())
125    }
126
127    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
128        0
129    }
130
131    fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
132        vortex_panic!("ExtensionArray buffer index {idx} out of bounds")
133    }
134
135    fn buffer_name(_array: ArrayView<'_, Self>, _idx: usize) -> Option<String> {
136        None
137    }
138
139    fn with_buffers(
140        &self,
141        array: ArrayView<'_, Self>,
142        buffers: &[BufferHandle],
143    ) -> VortexResult<ArrayParts<Self>> {
144        with_empty_buffers(self, array, buffers)
145    }
146
147    fn serialize(
148        _array: ArrayView<'_, Self>,
149        _session: &VortexSession,
150    ) -> VortexResult<Option<Vec<u8>>> {
151        Ok(Some(vec![]))
152    }
153
154    fn deserialize(
155        &self,
156        dtype: &DType,
157        len: usize,
158        metadata: &[u8],
159
160        _buffers: &[BufferHandle],
161        children: &dyn ArrayChildren,
162        _session: &VortexSession,
163    ) -> VortexResult<ArrayParts<Self>> {
164        if !metadata.is_empty() {
165            vortex_bail!(
166                "ExtensionArray expects empty metadata, got {} bytes",
167                metadata.len()
168            );
169        }
170        let DType::Extension(ext_dtype) = dtype else {
171            vortex_bail!("Not an extension DType");
172        };
173        if children.len() != 1 {
174            vortex_bail!("Expected 1 child, got {}", children.len());
175        }
176        let storage = children.get(0, ext_dtype.storage_dtype(), len)?;
177        Ok(
178            ArrayParts::new(self.clone(), dtype.clone(), len, EmptyArrayData)
179                .with_slots(smallvec![Some(storage)]),
180        )
181    }
182
183    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
184        SLOT_NAMES[idx].to_string()
185    }
186
187    fn execute(array: Array<Self>, _ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
188        Ok(ExecutionResult::done(array))
189    }
190
191    fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
192        RULES.evaluate(array)
193    }
194
195    fn reduce_parent(
196        array: ArrayView<'_, Self>,
197        parent: &ArrayRef,
198        child_idx: usize,
199    ) -> VortexResult<Option<ArrayRef>> {
200        PARENT_RULES.evaluate(array, parent, child_idx)
201    }
202}