Skip to main content

vortex_array/arrays/extension/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9
10use crate::ArrayRef;
11use crate::array::Array;
12use crate::array::ArrayParts;
13use crate::array::TypedArrayRef;
14use crate::arrays::Extension;
15use crate::dtype::DType;
16use crate::dtype::extension::ExtDTypeRef;
17
18/// The backing storage array for this extension array.
19pub(super) const STORAGE_SLOT: usize = 0;
20pub(super) const NUM_SLOTS: usize = 1;
21pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["storage"];
22
23/// An extension array that wraps another array with additional type information.
24///
25/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
26/// in future versions. The extension type system is still evolving.
27///
28/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
29/// mechanism for adding semantic meaning to existing array types without requiring
30/// changes to the core type system.
31///
32/// ## Design Philosophy
33///
34/// Extension arrays serve as a type-safe wrapper that:
35/// - Preserves the underlying storage format and operations
36/// - Adds semantic type information via `ExtDType`
37/// - Enables custom serialization and deserialization logic
38/// - Allows domain-specific interpretations of generic data
39///
40/// ## Storage and Type Relationship
41///
42/// The extension array maintains a strict contract:
43/// - **Storage array**: Contains the actual data in a standard Vortex encoding
44/// - **Extension type**: Defines how to interpret the storage data semantically
45/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
46///
47/// ## Use Cases
48///
49/// Extension arrays are ideal for:
50/// - **Custom numeric types**: Units of measurement, currencies
51/// - **Temporal types**: Custom date/time formats, time zones, calendars
52/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
53/// - **Encoded types**: Base64 strings, compressed data, encrypted values
54///
55/// ## Validity and Operations
56///
57/// Extension arrays delegate validity and most operations to their storage array:
58/// - Validity is inherited from the underlying storage
59/// - Slicing preserves the extension type
60/// - Scalar access wraps storage scalars with extension metadata
61#[derive(Clone, Debug)]
62pub struct ExtensionData {
63    /// The storage dtype. This **must** be a [`Extension::DType`] variant.
64    pub(super) ext_dtype: ExtDTypeRef,
65}
66
67impl Display for ExtensionData {
68    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
69        write!(f, "ext_dtype: {}", self.ext_dtype)
70    }
71}
72
73impl ExtensionData {
74    /// Constructs a new `ExtensionArray`.
75    ///
76    /// # Panics
77    ///
78    /// Panics if the storage array in not compatible with the extension dtype.
79    pub fn new(ext_dtype: ExtDTypeRef, storage_dtype: &DType) -> Self {
80        Self::try_new(ext_dtype, storage_dtype).vortex_expect("Failed to create `ExtensionArray`")
81    }
82
83    /// Tries to construct a new `ExtensionArray`.
84    ///
85    /// # Errors
86    ///
87    /// Returns an error if the storage array in not compatible with the extension dtype.
88    pub fn try_new(ext_dtype: ExtDTypeRef, storage_dtype: &DType) -> VortexResult<Self> {
89        // TODO(connor): Replace these statements once we add `validate_storage_array`.
90        // ext_dtype.validate_storage_array(&storage_array)?;
91        assert_eq!(
92            ext_dtype.storage_dtype(),
93            storage_dtype,
94            "ExtensionArray: storage_dtype must match storage array DType",
95        );
96
97        // SAFETY: we validate that the inputs are valid above.
98        Ok(unsafe { Self::new_unchecked(ext_dtype, storage_dtype) })
99    }
100
101    /// Creates a new `ExtensionArray`.
102    ///
103    /// # Safety
104    ///
105    /// The caller must ensure that the storage array is compatible with the extension dtype. In
106    /// other words, they must know that `ext_dtype.validate_storage_array(&storage_array)` has been
107    /// called successfully on this storage array.
108    pub unsafe fn new_unchecked(ext_dtype: ExtDTypeRef, storage_dtype: &DType) -> Self {
109        // TODO(connor): Replace these statements once we add `validate_storage_array`.
110        // #[cfg(debug_assertions)]
111        // ext_dtype
112        //     .validate_storage_array(&storage_array)
113        //     .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`");
114        debug_assert_eq!(
115            ext_dtype.storage_dtype(),
116            storage_dtype,
117            "ExtensionArray: storage_dtype must match storage array DType",
118        );
119
120        Self { ext_dtype }
121    }
122
123    /// The extension dtype of this array.
124    pub fn ext_dtype(&self) -> &ExtDTypeRef {
125        &self.ext_dtype
126    }
127}
128
129pub trait ExtensionArrayExt: TypedArrayRef<Extension> {
130    fn storage_array(&self) -> &ArrayRef {
131        self.as_ref().slots()[STORAGE_SLOT]
132            .as_ref()
133            .vortex_expect("ExtensionArray storage slot")
134    }
135}
136impl<T: TypedArrayRef<Extension>> ExtensionArrayExt for T {}
137
138impl Array<Extension> {
139    /// Constructs a new `ExtensionArray`.
140    ///
141    /// # Panics
142    ///
143    /// Panics if the storage array is not compatible with the extension dtype.
144    pub fn new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self {
145        let dtype = DType::Extension(ext_dtype.clone());
146        let len = storage_array.len();
147        let data = ExtensionData::new(ext_dtype, storage_array.dtype());
148        unsafe {
149            Array::from_parts_unchecked(
150                ArrayParts::new(Extension, dtype, len, data).with_slots(vec![Some(storage_array)]),
151            )
152        }
153    }
154
155    /// Tries to construct a new `ExtensionArray`.
156    pub fn try_new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> VortexResult<Self> {
157        let dtype = DType::Extension(ext_dtype.clone());
158        let len = storage_array.len();
159        let data = ExtensionData::try_new(ext_dtype, storage_array.dtype())?;
160        Ok(unsafe {
161            Array::from_parts_unchecked(
162                ArrayParts::new(Extension, dtype, len, data).with_slots(vec![Some(storage_array)]),
163            )
164        })
165    }
166}