Skip to main content

vortex_array/arrays/extension/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_error::VortexExpect;
5use vortex_error::VortexResult;
6
7use crate::ArrayRef;
8use crate::dtype::DType;
9use crate::dtype::extension::ExtDTypeRef;
10use crate::stats::ArrayStats;
11
12/// An extension array that wraps another array with additional type information.
13///
14/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
15/// in future versions. The extension type system is still evolving.
16///
17/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
18/// mechanism for adding semantic meaning to existing array types without requiring
19/// changes to the core type system.
20///
21/// ## Design Philosophy
22///
23/// Extension arrays serve as a type-safe wrapper that:
24/// - Preserves the underlying storage format and operations
25/// - Adds semantic type information via `ExtDType`
26/// - Enables custom serialization and deserialization logic
27/// - Allows domain-specific interpretations of generic data
28///
29/// ## Storage and Type Relationship
30///
31/// The extension array maintains a strict contract:
32/// - **Storage array**: Contains the actual data in a standard Vortex encoding
33/// - **Extension type**: Defines how to interpret the storage data semantically
34/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
35///
36/// ## Use Cases
37///
38/// Extension arrays are ideal for:
39/// - **Custom numeric types**: Units of measurement, currencies
40/// - **Temporal types**: Custom date/time formats, time zones, calendars
41/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
42/// - **Encoded types**: Base64 strings, compressed data, encrypted values
43///
44/// ## Validity and Operations
45///
46/// Extension arrays delegate validity and most operations to their storage array:
47/// - Validity is inherited from the underlying storage
48/// - Slicing preserves the extension type
49/// - Scalar access wraps storage scalars with extension metadata
50#[derive(Clone, Debug)]
51pub struct ExtensionArray {
52    /// The storage dtype. This **must** be a [`Extension::DType`] variant.
53    pub(super) dtype: DType,
54
55    /// The backing storage array for this extension array.
56    pub(super) storage_array: ArrayRef,
57
58    /// The stats for this array.
59    pub(super) stats_set: ArrayStats,
60}
61
62impl ExtensionArray {
63    /// Constructs a new `ExtensionArray`.
64    ///
65    /// # Panics
66    ///
67    /// Panics if the storage array in not compatible with the extension dtype.
68    pub fn new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self {
69        Self::try_new(ext_dtype, storage_array).vortex_expect("Failed to create `ExtensionArray`")
70    }
71
72    /// Tries to construct a new `ExtensionArray`.
73    ///
74    /// # Errors
75    ///
76    /// Returns an error if the storage array in not compatible with the extension dtype.
77    pub fn try_new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> VortexResult<Self> {
78        // TODO(connor): Replace these statements once we add `validate_storage_array`.
79        // ext_dtype.validate_storage_array(&storage_array)?;
80        assert_eq!(
81            ext_dtype.storage_dtype(),
82            storage_array.dtype(),
83            "ExtensionArray: storage_dtype must match storage array DType",
84        );
85
86        // SAFETY: we validate that the inputs are valid above.
87        Ok(unsafe { Self::new_unchecked(ext_dtype, storage_array) })
88    }
89
90    /// Creates a new `ExtensionArray`.
91    ///
92    /// # Safety
93    ///
94    /// The caller must ensure that the storage array is compatible with the extension dtype. In
95    /// other words, they must know that `ext_dtype.validate_storage_array(&storage_array)` has been
96    /// called successfully on this storage array.
97    pub unsafe fn new_unchecked(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self {
98        // TODO(connor): Replace these statements once we add `validate_storage_array`.
99        // #[cfg(debug_assertions)]
100        // ext_dtype
101        //     .validate_storage_array(&storage_array)
102        //     .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`");
103        debug_assert_eq!(
104            ext_dtype.storage_dtype(),
105            storage_array.dtype(),
106            "ExtensionArray: storage_dtype must match storage array DType",
107        );
108
109        Self {
110            dtype: DType::Extension(ext_dtype),
111            storage_array,
112            stats_set: ArrayStats::default(),
113        }
114    }
115
116    /// The extension dtype of this array.
117    pub fn ext_dtype(&self) -> &ExtDTypeRef {
118        let DType::Extension(ext) = &self.dtype else {
119            unreachable!("ExtensionArray: dtype must be an ExtDType")
120        };
121
122        ext
123    }
124
125    pub fn storage_array(&self) -> &ArrayRef {
126        &self.storage_array
127    }
128}