vortex_array/arrays/extension/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use vortex_dtype::{DType, ExtDType, ExtID};
7
8use crate::ArrayRef;
9use crate::stats::ArrayStats;
10
11/// An extension array that wraps another array with additional type information.
12///
13/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
14/// in future versions. The extension type system is still evolving.
15///
16/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
17/// mechanism for adding semantic meaning to existing array types without requiring
18/// changes to the core type system.
19///
20/// ## Design Philosophy
21///
22/// Extension arrays serve as a type-safe wrapper that:
23/// - Preserves the underlying storage format and operations
24/// - Adds semantic type information via `ExtDType`
25/// - Enables custom serialization and deserialization logic
26/// - Allows domain-specific interpretations of generic data
27///
28/// ## Storage and Type Relationship
29///
30/// The extension array maintains a strict contract:
31/// - **Storage array**: Contains the actual data in a standard Vortex encoding
32/// - **Extension type**: Defines how to interpret the storage data semantically
33/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
34///
35/// ## Use Cases
36///
37/// Extension arrays are ideal for:
38/// - **Custom numeric types**: Units of measurement, currencies
39/// - **Temporal types**: Custom date/time formats, time zones, calendars
40/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
41/// - **Encoded types**: Base64 strings, compressed data, encrypted values
42///
43/// ## Validity and Operations
44///
45/// Extension arrays delegate validity and most operations to their storage array:
46/// - Validity is inherited from the underlying storage
47/// - Slicing preserves the extension type
48/// - Scalar access wraps storage scalars with extension metadata
49///
50/// # Examples
51///
52/// ```
53/// use std::sync::Arc;
54/// use vortex_array::arrays::{ExtensionArray, PrimitiveArray};
55/// use vortex_dtype::{ExtDType, ExtID, DType, Nullability, PType};
56/// use vortex_array::validity::Validity;
57/// use vortex_array::IntoArray;
58/// use vortex_buffer::buffer;
59///
60/// // Define a custom extension type for representing currency values
61/// let currency_id = ExtID::from("example.currency");
62/// let currency_dtype = Arc::new(ExtDType::new(
63///     currency_id,
64///     Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), // Storage as i64 cents
65///     None, // No additional metadata needed
66/// ));
67///
68/// // Create storage array with currency values in cents
69/// let cents_storage = PrimitiveArray::new(
70///     buffer![12345i64, 67890, 99999], // $123.45, $678.90, $999.99
71///     Validity::NonNullable
72/// );
73///
74/// // Wrap with extension type
75/// let currency_array = ExtensionArray::new(
76///     currency_dtype.clone(),
77///     cents_storage.into_array()
78/// );
79///
80/// assert_eq!(currency_array.len(), 3);
81/// assert_eq!(currency_array.id().as_ref(), "example.currency");
82///
83/// // Access maintains extension type information
84/// let first_value = currency_array.scalar_at(0);
85/// assert!(first_value.as_extension_opt().is_some());
86/// ```
87#[derive(Clone, Debug)]
88pub struct ExtensionArray {
89    pub(super) dtype: DType,
90    pub(super) storage: ArrayRef,
91    pub(super) stats_set: ArrayStats,
92}
93
94impl ExtensionArray {
95    pub fn new(ext_dtype: Arc<ExtDType>, storage: ArrayRef) -> Self {
96        assert_eq!(
97            ext_dtype.storage_dtype(),
98            storage.dtype(),
99            "ExtensionArray: storage_dtype must match storage array DType",
100        );
101        Self {
102            dtype: DType::Extension(ext_dtype),
103            storage,
104            stats_set: ArrayStats::default(),
105        }
106    }
107
108    pub fn ext_dtype(&self) -> &Arc<ExtDType> {
109        let DType::Extension(ext) = &self.dtype else {
110            unreachable!("ExtensionArray: dtype must be an ExtDType")
111        };
112        ext
113    }
114
115    pub fn storage(&self) -> &ArrayRef {
116        &self.storage
117    }
118
119    #[allow(dead_code)]
120    #[inline]
121    pub fn id(&self) -> &ExtID {
122        self.ext_dtype().id()
123    }
124}