vortex_array/arrays/extension/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use vortex_dtype::DType;
7use vortex_dtype::ExtDType;
8use vortex_dtype::ExtID;
9
10use crate::ArrayRef;
11use crate::stats::ArrayStats;
12
13/// An extension array that wraps another array with additional type information.
14///
15/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
16/// in future versions. The extension type system is still evolving.
17///
18/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
19/// mechanism for adding semantic meaning to existing array types without requiring
20/// changes to the core type system.
21///
22/// ## Design Philosophy
23///
24/// Extension arrays serve as a type-safe wrapper that:
25/// - Preserves the underlying storage format and operations
26/// - Adds semantic type information via `ExtDType`
27/// - Enables custom serialization and deserialization logic
28/// - Allows domain-specific interpretations of generic data
29///
30/// ## Storage and Type Relationship
31///
32/// The extension array maintains a strict contract:
33/// - **Storage array**: Contains the actual data in a standard Vortex encoding
34/// - **Extension type**: Defines how to interpret the storage data semantically
35/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
36///
37/// ## Use Cases
38///
39/// Extension arrays are ideal for:
40/// - **Custom numeric types**: Units of measurement, currencies
41/// - **Temporal types**: Custom date/time formats, time zones, calendars
42/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
43/// - **Encoded types**: Base64 strings, compressed data, encrypted values
44///
45/// ## Validity and Operations
46///
47/// Extension arrays delegate validity and most operations to their storage array:
48/// - Validity is inherited from the underlying storage
49/// - Slicing preserves the extension type
50/// - Scalar access wraps storage scalars with extension metadata
51///
52/// # Examples
53///
54/// ```
55/// use std::sync::Arc;
56/// use vortex_array::arrays::{ExtensionArray, PrimitiveArray};
57/// use vortex_dtype::{ExtDType, ExtID, DType, Nullability, PType};
58/// use vortex_array::validity::Validity;
59/// use vortex_array::IntoArray;
60/// use vortex_buffer::buffer;
61///
62/// // Define a custom extension type for representing currency values
63/// let currency_id = ExtID::from("example.currency");
64/// let currency_dtype = Arc::new(ExtDType::new(
65///     currency_id,
66///     Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), // Storage as i64 cents
67///     None, // No additional metadata needed
68/// ));
69///
70/// // Create storage array with currency values in cents
71/// let cents_storage = PrimitiveArray::new(
72///     buffer![12345i64, 67890, 99999], // $123.45, $678.90, $999.99
73///     Validity::NonNullable
74/// );
75///
76/// // Wrap with extension type
77/// let currency_array = ExtensionArray::new(
78///     currency_dtype.clone(),
79///     cents_storage.into_array()
80/// );
81///
82/// assert_eq!(currency_array.len(), 3);
83/// assert_eq!(currency_array.id().as_ref(), "example.currency");
84///
85/// // Access maintains extension type information
86/// let first_value = currency_array.scalar_at(0);
87/// assert!(first_value.as_extension_opt().is_some());
88/// ```
89#[derive(Clone, Debug)]
90pub struct ExtensionArray {
91    pub(super) dtype: DType,
92    pub(super) storage: ArrayRef,
93    pub(super) stats_set: ArrayStats,
94}
95
96impl ExtensionArray {
97    pub fn new(ext_dtype: Arc<ExtDType>, storage: ArrayRef) -> Self {
98        assert_eq!(
99            ext_dtype.storage_dtype(),
100            storage.dtype(),
101            "ExtensionArray: storage_dtype must match storage array DType",
102        );
103        Self {
104            dtype: DType::Extension(ext_dtype),
105            storage,
106            stats_set: ArrayStats::default(),
107        }
108    }
109
110    pub fn ext_dtype(&self) -> &Arc<ExtDType> {
111        let DType::Extension(ext) = &self.dtype else {
112            unreachable!("ExtensionArray: dtype must be an ExtDType")
113        };
114        ext
115    }
116
117    pub fn storage(&self) -> &ArrayRef {
118        &self.storage
119    }
120
121    #[allow(dead_code)]
122    #[inline]
123    pub fn id(&self) -> &ExtID {
124        self.ext_dtype().id()
125    }
126}