Skip to main content

vortex_array/arrays/extension/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_dtype::DType;
5use vortex_dtype::ExtID;
6use vortex_dtype::extension::ExtDTypeRef;
7
8use crate::ArrayRef;
9use crate::stats::ArrayStats;
10
11/// An extension array that wraps another array with additional type information.
12///
13/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
14/// in future versions. The extension type system is still evolving.
15///
16/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
17/// mechanism for adding semantic meaning to existing array types without requiring
18/// changes to the core type system.
19///
20/// ## Design Philosophy
21///
22/// Extension arrays serve as a type-safe wrapper that:
23/// - Preserves the underlying storage format and operations
24/// - Adds semantic type information via `ExtDType`
25/// - Enables custom serialization and deserialization logic
26/// - Allows domain-specific interpretations of generic data
27///
28/// ## Storage and Type Relationship
29///
30/// The extension array maintains a strict contract:
31/// - **Storage array**: Contains the actual data in a standard Vortex encoding
32/// - **Extension type**: Defines how to interpret the storage data semantically
33/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
34///
35/// ## Use Cases
36///
37/// Extension arrays are ideal for:
38/// - **Custom numeric types**: Units of measurement, currencies
39/// - **Temporal types**: Custom date/time formats, time zones, calendars
40/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
41/// - **Encoded types**: Base64 strings, compressed data, encrypted values
42///
43/// ## Validity and Operations
44///
45/// Extension arrays delegate validity and most operations to their storage array:
46/// - Validity is inherited from the underlying storage
47/// - Slicing preserves the extension type
48/// - Scalar access wraps storage scalars with extension metadata
49#[derive(Clone, Debug)]
50pub struct ExtensionArray {
51    pub(super) dtype: DType,
52    pub(super) storage: ArrayRef,
53    pub(super) stats_set: ArrayStats,
54}
55
56impl ExtensionArray {
57    pub fn new(ext_dtype: ExtDTypeRef, storage: ArrayRef) -> Self {
58        assert_eq!(
59            ext_dtype.storage_dtype(),
60            storage.dtype(),
61            "ExtensionArray: storage_dtype must match storage array DType",
62        );
63        Self {
64            dtype: DType::Extension(ext_dtype),
65            storage,
66            stats_set: ArrayStats::default(),
67        }
68    }
69
70    pub fn ext_dtype(&self) -> &ExtDTypeRef {
71        let DType::Extension(ext) = &self.dtype else {
72            unreachable!("ExtensionArray: dtype must be an ExtDType")
73        };
74        ext
75    }
76
77    pub fn storage(&self) -> &ArrayRef {
78        &self.storage
79    }
80
81    #[inline]
82    pub fn id(&self) -> ExtID {
83        self.ext_dtype().id()
84    }
85}