vortex_array/arrays/extension/array.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_dtype::DType;
5use vortex_dtype::ExtID;
6use vortex_dtype::extension::ExtDTypeRef;
7
8use crate::ArrayRef;
9use crate::stats::ArrayStats;
10
11/// An extension array that wraps another array with additional type information.
12///
13/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
14/// in future versions. The extension type system is still evolving.
15///
16/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
17/// mechanism for adding semantic meaning to existing array types without requiring
18/// changes to the core type system.
19///
20/// ## Design Philosophy
21///
22/// Extension arrays serve as a type-safe wrapper that:
23/// - Preserves the underlying storage format and operations
24/// - Adds semantic type information via `ExtDType`
25/// - Enables custom serialization and deserialization logic
26/// - Allows domain-specific interpretations of generic data
27///
28/// ## Storage and Type Relationship
29///
30/// The extension array maintains a strict contract:
31/// - **Storage array**: Contains the actual data in a standard Vortex encoding
32/// - **Extension type**: Defines how to interpret the storage data semantically
33/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
34///
35/// ## Use Cases
36///
37/// Extension arrays are ideal for:
38/// - **Custom numeric types**: Units of measurement, currencies
39/// - **Temporal types**: Custom date/time formats, time zones, calendars
40/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
41/// - **Encoded types**: Base64 strings, compressed data, encrypted values
42///
43/// ## Validity and Operations
44///
45/// Extension arrays delegate validity and most operations to their storage array:
46/// - Validity is inherited from the underlying storage
47/// - Slicing preserves the extension type
48/// - Scalar access wraps storage scalars with extension metadata
49#[derive(Clone, Debug)]
50pub struct ExtensionArray {
51 pub(super) dtype: DType,
52 pub(super) storage: ArrayRef,
53 pub(super) stats_set: ArrayStats,
54}
55
56impl ExtensionArray {
57 pub fn new(ext_dtype: ExtDTypeRef, storage: ArrayRef) -> Self {
58 assert_eq!(
59 ext_dtype.storage_dtype(),
60 storage.dtype(),
61 "ExtensionArray: storage_dtype must match storage array DType",
62 );
63 Self {
64 dtype: DType::Extension(ext_dtype),
65 storage,
66 stats_set: ArrayStats::default(),
67 }
68 }
69
70 pub fn ext_dtype(&self) -> &ExtDTypeRef {
71 let DType::Extension(ext) = &self.dtype else {
72 unreachable!("ExtensionArray: dtype must be an ExtDType")
73 };
74 ext
75 }
76
77 pub fn storage(&self) -> &ArrayRef {
78 &self.storage
79 }
80
81 #[inline]
82 pub fn id(&self) -> ExtID {
83 self.ext_dtype().id()
84 }
85}