Skip to main content

vortex_array/arrays/extension/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use crate::ArrayRef;
5use crate::dtype::DType;
6use crate::dtype::extension::ExtDTypeRef;
7use crate::dtype::extension::ExtId;
8use crate::stats::ArrayStats;
9
10/// An extension array that wraps another array with additional type information.
11///
12/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
13/// in future versions. The extension type system is still evolving.
14///
15/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
16/// mechanism for adding semantic meaning to existing array types without requiring
17/// changes to the core type system.
18///
19/// ## Design Philosophy
20///
21/// Extension arrays serve as a type-safe wrapper that:
22/// - Preserves the underlying storage format and operations
23/// - Adds semantic type information via `ExtDType`
24/// - Enables custom serialization and deserialization logic
25/// - Allows domain-specific interpretations of generic data
26///
27/// ## Storage and Type Relationship
28///
29/// The extension array maintains a strict contract:
30/// - **Storage array**: Contains the actual data in a standard Vortex encoding
31/// - **Extension type**: Defines how to interpret the storage data semantically
32/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
33///
34/// ## Use Cases
35///
36/// Extension arrays are ideal for:
37/// - **Custom numeric types**: Units of measurement, currencies
38/// - **Temporal types**: Custom date/time formats, time zones, calendars
39/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
40/// - **Encoded types**: Base64 strings, compressed data, encrypted values
41///
42/// ## Validity and Operations
43///
44/// Extension arrays delegate validity and most operations to their storage array:
45/// - Validity is inherited from the underlying storage
46/// - Slicing preserves the extension type
47/// - Scalar access wraps storage scalars with extension metadata
48#[derive(Clone, Debug)]
49pub struct ExtensionArray {
50    pub(super) dtype: DType,
51    pub(super) storage: ArrayRef,
52    pub(super) stats_set: ArrayStats,
53}
54
55impl ExtensionArray {
56    pub fn new(ext_dtype: ExtDTypeRef, storage: ArrayRef) -> Self {
57        assert_eq!(
58            ext_dtype.storage_dtype(),
59            storage.dtype(),
60            "ExtensionArray: storage_dtype must match storage array DType",
61        );
62        Self {
63            dtype: DType::Extension(ext_dtype),
64            storage,
65            stats_set: ArrayStats::default(),
66        }
67    }
68
69    pub fn ext_dtype(&self) -> &ExtDTypeRef {
70        let DType::Extension(ext) = &self.dtype else {
71            unreachable!("ExtensionArray: dtype must be an ExtDType")
72        };
73        ext
74    }
75
76    pub fn storage(&self) -> &ArrayRef {
77        &self.storage
78    }
79
80    #[inline]
81    pub fn id(&self) -> ExtId {
82        self.ext_dtype().id()
83    }
84}