vortex_dtype/
extension.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::{Display, Formatter};
5use std::sync::Arc;
6
7use crate::{DType, Nullability};
8
9/// A unique identifier for an extension type
10#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
11#[cfg_attr(feature = "serde", derive(::serde::Serialize, ::serde::Deserialize))]
12pub struct ExtID(Arc<str>);
13
14impl ExtID {
15    /// Constructs a new `ExtID` from a string
16    pub fn new(value: Arc<str>) -> Self {
17        Self(value)
18    }
19}
20
21impl Display for ExtID {
22    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
23        write!(f, "{}", self.0)
24    }
25}
26
27impl AsRef<str> for ExtID {
28    fn as_ref(&self) -> &str {
29        self.0.as_ref()
30    }
31}
32
33impl From<&str> for ExtID {
34    fn from(value: &str) -> Self {
35        Self(value.into())
36    }
37}
38
39/// Opaque metadata for an extension type
40#[derive(Debug, Clone, PartialOrd, PartialEq, Eq, Hash)]
41#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
42pub struct ExtMetadata(Arc<[u8]>);
43
44impl ExtMetadata {
45    /// Constructs a new `ExtMetadata` from a byte slice
46    pub fn new(value: Arc<[u8]>) -> Self {
47        Self(value)
48    }
49}
50
51impl AsRef<[u8]> for ExtMetadata {
52    fn as_ref(&self) -> &[u8] {
53        self.0.as_ref()
54    }
55}
56
57impl From<&[u8]> for ExtMetadata {
58    fn from(value: &[u8]) -> Self {
59        Self(value.into())
60    }
61}
62
63/// A type descriptor for an extension type
64#[derive(Debug, Clone, PartialEq, Eq, Hash)]
65#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
66pub struct ExtDType {
67    id: ExtID,
68    storage_dtype: Arc<DType>,
69    metadata: Option<ExtMetadata>,
70}
71
72impl ExtDType {
73    /// Creates a new `ExtDType`.
74    ///
75    /// Extension data types in Vortex allows library users to express additional semantic meaning
76    /// on top of a set of scalar values. Metadata can optionally be provided for the extension type
77    /// to allow for parameterized types.
78    ///
79    /// A simple example would be if one wanted to create a `vortex.temperature` extension type. The
80    /// canonical encoding for such values would be `f64`, and the metadata can contain an optional
81    /// temperature unit, allowing downstream users to be sure they properly account for Celsius
82    /// and Fahrenheit conversions.
83    ///
84    /// ```
85    /// use std::sync::Arc;
86    /// use vortex_dtype::{DType, ExtDType, ExtID, ExtMetadata, Nullability, PType};
87    ///
88    /// #[repr(u8)]
89    /// enum TemperatureUnit {
90    ///     C = 0u8,
91    ///     F = 1u8,
92    /// }
93    ///
94    /// // Make a new extension type that encodes the unit for a set of nullable `f64`.
95    /// pub fn create_temperature_type(unit: TemperatureUnit) -> ExtDType {
96    ///     ExtDType::new(
97    ///         ExtID::new("vortex.temperature".into()),
98    ///         Arc::new(DType::Primitive(PType::F64, Nullability::Nullable)),
99    ///         Some(ExtMetadata::new([unit as u8].into()))
100    ///     )
101    /// }
102    /// ```
103    pub fn new(id: ExtID, storage_dtype: Arc<DType>, metadata: Option<ExtMetadata>) -> Self {
104        assert!(
105            !matches!(storage_dtype.as_ref(), &DType::Extension(_)),
106            "ExtDType cannot have Extension storage_dtype"
107        );
108
109        Self {
110            id,
111            storage_dtype,
112            metadata,
113        }
114    }
115
116    /// Returns the `ExtID` for this extension type
117    #[inline]
118    pub fn id(&self) -> &ExtID {
119        &self.id
120    }
121
122    /// Returns the `ExtMetadata` for this extension type, if it exists
123    #[inline]
124    pub fn storage_dtype(&self) -> &DType {
125        self.storage_dtype.as_ref()
126    }
127
128    /// Returns a new `ExtDType` with the given nullability
129    pub fn with_nullability(&self, nullability: Nullability) -> Self {
130        Self::new(
131            self.id.clone(),
132            Arc::new(self.storage_dtype.with_nullability(nullability)),
133            self.metadata.clone(),
134        )
135    }
136
137    /// Returns the `ExtMetadata` for this extension type, if it exists
138    #[inline]
139    pub fn metadata(&self) -> Option<&ExtMetadata> {
140        self.metadata.as_ref()
141    }
142
143    /// Check if `self` and `other` are equal, ignoring the storage nullability
144    pub fn eq_ignore_nullability(&self, other: &Self) -> bool {
145        self.id() == other.id()
146            && self.metadata() == other.metadata()
147            && self
148                .storage_dtype()
149                .eq_ignore_nullability(other.storage_dtype())
150    }
151}
152
153#[cfg(test)]
154mod test {
155    use std::sync::Arc;
156
157    use super::{ExtDType, ExtID};
158    use crate::{DType, Nullability, PType};
159
160    #[test]
161    fn different_ids_are_not_equal() {
162        let storage_dtype = Arc::from(DType::Bool(Nullability::NonNullable));
163        let one = ExtDType::new(ExtID::new(Arc::from("one")), storage_dtype.clone(), None);
164        let two = ExtDType::new(ExtID::new(Arc::from("two")), storage_dtype, None);
165
166        assert_ne!(one, two);
167    }
168
169    #[test]
170    fn same_id_different_storage_types_are_not_equal() {
171        let one = ExtDType::new(
172            ExtID::new(Arc::from("one")),
173            Arc::from(DType::Bool(Nullability::NonNullable)),
174            None,
175        );
176        let two = ExtDType::new(
177            ExtID::new(Arc::from("one")),
178            Arc::from(DType::Primitive(PType::U8, Nullability::NonNullable)),
179            None,
180        );
181
182        assert_ne!(one, two);
183    }
184
185    #[test]
186    fn same_id_different_nullability_are_not_equal() {
187        let nullable_u8 = Arc::from(DType::Primitive(PType::U8, Nullability::NonNullable));
188        let one = ExtDType::new(ExtID::new(Arc::from("one")), nullable_u8.clone(), None);
189        let two = ExtDType::new(
190            ExtID::new(Arc::from("one")),
191            Arc::from(nullable_u8.as_nullable()),
192            None,
193        );
194
195        assert_ne!(one, two);
196    }
197}