vortex_dtype/
extension.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use crate::DType;
9use crate::Nullability;
10
11/// A unique identifier for an extension type
12#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
13#[cfg_attr(feature = "serde", derive(::serde::Serialize, ::serde::Deserialize))]
14pub struct ExtID(Arc<str>);
15
16impl ExtID {
17    /// Constructs a new `ExtID` from a string
18    pub fn new(value: Arc<str>) -> Self {
19        Self(value)
20    }
21}
22
23impl Display for ExtID {
24    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
25        write!(f, "{}", self.0)
26    }
27}
28
29impl AsRef<str> for ExtID {
30    fn as_ref(&self) -> &str {
31        self.0.as_ref()
32    }
33}
34
35impl From<&str> for ExtID {
36    fn from(value: &str) -> Self {
37        Self(value.into())
38    }
39}
40
41/// Opaque metadata for an extension type
42#[derive(Debug, Clone, PartialOrd, PartialEq, Eq, Hash)]
43#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
44pub struct ExtMetadata(Arc<[u8]>);
45
46impl ExtMetadata {
47    /// Constructs a new `ExtMetadata` from a byte slice
48    pub fn new(value: Arc<[u8]>) -> Self {
49        Self(value)
50    }
51}
52
53impl AsRef<[u8]> for ExtMetadata {
54    fn as_ref(&self) -> &[u8] {
55        self.0.as_ref()
56    }
57}
58
59impl From<&[u8]> for ExtMetadata {
60    fn from(value: &[u8]) -> Self {
61        Self(value.into())
62    }
63}
64
65/// A type descriptor for an extension type
66#[derive(Debug, Clone, PartialEq, Eq, Hash)]
67#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
68pub struct ExtDType {
69    id: ExtID,
70    storage_dtype: Arc<DType>,
71    metadata: Option<ExtMetadata>,
72}
73
74impl ExtDType {
75    /// Creates a new `ExtDType`.
76    ///
77    /// Extension data types in Vortex allows library users to express additional semantic meaning
78    /// on top of a set of scalar values. Metadata can optionally be provided for the extension type
79    /// to allow for parameterized types.
80    ///
81    /// A simple example would be if one wanted to create a `vortex.temperature` extension type. The
82    /// canonical encoding for such values would be `f64`, and the metadata can contain an optional
83    /// temperature unit, allowing downstream users to be sure they properly account for Celsius
84    /// and Fahrenheit conversions.
85    ///
86    /// ```
87    /// use std::sync::Arc;
88    /// use vortex_dtype::{DType, ExtDType, ExtID, ExtMetadata, Nullability, PType};
89    ///
90    /// #[repr(u8)]
91    /// enum TemperatureUnit {
92    ///     C = 0u8,
93    ///     F = 1u8,
94    /// }
95    ///
96    /// // Make a new extension type that encodes the unit for a set of nullable `f64`.
97    /// pub fn create_temperature_type(unit: TemperatureUnit) -> ExtDType {
98    ///     ExtDType::new(
99    ///         ExtID::new("vortex.temperature".into()),
100    ///         Arc::new(DType::Primitive(PType::F64, Nullability::Nullable)),
101    ///         Some(ExtMetadata::new([unit as u8].into()))
102    ///     )
103    /// }
104    /// ```
105    pub fn new(id: ExtID, storage_dtype: Arc<DType>, metadata: Option<ExtMetadata>) -> Self {
106        assert!(
107            !matches!(storage_dtype.as_ref(), &DType::Extension(_)),
108            "ExtDType cannot have Extension storage_dtype"
109        );
110
111        Self {
112            id,
113            storage_dtype,
114            metadata,
115        }
116    }
117
118    /// Returns the `ExtID` for this extension type
119    #[inline]
120    pub fn id(&self) -> &ExtID {
121        &self.id
122    }
123
124    /// Returns the `ExtMetadata` for this extension type, if it exists
125    #[inline]
126    pub fn storage_dtype(&self) -> &DType {
127        self.storage_dtype.as_ref()
128    }
129
130    /// Returns a new `ExtDType` with the given nullability
131    pub fn with_nullability(&self, nullability: Nullability) -> Self {
132        Self::new(
133            self.id.clone(),
134            Arc::new(self.storage_dtype.with_nullability(nullability)),
135            self.metadata.clone(),
136        )
137    }
138
139    /// Returns the `ExtMetadata` for this extension type, if it exists
140    #[inline]
141    pub fn metadata(&self) -> Option<&ExtMetadata> {
142        self.metadata.as_ref()
143    }
144
145    /// Check if `self` and `other` are equal, ignoring the storage nullability
146    pub fn eq_ignore_nullability(&self, other: &Self) -> bool {
147        self.id() == other.id()
148            && self.metadata() == other.metadata()
149            && self
150                .storage_dtype()
151                .eq_ignore_nullability(other.storage_dtype())
152    }
153}
154
155#[cfg(test)]
156mod test {
157    use std::sync::Arc;
158
159    use super::ExtDType;
160    use super::ExtID;
161    use crate::DType;
162    use crate::Nullability;
163    use crate::PType;
164
165    #[test]
166    fn different_ids_are_not_equal() {
167        let storage_dtype = Arc::from(DType::Bool(Nullability::NonNullable));
168        let one = ExtDType::new(ExtID::new(Arc::from("one")), storage_dtype.clone(), None);
169        let two = ExtDType::new(ExtID::new(Arc::from("two")), storage_dtype, None);
170
171        assert_ne!(one, two);
172    }
173
174    #[test]
175    fn same_id_different_storage_types_are_not_equal() {
176        let one = ExtDType::new(
177            ExtID::new(Arc::from("one")),
178            Arc::from(DType::Bool(Nullability::NonNullable)),
179            None,
180        );
181        let two = ExtDType::new(
182            ExtID::new(Arc::from("one")),
183            Arc::from(DType::Primitive(PType::U8, Nullability::NonNullable)),
184            None,
185        );
186
187        assert_ne!(one, two);
188    }
189
190    #[test]
191    fn same_id_different_nullability_are_not_equal() {
192        let nullable_u8 = Arc::from(DType::Primitive(PType::U8, Nullability::NonNullable));
193        let one = ExtDType::new(ExtID::new(Arc::from("one")), nullable_u8.clone(), None);
194        let two = ExtDType::new(
195            ExtID::new(Arc::from("one")),
196            Arc::from(nullable_u8.as_nullable()),
197            None,
198        );
199
200        assert_ne!(one, two);
201    }
202}