vortex_dtype/
dtype.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::sync::Arc;
9
10use DType::*;
11use itertools::Itertools;
12use static_assertions::const_assert_eq;
13use vortex_error::vortex_panic;
14
15use crate::ExtDType;
16use crate::FieldDType;
17use crate::FieldName;
18use crate::PType;
19use crate::StructFields;
20use crate::decimal::DecimalDType;
21use crate::nullability::Nullability;
22
23/// The logical types of elements in Vortex arrays.
24///
25/// `DType` represents the different logical data types that can be represented in a Vortex array.
26///
27/// This is different from physical types, which represent the actual layout of data (compressed or
28/// uncompressed). The set of physical types/formats (or data layout) is surjective into the set of
29/// logical types (or in other words, all physical types map to a single logical type).
30///
31/// Note that a `DType` represents the logical type of the elements in the `Array`s, **not** the
32/// logical type of the `Array` itself.
33///
34/// For example, an array with [`DType::Primitive`]([`I32`], [`NonNullable`]) could be physically
35/// encoded as any of the following:
36///
37/// - A flat array of `i32` values.
38/// - A run-length encoded sequence.
39/// - Dictionary encoded values with bitpacked codes.
40///
41/// All of these physical encodings preserve the same logical [`I32`] type, even if the physical
42/// data is different.
43///
44/// [`I32`]: PType::I32
45/// [`NonNullable`]: Nullability::NonNullable
46#[derive(Debug, Clone, PartialEq, Eq, Hash)]
47#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
48pub enum DType {
49    /// A logical null type.
50    ///
51    /// `Null` only has a single value, `null`.
52    Null,
53
54    /// A logical boolean type.
55    ///
56    /// `Bool` can be `true` or `false` if non-nullable. It can be `true`, `false`, or `null` if
57    /// nullable.
58    Bool(Nullability),
59
60    /// A logical fixed-width numeric type.
61    ///
62    /// This can be unsigned, signed, or floating point. See [`PType`] for more information.
63    Primitive(PType, Nullability),
64
65    /// Logical real numbers with fixed precision and scale.
66    ///
67    /// See [`DecimalDType`] for more information.
68    Decimal(DecimalDType, Nullability),
69
70    /// Logical UTF-8 strings.
71    Utf8(Nullability),
72
73    /// Logical binary data.
74    Binary(Nullability),
75
76    /// A logical variable-length list type.
77    ///
78    /// This is parameterized by a single `DType` that represents the element type of the inner
79    /// lists.
80    List(Arc<DType>, Nullability),
81
82    /// A logical fixed-size list type.
83    ///
84    /// This is parameterized by a `DType` that represents the element type of the inner lists, as
85    /// well as a `u32` size that determines the fixed length of each `FixedSizeList` scalar.
86    FixedSizeList(Arc<DType>, u32, Nullability),
87
88    /// A logical struct type.
89    ///
90    /// A `Struct` type is composed of an ordered list of fields, each with a corresponding name and
91    /// `DType`. See [`StructFields`] for more information.
92    Struct(StructFields, Nullability),
93
94    /// A user-defined extension type.
95    ///
96    /// See [`ExtDType`] for more information.
97    Extension(Arc<ExtDType>),
98}
99
100/// This trait is implemented by native Rust types that can be converted
101/// to and from Vortex scalar values.
102/// e.g. `&str` -> `DType::Utf8`
103///      `bool` -> `DType::Bool`
104///
105/// The dtype is the one closet matching the domain of the rust type
106/// e.g. `Option<T>` -> Nullable DType.
107pub trait NativeDType {
108    /// Returns the Vortex data type for this scalar type.
109    fn dtype() -> DType;
110}
111
112#[cfg(not(target_arch = "wasm32"))]
113const_assert_eq!(size_of::<DType>(), 16);
114
115#[cfg(target_arch = "wasm32")]
116const_assert_eq!(size_of::<DType>(), 12);
117
118impl DType {
119    /// The default `DType` for bytes.
120    pub const BYTES: Self = Primitive(PType::U8, Nullability::NonNullable);
121
122    /// Get the nullability of the `DType`.
123    #[inline]
124    pub fn nullability(&self) -> Nullability {
125        self.is_nullable().into()
126    }
127
128    /// Check if the `DType` is [`Nullability::Nullable`].
129    #[inline]
130    pub fn is_nullable(&self) -> bool {
131        match self {
132            Null => true,
133            Extension(ext_dtype) => ext_dtype.storage_dtype().is_nullable(),
134            Bool(null)
135            | Primitive(_, null)
136            | Decimal(_, null)
137            | Utf8(null)
138            | Binary(null)
139            | Struct(_, null)
140            | List(_, null)
141            | FixedSizeList(_, _, null) => matches!(null, Nullability::Nullable),
142        }
143    }
144
145    /// Get a new `DType` with [`Nullability::NonNullable`] (but otherwise the same as `self`)
146    pub fn as_nonnullable(&self) -> Self {
147        self.with_nullability(Nullability::NonNullable)
148    }
149
150    /// Get a new `DType` with [`Nullability::Nullable`] (but otherwise the same as `self`)
151    pub fn as_nullable(&self) -> Self {
152        self.with_nullability(Nullability::Nullable)
153    }
154
155    /// Get a new DType with the given nullability (but otherwise the same as `self`)
156    pub fn with_nullability(&self, nullability: Nullability) -> Self {
157        match self {
158            Null => Null,
159            Bool(_) => Bool(nullability),
160            Primitive(pdt, _) => Primitive(*pdt, nullability),
161            Decimal(ddt, _) => Decimal(*ddt, nullability),
162            Utf8(_) => Utf8(nullability),
163            Binary(_) => Binary(nullability),
164            Struct(sf, _) => Struct(sf.clone(), nullability),
165            List(edt, _) => List(edt.clone(), nullability),
166            FixedSizeList(edt, size, _) => FixedSizeList(edt.clone(), *size, nullability),
167            Extension(ext) => Extension(Arc::new(ext.with_nullability(nullability))),
168        }
169    }
170
171    /// Union the nullability of this `DType` with the other nullability, returning a new `DType`.
172    pub fn union_nullability(&self, other: Nullability) -> Self {
173        let nullability = self.nullability() | other;
174        self.with_nullability(nullability)
175    }
176
177    /// Check if `self` and `other` are equal, ignoring nullability.
178    pub fn eq_ignore_nullability(&self, other: &Self) -> bool {
179        match (self, other) {
180            (Null, Null) => true,
181            (Bool(_), Bool(_)) => true,
182            (Primitive(lhs_ptype, _), Primitive(rhs_ptype, _)) => lhs_ptype == rhs_ptype,
183            (Decimal(lhs, _), Decimal(rhs, _)) => lhs == rhs,
184            (Utf8(_), Utf8(_)) => true,
185            (Binary(_), Binary(_)) => true,
186            (List(lhs_dtype, _), List(rhs_dtype, _)) => lhs_dtype.eq_ignore_nullability(rhs_dtype),
187            (FixedSizeList(lhs_dtype, lhs_size, _), FixedSizeList(rhs_dtype, rhs_size, _)) => {
188                lhs_size == rhs_size && lhs_dtype.eq_ignore_nullability(rhs_dtype)
189            }
190            (Struct(lhs_dtype, _), Struct(rhs_dtype, _)) => {
191                (lhs_dtype.names() == rhs_dtype.names())
192                    && (lhs_dtype
193                        .fields()
194                        .zip_eq(rhs_dtype.fields())
195                        .all(|(l, r)| l.eq_ignore_nullability(&r)))
196            }
197            (Extension(lhs_extdtype), Extension(rhs_extdtype)) => {
198                lhs_extdtype.as_ref().eq_ignore_nullability(rhs_extdtype)
199            }
200            _ => false,
201        }
202    }
203
204    /// Returns `true` if `self` is a subset type of `other, otherwise `false`.
205    ///
206    /// If `self` is nullable, this means that the other `DType` must also be nullable (since a
207    /// nullable type represents more values than a non-nullable type) and equal.
208    ///
209    /// If `self` is non-nullable, then the other `DType` must be equal ignoring nullabillity.
210    ///
211    /// We implement this functionality as a complement to `is_superset_of`.
212    pub fn eq_with_nullability_subset(&self, other: &Self) -> bool {
213        if self.is_nullable() {
214            self == other
215        } else {
216            self.eq_ignore_nullability(other)
217        }
218    }
219
220    /// Returns `true` if `self` is a superset type of `other, otherwise `false`.
221    ///
222    /// If `self` is non-nullable, this means that the other `DType` must also be non-nullable
223    /// (since a non-nullable type represents less values than a nullable type) and equal.
224    ///
225    /// If `self` is nullable, then the other `DType` must be equal ignoring nullabillity.
226    ///
227    /// This function is useful (in the `vortex-array` crate) for determining if an `Array` can
228    /// extend a given `ArrayBuilder`: it can only extend it if the `DType` of the builder is a
229    /// superset of the `Array`.
230    pub fn eq_with_nullability_superset(&self, other: &Self) -> bool {
231        if self.is_nullable() {
232            self.eq_ignore_nullability(other)
233        } else {
234            self == other
235        }
236    }
237
238    /// Check if `self` is a boolean
239    pub fn is_boolean(&self) -> bool {
240        matches!(self, Bool(_))
241    }
242
243    /// Check if `self` is a primitive type
244    pub fn is_primitive(&self) -> bool {
245        matches!(self, Primitive(_, _))
246    }
247
248    /// Returns this [`DType`]'s [`PType`] if it is a primitive type, otherwise panics.
249    pub fn as_ptype(&self) -> PType {
250        if let Primitive(ptype, _) = self {
251            *ptype
252        } else {
253            vortex_panic!("DType is not a primitive type")
254        }
255    }
256
257    /// Check if `self` is an unsigned integer
258    pub fn is_unsigned_int(&self) -> bool {
259        if let Primitive(ptype, _) = self {
260            return ptype.is_unsigned_int();
261        }
262        false
263    }
264
265    /// Check if `self` is a signed integer
266    pub fn is_signed_int(&self) -> bool {
267        if let Primitive(ptype, _) = self {
268            return ptype.is_signed_int();
269        }
270        false
271    }
272
273    /// Check if `self` is an integer (signed or unsigned)
274    pub fn is_int(&self) -> bool {
275        if let Primitive(ptype, _) = self {
276            return ptype.is_int();
277        }
278        false
279    }
280
281    /// Check if `self` is a floating point number
282    pub fn is_float(&self) -> bool {
283        if let Primitive(ptype, _) = self {
284            return ptype.is_float();
285        }
286        false
287    }
288
289    /// Check if `self` is a [`DType::Decimal`].
290    pub fn is_decimal(&self) -> bool {
291        matches!(self, Decimal(..))
292    }
293
294    /// Check if `self` is a [`DType::Utf8`]
295    pub fn is_utf8(&self) -> bool {
296        matches!(self, Utf8(_))
297    }
298
299    /// Check if `self` is a [`DType::Binary`]
300    pub fn is_binary(&self) -> bool {
301        matches!(self, Binary(_))
302    }
303
304    /// Check if `self` is a [`DType::List`].
305    pub fn is_list(&self) -> bool {
306        matches!(self, List(_, _))
307    }
308
309    /// Check if `self` is a [`DType::FixedSizeList`],
310    pub fn is_fixed_size_list(&self) -> bool {
311        matches!(self, FixedSizeList(..))
312    }
313
314    /// Check if `self` is a [`DType::Struct`]
315    pub fn is_struct(&self) -> bool {
316        matches!(self, Struct(_, _))
317    }
318
319    /// Check if `self` is a [`DType::Extension`] type
320    pub fn is_extension(&self) -> bool {
321        matches!(self, Extension(_))
322    }
323
324    /// Check if `self` is a nested type, i.e. list, fixed size list, struct, or extension of a
325    /// recursive type.
326    pub fn is_nested(&self) -> bool {
327        match self {
328            List(..) | FixedSizeList(..) | Struct(..) => true,
329            Extension(ext) => ext.storage_dtype().is_nested(),
330            _ => false,
331        }
332    }
333
334    /// Check returns the inner decimal type if the dtype is a [`DType::Decimal`].
335    pub fn as_decimal_opt(&self) -> Option<&DecimalDType> {
336        if let Decimal(decimal, _) = self {
337            Some(decimal)
338        } else {
339            None
340        }
341    }
342
343    /// Owned version of [Self::as_decimal_opt].
344    pub fn into_decimal_opt(self) -> Option<DecimalDType> {
345        if let Decimal(decimal, _) = self {
346            Some(decimal)
347        } else {
348            None
349        }
350    }
351
352    /// Get the inner element dtype if `self` is a [`DType::List`], otherwise returns `None`.
353    ///
354    /// Note that this does _not_ return `Some` if `self` is a [`DType::FixedSizeList`].
355    pub fn as_list_element_opt(&self) -> Option<&Arc<DType>> {
356        if let List(edt, _) = self {
357            Some(edt)
358        } else {
359            None
360        }
361    }
362
363    /// Owned version of [Self::as_list_element_opt].
364    pub fn into_list_element_opt(self) -> Option<Arc<DType>> {
365        if let List(edt, _) = self {
366            Some(edt)
367        } else {
368            None
369        }
370    }
371
372    /// Get the inner element dtype if `self` is a [`DType::FixedSizeList`], otherwise returns
373    /// `None`.
374    ///
375    /// Note that this does _not_ return `Some` if `self` is a [`DType::List`].
376    pub fn as_fixed_size_list_element_opt(&self) -> Option<&Arc<DType>> {
377        if let FixedSizeList(edt, ..) = self {
378            Some(edt)
379        } else {
380            None
381        }
382    }
383
384    /// Owned version of [Self::as_fixed_size_list_element_opt].
385    pub fn into_fixed_size_list_element_opt(self) -> Option<Arc<DType>> {
386        if let FixedSizeList(edt, ..) = self {
387            Some(edt)
388        } else {
389            None
390        }
391    }
392
393    /// Get the inner element dtype if `self` is **either** a [`DType::List`] or a
394    /// [`DType::FixedSizeList`], otherwise returns `None`
395    pub fn as_any_size_list_element_opt(&self) -> Option<&Arc<DType>> {
396        if let FixedSizeList(edt, ..) = self {
397            Some(edt)
398        } else if let List(edt, ..) = self {
399            Some(edt)
400        } else {
401            None
402        }
403    }
404
405    /// Owned version of [Self::as_any_size_list_element_opt].
406    pub fn into_any_size_list_element_opt(self) -> Option<Arc<DType>> {
407        if let FixedSizeList(edt, ..) = self {
408            Some(edt)
409        } else if let List(edt, ..) = self {
410            Some(edt)
411        } else {
412            None
413        }
414    }
415
416    /// Returns the [`StructFields`] from a struct [`DType`].
417    ///
418    /// # Panics
419    ///
420    /// If the [`DType`] is not a struct.
421    pub fn as_struct_fields(&self) -> &StructFields {
422        if let Struct(f, _) = self {
423            return f;
424        }
425        vortex_panic!("DType is not a Struct")
426    }
427
428    /// Owned version of [Self::as_struct_fields].
429    pub fn into_struct_fields(self) -> StructFields {
430        if let Struct(f, _) = self {
431            return f;
432        }
433        vortex_panic!("DType is not a Struct")
434    }
435
436    /// Get the `StructDType` if `self` is a `StructDType`, otherwise `None`
437    pub fn as_struct_fields_opt(&self) -> Option<&StructFields> {
438        if let Struct(f, _) = self {
439            Some(f)
440        } else {
441            None
442        }
443    }
444
445    /// Owned version of [Self::as_struct_fields_opt].
446    pub fn into_struct_fields_opt(self) -> Option<StructFields> {
447        if let Struct(f, _) = self {
448            Some(f)
449        } else {
450            None
451        }
452    }
453
454    /// Convenience method for creating a [`DType::List`].
455    pub fn list(dtype: impl Into<DType>, nullability: Nullability) -> Self {
456        List(Arc::new(dtype.into()), nullability)
457    }
458
459    /// Convenience method for creating a [`DType::Struct`].
460    pub fn struct_<I: IntoIterator<Item = (impl Into<FieldName>, impl Into<FieldDType>)>>(
461        iter: I,
462        nullability: Nullability,
463    ) -> Self {
464        Struct(StructFields::from_iter(iter), nullability)
465    }
466}
467
468impl Display for DType {
469    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
470        match self {
471            Null => write!(f, "null"),
472            Bool(null) => write!(f, "bool{null}"),
473            Primitive(pdt, null) => write!(f, "{pdt}{null}"),
474            Decimal(ddt, null) => write!(f, "{ddt}{null}"),
475            Utf8(null) => write!(f, "utf8{null}"),
476            Binary(null) => write!(f, "binary{null}"),
477            Struct(sf, null) => write!(
478                f,
479                "{{{}}}{null}",
480                sf.names()
481                    .iter()
482                    .zip(sf.fields())
483                    .map(|(field_null, dt)| format!("{field_null}={dt}"))
484                    .join(", "),
485            ),
486            List(edt, null) => write!(f, "list({edt}){null}"),
487            FixedSizeList(edt, size, null) => write!(f, "fixed_size_list({edt})[{size}]{null}"),
488            Extension(ext) => write!(
489                f,
490                "ext({}, {}{}){}",
491                ext.id(),
492                ext.storage_dtype()
493                    .with_nullability(Nullability::NonNullable),
494                ext.metadata()
495                    .map(|m| format!(", {m:?}"))
496                    .unwrap_or_else(|| "".to_string()),
497                ext.storage_dtype().nullability(),
498            ),
499        }
500    }
501}