vortex_dtype/
dtype.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::{Debug, Display, Formatter};
5use std::hash::Hash;
6use std::sync::Arc;
7
8use DType::*;
9use itertools::Itertools;
10use static_assertions::const_assert_eq;
11use vortex_error::vortex_panic;
12
13use crate::decimal::DecimalDType;
14use crate::nullability::Nullability;
15use crate::{ExtDType, FieldDType, FieldName, PType, StructFields};
16
17/// The logical types of elements in Vortex arrays.
18///
19/// `DType` represents the different logical data types that can be represented in a Vortex array.
20///
21/// This is different from physical types, which represent the actual layout of data (compressed or
22/// uncompressed). The set of physical types/formats (or data layout) is surjective into the set of
23/// logical types (or in other words, all physical types map to a single logical type).
24///
25/// Note that a `DType` represents the logical type of the elements in the `Array`s, **not** the
26/// logical type of the `Array` itself.
27///
28/// For example, an array with [`DType::Primitive`]([`I32`], [`NonNullable`]) could be physically
29/// encoded as any of the following:
30///
31/// - A flat array of `i32` values.
32/// - A run-length encoded sequence.
33/// - Dictionary encoded values with bitpacked codes.
34///
35/// All of these physical encodings preserve the same logical [`I32`] type, even if the physical
36/// data is different.
37///
38/// [`I32`]: PType::I32
39/// [`NonNullable`]: Nullability::NonNullable
40#[derive(Debug, Clone, PartialEq, Eq, Hash)]
41#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
42pub enum DType {
43    /// A logical null type.
44    ///
45    /// `Null` only has a single value, `null`.
46    Null,
47
48    /// A logical boolean type.
49    ///
50    /// `Bool` can be `true` or `false` if non-nullable. It can be `true`, `false`, or `null` if
51    /// nullable.
52    Bool(Nullability),
53
54    /// A logical fixed-width numeric type.
55    ///
56    /// This can be unsigned, signed, or floating point. See [`PType`] for more information.
57    Primitive(PType, Nullability),
58
59    /// Logical real numbers with fixed precision and scale.
60    ///
61    /// See [`DecimalDType`] for more information.
62    Decimal(DecimalDType, Nullability),
63
64    /// Logical UTF-8 strings.
65    Utf8(Nullability),
66
67    /// Logical binary data.
68    Binary(Nullability),
69
70    /// A logical variable-length list type.
71    ///
72    /// This is parameterized by a single `DType` that represents the element type of the inner
73    /// lists.
74    List(Arc<DType>, Nullability),
75
76    /// A logical fixed-size list type.
77    ///
78    /// This is parameterized by a `DType` that represents the element type of the inner lists, as
79    /// well as a `u32` size that determines the fixed length of each `FixedSizeList` scalar.
80    FixedSizeList(Arc<DType>, u32, Nullability),
81
82    /// A logical struct type.
83    ///
84    /// A `Struct` type is composed of an ordered list of fields, each with a corresponding name and
85    /// `DType`. See [`StructFields`] for more information.
86    Struct(StructFields, Nullability),
87
88    /// A user-defined extension type.
89    ///
90    /// See [`ExtDType`] for more information.
91    Extension(Arc<ExtDType>),
92}
93
94/// This trait is implemented by native Rust types that can be converted
95/// to and from Vortex scalar values.
96/// e.g. `&str` -> `DType::Utf8`
97///      `bool` -> `DType::Bool`
98///
99/// The dtype is the one closet matching the domain of the rust type
100/// e.g. `Option<T>` -> Nullable DType.
101pub trait NativeDType {
102    /// Returns the Vortex data type for this scalar type.
103    fn dtype() -> DType;
104}
105
106#[cfg(not(target_arch = "wasm32"))]
107const_assert_eq!(size_of::<DType>(), 16);
108
109#[cfg(target_arch = "wasm32")]
110const_assert_eq!(size_of::<DType>(), 12);
111
112impl DType {
113    /// The default `DType` for bytes.
114    pub const BYTES: Self = Primitive(PType::U8, Nullability::NonNullable);
115
116    /// Get the nullability of the `DType`.
117    #[inline]
118    pub fn nullability(&self) -> Nullability {
119        self.is_nullable().into()
120    }
121
122    /// Check if the `DType` is [`Nullability::Nullable`].
123    #[inline]
124    pub fn is_nullable(&self) -> bool {
125        match self {
126            Null => true,
127            Extension(ext_dtype) => ext_dtype.storage_dtype().is_nullable(),
128            Bool(null)
129            | Primitive(_, null)
130            | Decimal(_, null)
131            | Utf8(null)
132            | Binary(null)
133            | Struct(_, null)
134            | List(_, null)
135            | FixedSizeList(_, _, null) => matches!(null, Nullability::Nullable),
136        }
137    }
138
139    /// Get a new `DType` with [`Nullability::NonNullable`] (but otherwise the same as `self`)
140    pub fn as_nonnullable(&self) -> Self {
141        self.with_nullability(Nullability::NonNullable)
142    }
143
144    /// Get a new `DType` with [`Nullability::Nullable`] (but otherwise the same as `self`)
145    pub fn as_nullable(&self) -> Self {
146        self.with_nullability(Nullability::Nullable)
147    }
148
149    /// Get a new DType with the given nullability (but otherwise the same as `self`)
150    pub fn with_nullability(&self, nullability: Nullability) -> Self {
151        match self {
152            Null => Null,
153            Bool(_) => Bool(nullability),
154            Primitive(pdt, _) => Primitive(*pdt, nullability),
155            Decimal(ddt, _) => Decimal(*ddt, nullability),
156            Utf8(_) => Utf8(nullability),
157            Binary(_) => Binary(nullability),
158            Struct(sf, _) => Struct(sf.clone(), nullability),
159            List(edt, _) => List(edt.clone(), nullability),
160            FixedSizeList(edt, size, _) => FixedSizeList(edt.clone(), *size, nullability),
161            Extension(ext) => Extension(Arc::new(ext.with_nullability(nullability))),
162        }
163    }
164
165    /// Union the nullability of this `DType` with the other nullability, returning a new `DType`.
166    pub fn union_nullability(&self, other: Nullability) -> Self {
167        let nullability = self.nullability() | other;
168        self.with_nullability(nullability)
169    }
170
171    /// Check if `self` and `other` are equal, ignoring nullability.
172    pub fn eq_ignore_nullability(&self, other: &Self) -> bool {
173        match (self, other) {
174            (Null, Null) => true,
175            (Bool(_), Bool(_)) => true,
176            (Primitive(lhs_ptype, _), Primitive(rhs_ptype, _)) => lhs_ptype == rhs_ptype,
177            (Decimal(lhs, _), Decimal(rhs, _)) => lhs == rhs,
178            (Utf8(_), Utf8(_)) => true,
179            (Binary(_), Binary(_)) => true,
180            (List(lhs_dtype, _), List(rhs_dtype, _)) => lhs_dtype.eq_ignore_nullability(rhs_dtype),
181            (FixedSizeList(lhs_dtype, lhs_size, _), FixedSizeList(rhs_dtype, rhs_size, _)) => {
182                lhs_size == rhs_size && lhs_dtype.eq_ignore_nullability(rhs_dtype)
183            }
184            (Struct(lhs_dtype, _), Struct(rhs_dtype, _)) => {
185                (lhs_dtype.names() == rhs_dtype.names())
186                    && (lhs_dtype
187                        .fields()
188                        .zip_eq(rhs_dtype.fields())
189                        .all(|(l, r)| l.eq_ignore_nullability(&r)))
190            }
191            (Extension(lhs_extdtype), Extension(rhs_extdtype)) => {
192                lhs_extdtype.as_ref().eq_ignore_nullability(rhs_extdtype)
193            }
194            _ => false,
195        }
196    }
197
198    /// Returns `true` if `self` is a subset type of `other, otherwise `false`.
199    ///
200    /// If `self` is nullable, this means that the other `DType` must also be nullable (since a
201    /// nullable type represents more values than a non-nullable type) and equal.
202    ///
203    /// If `self` is non-nullable, then the other `DType` must be equal ignoring nullabillity.
204    ///
205    /// We implement this functionality as a complement to `is_superset_of`.
206    pub fn eq_with_nullability_subset(&self, other: &Self) -> bool {
207        if self.is_nullable() {
208            self == other
209        } else {
210            self.eq_ignore_nullability(other)
211        }
212    }
213
214    /// Returns `true` if `self` is a superset type of `other, otherwise `false`.
215    ///
216    /// If `self` is non-nullable, this means that the other `DType` must also be non-nullable
217    /// (since a non-nullable type represents less values than a nullable type) and equal.
218    ///
219    /// If `self` is nullable, then the other `DType` must be equal ignoring nullabillity.
220    ///
221    /// This function is useful (in the `vortex-array` crate) for determining if an `Array` can
222    /// extend a given `ArrayBuilder`: it can only extend it if the `DType` of the builder is a
223    /// superset of the `Array`.
224    pub fn eq_with_nullability_superset(&self, other: &Self) -> bool {
225        if self.is_nullable() {
226            self.eq_ignore_nullability(other)
227        } else {
228            self == other
229        }
230    }
231
232    /// Check if `self` is a boolean
233    pub fn is_boolean(&self) -> bool {
234        matches!(self, Bool(_))
235    }
236
237    /// Check if `self` is a primitive type
238    pub fn is_primitive(&self) -> bool {
239        matches!(self, Primitive(_, _))
240    }
241
242    /// Returns this [`DType`]'s [`PType`] if it is a primitive type, otherwise panics.
243    pub fn as_ptype(&self) -> PType {
244        if let Primitive(ptype, _) = self {
245            *ptype
246        } else {
247            vortex_panic!("DType is not a primitive type")
248        }
249    }
250
251    /// Check if `self` is an unsigned integer
252    pub fn is_unsigned_int(&self) -> bool {
253        if let Primitive(ptype, _) = self {
254            return ptype.is_unsigned_int();
255        }
256        false
257    }
258
259    /// Check if `self` is a signed integer
260    pub fn is_signed_int(&self) -> bool {
261        if let Primitive(ptype, _) = self {
262            return ptype.is_signed_int();
263        }
264        false
265    }
266
267    /// Check if `self` is an integer (signed or unsigned)
268    pub fn is_int(&self) -> bool {
269        if let Primitive(ptype, _) = self {
270            return ptype.is_int();
271        }
272        false
273    }
274
275    /// Check if `self` is a floating point number
276    pub fn is_float(&self) -> bool {
277        if let Primitive(ptype, _) = self {
278            return ptype.is_float();
279        }
280        false
281    }
282
283    /// Check if `self` is a [`DType::Decimal`].
284    pub fn is_decimal(&self) -> bool {
285        matches!(self, Decimal(..))
286    }
287
288    /// Check if `self` is a [`DType::Utf8`]
289    pub fn is_utf8(&self) -> bool {
290        matches!(self, Utf8(_))
291    }
292
293    /// Check if `self` is a [`DType::Binary`]
294    pub fn is_binary(&self) -> bool {
295        matches!(self, Binary(_))
296    }
297
298    /// Check if `self` is a [`DType::List`].
299    pub fn is_list(&self) -> bool {
300        matches!(self, List(_, _))
301    }
302
303    /// Check if `self` is a [`DType::FixedSizeList`],
304    pub fn is_fixed_size_list(&self) -> bool {
305        matches!(self, FixedSizeList(..))
306    }
307
308    /// Check if `self` is a [`DType::Struct`]
309    pub fn is_struct(&self) -> bool {
310        matches!(self, Struct(_, _))
311    }
312
313    /// Check if `self` is a [`DType::Extension`] type
314    pub fn is_extension(&self) -> bool {
315        matches!(self, Extension(_))
316    }
317
318    /// Check if `self` is a nested type, i.e. list, fixed size list, struct, or extension of a
319    /// recursive type.
320    pub fn is_nested(&self) -> bool {
321        match self {
322            List(..) | FixedSizeList(..) | Struct(..) => true,
323            Extension(ext) => ext.storage_dtype().is_nested(),
324            _ => false,
325        }
326    }
327
328    /// Check returns the inner decimal type if the dtype is a [`DType::Decimal`].
329    pub fn as_decimal_opt(&self) -> Option<&DecimalDType> {
330        if let Decimal(decimal, _) = self {
331            Some(decimal)
332        } else {
333            None
334        }
335    }
336
337    /// Owned version of [Self::as_decimal_opt].
338    pub fn into_decimal_opt(self) -> Option<DecimalDType> {
339        if let Decimal(decimal, _) = self {
340            Some(decimal)
341        } else {
342            None
343        }
344    }
345
346    /// Get the inner element dtype if `self` is a [`DType::List`], otherwise returns `None`.
347    ///
348    /// Note that this does _not_ return `Some` if `self` is a [`DType::FixedSizeList`].
349    pub fn as_list_element_opt(&self) -> Option<&Arc<DType>> {
350        if let List(edt, _) = self {
351            Some(edt)
352        } else {
353            None
354        }
355    }
356
357    /// Owned version of [Self::as_list_element_opt].
358    pub fn into_list_element_opt(self) -> Option<Arc<DType>> {
359        if let List(edt, _) = self {
360            Some(edt)
361        } else {
362            None
363        }
364    }
365
366    /// Get the inner element dtype if `self` is a [`DType::FixedSizeList`], otherwise returns
367    /// `None`.
368    ///
369    /// Note that this does _not_ return `Some` if `self` is a [`DType::List`].
370    pub fn as_fixed_size_list_element_opt(&self) -> Option<&Arc<DType>> {
371        if let FixedSizeList(edt, ..) = self {
372            Some(edt)
373        } else {
374            None
375        }
376    }
377
378    /// Owned version of [Self::as_fixed_size_list_element_opt].
379    pub fn into_fixed_size_list_element_opt(self) -> Option<Arc<DType>> {
380        if let FixedSizeList(edt, ..) = self {
381            Some(edt)
382        } else {
383            None
384        }
385    }
386
387    /// Get the inner element dtype if `self` is **either** a [`DType::List`] or a
388    /// [`DType::FixedSizeList`], otherwise returns `None`
389    pub fn as_any_size_list_element_opt(&self) -> Option<&Arc<DType>> {
390        if let FixedSizeList(edt, ..) = self {
391            Some(edt)
392        } else if let List(edt, ..) = self {
393            Some(edt)
394        } else {
395            None
396        }
397    }
398
399    /// Owned version of [Self::as_any_size_list_element_opt].
400    pub fn into_any_size_list_element_opt(self) -> Option<Arc<DType>> {
401        if let FixedSizeList(edt, ..) = self {
402            Some(edt)
403        } else if let List(edt, ..) = self {
404            Some(edt)
405        } else {
406            None
407        }
408    }
409
410    /// Returns the [`StructFields`] from a struct [`DType`].
411    ///
412    /// # Panics
413    ///
414    /// If the [`DType`] is not a struct.
415    pub fn as_struct_fields(&self) -> &StructFields {
416        if let Struct(f, _) = self {
417            return f;
418        }
419        vortex_panic!("DType is not a Struct")
420    }
421
422    /// Owned version of [Self::as_struct_fields].
423    pub fn into_struct_fields(self) -> StructFields {
424        if let Struct(f, _) = self {
425            return f;
426        }
427        vortex_panic!("DType is not a Struct")
428    }
429
430    /// Get the `StructDType` if `self` is a `StructDType`, otherwise `None`
431    pub fn as_struct_fields_opt(&self) -> Option<&StructFields> {
432        if let Struct(f, _) = self {
433            Some(f)
434        } else {
435            None
436        }
437    }
438
439    /// Owned version of [Self::as_struct_fields_opt].
440    pub fn into_struct_fields_opt(self) -> Option<StructFields> {
441        if let Struct(f, _) = self {
442            Some(f)
443        } else {
444            None
445        }
446    }
447
448    /// Convenience method for creating a [`DType::List`].
449    pub fn list(dtype: impl Into<DType>, nullability: Nullability) -> Self {
450        List(Arc::new(dtype.into()), nullability)
451    }
452
453    /// Convenience method for creating a [`DType::Struct`].
454    pub fn struct_<I: IntoIterator<Item = (impl Into<FieldName>, impl Into<FieldDType>)>>(
455        iter: I,
456        nullability: Nullability,
457    ) -> Self {
458        Struct(StructFields::from_iter(iter), nullability)
459    }
460}
461
462impl Display for DType {
463    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
464        match self {
465            Null => write!(f, "null"),
466            Bool(null) => write!(f, "bool{null}"),
467            Primitive(pdt, null) => write!(f, "{pdt}{null}"),
468            Decimal(ddt, null) => write!(f, "{ddt}{null}"),
469            Utf8(null) => write!(f, "utf8{null}"),
470            Binary(null) => write!(f, "binary{null}"),
471            Struct(sf, null) => write!(
472                f,
473                "{{{}}}{null}",
474                sf.names()
475                    .iter()
476                    .zip(sf.fields())
477                    .map(|(field_null, dt)| format!("{field_null}={dt}"))
478                    .join(", "),
479            ),
480            List(edt, null) => write!(f, "list({edt}){null}"),
481            FixedSizeList(edt, size, null) => write!(f, "fixed_size_list({edt})[{size}]{null}"),
482            Extension(ext) => write!(
483                f,
484                "ext({}, {}{}){}",
485                ext.id(),
486                ext.storage_dtype()
487                    .with_nullability(Nullability::NonNullable),
488                ext.metadata()
489                    .map(|m| format!(", {m:?}"))
490                    .unwrap_or_else(|| "".to_string()),
491                ext.storage_dtype().nullability(),
492            ),
493        }
494    }
495}