vortex_dtype/dtype.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::{Debug, Display, Formatter};
5use std::hash::Hash;
6use std::sync::Arc;
7
8use DType::*;
9use itertools::Itertools;
10use static_assertions::const_assert_eq;
11use vortex_error::vortex_panic;
12
13use crate::decimal::DecimalDType;
14use crate::nullability::Nullability;
15use crate::{ExtDType, FieldDType, FieldName, PType, StructFields};
16
17/// The logical types of elements in Vortex arrays.
18///
19/// `DType` represents the different logical data types that can be represented in a Vortex array.
20///
21/// This is different from physical types, which represent the actual layout of data (compressed or
22/// uncompressed). The set of physical types/formats (or data layout) is surjective into the set of
23/// logical types (or in other words, all physical types map to a single logical type).
24///
25/// Note that a `DType` represents the logical type of the elements in the `Array`s, **not** the
26/// logical type of the `Array` itself.
27///
28/// For example, an array with [`DType::Primitive`]([`I32`], [`NonNullable`]) could be physically
29/// encoded as any of the following:
30///
31/// - A flat array of `i32` values.
32/// - A run-length encoded sequence.
33/// - Dictionary encoded values with bitpacked codes.
34///
35/// All of these physical encodings preserve the same logical [`I32`] type, even if the physical
36/// data is different.
37///
38/// [`I32`]: PType::I32
39/// [`NonNullable`]: Nullability::NonNullable
40#[derive(Debug, Clone, PartialEq, Eq, Hash)]
41#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
42pub enum DType {
43 /// A logical null type.
44 ///
45 /// `Null` only has a single value, `null`.
46 Null,
47
48 /// A logical boolean type.
49 ///
50 /// `Bool` can be `true` or `false` if non-nullable. It can be `true`, `false`, or `null` if
51 /// nullable.
52 Bool(Nullability),
53
54 /// A logical fixed-width numeric type.
55 ///
56 /// This can be unsigned, signed, or floating point. See [`PType`] for more information.
57 Primitive(PType, Nullability),
58
59 /// Logical real numbers with fixed precision and scale.
60 ///
61 /// See [`DecimalDType`] for more information.
62 Decimal(DecimalDType, Nullability),
63
64 /// Logical UTF-8 strings.
65 Utf8(Nullability),
66
67 /// Logical binary data.
68 Binary(Nullability),
69
70 /// A logical variable-length list type.
71 ///
72 /// This is parameterized by a single `DType` that represents the element type of the inner
73 /// lists.
74 List(Arc<DType>, Nullability),
75
76 /// A logical fixed-size list type.
77 ///
78 /// This is parameterized by a `DType` that represents the element type of the inner lists, as
79 /// well as a `u32` size that determines the fixed length of each `FixedSizeList` scalar.
80 ///
81 /// This variant has not yet been implemented. Please add a comment with
82 /// `TODO(connor)[FixedSizeList]` if you need to match against `DType`.
83 FixedSizeList(Arc<DType>, u32, Nullability),
84
85 /// A logical struct type.
86 ///
87 /// A `Struct` type is composed of an ordered list of fields, each with a corresponding name and
88 /// `DType`. See [`StructFields`] for more information.
89 Struct(StructFields, Nullability),
90
91 /// A user-defined extension type.
92 ///
93 /// See [`ExtDType`] for more information.
94 Extension(Arc<ExtDType>),
95}
96
97#[cfg(not(target_arch = "wasm32"))]
98const_assert_eq!(size_of::<DType>(), 16);
99
100#[cfg(target_arch = "wasm32")]
101const_assert_eq!(size_of::<DType>(), 12);
102
103impl DType {
104 /// The default `DType` for bytes.
105 pub const BYTES: Self = Primitive(PType::U8, Nullability::NonNullable);
106
107 /// Get the nullability of the `DType`.
108 #[inline]
109 pub fn nullability(&self) -> Nullability {
110 self.is_nullable().into()
111 }
112
113 /// Check if the `DType` is [`Nullability::Nullable`].
114 #[inline]
115 pub fn is_nullable(&self) -> bool {
116 match self {
117 Null => true,
118 Extension(ext_dtype) => ext_dtype.storage_dtype().is_nullable(),
119 Bool(null)
120 | Primitive(_, null)
121 | Decimal(_, null)
122 | Utf8(null)
123 | Binary(null)
124 | Struct(_, null)
125 | List(_, null)
126 | FixedSizeList(_, _, null) => matches!(null, Nullability::Nullable),
127 }
128 }
129
130 /// Get a new `DType` with [`Nullability::NonNullable`] (but otherwise the same as `self`)
131 pub fn as_nonnullable(&self) -> Self {
132 self.with_nullability(Nullability::NonNullable)
133 }
134
135 /// Get a new `DType` with [`Nullability::Nullable`] (but otherwise the same as `self`)
136 pub fn as_nullable(&self) -> Self {
137 self.with_nullability(Nullability::Nullable)
138 }
139
140 /// Get a new DType with the given nullability (but otherwise the same as `self`)
141 pub fn with_nullability(&self, nullability: Nullability) -> Self {
142 match self {
143 Null => Null,
144 Bool(_) => Bool(nullability),
145 Primitive(pdt, _) => Primitive(*pdt, nullability),
146 Decimal(ddt, _) => Decimal(*ddt, nullability),
147 Utf8(_) => Utf8(nullability),
148 Binary(_) => Binary(nullability),
149 Struct(sf, _) => Struct(sf.clone(), nullability),
150 List(edt, _) => List(edt.clone(), nullability),
151 FixedSizeList(edt, size, _) => FixedSizeList(edt.clone(), *size, nullability),
152 Extension(ext) => Extension(Arc::new(ext.with_nullability(nullability))),
153 }
154 }
155
156 /// Union the nullability of this `DType` with the other nullability, returning a new `DType`.
157 pub fn union_nullability(&self, other: Nullability) -> Self {
158 let nullability = self.nullability() | other;
159 self.with_nullability(nullability)
160 }
161
162 /// Check if `self` and `other` are equal, ignoring nullability.
163 pub fn eq_ignore_nullability(&self, other: &Self) -> bool {
164 match (self, other) {
165 (Null, Null) => true,
166 (Bool(_), Bool(_)) => true,
167 (Primitive(lhs_ptype, _), Primitive(rhs_ptype, _)) => lhs_ptype == rhs_ptype,
168 (Decimal(lhs, _), Decimal(rhs, _)) => lhs == rhs,
169 (Utf8(_), Utf8(_)) => true,
170 (Binary(_), Binary(_)) => true,
171 (List(lhs_dtype, _), List(rhs_dtype, _)) => lhs_dtype.eq_ignore_nullability(rhs_dtype),
172 (FixedSizeList(lhs_dtype, lhs_size, _), FixedSizeList(rhs_dtype, rhs_size, _)) => {
173 lhs_size == rhs_size && lhs_dtype.eq_ignore_nullability(rhs_dtype)
174 }
175 (Struct(lhs_dtype, _), Struct(rhs_dtype, _)) => {
176 (lhs_dtype.names() == rhs_dtype.names())
177 && (lhs_dtype
178 .fields()
179 .zip_eq(rhs_dtype.fields())
180 .all(|(l, r)| l.eq_ignore_nullability(&r)))
181 }
182 (Extension(lhs_extdtype), Extension(rhs_extdtype)) => {
183 lhs_extdtype.as_ref().eq_ignore_nullability(rhs_extdtype)
184 }
185 _ => false,
186 }
187 }
188
189 /// Returns `true` if `self` is a subset type of `other, otherwise `false`.
190 ///
191 /// If `self` is nullable, this means that the other `DType` must also be nullable (since a
192 /// nullable type represents more values than a non-nullable type) and equal.
193 ///
194 /// If `self` is non-nullable, then the other `DType` must be equal ignoring nullabillity.
195 ///
196 /// We implement this functionality as a complement to `is_superset_of`.
197 pub fn eq_with_nullability_subset(&self, other: &Self) -> bool {
198 if self.is_nullable() {
199 self == other
200 } else {
201 self.eq_ignore_nullability(other)
202 }
203 }
204
205 /// Returns `true` if `self` is a superset type of `other, otherwise `false`.
206 ///
207 /// If `self` is non-nullable, this means that the other `DType` must also be non-nullable
208 /// (since a non-nullable type represents less values than a nullable type) and equal.
209 ///
210 /// If `self` is nullable, then the other `DType` must be equal ignoring nullabillity.
211 ///
212 /// This function is useful (in the `vortex-array` crate) for determining if an `Array` can
213 /// extend a given `ArrayBuilder`: it can only extend it if the `DType` of the builder is a
214 /// superset of the `Array`.
215 pub fn eq_with_nullability_superset(&self, other: &Self) -> bool {
216 if self.is_nullable() {
217 self.eq_ignore_nullability(other)
218 } else {
219 self == other
220 }
221 }
222
223 /// Check if `self` is a boolean
224 pub fn is_boolean(&self) -> bool {
225 matches!(self, Bool(_))
226 }
227
228 /// Check if `self` is a primitive type
229 pub fn is_primitive(&self) -> bool {
230 matches!(self, Primitive(_, _))
231 }
232
233 /// Returns this [`DType`]'s [`PType`] if it is a primitive type, otherwise panics.
234 pub fn as_ptype(&self) -> PType {
235 if let Primitive(ptype, _) = self {
236 *ptype
237 } else {
238 vortex_panic!("DType is not a primitive type")
239 }
240 }
241
242 /// Check if `self` is an unsigned integer
243 pub fn is_unsigned_int(&self) -> bool {
244 if let Primitive(ptype, _) = self {
245 return ptype.is_unsigned_int();
246 }
247 false
248 }
249
250 /// Check if `self` is a signed integer
251 pub fn is_signed_int(&self) -> bool {
252 if let Primitive(ptype, _) = self {
253 return ptype.is_signed_int();
254 }
255 false
256 }
257
258 /// Check if `self` is an integer (signed or unsigned)
259 pub fn is_int(&self) -> bool {
260 if let Primitive(ptype, _) = self {
261 return ptype.is_int();
262 }
263 false
264 }
265
266 /// Check if `self` is a floating point number
267 pub fn is_float(&self) -> bool {
268 if let Primitive(ptype, _) = self {
269 return ptype.is_float();
270 }
271 false
272 }
273
274 /// Check if `self` is a [`DType::Decimal`].
275 pub fn is_decimal(&self) -> bool {
276 matches!(self, Decimal(..))
277 }
278
279 /// Check if `self` is a [`DType::Utf8`]
280 pub fn is_utf8(&self) -> bool {
281 matches!(self, Utf8(_))
282 }
283
284 /// Check if `self` is a [`DType::Binary`]
285 pub fn is_binary(&self) -> bool {
286 matches!(self, Binary(_))
287 }
288
289 /// Check if `self` is a [`DType::List`].
290 pub fn is_list(&self) -> bool {
291 matches!(self, List(_, _))
292 }
293
294 /// Check if `self` is a [`DType::FixedSizeList`],
295 pub fn is_fixed_size_list(&self) -> bool {
296 matches!(self, FixedSizeList(..))
297 }
298
299 /// Check if `self` is a [`DType::Struct`]
300 pub fn is_struct(&self) -> bool {
301 matches!(self, Struct(_, _))
302 }
303
304 /// Check if `self` is a [`DType::Extension`] type
305 pub fn is_extension(&self) -> bool {
306 matches!(self, Extension(_))
307 }
308
309 /// Check returns the inner decimal type if the dtype is a [`DType::Decimal`].
310 pub fn as_decimal_opt(&self) -> Option<&DecimalDType> {
311 if let Decimal(decimal, _) = self {
312 Some(decimal)
313 } else {
314 None
315 }
316 }
317
318 /// Get the inner element dtype if `self` is a [`DType::List`] or [`DType::FixedSizeList`],
319 /// otherwise returns `None`
320 pub fn as_list_element_opt(&self) -> Option<&Arc<DType>> {
321 if let List(edt, _) = self {
322 Some(edt)
323 } else if let FixedSizeList(edt, ..) = self {
324 Some(edt)
325 } else {
326 None
327 }
328 }
329
330 /// Get the `StructDType` if `self` is a `StructDType`, otherwise `None`
331 pub fn as_struct_fields_opt(&self) -> Option<&StructFields> {
332 if let Struct(f, _) = self {
333 Some(f)
334 } else {
335 None
336 }
337 }
338
339 /// Convenience method for creating a [`DType::List`].
340 pub fn list(dtype: impl Into<DType>, nullability: Nullability) -> Self {
341 List(Arc::new(dtype.into()), nullability)
342 }
343
344 /// Convenience method for creating a [`DType::Struct`].
345 pub fn struct_<I: IntoIterator<Item = (impl Into<FieldName>, impl Into<FieldDType>)>>(
346 iter: I,
347 nullability: Nullability,
348 ) -> Self {
349 Struct(StructFields::from_iter(iter), nullability)
350 }
351}
352
353impl Display for DType {
354 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
355 match self {
356 Null => write!(f, "null"),
357 Bool(null) => write!(f, "bool{null}"),
358 Primitive(pdt, null) => write!(f, "{pdt}{null}"),
359 Decimal(ddt, null) => write!(f, "{ddt}{null}"),
360 Utf8(null) => write!(f, "utf8{null}"),
361 Binary(null) => write!(f, "binary{null}"),
362 Struct(sf, null) => write!(
363 f,
364 "{{{}}}{null}",
365 sf.names()
366 .iter()
367 .zip(sf.fields())
368 .map(|(field_null, dt)| format!("{field_null}={dt}"))
369 .join(", "),
370 ),
371 List(edt, null) => write!(f, "list({edt}){null}"),
372 FixedSizeList(edt, size, null) => write!(f, "fixed_size_list({edt})[{size}]{null}"),
373 Extension(ext) => write!(
374 f,
375 "ext({}, {}{}){}",
376 ext.id(),
377 ext.storage_dtype()
378 .with_nullability(Nullability::NonNullable),
379 ext.metadata()
380 .map(|m| format!(", {m:?}"))
381 .unwrap_or_else(|| "".to_string()),
382 ext.storage_dtype().nullability(),
383 ),
384 }
385 }
386}