polars_core/datatypes/
dtype.rs

1use std::collections::BTreeMap;
2
3use arrow::datatypes::{
4    DTYPE_CATEGORICAL_NEW, DTYPE_ENUM_VALUES_LEGACY, DTYPE_ENUM_VALUES_NEW, MAINTAIN_PL_TYPE,
5    Metadata, PL_KEY,
6};
7#[cfg(feature = "dtype-array")]
8use polars_utils::format_tuple;
9use polars_utils::itertools::Itertools;
10#[cfg(any(feature = "serde-lazy", feature = "serde"))]
11use serde::{Deserialize, Serialize};
12pub use temporal::time_zone::TimeZone;
13
14use super::*;
15#[cfg(feature = "object")]
16use crate::chunked_array::object::registry::get_object_physical_type;
17use crate::utils::materialize_dyn_int;
18
19pub trait MetaDataExt: IntoMetadata {
20    fn pl_enum_metadata(&self) -> Option<&str> {
21        let md = self.into_metadata_ref();
22        let values = md
23            .get(DTYPE_ENUM_VALUES_NEW)
24            .or_else(|| md.get(DTYPE_ENUM_VALUES_LEGACY));
25        Some(values?.as_str())
26    }
27
28    fn pl_categorical_metadata(&self) -> Option<&str> {
29        // We ignore DTYPE_CATEGORICAL_LEGACY here, as we already map all
30        // string-typed arrow dictionaries to the global Categories, and the
31        // legacy metadata format only specifies the now-removed physical
32        // ordering parameter.
33        Some(
34            self.into_metadata_ref()
35                .get(DTYPE_CATEGORICAL_NEW)?
36                .as_str(),
37        )
38    }
39
40    fn maintain_type(&self) -> bool {
41        let metadata = self.into_metadata_ref();
42        metadata.get(PL_KEY).map(|s| s.as_str()) == Some(MAINTAIN_PL_TYPE)
43    }
44}
45
46impl MetaDataExt for Metadata {}
47pub trait IntoMetadata {
48    #[allow(clippy::wrong_self_convention)]
49    fn into_metadata_ref(&self) -> &Metadata;
50}
51
52impl IntoMetadata for Metadata {
53    fn into_metadata_ref(&self) -> &Metadata {
54        self
55    }
56}
57
58#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
59#[cfg_attr(
60    any(feature = "serde", feature = "serde-lazy"),
61    derive(Serialize, Deserialize)
62)]
63#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
64pub enum UnknownKind {
65    Ufunc,
66    // Hold the value to determine the concrete size.
67    Int(i128),
68    Float,
69    // Can be Categorical or String
70    Str,
71    #[default]
72    Any,
73}
74
75impl UnknownKind {
76    pub fn materialize(&self) -> Option<DataType> {
77        let dtype = match self {
78            UnknownKind::Int(v) => materialize_dyn_int(*v).dtype(),
79            UnknownKind::Float => DataType::Float64,
80            UnknownKind::Str => DataType::String,
81            UnknownKind::Any | UnknownKind::Ufunc => return None,
82        };
83        Some(dtype)
84    }
85}
86
87#[derive(Clone)]
88pub enum DataType {
89    Boolean,
90    UInt8,
91    UInt16,
92    UInt32,
93    UInt64,
94    Int8,
95    Int16,
96    Int32,
97    Int64,
98    Int128,
99    Float32,
100    Float64,
101    /// Fixed point decimal type optional precision and non-negative scale.
102    /// This is backed by a signed 128-bit integer which allows for up to 38 significant digits.
103    /// Meaning max precision is 38.
104    #[cfg(feature = "dtype-decimal")]
105    Decimal(Option<usize>, Option<usize>), // precision/scale; scale being None means "infer"
106    /// String data
107    String,
108    Binary,
109    BinaryOffset,
110    /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
111    /// in days (32 bits).
112    Date,
113    /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
114    /// in the given timeunit (64 bits).
115    Datetime(TimeUnit, Option<TimeZone>),
116    /// 64-bit integer representing difference between times in milliseconds or nanoseconds
117    Duration(TimeUnit),
118    /// A 64-bit time representing the elapsed time since midnight in nanoseconds
119    Time,
120    /// A nested list with a fixed size in each row
121    #[cfg(feature = "dtype-array")]
122    Array(Box<DataType>, usize),
123    /// A nested list with a variable size in each row
124    List(Box<DataType>),
125    /// A generic type that can be used in a `Series`
126    /// &'static str can be used to determine/set inner type
127    #[cfg(feature = "object")]
128    Object(&'static str),
129    Null,
130    #[cfg(feature = "dtype-categorical")]
131    Categorical(Arc<Categories>, Arc<CategoricalMapping>),
132    // It is an Option, so that matching Enum/Categoricals can take the same guards.
133    #[cfg(feature = "dtype-categorical")]
134    Enum(Arc<FrozenCategories>, Arc<CategoricalMapping>),
135    #[cfg(feature = "dtype-struct")]
136    Struct(Vec<Field>),
137    // some logical types we cannot know statically, e.g. Datetime
138    Unknown(UnknownKind),
139}
140
141impl Default for DataType {
142    fn default() -> Self {
143        DataType::Unknown(UnknownKind::Any)
144    }
145}
146
147pub trait AsRefDataType {
148    fn as_ref_dtype(&self) -> &DataType;
149}
150
151impl Hash for DataType {
152    fn hash<H: Hasher>(&self, state: &mut H) {
153        std::mem::discriminant(self).hash(state)
154    }
155}
156
157impl PartialEq for DataType {
158    fn eq(&self, other: &Self) -> bool {
159        use DataType::*;
160        {
161            match (self, other) {
162                #[cfg(feature = "dtype-categorical")]
163                (Categorical(cats_l, _), Categorical(cats_r, _)) => Arc::ptr_eq(cats_l, cats_r),
164                #[cfg(feature = "dtype-categorical")]
165                (Enum(fcats_l, _), Enum(fcats_r, _)) => Arc::ptr_eq(fcats_l, fcats_r),
166                (Datetime(tu_l, tz_l), Datetime(tu_r, tz_r)) => tu_l == tu_r && tz_l == tz_r,
167                (List(left_inner), List(right_inner)) => left_inner == right_inner,
168                #[cfg(feature = "dtype-duration")]
169                (Duration(tu_l), Duration(tu_r)) => tu_l == tu_r,
170                #[cfg(feature = "dtype-decimal")]
171                (Decimal(l_prec, l_scale), Decimal(r_prec, r_scale)) => {
172                    let is_prec_eq = l_prec.is_none() || r_prec.is_none() || l_prec == r_prec;
173                    let is_scale_eq = l_scale.is_none() || r_scale.is_none() || l_scale == r_scale;
174
175                    is_prec_eq && is_scale_eq
176                },
177                #[cfg(feature = "object")]
178                (Object(lhs), Object(rhs)) => lhs == rhs,
179                #[cfg(feature = "dtype-struct")]
180                (Struct(lhs), Struct(rhs)) => {
181                    std::ptr::eq(Vec::as_ptr(lhs), Vec::as_ptr(rhs)) || lhs == rhs
182                },
183                #[cfg(feature = "dtype-array")]
184                (Array(left_inner, left_width), Array(right_inner, right_width)) => {
185                    left_width == right_width && left_inner == right_inner
186                },
187                (Unknown(l), Unknown(r)) => match (l, r) {
188                    (UnknownKind::Int(_), UnknownKind::Int(_)) => true,
189                    _ => l == r,
190                },
191                _ => std::mem::discriminant(self) == std::mem::discriminant(other),
192            }
193        }
194    }
195}
196
197impl Eq for DataType {}
198
199impl DataType {
200    pub const IDX_DTYPE: Self = {
201        #[cfg(not(feature = "bigidx"))]
202        {
203            DataType::UInt32
204        }
205        #[cfg(feature = "bigidx")]
206        {
207            DataType::UInt64
208        }
209    };
210
211    pub fn value_within_range(&self, other: AnyValue) -> bool {
212        use DataType::*;
213        match self {
214            UInt8 => other.extract::<u8>().is_some(),
215            #[cfg(feature = "dtype-u16")]
216            UInt16 => other.extract::<u16>().is_some(),
217            UInt32 => other.extract::<u32>().is_some(),
218            UInt64 => other.extract::<u64>().is_some(),
219            #[cfg(feature = "dtype-i8")]
220            Int8 => other.extract::<i8>().is_some(),
221            #[cfg(feature = "dtype-i16")]
222            Int16 => other.extract::<i16>().is_some(),
223            Int32 => other.extract::<i32>().is_some(),
224            Int64 => other.extract::<i64>().is_some(),
225            _ => false,
226        }
227    }
228
229    /// Struct representation of the arrow `month_day_nano_interval` type.
230    #[cfg(feature = "dtype-struct")]
231    pub fn _month_days_ns_struct_type() -> Self {
232        DataType::Struct(vec![
233            Field::new(PlSmallStr::from_static("months"), DataType::Int32),
234            Field::new(PlSmallStr::from_static("days"), DataType::Int32),
235            Field::new(
236                PlSmallStr::from_static("nanoseconds"),
237                DataType::Duration(TimeUnit::Nanoseconds),
238            ),
239        ])
240    }
241
242    /// Check if the whole dtype is known.
243    pub fn is_known(&self) -> bool {
244        match self {
245            DataType::List(inner) => inner.is_known(),
246            #[cfg(feature = "dtype-array")]
247            DataType::Array(inner, _) => inner.is_known(),
248            #[cfg(feature = "dtype-struct")]
249            DataType::Struct(fields) => fields.iter().all(|fld| fld.dtype.is_known()),
250            DataType::Unknown(_) => false,
251            _ => true,
252        }
253    }
254
255    /// Materialize this datatype if it is unknown. All other datatypes
256    /// are left unchanged.
257    pub fn materialize_unknown(self, allow_unknown: bool) -> PolarsResult<DataType> {
258        match self {
259            DataType::Unknown(u) => match u.materialize() {
260                Some(known) => Ok(known),
261                None => {
262                    if allow_unknown {
263                        Ok(DataType::Unknown(u))
264                    } else {
265                        polars_bail!(SchemaMismatch: "failed to materialize unknown type")
266                    }
267                },
268            },
269            DataType::List(inner) => Ok(DataType::List(Box::new(
270                inner.materialize_unknown(allow_unknown)?,
271            ))),
272            #[cfg(feature = "dtype-array")]
273            DataType::Array(inner, size) => Ok(DataType::Array(
274                Box::new(inner.materialize_unknown(allow_unknown)?),
275                size,
276            )),
277            #[cfg(feature = "dtype-struct")]
278            DataType::Struct(fields) => Ok(DataType::Struct(
279                fields
280                    .into_iter()
281                    .map(|f| {
282                        PolarsResult::Ok(Field::new(
283                            f.name,
284                            f.dtype.materialize_unknown(allow_unknown)?,
285                        ))
286                    })
287                    .try_collect_vec()?,
288            )),
289            _ => Ok(self),
290        }
291    }
292
293    #[cfg(feature = "dtype-array")]
294    /// Get the full shape of a multidimensional array.
295    pub fn get_shape(&self) -> Option<Vec<usize>> {
296        fn get_shape_impl(dt: &DataType, shape: &mut Vec<usize>) {
297            if let DataType::Array(inner, size) = dt {
298                shape.push(*size);
299                get_shape_impl(inner, shape);
300            }
301        }
302
303        if let DataType::Array(inner, size) = self {
304            let mut shape = vec![*size];
305            get_shape_impl(inner, &mut shape);
306            Some(shape)
307        } else {
308            None
309        }
310    }
311
312    /// Get the inner data type of a nested type.
313    pub fn inner_dtype(&self) -> Option<&DataType> {
314        match self {
315            DataType::List(inner) => Some(inner),
316            #[cfg(feature = "dtype-array")]
317            DataType::Array(inner, _) => Some(inner),
318            _ => None,
319        }
320    }
321
322    /// Get the inner data type of a nested type.
323    pub fn into_inner_dtype(self) -> Option<DataType> {
324        match self {
325            DataType::List(inner) => Some(*inner),
326            #[cfg(feature = "dtype-array")]
327            DataType::Array(inner, _) => Some(*inner),
328            _ => None,
329        }
330    }
331
332    /// Get the inner data type of a nested type.
333    pub fn try_into_inner_dtype(self) -> PolarsResult<DataType> {
334        match self {
335            DataType::List(inner) => Ok(*inner),
336            #[cfg(feature = "dtype-array")]
337            DataType::Array(inner, _) => Ok(*inner),
338            dt => polars_bail!(InvalidOperation: "cannot get inner datatype of `{dt}`"),
339        }
340    }
341
342    /// Get the absolute inner data type of a nested type.
343    pub fn leaf_dtype(&self) -> &DataType {
344        let mut prev = self;
345        while let Some(dtype) = prev.inner_dtype() {
346            prev = dtype
347        }
348        prev
349    }
350
351    #[cfg(feature = "dtype-array")]
352    /// Get the inner data type of a multidimensional array.
353    pub fn array_leaf_dtype(&self) -> Option<&DataType> {
354        let mut prev = self;
355        match prev {
356            DataType::Array(_, _) => {
357                while let DataType::Array(inner, _) = &prev {
358                    prev = inner;
359                }
360                Some(prev)
361            },
362            _ => None,
363        }
364    }
365
366    /// Cast the leaf types of Lists/Arrays and keep the nesting.
367    pub fn cast_leaf(&self, to: DataType) -> DataType {
368        use DataType::*;
369        match self {
370            List(inner) => List(Box::new(inner.cast_leaf(to))),
371            #[cfg(feature = "dtype-array")]
372            Array(inner, size) => Array(Box::new(inner.cast_leaf(to)), *size),
373            _ => to,
374        }
375    }
376
377    /// Return whether the cast to `to` makes sense.
378    ///
379    /// If it `None`, we are not sure.
380    pub fn can_cast_to(&self, to: &DataType) -> Option<bool> {
381        if self == to {
382            return Some(true);
383        }
384        if self.is_primitive_numeric() && to.is_primitive_numeric() {
385            return Some(true);
386        }
387
388        if self.is_null() {
389            return Some(true);
390        }
391
392        use DataType as D;
393        Some(match (self, to) {
394            #[cfg(feature = "dtype-categorical")]
395            (D::Categorical(_, _) | D::Enum(_, _), D::Binary)
396            | (D::Binary, D::Categorical(_, _) | D::Enum(_, _)) => false, // TODO @ cat-rework: why can we not cast to Binary?
397
398            #[cfg(feature = "object")]
399            (D::Object(_), D::Object(_)) => true,
400            #[cfg(feature = "object")]
401            (D::Object(_), _) | (_, D::Object(_)) => false,
402
403            (D::Boolean, dt) | (dt, D::Boolean) => match dt {
404                dt if dt.is_primitive_numeric() => true,
405                #[cfg(feature = "dtype-decimal")]
406                D::Decimal(_, _) => true,
407                D::String | D::Binary => true,
408                _ => false,
409            },
410
411            (D::List(from), D::List(to)) => from.can_cast_to(to)?,
412            #[cfg(feature = "dtype-array")]
413            (D::Array(from, l_width), D::Array(to, r_width)) => {
414                l_width == r_width && from.can_cast_to(to)?
415            },
416            #[cfg(feature = "dtype-struct")]
417            (D::Struct(l_fields), D::Struct(r_fields)) => {
418                if l_fields.is_empty() {
419                    return Some(true);
420                }
421
422                if l_fields.len() != r_fields.len() {
423                    return Some(false);
424                }
425
426                for (l, r) in l_fields.iter().zip(r_fields) {
427                    if !l.dtype().can_cast_to(r.dtype())? {
428                        return Some(false);
429                    }
430                }
431
432                true
433            },
434
435            // @NOTE: we are being conversative
436            _ => return None,
437        })
438    }
439
440    pub fn implode(self) -> DataType {
441        DataType::List(Box::new(self))
442    }
443
444    /// Convert to the physical data type
445    #[must_use]
446    pub fn to_physical(&self) -> DataType {
447        use DataType::*;
448        match self {
449            Date => Int32,
450            Datetime(_, _) => Int64,
451            Duration(_) => Int64,
452            Time => Int64,
453            #[cfg(feature = "dtype-decimal")]
454            Decimal(_, _) => Int128,
455            #[cfg(feature = "dtype-categorical")]
456            Categorical(cats, _) => cats.physical().dtype(),
457            #[cfg(feature = "dtype-categorical")]
458            Enum(fcats, _) => fcats.physical().dtype(),
459            #[cfg(feature = "dtype-array")]
460            Array(dt, width) => Array(Box::new(dt.to_physical()), *width),
461            List(dt) => List(Box::new(dt.to_physical())),
462            #[cfg(feature = "dtype-struct")]
463            Struct(fields) => {
464                let new_fields = fields
465                    .iter()
466                    .map(|s| Field::new(s.name().clone(), s.dtype().to_physical()))
467                    .collect();
468                Struct(new_fields)
469            },
470            _ => self.clone(),
471        }
472    }
473
474    pub fn is_supported_list_arithmetic_input(&self) -> bool {
475        self.is_primitive_numeric() || self.is_bool() || self.is_null()
476    }
477
478    /// Check if this [`DataType`] is a logical type
479    pub fn is_logical(&self) -> bool {
480        self != &self.to_physical()
481    }
482
483    /// Check if this [`DataType`] is a temporal type
484    pub fn is_temporal(&self) -> bool {
485        use DataType::*;
486        matches!(self, Date | Datetime(_, _) | Duration(_) | Time)
487    }
488
489    /// Check if datatype is a primitive type. By that we mean that
490    /// it is not a nested or logical type.
491    pub fn is_primitive(&self) -> bool {
492        self.is_primitive_numeric()
493            | matches!(
494                self,
495                DataType::Boolean | DataType::String | DataType::Binary
496            )
497    }
498
499    /// Check if this [`DataType`] is a primitive numeric type (excludes Decimal).
500    pub fn is_primitive_numeric(&self) -> bool {
501        self.is_float() || self.is_integer()
502    }
503
504    /// Check if this [`DataType`] is a boolean.
505    pub fn is_bool(&self) -> bool {
506        matches!(self, DataType::Boolean)
507    }
508
509    /// Check if this [`DataType`] is a list.
510    pub fn is_list(&self) -> bool {
511        matches!(self, DataType::List(_))
512    }
513
514    /// Check if this [`DataType`] is an array.
515    pub fn is_array(&self) -> bool {
516        #[cfg(feature = "dtype-array")]
517        {
518            matches!(self, DataType::Array(_, _))
519        }
520        #[cfg(not(feature = "dtype-array"))]
521        {
522            false
523        }
524    }
525
526    pub fn is_nested(&self) -> bool {
527        self.is_list() || self.is_struct() || self.is_array()
528    }
529
530    /// Check if this [`DataType`] is a struct
531    pub fn is_struct(&self) -> bool {
532        #[cfg(feature = "dtype-struct")]
533        {
534            matches!(self, DataType::Struct(_))
535        }
536        #[cfg(not(feature = "dtype-struct"))]
537        {
538            false
539        }
540    }
541
542    pub fn is_binary(&self) -> bool {
543        matches!(self, DataType::Binary)
544    }
545
546    pub fn is_date(&self) -> bool {
547        matches!(self, DataType::Date)
548    }
549    pub fn is_datetime(&self) -> bool {
550        matches!(self, DataType::Datetime(..))
551    }
552
553    pub fn is_duration(&self) -> bool {
554        matches!(self, DataType::Duration(..))
555    }
556
557    pub fn is_object(&self) -> bool {
558        #[cfg(feature = "object")]
559        {
560            matches!(self, DataType::Object(_))
561        }
562        #[cfg(not(feature = "object"))]
563        {
564            false
565        }
566    }
567
568    pub fn is_null(&self) -> bool {
569        matches!(self, DataType::Null)
570    }
571
572    pub fn contains_views(&self) -> bool {
573        use DataType::*;
574        match self {
575            Binary | String => true,
576            List(inner) => inner.contains_views(),
577            #[cfg(feature = "dtype-array")]
578            Array(inner, _) => inner.contains_views(),
579            #[cfg(feature = "dtype-struct")]
580            Struct(fields) => fields.iter().any(|field| field.dtype.contains_views()),
581            _ => false,
582        }
583    }
584
585    pub fn contains_categoricals(&self) -> bool {
586        use DataType::*;
587        match self {
588            #[cfg(feature = "dtype-categorical")]
589            Categorical(_, _) | Enum(_, _) => true,
590            List(inner) => inner.contains_categoricals(),
591            #[cfg(feature = "dtype-array")]
592            Array(inner, _) => inner.contains_categoricals(),
593            #[cfg(feature = "dtype-struct")]
594            Struct(fields) => fields
595                .iter()
596                .any(|field| field.dtype.contains_categoricals()),
597            _ => false,
598        }
599    }
600
601    pub fn contains_objects(&self) -> bool {
602        use DataType::*;
603        match self {
604            #[cfg(feature = "object")]
605            Object(_) => true,
606            List(inner) => inner.contains_objects(),
607            #[cfg(feature = "dtype-array")]
608            Array(inner, _) => inner.contains_objects(),
609            #[cfg(feature = "dtype-struct")]
610            Struct(fields) => fields.iter().any(|field| field.dtype.contains_objects()),
611            _ => false,
612        }
613    }
614
615    pub fn contains_list_recursive(&self) -> bool {
616        use DataType as D;
617        match self {
618            D::List(_) => true,
619            #[cfg(feature = "dtype-array")]
620            D::Array(inner, _) => inner.contains_list_recursive(),
621            #[cfg(feature = "dtype-struct")]
622            D::Struct(fields) => fields
623                .iter()
624                .any(|field| field.dtype.contains_list_recursive()),
625            _ => false,
626        }
627    }
628
629    pub fn contains_unknown(&self) -> bool {
630        use DataType as D;
631        match self {
632            D::Unknown(_) => true,
633            D::List(inner) => inner.contains_unknown(),
634            #[cfg(feature = "dtype-array")]
635            D::Array(inner, _) => inner.contains_unknown(),
636            #[cfg(feature = "dtype-struct")]
637            D::Struct(fields) => fields.iter().any(|field| field.dtype.contains_unknown()),
638            _ => false,
639        }
640    }
641
642    /// Check if type is sortable
643    pub fn is_ord(&self) -> bool {
644        let phys = self.to_physical();
645        phys.is_primitive_numeric()
646            || self.is_decimal()
647            || matches!(
648                phys,
649                DataType::Binary | DataType::String | DataType::Boolean
650            )
651    }
652
653    /// Check if this [`DataType`] is a Decimal type (of any scale/precision).
654    pub fn is_decimal(&self) -> bool {
655        match self {
656            #[cfg(feature = "dtype-decimal")]
657            DataType::Decimal(_, _) => true,
658            _ => false,
659        }
660    }
661
662    /// Check if this [`DataType`] is a basic floating point type (excludes Decimal).
663    /// Note, this also includes `Unknown(UnknownKind::Float)`.
664    pub fn is_float(&self) -> bool {
665        matches!(
666            self,
667            DataType::Float32 | DataType::Float64 | DataType::Unknown(UnknownKind::Float)
668        )
669    }
670
671    /// Check if this [`DataType`] is an integer. Note, this also includes `Unknown(UnknownKind::Int)`.
672    pub fn is_integer(&self) -> bool {
673        matches!(
674            self,
675            DataType::Int8
676                | DataType::Int16
677                | DataType::Int32
678                | DataType::Int64
679                | DataType::Int128
680                | DataType::UInt8
681                | DataType::UInt16
682                | DataType::UInt32
683                | DataType::UInt64
684                | DataType::Unknown(UnknownKind::Int(_))
685        )
686    }
687
688    pub fn is_signed_integer(&self) -> bool {
689        // allow because it cannot be replaced when object feature is activated
690        matches!(
691            self,
692            DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Int128
693        )
694    }
695
696    pub fn is_unsigned_integer(&self) -> bool {
697        matches!(
698            self,
699            DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64,
700        )
701    }
702
703    pub fn is_string(&self) -> bool {
704        matches!(self, DataType::String | DataType::Unknown(UnknownKind::Str))
705    }
706
707    pub fn is_categorical(&self) -> bool {
708        #[cfg(feature = "dtype-categorical")]
709        {
710            matches!(self, DataType::Categorical(_, _))
711        }
712        #[cfg(not(feature = "dtype-categorical"))]
713        {
714            false
715        }
716    }
717
718    pub fn is_enum(&self) -> bool {
719        #[cfg(feature = "dtype-categorical")]
720        {
721            matches!(self, DataType::Enum(_, _))
722        }
723        #[cfg(not(feature = "dtype-categorical"))]
724        {
725            false
726        }
727    }
728
729    /// Convert to an Arrow Field.
730    pub fn to_arrow_field(&self, name: PlSmallStr, compat_level: CompatLevel) -> ArrowField {
731        let metadata = match self {
732            #[cfg(feature = "dtype-categorical")]
733            DataType::Enum(fcats, _map) => {
734                let cats = fcats.categories();
735                let strings_size: usize = cats
736                    .values_iter()
737                    .map(|s| (s.len() + 1).ilog10() as usize + 1 + s.len())
738                    .sum();
739                let mut encoded = String::with_capacity(strings_size);
740                for cat in cats.values_iter() {
741                    encoded.push_str(itoa::Buffer::new().format(cat.len()));
742                    encoded.push(';');
743                    encoded.push_str(cat);
744                }
745                Some(BTreeMap::from([(
746                    PlSmallStr::from_static(DTYPE_ENUM_VALUES_NEW),
747                    PlSmallStr::from_string(encoded),
748                )]))
749            },
750            #[cfg(feature = "dtype-categorical")]
751            DataType::Categorical(cats, _) => {
752                let mut encoded = String::new();
753                encoded.push_str(itoa::Buffer::new().format(cats.name().len()));
754                encoded.push(';');
755                encoded.push_str(cats.name());
756                encoded.push_str(itoa::Buffer::new().format(cats.namespace().len()));
757                encoded.push(';');
758                encoded.push_str(cats.namespace());
759                encoded.push_str(cats.physical().as_str());
760                encoded.push(';');
761
762                Some(BTreeMap::from([(
763                    PlSmallStr::from_static(DTYPE_CATEGORICAL_NEW),
764                    PlSmallStr::from_string(encoded),
765                )]))
766            },
767            DataType::BinaryOffset => Some(BTreeMap::from([(
768                PlSmallStr::from_static(PL_KEY),
769                PlSmallStr::from_static(MAINTAIN_PL_TYPE),
770            )])),
771            _ => None,
772        };
773
774        let field = ArrowField::new(name, self.to_arrow(compat_level), true);
775
776        if let Some(metadata) = metadata {
777            field.with_metadata(metadata)
778        } else {
779            field
780        }
781    }
782
783    /// Try to get the maximum value for this datatype.
784    pub fn max(&self) -> PolarsResult<Scalar> {
785        use DataType::*;
786        let v = match self {
787            Int8 => Scalar::from(i8::MAX),
788            Int16 => Scalar::from(i16::MAX),
789            Int32 => Scalar::from(i32::MAX),
790            Int64 => Scalar::from(i64::MAX),
791            Int128 => Scalar::from(i128::MAX),
792            UInt8 => Scalar::from(u8::MAX),
793            UInt16 => Scalar::from(u16::MAX),
794            UInt32 => Scalar::from(u32::MAX),
795            UInt64 => Scalar::from(u64::MAX),
796            Float32 => Scalar::from(f32::INFINITY),
797            Float64 => Scalar::from(f64::INFINITY),
798            #[cfg(feature = "dtype-time")]
799            Time => Scalar::new(Time, AnyValue::Time(NS_IN_DAY - 1)),
800            dt => polars_bail!(ComputeError: "cannot determine upper bound for dtype `{}`", dt),
801        };
802        Ok(v)
803    }
804
805    /// Try to get the minimum value for this datatype.
806    pub fn min(&self) -> PolarsResult<Scalar> {
807        use DataType::*;
808        let v = match self {
809            Int8 => Scalar::from(i8::MIN),
810            Int16 => Scalar::from(i16::MIN),
811            Int32 => Scalar::from(i32::MIN),
812            Int64 => Scalar::from(i64::MIN),
813            Int128 => Scalar::from(i128::MIN),
814            UInt8 => Scalar::from(u8::MIN),
815            UInt16 => Scalar::from(u16::MIN),
816            UInt32 => Scalar::from(u32::MIN),
817            UInt64 => Scalar::from(u64::MIN),
818            Float32 => Scalar::from(f32::NEG_INFINITY),
819            Float64 => Scalar::from(f64::NEG_INFINITY),
820            #[cfg(feature = "dtype-time")]
821            Time => Scalar::new(Time, AnyValue::Time(0)),
822            dt => polars_bail!(ComputeError: "cannot determine lower bound for dtype `{}`", dt),
823        };
824        Ok(v)
825    }
826
827    /// Convert to an Arrow data type.
828    #[inline]
829    pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType {
830        self.try_to_arrow(compat_level).unwrap()
831    }
832
833    #[inline]
834    pub fn try_to_arrow(&self, compat_level: CompatLevel) -> PolarsResult<ArrowDataType> {
835        use DataType::*;
836        match self {
837            Boolean => Ok(ArrowDataType::Boolean),
838            UInt8 => Ok(ArrowDataType::UInt8),
839            UInt16 => Ok(ArrowDataType::UInt16),
840            UInt32 => Ok(ArrowDataType::UInt32),
841            UInt64 => Ok(ArrowDataType::UInt64),
842            Int8 => Ok(ArrowDataType::Int8),
843            Int16 => Ok(ArrowDataType::Int16),
844            Int32 => Ok(ArrowDataType::Int32),
845            Int64 => Ok(ArrowDataType::Int64),
846            Int128 => Ok(ArrowDataType::Int128),
847            Float32 => Ok(ArrowDataType::Float32),
848            Float64 => Ok(ArrowDataType::Float64),
849            #[cfg(feature = "dtype-decimal")]
850            Decimal(precision, scale) => {
851                let precision = (*precision).unwrap_or(38);
852                polars_ensure!(precision <= 38 && precision > 0, InvalidOperation: "decimal precision should be <= 38 & >= 1");
853
854                Ok(ArrowDataType::Decimal(
855                    precision,
856                    scale.unwrap_or(0), // and what else can we do here?
857                ))
858            },
859            String => {
860                let dt = if compat_level.0 >= 1 {
861                    ArrowDataType::Utf8View
862                } else {
863                    ArrowDataType::LargeUtf8
864                };
865                Ok(dt)
866            },
867            Binary => {
868                let dt = if compat_level.0 >= 1 {
869                    ArrowDataType::BinaryView
870                } else {
871                    ArrowDataType::LargeBinary
872                };
873                Ok(dt)
874            },
875            Date => Ok(ArrowDataType::Date32),
876            Datetime(unit, tz) => Ok(ArrowDataType::Timestamp(
877                unit.to_arrow(),
878                tz.as_deref().cloned(),
879            )),
880            Duration(unit) => Ok(ArrowDataType::Duration(unit.to_arrow())),
881            Time => Ok(ArrowDataType::Time64(ArrowTimeUnit::Nanosecond)),
882            #[cfg(feature = "dtype-array")]
883            Array(dt, size) => Ok(dt
884                .try_to_arrow(compat_level)?
885                .to_fixed_size_list(*size, true)),
886            List(dt) => Ok(ArrowDataType::LargeList(Box::new(
887                dt.to_arrow_field(LIST_VALUES_NAME, compat_level),
888            ))),
889            Null => Ok(ArrowDataType::Null),
890            #[cfg(feature = "object")]
891            Object(_) => Ok(get_object_physical_type()),
892            #[cfg(feature = "dtype-categorical")]
893            Categorical(_, _) | Enum(_, _) => {
894                let arrow_phys = match self.cat_physical().unwrap() {
895                    CategoricalPhysical::U8 => IntegerType::UInt8,
896                    CategoricalPhysical::U16 => IntegerType::UInt16,
897                    CategoricalPhysical::U32 => IntegerType::UInt32,
898                };
899
900                let values = if compat_level.0 >= 1 {
901                    ArrowDataType::Utf8View
902                } else {
903                    ArrowDataType::LargeUtf8
904                };
905
906                Ok(ArrowDataType::Dictionary(
907                    arrow_phys,
908                    Box::new(values),
909                    false,
910                ))
911            },
912            #[cfg(feature = "dtype-struct")]
913            Struct(fields) => {
914                let fields = fields
915                    .iter()
916                    .map(|fld| fld.to_arrow(compat_level))
917                    .collect();
918                Ok(ArrowDataType::Struct(fields))
919            },
920            BinaryOffset => Ok(ArrowDataType::LargeBinary),
921            Unknown(kind) => {
922                let dt = match kind {
923                    UnknownKind::Any | UnknownKind::Ufunc => ArrowDataType::Unknown,
924                    UnknownKind::Float => ArrowDataType::Float64,
925                    UnknownKind::Str => ArrowDataType::Utf8View,
926                    UnknownKind::Int(v) => {
927                        return materialize_dyn_int(*v).dtype().try_to_arrow(compat_level);
928                    },
929                };
930                Ok(dt)
931            },
932        }
933    }
934
935    pub fn is_nested_null(&self) -> bool {
936        use DataType::*;
937        match self {
938            Null => true,
939            List(field) => field.is_nested_null(),
940            #[cfg(feature = "dtype-array")]
941            Array(field, _) => field.is_nested_null(),
942            #[cfg(feature = "dtype-struct")]
943            Struct(fields) => fields.iter().all(|fld| fld.dtype.is_nested_null()),
944            _ => false,
945        }
946    }
947
948    /// Answers if this type matches the given type of a schema.
949    ///
950    /// Allows (nested) Null types in this type to match any type in the schema,
951    /// but not vice versa. In such a case Ok(true) is returned, because a cast
952    /// is necessary. If no cast is necessary Ok(false) is returned, and an
953    /// error is returned if the types are incompatible.
954    pub fn matches_schema_type(&self, schema_type: &DataType) -> PolarsResult<bool> {
955        match (self, schema_type) {
956            (DataType::List(l), DataType::List(r)) => l.matches_schema_type(r),
957            #[cfg(feature = "dtype-array")]
958            (DataType::Array(l, sl), DataType::Array(r, sr)) => {
959                Ok(l.matches_schema_type(r)? && sl == sr)
960            },
961            #[cfg(feature = "dtype-struct")]
962            (DataType::Struct(l), DataType::Struct(r)) => {
963                if l.len() != r.len() {
964                    polars_bail!(SchemaMismatch: "structs have different number of fields: {} vs {}", l.len(), r.len());
965                }
966                let mut must_cast = false;
967                for (l, r) in l.iter().zip(r.iter()) {
968                    must_cast |= l.dtype.matches_schema_type(&r.dtype)?;
969                }
970                Ok(must_cast)
971            },
972            (DataType::Null, DataType::Null) => Ok(false),
973            #[cfg(feature = "dtype-decimal")]
974            (DataType::Decimal(_, s1), DataType::Decimal(_, s2)) => Ok(s1 != s2),
975            // We don't allow the other way around, only if our current type is
976            // null and the schema isn't we allow it.
977            (DataType::Null, _) => Ok(true),
978            #[cfg(feature = "dtype-categorical")]
979            (DataType::Categorical(l, _), DataType::Categorical(r, _)) => {
980                ensure_same_categories(l, r)?;
981                Ok(false)
982            },
983            #[cfg(feature = "dtype-categorical")]
984            (DataType::Enum(l, _), DataType::Enum(r, _)) => {
985                ensure_same_frozen_categories(l, r)?;
986                Ok(false)
987            },
988
989            (l, r) if l == r => Ok(false),
990            (l, r) => {
991                polars_bail!(SchemaMismatch: "type {:?} is incompatible with expected type {:?}", l, r)
992            },
993        }
994    }
995
996    #[inline]
997    pub fn is_unknown(&self) -> bool {
998        matches!(self, DataType::Unknown(_))
999    }
1000
1001    pub fn nesting_level(&self) -> usize {
1002        let mut level = 0;
1003        let mut slf = self;
1004        while let Some(inner_dtype) = slf.inner_dtype() {
1005            level += 1;
1006            slf = inner_dtype;
1007        }
1008        level
1009    }
1010
1011    /// If this dtype is a Categorical or Enum, returns the physical backing type.
1012    #[cfg(feature = "dtype-categorical")]
1013    pub fn cat_physical(&self) -> PolarsResult<CategoricalPhysical> {
1014        match self {
1015            DataType::Categorical(cats, _) => Ok(cats.physical()),
1016            DataType::Enum(fcats, _) => Ok(fcats.physical()),
1017            _ => {
1018                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1019            },
1020        }
1021    }
1022
1023    /// If this dtype is a Categorical or Enum, returns the underlying mapping.
1024    #[cfg(feature = "dtype-categorical")]
1025    pub fn cat_mapping(&self) -> PolarsResult<&Arc<CategoricalMapping>> {
1026        match self {
1027            DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => Ok(mapping),
1028            _ => {
1029                polars_bail!(SchemaMismatch: "invalid dtype: expected an Enum or Categorical type, received '{:?}'", self)
1030            },
1031        }
1032    }
1033
1034    #[cfg(feature = "dtype-categorical")]
1035    pub fn from_categories(cats: Arc<Categories>) -> Self {
1036        let mapping = cats.mapping();
1037        Self::Categorical(cats, mapping)
1038    }
1039
1040    #[cfg(feature = "dtype-categorical")]
1041    pub fn from_frozen_categories(fcats: Arc<FrozenCategories>) -> Self {
1042        let mapping = fcats.mapping().clone();
1043        Self::Enum(fcats, mapping)
1044    }
1045
1046    pub fn is_numeric(&self) -> bool {
1047        self.is_integer() || self.is_float() || self.is_decimal()
1048    }
1049}
1050
1051impl Display for DataType {
1052    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1053        let s = match self {
1054            DataType::Null => "null",
1055            DataType::Boolean => "bool",
1056            DataType::UInt8 => "u8",
1057            DataType::UInt16 => "u16",
1058            DataType::UInt32 => "u32",
1059            DataType::UInt64 => "u64",
1060            DataType::Int8 => "i8",
1061            DataType::Int16 => "i16",
1062            DataType::Int32 => "i32",
1063            DataType::Int64 => "i64",
1064            DataType::Int128 => "i128",
1065            DataType::Float32 => "f32",
1066            DataType::Float64 => "f64",
1067            #[cfg(feature = "dtype-decimal")]
1068            DataType::Decimal(precision, scale) => {
1069                return match (precision, scale) {
1070                    (Some(precision), Some(scale)) => {
1071                        f.write_str(&format!("decimal[{precision},{scale}]"))
1072                    },
1073                    (None, Some(scale)) => f.write_str(&format!("decimal[*,{scale}]")),
1074                    _ => f.write_str("decimal[?]"), // shouldn't happen
1075                };
1076            },
1077            DataType::String => "str",
1078            DataType::Binary => "binary",
1079            DataType::Date => "date",
1080            DataType::Datetime(tu, tz) => {
1081                let s = match tz {
1082                    None => format!("datetime[{tu}]"),
1083                    Some(tz) => format!("datetime[{tu}, {tz}]"),
1084                };
1085                return f.write_str(&s);
1086            },
1087            DataType::Duration(tu) => return write!(f, "duration[{tu}]"),
1088            DataType::Time => "time",
1089            #[cfg(feature = "dtype-array")]
1090            DataType::Array(_, _) => {
1091                let tp = self.array_leaf_dtype().unwrap();
1092
1093                let dims = self.get_shape().unwrap();
1094                let shape = if dims.len() == 1 {
1095                    format!("{}", dims[0])
1096                } else {
1097                    format_tuple!(dims)
1098                };
1099                return write!(f, "array[{tp}, {shape}]");
1100            },
1101            DataType::List(tp) => return write!(f, "list[{tp}]"),
1102            #[cfg(feature = "object")]
1103            DataType::Object(s) => s,
1104            #[cfg(feature = "dtype-categorical")]
1105            DataType::Categorical(_, _) => "cat",
1106            #[cfg(feature = "dtype-categorical")]
1107            DataType::Enum(_, _) => "enum",
1108            #[cfg(feature = "dtype-struct")]
1109            DataType::Struct(fields) => return write!(f, "struct[{}]", fields.len()),
1110            DataType::Unknown(kind) => match kind {
1111                UnknownKind::Ufunc => "unknown ufunc",
1112                UnknownKind::Any => "unknown",
1113                UnknownKind::Int(_) => "dyn int",
1114                UnknownKind::Float => "dyn float",
1115                UnknownKind::Str => "dyn str",
1116            },
1117            DataType::BinaryOffset => "binary[offset]",
1118        };
1119        f.write_str(s)
1120    }
1121}
1122
1123impl std::fmt::Debug for DataType {
1124    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1125        use DataType::*;
1126        match self {
1127            Boolean => write!(f, "Boolean"),
1128            UInt8 => write!(f, "UInt8"),
1129            UInt16 => write!(f, "UInt16"),
1130            UInt32 => write!(f, "UInt32"),
1131            UInt64 => write!(f, "UInt64"),
1132            Int8 => write!(f, "Int8"),
1133            Int16 => write!(f, "Int16"),
1134            Int32 => write!(f, "Int32"),
1135            Int64 => write!(f, "Int64"),
1136            Int128 => write!(f, "Int128"),
1137            Float32 => write!(f, "Float32"),
1138            Float64 => write!(f, "Float64"),
1139            String => write!(f, "String"),
1140            Binary => write!(f, "Binary"),
1141            BinaryOffset => write!(f, "BinaryOffset"),
1142            Date => write!(f, "Date"),
1143            Time => write!(f, "Time"),
1144            Duration(unit) => write!(f, "Duration('{unit}')"),
1145            Datetime(unit, opt_tz) => {
1146                if let Some(tz) = opt_tz {
1147                    write!(f, "Datetime('{unit}', '{tz}')")
1148                } else {
1149                    write!(f, "Datetime('{unit}')")
1150                }
1151            },
1152            #[cfg(feature = "dtype-decimal")]
1153            Decimal(opt_p, opt_s) => match (opt_p, opt_s) {
1154                (None, None) => write!(f, "Decimal(None, None)"),
1155                (None, Some(s)) => write!(f, "Decimal(None, {s})"),
1156                (Some(p), None) => write!(f, "Decimal({p}, None)"),
1157                (Some(p), Some(s)) => write!(f, "Decimal({p}, {s})"),
1158            },
1159            #[cfg(feature = "dtype-array")]
1160            Array(inner, size) => write!(f, "Array({inner:?}, {size})"),
1161            List(inner) => write!(f, "List({inner:?})"),
1162            #[cfg(feature = "dtype-struct")]
1163            Struct(fields) => {
1164                let mut first = true;
1165                write!(f, "Struct({{")?;
1166                for field in fields {
1167                    if !first {
1168                        write!(f, ", ")?;
1169                    }
1170                    write!(f, "'{}': {:?}", field.name(), field.dtype())?;
1171                    first = false;
1172                }
1173                write!(f, "}})")
1174            },
1175            #[cfg(feature = "dtype-categorical")]
1176            Categorical(cats, _) => {
1177                if cats.is_global() {
1178                    write!(f, "Categorical")
1179                } else if cats.namespace().is_empty() && cats.physical() == CategoricalPhysical::U32
1180                {
1181                    write!(f, "Categorical('{}')", cats.name())
1182                } else {
1183                    write!(
1184                        f,
1185                        "Categorical('{}', '{}', {:?})",
1186                        cats.name(),
1187                        cats.namespace(),
1188                        cats.physical()
1189                    )
1190                }
1191            },
1192            #[cfg(feature = "dtype-categorical")]
1193            Enum(_, _) => write!(f, "Enum([...])"),
1194            #[cfg(feature = "object")]
1195            Object(_) => write!(f, "Object"),
1196            Null => write!(f, "Null"),
1197            Unknown(kind) => write!(f, "Unknown({kind:?})"),
1198        }
1199    }
1200}
1201
1202pub fn merge_dtypes(left: &DataType, right: &DataType) -> PolarsResult<DataType> {
1203    use DataType::*;
1204    Ok(match (left, right) {
1205        #[cfg(feature = "dtype-categorical")]
1206        (Categorical(cats_l, map), Categorical(cats_r, _)) => {
1207            ensure_same_categories(cats_l, cats_r)?;
1208            Categorical(cats_l.clone(), map.clone())
1209        },
1210        #[cfg(feature = "dtype-categorical")]
1211        (Enum(fcats_l, map), Enum(fcats_r, _)) => {
1212            ensure_same_frozen_categories(fcats_l, fcats_r)?;
1213            Enum(fcats_l.clone(), map.clone())
1214        },
1215        (List(inner_l), List(inner_r)) => {
1216            let merged = merge_dtypes(inner_l, inner_r)?;
1217            List(Box::new(merged))
1218        },
1219        #[cfg(feature = "dtype-struct")]
1220        (Struct(inner_l), Struct(inner_r)) => {
1221            polars_ensure!(inner_l.len() == inner_r.len(), ComputeError: "cannot combine structs with differing amounts of fields ({} != {})", inner_l.len(), inner_r.len());
1222            let fields = inner_l.iter().zip(inner_r.iter()).map(|(l, r)| {
1223                polars_ensure!(l.name() == r.name(), ComputeError: "cannot combine structs with different fields ({} != {})", l.name(), r.name());
1224                let merged = merge_dtypes(l.dtype(), r.dtype())?;
1225                Ok(Field::new(l.name().clone(), merged))
1226            }).collect::<PolarsResult<Vec<_>>>()?;
1227            Struct(fields)
1228        },
1229        #[cfg(feature = "dtype-array")]
1230        (Array(inner_l, width_l), Array(inner_r, width_r)) => {
1231            polars_ensure!(width_l == width_r, ComputeError: "widths of FixedSizeWidth Series are not equal");
1232            let merged = merge_dtypes(inner_l, inner_r)?;
1233            Array(Box::new(merged), *width_l)
1234        },
1235        (left, right) if left == right => left.clone(),
1236        _ => polars_bail!(ComputeError: "unable to merge datatypes"),
1237    })
1238}
1239
1240fn collect_nested_types(
1241    dtype: &DataType,
1242    result: &mut PlHashSet<DataType>,
1243    include_compound_types: bool,
1244) {
1245    match dtype {
1246        DataType::List(inner) => {
1247            if include_compound_types {
1248                result.insert(dtype.clone());
1249            }
1250            collect_nested_types(inner, result, include_compound_types);
1251        },
1252        #[cfg(feature = "dtype-array")]
1253        DataType::Array(inner, _) => {
1254            if include_compound_types {
1255                result.insert(dtype.clone());
1256            }
1257            collect_nested_types(inner, result, include_compound_types);
1258        },
1259        #[cfg(feature = "dtype-struct")]
1260        DataType::Struct(fields) => {
1261            if include_compound_types {
1262                result.insert(dtype.clone());
1263            }
1264            for field in fields {
1265                collect_nested_types(field.dtype(), result, include_compound_types);
1266            }
1267        },
1268        _ => {
1269            result.insert(dtype.clone());
1270        },
1271    }
1272}
1273
1274pub fn unpack_dtypes(dtype: &DataType, include_compound_types: bool) -> PlHashSet<DataType> {
1275    let mut result = PlHashSet::new();
1276    collect_nested_types(dtype, &mut result, include_compound_types);
1277    result
1278}
1279
1280#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
1281#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1282#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
1283pub struct CompatLevel(pub(crate) u16);
1284
1285impl CompatLevel {
1286    pub const fn newest() -> CompatLevel {
1287        CompatLevel(1)
1288    }
1289
1290    pub const fn oldest() -> CompatLevel {
1291        CompatLevel(0)
1292    }
1293
1294    // The following methods are only used internally
1295
1296    #[doc(hidden)]
1297    pub fn with_level(level: u16) -> PolarsResult<CompatLevel> {
1298        if level > CompatLevel::newest().0 {
1299            polars_bail!(InvalidOperation: "invalid compat level");
1300        }
1301        Ok(CompatLevel(level))
1302    }
1303
1304    #[doc(hidden)]
1305    pub fn get_level(&self) -> u16 {
1306        self.0
1307    }
1308}
1309
1310#[cfg(test)]
1311mod tests {
1312    use super::*;
1313
1314    #[cfg(feature = "dtype-array")]
1315    #[test]
1316    fn test_unpack_primitive_dtypes() {
1317        let inner_type = DataType::Float64;
1318        let array_type = DataType::Array(Box::new(inner_type), 10);
1319        let list_type = DataType::List(Box::new(array_type));
1320
1321        let result = unpack_dtypes(&list_type, false);
1322
1323        let mut expected = PlHashSet::new();
1324        expected.insert(DataType::Float64);
1325
1326        assert_eq!(result, expected)
1327    }
1328
1329    #[cfg(feature = "dtype-array")]
1330    #[test]
1331    fn test_unpack_compound_dtypes() {
1332        let inner_type = DataType::Float64;
1333        let array_type = DataType::Array(Box::new(inner_type), 10);
1334        let list_type = DataType::List(Box::new(array_type.clone()));
1335
1336        let result = unpack_dtypes(&list_type, true);
1337
1338        let mut expected = PlHashSet::new();
1339        expected.insert(list_type);
1340        expected.insert(array_type);
1341        expected.insert(DataType::Float64);
1342
1343        assert_eq!(result, expected)
1344    }
1345}