polars_core/datatypes/
field.rs

1use arrow::datatypes::{Metadata, DTYPE_ENUM_VALUES};
2use polars_utils::pl_str::PlSmallStr;
3
4use super::*;
5pub static EXTENSION_NAME: &str = "POLARS_EXTENSION_TYPE";
6
7/// Characterizes the name and the [`DataType`] of a column.
8#[derive(Clone, Debug, PartialEq, Eq, Hash)]
9#[cfg_attr(
10    any(feature = "serde", feature = "serde-lazy"),
11    derive(Serialize, Deserialize)
12)]
13pub struct Field {
14    pub name: PlSmallStr,
15    pub dtype: DataType,
16}
17
18impl From<Field> for (PlSmallStr, DataType) {
19    fn from(value: Field) -> Self {
20        (value.name, value.dtype)
21    }
22}
23
24pub type FieldRef = Arc<Field>;
25
26impl Field {
27    /// Creates a new `Field`.
28    ///
29    /// # Example
30    ///
31    /// ```rust
32    /// # use polars_core::prelude::*;
33    /// let f1 = Field::new("Fruit name".into(), DataType::String);
34    /// let f2 = Field::new("Lawful".into(), DataType::Boolean);
35    /// let f2 = Field::new("Departure".into(), DataType::Time);
36    /// ```
37    #[inline]
38    pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
39        Field { name, dtype }
40    }
41
42    /// Returns a reference to the `Field` name.
43    ///
44    /// # Example
45    ///
46    /// ```rust
47    /// # use polars_core::prelude::*;
48    /// let f = Field::new("Year".into(), DataType::Int32);
49    ///
50    /// assert_eq!(f.name(), "Year");
51    /// ```
52    #[inline]
53    pub fn name(&self) -> &PlSmallStr {
54        &self.name
55    }
56
57    /// Returns a reference to the `Field` datatype.
58    ///
59    /// # Example
60    ///
61    /// ```rust
62    /// # use polars_core::prelude::*;
63    /// let f = Field::new("Birthday".into(), DataType::Date);
64    ///
65    /// assert_eq!(f.dtype(), &DataType::Date);
66    /// ```
67    #[inline]
68    pub fn dtype(&self) -> &DataType {
69        &self.dtype
70    }
71
72    /// Sets the `Field` datatype.
73    ///
74    /// # Example
75    ///
76    /// ```rust
77    /// # use polars_core::prelude::*;
78    /// let mut f = Field::new("Temperature".into(), DataType::Int32);
79    /// f.coerce(DataType::Float32);
80    ///
81    /// assert_eq!(f, Field::new("Temperature".into(), DataType::Float32));
82    /// ```
83    pub fn coerce(&mut self, dtype: DataType) {
84        self.dtype = dtype;
85    }
86
87    /// Sets the `Field` name.
88    ///
89    /// # Example
90    ///
91    /// ```rust
92    /// # use polars_core::prelude::*;
93    /// let mut f = Field::new("Atomic number".into(), DataType::UInt32);
94    /// f.set_name("Proton".into());
95    ///
96    /// assert_eq!(f, Field::new("Proton".into(), DataType::UInt32));
97    /// ```
98    pub fn set_name(&mut self, name: PlSmallStr) {
99        self.name = name;
100    }
101
102    /// Returns this `Field`, renamed.
103    pub fn with_name(mut self, name: PlSmallStr) -> Self {
104        self.name = name;
105        self
106    }
107
108    /// Converts the `Field` to an `arrow::datatypes::Field`.
109    ///
110    /// # Example
111    ///
112    /// ```rust
113    /// # use polars_core::prelude::*;
114    /// let f = Field::new("Value".into(), DataType::Int64);
115    /// let af = arrow::datatypes::Field::new("Value".into(), arrow::datatypes::ArrowDataType::Int64, true);
116    ///
117    /// assert_eq!(f.to_arrow(CompatLevel::newest()), af);
118    /// ```
119    pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField {
120        self.dtype.to_arrow_field(self.name.clone(), compat_level)
121    }
122}
123
124impl AsRef<DataType> for Field {
125    fn as_ref(&self) -> &DataType {
126        &self.dtype
127    }
128}
129
130impl AsRef<DataType> for DataType {
131    fn as_ref(&self) -> &DataType {
132        self
133    }
134}
135
136impl DataType {
137    pub fn boxed(self) -> Box<DataType> {
138        Box::new(self)
139    }
140
141    pub fn from_arrow_field(field: &ArrowField) -> DataType {
142        Self::from_arrow(&field.dtype, true, field.metadata.as_deref())
143    }
144
145    pub fn from_arrow_dtype(dt: &ArrowDataType) -> DataType {
146        Self::from_arrow(dt, true, None)
147    }
148
149    pub fn from_arrow(dt: &ArrowDataType, bin_to_view: bool, md: Option<&Metadata>) -> DataType {
150        match dt {
151            ArrowDataType::Null => DataType::Null,
152            ArrowDataType::UInt8 => DataType::UInt8,
153            ArrowDataType::UInt16 => DataType::UInt16,
154            ArrowDataType::UInt32 => DataType::UInt32,
155            ArrowDataType::UInt64 => DataType::UInt64,
156            ArrowDataType::Int8 => DataType::Int8,
157            ArrowDataType::Int16 => DataType::Int16,
158            ArrowDataType::Int32 => DataType::Int32,
159            ArrowDataType::Int64 => DataType::Int64,
160            #[cfg(feature = "dtype-i128")]
161            ArrowDataType::Int128 => DataType::Int128,
162            ArrowDataType::Boolean => DataType::Boolean,
163            ArrowDataType::Float32 => DataType::Float32,
164            ArrowDataType::Float64 => DataType::Float64,
165            #[cfg(feature = "dtype-array")]
166            ArrowDataType::FixedSizeList(f, size) => DataType::Array(DataType::from_arrow_field(f).boxed(), *size),
167            ArrowDataType::LargeList(f) | ArrowDataType::List(f) => DataType::List(DataType::from_arrow_field(f).boxed()),
168            ArrowDataType::Date32 => DataType::Date,
169            ArrowDataType::Timestamp(tu, tz) => DataType::Datetime(tu.into(), DataType::canonical_timezone(tz)),
170            ArrowDataType::Duration(tu) => DataType::Duration(tu.into()),
171            ArrowDataType::Date64 => DataType::Datetime(TimeUnit::Milliseconds, None),
172            ArrowDataType::Time64(_) | ArrowDataType::Time32(_) => DataType::Time,
173            #[cfg(feature = "dtype-categorical")]
174            ArrowDataType::Dictionary(_, value_type, _) => {
175                if md.map(|md| md.is_enum()).unwrap_or(false) {
176                    let md = md.unwrap();
177                    let encoded = md.get(DTYPE_ENUM_VALUES).unwrap();
178                    let mut encoded = encoded.as_str();
179                    let mut cats = MutableBinaryViewArray::<str>::new();
180
181                    // Data is encoded as <len in ascii><sep ';'><payload>
182                    // We know thus that len is only [0-9] and the first ';' doesn't belong to the
183                    // payload.
184                    while let Some(pos) = encoded.find(';') {
185                            let (len, remainder) =  encoded.split_at(pos);
186                            // Split off ';'
187                            encoded = &remainder[1..];
188                            let len = len.parse::<usize>().unwrap();
189
190                            let (value, remainder) = encoded.split_at(len);
191                            cats.push_value(value);
192                            encoded = remainder;
193                    }
194                    DataType::Enum(Some(Arc::new(RevMapping::build_local(cats.into()))), Default::default())
195                } else if let Some(ordering) = md.and_then(|md| md.categorical()) {
196                    DataType::Categorical(None, ordering)
197                } else if matches!(value_type.as_ref(), ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View) {
198                    DataType::Categorical(None, Default::default())
199                } else {
200                    Self::from_arrow(value_type, bin_to_view, None)
201                }
202            },
203            #[cfg(feature = "dtype-struct")]
204            ArrowDataType::Struct(fields) => {
205                DataType::Struct(fields.iter().map(|fld| fld.into()).collect())
206            }
207            #[cfg(not(feature = "dtype-struct"))]
208            ArrowDataType::Struct(_) => {
209                panic!("activate the 'dtype-struct' feature to handle struct data types")
210            }
211            ArrowDataType::Extension(ext) if ext.name.as_str() == EXTENSION_NAME => {
212                #[cfg(feature = "object")]
213                {
214                    DataType::Object("object", None)
215                }
216                #[cfg(not(feature = "object"))]
217                {
218                    panic!("activate the 'object' feature to be able to load POLARS_EXTENSION_TYPE")
219                }
220            }
221            #[cfg(feature = "dtype-decimal")]
222            ArrowDataType::Decimal(precision, scale) => DataType::Decimal(Some(*precision), Some(*scale)),
223            ArrowDataType::Utf8View |ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => DataType::String,
224            ArrowDataType::BinaryView => DataType::Binary,
225            ArrowDataType::LargeBinary | ArrowDataType::Binary => {
226                if bin_to_view {
227                    DataType::Binary
228                } else {
229
230                    DataType::BinaryOffset
231                }
232            },
233            ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
234            dt => panic!("Arrow datatype {dt:?} not supported by Polars. You probably need to activate that data-type feature."),
235        }
236    }
237}
238
239impl From<&ArrowField> for Field {
240    fn from(f: &ArrowField) -> Self {
241        Field::new(f.name.clone(), DataType::from_arrow_field(f))
242    }
243}