polars_core/datatypes/
field.rs1use arrow::datatypes::{Metadata, DTYPE_ENUM_VALUES};
2use polars_utils::pl_str::PlSmallStr;
3
4use super::*;
5pub static EXTENSION_NAME: &str = "POLARS_EXTENSION_TYPE";
6
7#[derive(Clone, Debug, PartialEq, Eq, Hash)]
9#[cfg_attr(
10 any(feature = "serde", feature = "serde-lazy"),
11 derive(Serialize, Deserialize)
12)]
13pub struct Field {
14 pub name: PlSmallStr,
15 pub dtype: DataType,
16}
17
18impl From<Field> for (PlSmallStr, DataType) {
19 fn from(value: Field) -> Self {
20 (value.name, value.dtype)
21 }
22}
23
24pub type FieldRef = Arc<Field>;
25
26impl Field {
27 #[inline]
38 pub fn new(name: PlSmallStr, dtype: DataType) -> Self {
39 Field { name, dtype }
40 }
41
42 #[inline]
53 pub fn name(&self) -> &PlSmallStr {
54 &self.name
55 }
56
57 #[inline]
68 pub fn dtype(&self) -> &DataType {
69 &self.dtype
70 }
71
72 pub fn coerce(&mut self, dtype: DataType) {
84 self.dtype = dtype;
85 }
86
87 pub fn set_name(&mut self, name: PlSmallStr) {
99 self.name = name;
100 }
101
102 pub fn with_name(mut self, name: PlSmallStr) -> Self {
104 self.name = name;
105 self
106 }
107
108 pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField {
120 self.dtype.to_arrow_field(self.name.clone(), compat_level)
121 }
122}
123
124impl AsRef<DataType> for Field {
125 fn as_ref(&self) -> &DataType {
126 &self.dtype
127 }
128}
129
130impl AsRef<DataType> for DataType {
131 fn as_ref(&self) -> &DataType {
132 self
133 }
134}
135
136impl DataType {
137 pub fn boxed(self) -> Box<DataType> {
138 Box::new(self)
139 }
140
141 pub fn from_arrow_field(field: &ArrowField) -> DataType {
142 Self::from_arrow(&field.dtype, true, field.metadata.as_deref())
143 }
144
145 pub fn from_arrow_dtype(dt: &ArrowDataType) -> DataType {
146 Self::from_arrow(dt, true, None)
147 }
148
149 pub fn from_arrow(dt: &ArrowDataType, bin_to_view: bool, md: Option<&Metadata>) -> DataType {
150 match dt {
151 ArrowDataType::Null => DataType::Null,
152 ArrowDataType::UInt8 => DataType::UInt8,
153 ArrowDataType::UInt16 => DataType::UInt16,
154 ArrowDataType::UInt32 => DataType::UInt32,
155 ArrowDataType::UInt64 => DataType::UInt64,
156 ArrowDataType::Int8 => DataType::Int8,
157 ArrowDataType::Int16 => DataType::Int16,
158 ArrowDataType::Int32 => DataType::Int32,
159 ArrowDataType::Int64 => DataType::Int64,
160 #[cfg(feature = "dtype-i128")]
161 ArrowDataType::Int128 => DataType::Int128,
162 ArrowDataType::Boolean => DataType::Boolean,
163 ArrowDataType::Float32 => DataType::Float32,
164 ArrowDataType::Float64 => DataType::Float64,
165 #[cfg(feature = "dtype-array")]
166 ArrowDataType::FixedSizeList(f, size) => DataType::Array(DataType::from_arrow_field(f).boxed(), *size),
167 ArrowDataType::LargeList(f) | ArrowDataType::List(f) => DataType::List(DataType::from_arrow_field(f).boxed()),
168 ArrowDataType::Date32 => DataType::Date,
169 ArrowDataType::Timestamp(tu, tz) => DataType::Datetime(tu.into(), DataType::canonical_timezone(tz)),
170 ArrowDataType::Duration(tu) => DataType::Duration(tu.into()),
171 ArrowDataType::Date64 => DataType::Datetime(TimeUnit::Milliseconds, None),
172 ArrowDataType::Time64(_) | ArrowDataType::Time32(_) => DataType::Time,
173 #[cfg(feature = "dtype-categorical")]
174 ArrowDataType::Dictionary(_, value_type, _) => {
175 if md.map(|md| md.is_enum()).unwrap_or(false) {
176 let md = md.unwrap();
177 let encoded = md.get(DTYPE_ENUM_VALUES).unwrap();
178 let mut encoded = encoded.as_str();
179 let mut cats = MutableBinaryViewArray::<str>::new();
180
181 while let Some(pos) = encoded.find(';') {
185 let (len, remainder) = encoded.split_at(pos);
186 encoded = &remainder[1..];
188 let len = len.parse::<usize>().unwrap();
189
190 let (value, remainder) = encoded.split_at(len);
191 cats.push_value(value);
192 encoded = remainder;
193 }
194 DataType::Enum(Some(Arc::new(RevMapping::build_local(cats.into()))), Default::default())
195 } else if let Some(ordering) = md.and_then(|md| md.categorical()) {
196 DataType::Categorical(None, ordering)
197 } else if matches!(value_type.as_ref(), ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View) {
198 DataType::Categorical(None, Default::default())
199 } else {
200 Self::from_arrow(value_type, bin_to_view, None)
201 }
202 },
203 #[cfg(feature = "dtype-struct")]
204 ArrowDataType::Struct(fields) => {
205 DataType::Struct(fields.iter().map(|fld| fld.into()).collect())
206 }
207 #[cfg(not(feature = "dtype-struct"))]
208 ArrowDataType::Struct(_) => {
209 panic!("activate the 'dtype-struct' feature to handle struct data types")
210 }
211 ArrowDataType::Extension(ext) if ext.name.as_str() == EXTENSION_NAME => {
212 #[cfg(feature = "object")]
213 {
214 DataType::Object("object", None)
215 }
216 #[cfg(not(feature = "object"))]
217 {
218 panic!("activate the 'object' feature to be able to load POLARS_EXTENSION_TYPE")
219 }
220 }
221 #[cfg(feature = "dtype-decimal")]
222 ArrowDataType::Decimal(precision, scale) => DataType::Decimal(Some(*precision), Some(*scale)),
223 ArrowDataType::Utf8View |ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => DataType::String,
224 ArrowDataType::BinaryView => DataType::Binary,
225 ArrowDataType::LargeBinary | ArrowDataType::Binary => {
226 if bin_to_view {
227 DataType::Binary
228 } else {
229
230 DataType::BinaryOffset
231 }
232 },
233 ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
234 dt => panic!("Arrow datatype {dt:?} not supported by Polars. You probably need to activate that data-type feature."),
235 }
236 }
237}
238
239impl From<&ArrowField> for Field {
240 fn from(f: &ArrowField) -> Self {
241 Field::new(f.name.clone(), DataType::from_arrow_field(f))
242 }
243}