Skip to main content

polars_core/series/
into.rs

1#[cfg(any(
2    feature = "dtype-datetime",
3    feature = "dtype-date",
4    feature = "dtype-duration",
5    feature = "dtype-time"
6))]
7use polars_compute::cast::cast_default;
8use polars_compute::cast::cast_unchecked;
9
10use crate::prelude::*;
11
12impl Series {
13    /// Returns a reference to the Arrow ArrayRef
14    #[inline]
15    pub fn array_ref(&self, chunk_idx: usize) -> &ArrayRef {
16        &self.chunks()[chunk_idx] as &ArrayRef
17    }
18
19    /// Convert a chunk in the Series to the correct Arrow type.
20    /// This conversion is needed because polars doesn't use a
21    /// 1 on 1 mapping for logical/categoricals, etc.
22    pub fn to_arrow(&self, chunk_idx: usize, compat_level: CompatLevel) -> ArrayRef {
23        self.to_arrow_with_field(chunk_idx, compat_level, None)
24            .unwrap()
25    }
26
27    pub fn to_arrow_with_field(
28        &self,
29        chunk_idx: usize,
30        compat_level: CompatLevel,
31        output_arrow_field: Option<&ArrowField>,
32    ) -> PolarsResult<ArrayRef> {
33        ToArrowConverter {
34            compat_level,
35            #[cfg(feature = "dtype-categorical")]
36            categorical_converter: {
37                let mut categorical_converter =
38                    crate::series::categorical_to_arrow::CategoricalToArrowConverter {
39                        converters: Default::default(),
40                        persist_remap: false,
41                        output_keys_only: false,
42                    };
43
44                categorical_converter.initialize(self.dtype());
45
46                categorical_converter
47            },
48        }
49        .array_to_arrow(
50            self.chunks().get(chunk_idx).unwrap().as_ref(),
51            self.dtype(),
52            output_arrow_field,
53        )
54    }
55}
56
57pub struct ToArrowConverter {
58    pub compat_level: CompatLevel,
59    #[cfg(feature = "dtype-categorical")]
60    pub categorical_converter: crate::series::categorical_to_arrow::CategoricalToArrowConverter,
61}
62
63impl ToArrowConverter {
64    /// Returns an error if `output_arrow_field` was provided and does not match the output data type.
65    pub fn array_to_arrow(
66        &mut self,
67        array: &dyn Array,
68        dtype: &DataType,
69        output_arrow_field: Option<&ArrowField>,
70    ) -> PolarsResult<Box<dyn Array>> {
71        let out = self.array_to_arrow_impl(array, dtype, output_arrow_field)?;
72
73        if let Some(field) = output_arrow_field {
74            polars_ensure!(
75                field.is_nullable || !out.has_nulls(),
76                SchemaMismatch:
77                "to_arrow(): nullable is false but array contained {} NULLs (arrow field: {:?})",
78                out.null_count(), field,
79            );
80
81            // Don't eq nested types (they will recurse here with the inner types).
82            if (!field.dtype().is_nested()
83                || matches!(field.dtype(), ArrowDataType::Dictionary(..)))
84                && out.dtype() != field.dtype()
85            {
86                polars_bail!(
87                    SchemaMismatch:
88                    "to_arrow(): provided dtype ({:?}) does not match output dtype ({:?})",
89                    field.dtype(), out.dtype()
90                )
91            }
92        }
93
94        Ok(out)
95    }
96
97    fn array_to_arrow_impl(
98        &mut self,
99        array: &dyn Array,
100        dtype: &DataType,
101        output_arrow_field: Option<&ArrowField>,
102    ) -> PolarsResult<Box<dyn Array>> {
103        Ok(match dtype {
104            // make sure that we recursively apply all logical types.
105            #[cfg(feature = "dtype-struct")]
106            DataType::Struct(fields) => {
107                use arrow::array::StructArray;
108                let arr: &StructArray = array.as_any().downcast_ref().unwrap();
109
110                let expected_output_fields: &[ArrowField] = match output_arrow_field {
111                    Some(
112                        field @ ArrowField {
113                            name: _,
114                            dtype: ArrowDataType::Struct(fields),
115                            is_nullable: _,
116                            metadata: _,
117                        },
118                    ) if fields.len() == arr.fields().len()
119                        && fields
120                            .iter()
121                            .zip(arr.fields())
122                            .all(|(l, r)| l.name() == r.name()) =>
123                    {
124                        fields.as_slice()
125                    },
126                    Some(ArrowField { dtype, .. }) => polars_bail!(
127                        SchemaMismatch:
128                        "to_arrow(): struct dtype mismatch: {:?} != expected: {:?}",
129                        dtype, arr.dtype(),
130                    ),
131                    None => &[],
132                };
133
134                let values: Vec<ArrayRef> = arr
135                    .values()
136                    .iter()
137                    .zip(fields.iter())
138                    .enumerate()
139                    .map(|(i, (values, field))| {
140                        self.array_to_arrow(
141                            values.as_ref(),
142                            field.dtype(),
143                            expected_output_fields.get(i),
144                        )
145                    })
146                    .collect::<PolarsResult<_>>()?;
147
148                let converted_arrow_fields: Vec<ArrowField> = fields
149                    .iter()
150                    .map(|x| (x.name().clone(), x.dtype()))
151                    .zip(values.iter().map(|x| x.dtype()))
152                    .enumerate()
153                    .map(|(i, ((name, dtype), converted_arrow_dtype))| {
154                        create_arrow_field(
155                            name,
156                            dtype,
157                            converted_arrow_dtype,
158                            self.compat_level,
159                            opt_field_is_nullable(expected_output_fields.get(i)),
160                        )
161                    })
162                    .collect();
163
164                StructArray::new(
165                    ArrowDataType::Struct(converted_arrow_fields),
166                    arr.len(),
167                    values,
168                    arr.validity().cloned(),
169                )
170                .boxed()
171            },
172            DataType::List(inner) => {
173                let arr: &ListArray<i64> = array.as_any().downcast_ref().unwrap();
174
175                let expected_inner_output_field: Option<&ArrowField> = match output_arrow_field {
176                    Some(ArrowField {
177                        name: _,
178                        dtype: ArrowDataType::LargeList(inner_field),
179                        is_nullable: _,
180                        metadata: _,
181                    }) if inner_field.name() == &LIST_VALUES_NAME => Some(inner_field),
182                    Some(ArrowField { dtype, .. }) => polars_bail!(
183                        SchemaMismatch:
184                        "to_arrow(): list dtype mismatch: {:?} != expected: {:?}",
185                        dtype, arr.dtype(),
186                    ),
187                    None => None,
188                };
189
190                let new_values =
191                    self.array_to_arrow(arr.values().as_ref(), inner, expected_inner_output_field)?;
192
193                let arr = ListArray::<i64>::new(
194                    ArrowDataType::LargeList(Box::new(create_arrow_field(
195                        LIST_VALUES_NAME,
196                        inner.as_ref(),
197                        new_values.dtype(),
198                        self.compat_level,
199                        opt_field_is_nullable(expected_inner_output_field),
200                    ))),
201                    arr.offsets().clone(),
202                    new_values,
203                    arr.validity().cloned(),
204                );
205                Box::new(arr)
206            },
207            #[cfg(feature = "dtype-array")]
208            DataType::Array(inner, width) => {
209                use arrow::array::FixedSizeListArray;
210
211                let arr: &FixedSizeListArray = array.as_any().downcast_ref().unwrap();
212
213                let expected_inner_output_field: Option<&ArrowField> = match output_arrow_field {
214                    Some(
215                        field @ ArrowField {
216                            name: _,
217                            dtype: ArrowDataType::FixedSizeList(inner_field, width),
218                            is_nullable: _,
219                            metadata: _,
220                        },
221                    ) if *width == arr.size() && inner_field.name() == &LIST_VALUES_NAME => {
222                        Some(inner_field)
223                    },
224                    Some(ArrowField { dtype, .. }) => polars_bail!(
225                        SchemaMismatch:
226                        "to_arrow(): fixed-size list dtype mismatch: {:?} != expected: {:?}",
227                        dtype, arr.dtype(),
228                    ),
229                    None => None,
230                };
231
232                let new_values =
233                    self.array_to_arrow(arr.values().as_ref(), inner, expected_inner_output_field)?;
234
235                let arr = FixedSizeListArray::new(
236                    ArrowDataType::FixedSizeList(
237                        Box::new(create_arrow_field(
238                            LIST_VALUES_NAME,
239                            inner.as_ref(),
240                            new_values.dtype(),
241                            self.compat_level,
242                            opt_field_is_nullable(expected_inner_output_field),
243                        )),
244                        *width,
245                    ),
246                    arr.len(),
247                    new_values,
248                    arr.validity().cloned(),
249                );
250                Box::new(arr)
251            },
252            #[cfg(feature = "dtype-categorical")]
253            DataType::Categorical(_, _) | DataType::Enum(_, _) => self
254                .categorical_converter
255                .array_to_arrow(array, dtype, self.compat_level),
256            #[cfg(feature = "dtype-date")]
257            DataType::Date => {
258                cast_default(array, &DataType::Date.to_arrow(self.compat_level)).unwrap()
259            },
260            #[cfg(feature = "dtype-datetime")]
261            DataType::Datetime(_, _) => {
262                cast_default(array, &dtype.to_arrow(self.compat_level)).unwrap()
263            },
264            #[cfg(feature = "dtype-duration")]
265            DataType::Duration(_) => {
266                cast_default(array, &dtype.to_arrow(self.compat_level)).unwrap()
267            },
268            #[cfg(feature = "dtype-time")]
269            DataType::Time => {
270                cast_default(array, &DataType::Time.to_arrow(self.compat_level)).unwrap()
271            },
272            #[cfg(feature = "dtype-decimal")]
273            DataType::Decimal(_, _) => array
274                .as_any()
275                .downcast_ref::<arrow::array::PrimitiveArray<i128>>()
276                .unwrap()
277                .clone()
278                .to(dtype.to_arrow(CompatLevel::newest()))
279                .to_boxed(),
280            #[cfg(feature = "object")]
281            DataType::Object(_) => {
282                use crate::chunked_array::object::builder::object_series_to_arrow_array;
283                object_series_to_arrow_array(&unsafe {
284                    Series::from_chunks_and_dtype_unchecked(
285                        PlSmallStr::EMPTY,
286                        vec![array.to_boxed()],
287                        dtype,
288                    )
289                })
290            },
291            DataType::String => {
292                if self.compat_level.0 >= 1 {
293                    array.to_boxed()
294                } else {
295                    cast_unchecked(array, &ArrowDataType::LargeUtf8).unwrap()
296                }
297            },
298            DataType::Binary => {
299                if self.compat_level.0 >= 1 {
300                    array.to_boxed()
301                } else {
302                    cast_unchecked(array, &ArrowDataType::LargeBinary).unwrap()
303                }
304            },
305            #[cfg(feature = "dtype-extension")]
306            DataType::Extension(typ, storage_dtype) => {
307                use arrow::datatypes::ExtensionType;
308
309                let output_ext_name: PlSmallStr = typ.name().into();
310                let output_ext_md: Option<PlSmallStr> =
311                    typ.serialize_metadata().map(|md| md.into());
312
313                let expected_inner_output_field: Option<ArrowField> = match output_arrow_field {
314                    Some(
315                        field @ ArrowField {
316                            name: _,
317                            dtype: ArrowDataType::Extension(ext_type),
318                            is_nullable: _,
319                            metadata: _,
320                        },
321                    ) if {
322                        let ExtensionType {
323                            name,
324                            inner: _,
325                            metadata,
326                        } = ext_type.as_ref();
327
328                        name == &output_ext_name
329                            && metadata.as_ref().filter(|x| !x.is_empty())
330                                == output_ext_md.as_ref().filter(|x| !x.is_empty())
331                    } =>
332                    {
333                        let ExtensionType {
334                            name,
335                            inner,
336                            metadata: _,
337                        } = ext_type.as_ref();
338
339                        Some(create_arrow_field(
340                            name.clone(),
341                            storage_dtype.as_ref(),
342                            inner,
343                            self.compat_level,
344                            true,
345                        ))
346                    },
347                    Some(ArrowField { dtype, .. }) => {
348                        let expected_inner = self
349                            .array_to_arrow(array.sliced(0, 0).as_ref(), storage_dtype, None)
350                            .unwrap()
351                            .dtype()
352                            .clone();
353
354                        let expected = ArrowDataType::Extension(Box::new(ExtensionType {
355                            name: output_ext_name,
356                            inner: expected_inner,
357                            metadata: output_ext_md,
358                        }));
359
360                        polars_bail!(
361                            SchemaMismatch:
362                            "to_arrow(): extension dtype mismatch: {:?} != expected: {:?}",
363                            dtype, expected,
364                        )
365                    },
366                    None => None,
367                };
368
369                let mut arr = self.array_to_arrow(
370                    array,
371                    storage_dtype,
372                    expected_inner_output_field.as_ref(),
373                )?;
374
375                *arr.dtype_mut() = ArrowDataType::Extension(Box::new(ExtensionType {
376                    name: output_ext_name,
377                    inner: arr.dtype().clone(),
378                    metadata: output_ext_md,
379                }));
380                arr
381            },
382            _ => {
383                assert!(!dtype.is_logical());
384                array.to_boxed()
385            },
386        })
387    }
388}
389
390fn create_arrow_field(
391    name: PlSmallStr,
392    dtype: &DataType,
393    arrow_dtype: &ArrowDataType,
394    compat_level: CompatLevel,
395    is_nullable: bool,
396) -> ArrowField {
397    match (dtype, arrow_dtype) {
398        #[cfg(feature = "dtype-categorical")]
399        (DataType::Categorical(..) | DataType::Enum(..), ArrowDataType::Dictionary(_, _, _)) => {
400            // Sets _PL_ metadata
401            let mut out = dtype.to_arrow_field(name, compat_level);
402            out.is_nullable = is_nullable;
403            out
404        },
405        _ => ArrowField::new(name, arrow_dtype.clone(), is_nullable),
406    }
407}
408
409fn opt_field_is_nullable(opt_field: Option<&ArrowField>) -> bool {
410    opt_field.is_none_or(|x| x.is_nullable)
411}