polars_core/series/
from.rs

1use arrow::datatypes::{IntervalUnit, Metadata};
2use arrow::offset::OffsetsBuffer;
3#[cfg(any(
4    feature = "dtype-date",
5    feature = "dtype-datetime",
6    feature = "dtype-time",
7    feature = "dtype-duration"
8))]
9use arrow::temporal_conversions::*;
10use arrow::types::months_days_ns;
11use polars_compute::cast::cast_unchecked as cast;
12use polars_error::feature_gated;
13use polars_utils::check_allow_importing_interval_as_struct;
14use polars_utils::itertools::Itertools;
15
16use crate::chunked_array::cast::{CastOptions, cast_chunks};
17#[cfg(feature = "object")]
18use crate::chunked_array::object::extension::polars_extension::PolarsExtension;
19#[cfg(feature = "object")]
20use crate::chunked_array::object::registry::get_object_builder;
21use crate::prelude::*;
22
23impl Series {
24    pub fn from_array<A: ParameterFreeDtypeStaticArray>(name: PlSmallStr, array: A) -> Self {
25        unsafe {
26            Self::from_chunks_and_dtype_unchecked(
27                name,
28                vec![Box::new(array)],
29                &DataType::from_arrow_dtype(&A::get_dtype()),
30            )
31        }
32    }
33
34    pub fn from_chunk_and_dtype(
35        name: PlSmallStr,
36        chunk: ArrayRef,
37        dtype: &DataType,
38    ) -> PolarsResult<Self> {
39        if &dtype.to_physical().to_arrow(CompatLevel::newest()) != chunk.dtype() {
40            polars_bail!(
41                InvalidOperation: "cannot create a series of type '{dtype}' of arrow chunk with type '{:?}'",
42                chunk.dtype()
43            );
44        }
45
46        // SAFETY: We check that the datatype matches.
47        let series = unsafe { Self::from_chunks_and_dtype_unchecked(name, vec![chunk], dtype) };
48        Ok(series)
49    }
50
51    /// Takes chunks and a polars datatype and constructs the Series
52    /// This is faster than creating from chunks and an arrow datatype because there is no
53    /// casting involved
54    ///
55    /// # Safety
56    ///
57    /// The caller must ensure that the given `dtype`'s physical type matches all the `ArrayRef` dtypes.
58    pub unsafe fn from_chunks_and_dtype_unchecked(
59        name: PlSmallStr,
60        chunks: Vec<ArrayRef>,
61        dtype: &DataType,
62    ) -> Self {
63        use DataType::*;
64        match dtype {
65            Int8 => Int8Chunked::from_chunks(name, chunks).into_series(),
66            Int16 => Int16Chunked::from_chunks(name, chunks).into_series(),
67            Int32 => Int32Chunked::from_chunks(name, chunks).into_series(),
68            Int64 => Int64Chunked::from_chunks(name, chunks).into_series(),
69            UInt8 => UInt8Chunked::from_chunks(name, chunks).into_series(),
70            UInt16 => UInt16Chunked::from_chunks(name, chunks).into_series(),
71            UInt32 => UInt32Chunked::from_chunks(name, chunks).into_series(),
72            UInt64 => UInt64Chunked::from_chunks(name, chunks).into_series(),
73            #[cfg(feature = "dtype-i128")]
74            Int128 => Int128Chunked::from_chunks(name, chunks).into_series(),
75            #[cfg(feature = "dtype-date")]
76            Date => Int32Chunked::from_chunks(name, chunks)
77                .into_date()
78                .into_series(),
79            #[cfg(feature = "dtype-time")]
80            Time => Int64Chunked::from_chunks(name, chunks)
81                .into_time()
82                .into_series(),
83            #[cfg(feature = "dtype-duration")]
84            Duration(tu) => Int64Chunked::from_chunks(name, chunks)
85                .into_duration(*tu)
86                .into_series(),
87            #[cfg(feature = "dtype-datetime")]
88            Datetime(tu, tz) => Int64Chunked::from_chunks(name, chunks)
89                .into_datetime(*tu, tz.clone())
90                .into_series(),
91            #[cfg(feature = "dtype-decimal")]
92            Decimal(precision, scale) => Int128Chunked::from_chunks(name, chunks)
93                .into_decimal_unchecked(
94                    *precision,
95                    scale.unwrap_or_else(|| unreachable!("scale should be set")),
96                )
97                .into_series(),
98            #[cfg(feature = "dtype-array")]
99            Array(_, _) => {
100                ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
101                    .into_series()
102            },
103            List(_) => ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
104                .into_series(),
105            String => StringChunked::from_chunks(name, chunks).into_series(),
106            Binary => BinaryChunked::from_chunks(name, chunks).into_series(),
107            #[cfg(feature = "dtype-categorical")]
108            dt @ (Categorical(_, _) | Enum(_, _)) => {
109                with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
110                    let phys = ChunkedArray::from_chunks(name, chunks);
111                    CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(phys, dt.clone()).into_series()
112                })
113            },
114            Boolean => BooleanChunked::from_chunks(name, chunks).into_series(),
115            Float32 => Float32Chunked::from_chunks(name, chunks).into_series(),
116            Float64 => Float64Chunked::from_chunks(name, chunks).into_series(),
117            BinaryOffset => BinaryOffsetChunked::from_chunks(name, chunks).into_series(),
118            #[cfg(feature = "dtype-struct")]
119            Struct(_) => {
120                let mut ca =
121                    StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone());
122                StructChunked::propagate_nulls_mut(&mut ca);
123                ca.into_series()
124            },
125            #[cfg(feature = "object")]
126            Object(_) => {
127                if let Some(arr) = chunks[0].as_any().downcast_ref::<FixedSizeBinaryArray>() {
128                    assert_eq!(chunks.len(), 1);
129                    // SAFETY:
130                    // this is highly unsafe. it will dereference a raw ptr on the heap
131                    // make sure the ptr is allocated and from this pid
132                    // (the pid is checked before dereference)
133                    {
134                        let pe = PolarsExtension::new(arr.clone());
135                        let s = pe.get_series(&name);
136                        pe.take_and_forget();
137                        s
138                    }
139                } else {
140                    unsafe { get_object_builder(name, 0).from_chunks(chunks) }
141                }
142            },
143            Null => new_null(name, &chunks),
144            Unknown(_) => {
145                panic!("dtype is unknown; consider supplying data-types for all operations")
146            },
147            #[allow(unreachable_patterns)]
148            _ => unreachable!(),
149        }
150    }
151
152    /// # Safety
153    /// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
154    pub unsafe fn _try_from_arrow_unchecked(
155        name: PlSmallStr,
156        chunks: Vec<ArrayRef>,
157        dtype: &ArrowDataType,
158    ) -> PolarsResult<Self> {
159        Self::_try_from_arrow_unchecked_with_md(name, chunks, dtype, None)
160    }
161
162    /// Create a new Series without checking if the inner dtype of the chunks is correct
163    ///
164    /// # Safety
165    /// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
166    pub unsafe fn _try_from_arrow_unchecked_with_md(
167        name: PlSmallStr,
168        chunks: Vec<ArrayRef>,
169        dtype: &ArrowDataType,
170        md: Option<&Metadata>,
171    ) -> PolarsResult<Self> {
172        match dtype {
173            ArrowDataType::Utf8View => Ok(StringChunked::from_chunks(name, chunks).into_series()),
174            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
175                let chunks =
176                    cast_chunks(&chunks, &DataType::String, CastOptions::NonStrict).unwrap();
177                Ok(StringChunked::from_chunks(name, chunks).into_series())
178            },
179            ArrowDataType::BinaryView => Ok(BinaryChunked::from_chunks(name, chunks).into_series()),
180            ArrowDataType::LargeBinary => {
181                if let Some(md) = md {
182                    if md.maintain_type() {
183                        return Ok(BinaryOffsetChunked::from_chunks(name, chunks).into_series());
184                    }
185                }
186                let chunks =
187                    cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
188                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
189            },
190            ArrowDataType::Binary => {
191                let chunks =
192                    cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
193                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
194            },
195            ArrowDataType::List(_) | ArrowDataType::LargeList(_) => {
196                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
197                unsafe {
198                    Ok(
199                        ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
200                            .into_series(),
201                    )
202                }
203            },
204            #[cfg(feature = "dtype-array")]
205            ArrowDataType::FixedSizeList(_, _) => {
206                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
207                unsafe {
208                    Ok(
209                        ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
210                            .into_series(),
211                    )
212                }
213            },
214            ArrowDataType::Boolean => Ok(BooleanChunked::from_chunks(name, chunks).into_series()),
215            #[cfg(feature = "dtype-u8")]
216            ArrowDataType::UInt8 => Ok(UInt8Chunked::from_chunks(name, chunks).into_series()),
217            #[cfg(feature = "dtype-u16")]
218            ArrowDataType::UInt16 => Ok(UInt16Chunked::from_chunks(name, chunks).into_series()),
219            ArrowDataType::UInt32 => Ok(UInt32Chunked::from_chunks(name, chunks).into_series()),
220            ArrowDataType::UInt64 => Ok(UInt64Chunked::from_chunks(name, chunks).into_series()),
221            #[cfg(feature = "dtype-i8")]
222            ArrowDataType::Int8 => Ok(Int8Chunked::from_chunks(name, chunks).into_series()),
223            #[cfg(feature = "dtype-i16")]
224            ArrowDataType::Int16 => Ok(Int16Chunked::from_chunks(name, chunks).into_series()),
225            ArrowDataType::Int32 => Ok(Int32Chunked::from_chunks(name, chunks).into_series()),
226            ArrowDataType::Int64 => Ok(Int64Chunked::from_chunks(name, chunks).into_series()),
227            ArrowDataType::Int128 => feature_gated!(
228                "dtype-i128",
229                Ok(Int128Chunked::from_chunks(name, chunks).into_series())
230            ),
231            ArrowDataType::Float16 => {
232                let chunks =
233                    cast_chunks(&chunks, &DataType::Float32, CastOptions::NonStrict).unwrap();
234                Ok(Float32Chunked::from_chunks(name, chunks).into_series())
235            },
236            ArrowDataType::Float32 => Ok(Float32Chunked::from_chunks(name, chunks).into_series()),
237            ArrowDataType::Float64 => Ok(Float64Chunked::from_chunks(name, chunks).into_series()),
238            #[cfg(feature = "dtype-date")]
239            ArrowDataType::Date32 => {
240                let chunks =
241                    cast_chunks(&chunks, &DataType::Int32, CastOptions::Overflowing).unwrap();
242                Ok(Int32Chunked::from_chunks(name, chunks)
243                    .into_date()
244                    .into_series())
245            },
246            #[cfg(feature = "dtype-datetime")]
247            ArrowDataType::Date64 => {
248                let chunks =
249                    cast_chunks(&chunks, &DataType::Int64, CastOptions::Overflowing).unwrap();
250                let ca = Int64Chunked::from_chunks(name, chunks);
251                Ok(ca.into_datetime(TimeUnit::Milliseconds, None).into_series())
252            },
253            #[cfg(feature = "dtype-datetime")]
254            ArrowDataType::Timestamp(tu, tz) => {
255                let tz = TimeZone::opt_try_new(tz.clone())?;
256                let chunks =
257                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
258                let s = Int64Chunked::from_chunks(name, chunks)
259                    .into_datetime(tu.into(), tz)
260                    .into_series();
261                Ok(match tu {
262                    ArrowTimeUnit::Second => &s * MILLISECONDS,
263                    ArrowTimeUnit::Millisecond => s,
264                    ArrowTimeUnit::Microsecond => s,
265                    ArrowTimeUnit::Nanosecond => s,
266                })
267            },
268            #[cfg(feature = "dtype-duration")]
269            ArrowDataType::Duration(tu) => {
270                let chunks =
271                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
272                let s = Int64Chunked::from_chunks(name, chunks)
273                    .into_duration(tu.into())
274                    .into_series();
275                Ok(match tu {
276                    ArrowTimeUnit::Second => &s * MILLISECONDS,
277                    ArrowTimeUnit::Millisecond => s,
278                    ArrowTimeUnit::Microsecond => s,
279                    ArrowTimeUnit::Nanosecond => s,
280                })
281            },
282            #[cfg(feature = "dtype-time")]
283            ArrowDataType::Time64(tu) | ArrowDataType::Time32(tu) => {
284                let mut chunks = chunks;
285                if matches!(dtype, ArrowDataType::Time32(_)) {
286                    chunks =
287                        cast_chunks(&chunks, &DataType::Int32, CastOptions::NonStrict).unwrap();
288                }
289                let chunks =
290                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
291                let s = Int64Chunked::from_chunks(name, chunks)
292                    .into_time()
293                    .into_series();
294                Ok(match tu {
295                    ArrowTimeUnit::Second => &s * NANOSECONDS,
296                    ArrowTimeUnit::Millisecond => &s * 1_000_000,
297                    ArrowTimeUnit::Microsecond => &s * 1_000,
298                    ArrowTimeUnit::Nanosecond => s,
299                })
300            },
301            ArrowDataType::Decimal32(precision, scale) => {
302                feature_gated!("dtype-decimal", {
303                    polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})");
304                    polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision");
305
306                    let mut chunks = chunks;
307                    for chunk in chunks.iter_mut() {
308                        let old_chunk = chunk
309                            .as_any_mut()
310                            .downcast_mut::<PrimitiveArray<i32>>()
311                            .unwrap();
312
313                        // For now, we just cast the whole data to i128.
314                        let (_, values, validity) = std::mem::take(old_chunk).into_inner();
315                        *chunk = PrimitiveArray::new(
316                            ArrowDataType::Int128,
317                            values.iter().map(|&v| v as i128).collect(),
318                            validity,
319                        )
320                        .to_boxed();
321                    }
322
323                    // @NOTE: We cannot cast here as that will lower the scale.
324                    let s = Int128Chunked::from_chunks(name, chunks)
325                        .into_decimal_unchecked(Some(*precision), *scale)
326                        .into_series();
327                    Ok(s)
328                })
329            },
330            ArrowDataType::Decimal64(precision, scale) => {
331                feature_gated!("dtype-decimal", {
332                    polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})");
333                    polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision");
334
335                    let mut chunks = chunks;
336                    for chunk in chunks.iter_mut() {
337                        let old_chunk = chunk
338                            .as_any_mut()
339                            .downcast_mut::<PrimitiveArray<i64>>()
340                            .unwrap();
341
342                        // For now, we just cast the whole data to i128.
343                        let (_, values, validity) = std::mem::take(old_chunk).into_inner();
344                        *chunk = PrimitiveArray::new(
345                            ArrowDataType::Int128,
346                            values.iter().map(|&v| v as i128).collect(),
347                            validity,
348                        )
349                        .to_boxed();
350                    }
351
352                    // @NOTE: We cannot cast here as that will lower the scale.
353                    let s = Int128Chunked::from_chunks(name, chunks)
354                        .into_decimal_unchecked(Some(*precision), *scale)
355                        .into_series();
356                    Ok(s)
357                })
358            },
359            ArrowDataType::Decimal(precision, scale)
360            | ArrowDataType::Decimal256(precision, scale) => {
361                feature_gated!("dtype-decimal", {
362                    polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})");
363                    polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision");
364
365                    // Q? I don't think this is correct for Decimal256?
366                    let mut chunks = chunks;
367                    for chunk in chunks.iter_mut() {
368                        *chunk = std::mem::take(
369                            chunk
370                                .as_any_mut()
371                                .downcast_mut::<PrimitiveArray<i128>>()
372                                .unwrap(),
373                        )
374                        .to(ArrowDataType::Int128)
375                        .to_boxed();
376                    }
377
378                    // @NOTE: We cannot cast here as that will lower the scale.
379                    let s = Int128Chunked::from_chunks(name, chunks)
380                        .into_decimal_unchecked(Some(*precision), *scale)
381                        .into_series();
382                    Ok(s)
383                })
384            },
385            ArrowDataType::Null => Ok(new_null(name, &chunks)),
386            #[cfg(not(feature = "dtype-categorical"))]
387            ArrowDataType::Dictionary(_, _, _) => {
388                panic!("activate dtype-categorical to convert dictionary arrays")
389            },
390            #[cfg(feature = "dtype-categorical")]
391            ArrowDataType::Dictionary(key_type, _, _) => {
392                let polars_dtype = DataType::from_arrow(chunks[0].dtype(), md);
393
394                let mut series_iter = chunks.into_iter().map(|arr| {
395                    import_arrow_dictionary_array(name.clone(), arr, key_type, &polars_dtype)
396                });
397
398                let mut first = series_iter.next().unwrap()?;
399
400                for s in series_iter {
401                    first.append_owned(s?)?;
402                }
403
404                Ok(first)
405            },
406            #[cfg(feature = "object")]
407            ArrowDataType::Extension(ext)
408                if ext.name == EXTENSION_NAME && ext.metadata.is_some() =>
409            {
410                assert_eq!(chunks.len(), 1);
411                let arr = chunks[0]
412                    .as_any()
413                    .downcast_ref::<FixedSizeBinaryArray>()
414                    .unwrap();
415                // SAFETY:
416                // this is highly unsafe. it will dereference a raw ptr on the heap
417                // make sure the ptr is allocated and from this pid
418                // (the pid is checked before dereference)
419                let s = {
420                    let pe = PolarsExtension::new(arr.clone());
421                    let s = pe.get_series(&name);
422                    pe.take_and_forget();
423                    s
424                };
425                Ok(s)
426            },
427            #[cfg(feature = "dtype-struct")]
428            ArrowDataType::Struct(_) => {
429                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
430
431                unsafe {
432                    let mut ca =
433                        StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype);
434                    StructChunked::propagate_nulls_mut(&mut ca);
435                    Ok(ca.into_series())
436                }
437            },
438            ArrowDataType::FixedSizeBinary(_) => {
439                let chunks = cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict)?;
440                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
441            },
442            ArrowDataType::Map(field, _is_ordered) => {
443                let struct_arrays = chunks
444                    .iter()
445                    .map(|arr| {
446                        let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
447                        arr.field().clone()
448                    })
449                    .collect::<Vec<_>>();
450
451                let (phys_struct_arrays, dtype) =
452                    to_physical_and_dtype(struct_arrays, field.metadata.as_deref());
453
454                let chunks = chunks
455                    .iter()
456                    .zip(phys_struct_arrays)
457                    .map(|(arr, values)| {
458                        let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
459                        let offsets: &OffsetsBuffer<i32> = arr.offsets();
460
461                        let validity = values.validity().cloned();
462
463                        Box::from(ListArray::<i64>::new(
464                            ListArray::<i64>::default_datatype(values.dtype().clone()),
465                            OffsetsBuffer::<i64>::from(offsets),
466                            values,
467                            validity,
468                        )) as ArrayRef
469                    })
470                    .collect();
471
472                unsafe {
473                    let out = ListChunked::from_chunks_and_dtype_unchecked(
474                        name,
475                        chunks,
476                        DataType::List(Box::new(dtype)),
477                    );
478
479                    Ok(out.into_series())
480                }
481            },
482            ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
483                check_allow_importing_interval_as_struct("month_day_nano_interval")?;
484
485                feature_gated!("dtype-struct", {
486                    let chunks = chunks
487                        .into_iter()
488                        .map(convert_month_day_nano_to_struct)
489                        .collect::<PolarsResult<Vec<_>>>()?;
490
491                    Ok(StructChunked::from_chunks_and_dtype_unchecked(
492                        name,
493                        chunks,
494                        DataType::_month_days_ns_struct_type(),
495                    )
496                    .into_series())
497                })
498            },
499            dt => polars_bail!(ComputeError: "cannot create series from {:?}", dt),
500        }
501    }
502}
503
504fn convert<F: Fn(&dyn Array) -> ArrayRef>(arr: &[ArrayRef], f: F) -> Vec<ArrayRef> {
505    arr.iter().map(|arr| f(&**arr)).collect()
506}
507
508/// Converts to physical types and bubbles up the correct [`DataType`].
509#[allow(clippy::only_used_in_recursion)]
510unsafe fn to_physical_and_dtype(
511    arrays: Vec<ArrayRef>,
512    md: Option<&Metadata>,
513) -> (Vec<ArrayRef>, DataType) {
514    match arrays[0].dtype() {
515        ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
516            let chunks = cast_chunks(&arrays, &DataType::String, CastOptions::NonStrict).unwrap();
517            (chunks, DataType::String)
518        },
519        ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::FixedSizeBinary(_) => {
520            let chunks = cast_chunks(&arrays, &DataType::Binary, CastOptions::NonStrict).unwrap();
521            (chunks, DataType::Binary)
522        },
523        #[allow(unused_variables)]
524        dt @ ArrowDataType::Dictionary(_, _, _) => {
525            feature_gated!("dtype-categorical", {
526                let s = unsafe {
527                    let dt = dt.clone();
528                    Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
529                }
530                .unwrap();
531                (s.chunks().clone(), s.dtype().clone())
532            })
533        },
534        ArrowDataType::List(field) => {
535            let out = convert(&arrays, |arr| {
536                cast(arr, &ArrowDataType::LargeList(field.clone())).unwrap()
537            });
538            to_physical_and_dtype(out, md)
539        },
540        #[cfg(feature = "dtype-array")]
541        ArrowDataType::FixedSizeList(field, size) => {
542            let values = arrays
543                .iter()
544                .map(|arr| {
545                    let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
546                    arr.values().clone()
547                })
548                .collect::<Vec<_>>();
549
550            let (converted_values, dtype) =
551                to_physical_and_dtype(values, field.metadata.as_deref());
552
553            let arrays = arrays
554                .iter()
555                .zip(converted_values)
556                .map(|(arr, values)| {
557                    let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
558
559                    let dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), *size);
560                    Box::from(FixedSizeListArray::new(
561                        dtype,
562                        arr.len(),
563                        values,
564                        arr.validity().cloned(),
565                    )) as ArrayRef
566                })
567                .collect();
568            (arrays, DataType::Array(Box::new(dtype), *size))
569        },
570        ArrowDataType::LargeList(field) => {
571            let values = arrays
572                .iter()
573                .map(|arr| {
574                    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
575                    arr.values().clone()
576                })
577                .collect::<Vec<_>>();
578
579            let (converted_values, dtype) =
580                to_physical_and_dtype(values, field.metadata.as_deref());
581
582            let arrays = arrays
583                .iter()
584                .zip(converted_values)
585                .map(|(arr, values)| {
586                    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
587
588                    let dtype = ListArray::<i64>::default_datatype(values.dtype().clone());
589                    Box::from(ListArray::<i64>::new(
590                        dtype,
591                        arr.offsets().clone(),
592                        values,
593                        arr.validity().cloned(),
594                    )) as ArrayRef
595                })
596                .collect();
597            (arrays, DataType::List(Box::new(dtype)))
598        },
599        ArrowDataType::Struct(_fields) => {
600            feature_gated!("dtype-struct", {
601                let mut pl_fields = None;
602                let arrays = arrays
603                    .iter()
604                    .map(|arr| {
605                        let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
606                        let (values, dtypes): (Vec<_>, Vec<_>) = arr
607                            .values()
608                            .iter()
609                            .zip(_fields.iter())
610                            .map(|(value, field)| {
611                                let mut out = to_physical_and_dtype(
612                                    vec![value.clone()],
613                                    field.metadata.as_deref(),
614                                );
615                                (out.0.pop().unwrap(), out.1)
616                            })
617                            .unzip();
618
619                        let arrow_fields = values
620                            .iter()
621                            .zip(_fields.iter())
622                            .map(|(arr, field)| {
623                                ArrowField::new(field.name.clone(), arr.dtype().clone(), true)
624                            })
625                            .collect();
626                        let arrow_array = Box::new(StructArray::new(
627                            ArrowDataType::Struct(arrow_fields),
628                            arr.len(),
629                            values,
630                            arr.validity().cloned(),
631                        )) as ArrayRef;
632
633                        if pl_fields.is_none() {
634                            pl_fields = Some(
635                                _fields
636                                    .iter()
637                                    .zip(dtypes)
638                                    .map(|(field, dtype)| Field::new(field.name.clone(), dtype))
639                                    .collect_vec(),
640                            )
641                        }
642
643                        arrow_array
644                    })
645                    .collect_vec();
646
647                (arrays, DataType::Struct(pl_fields.unwrap()))
648            })
649        },
650        // Use Series architecture to convert nested logical types to physical.
651        dt @ (ArrowDataType::Duration(_)
652        | ArrowDataType::Time32(_)
653        | ArrowDataType::Time64(_)
654        | ArrowDataType::Timestamp(_, _)
655        | ArrowDataType::Date32
656        | ArrowDataType::Decimal(_, _)
657        | ArrowDataType::Date64
658        | ArrowDataType::Map(_, _)) => {
659            let dt = dt.clone();
660            let mut s = Series::_try_from_arrow_unchecked(PlSmallStr::EMPTY, arrays, &dt).unwrap();
661            let dtype = s.dtype().clone();
662            (std::mem::take(s.chunks_mut()), dtype)
663        },
664        dt => {
665            let dtype = DataType::from_arrow(dt, md);
666            (arrays, dtype)
667        },
668    }
669}
670
671#[cfg(feature = "dtype-categorical")]
672unsafe fn import_arrow_dictionary_array(
673    name: PlSmallStr,
674    arr: Box<dyn Array>,
675    key_type: &arrow::datatypes::IntegerType,
676    polars_dtype: &DataType,
677) -> PolarsResult<Series> {
678    use arrow::datatypes::IntegerType as I;
679
680    if matches!(
681        polars_dtype,
682        DataType::Categorical(_, _) | DataType::Enum(_, _)
683    ) {
684        macro_rules! unpack_categorical_chunked {
685            ($dt:ty) => {{
686                let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
687                let keys = arr.keys();
688                let values = arr.values();
689                let values = cast(&**values, &ArrowDataType::Utf8View)?;
690                let values = values.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
691                with_match_categorical_physical_type!(polars_dtype.cat_physical().unwrap(), |$C| {
692                    let ca = CategoricalChunked::<$C>::from_str_iter(
693                        name,
694                        polars_dtype.clone(),
695                        keys.iter().map(|k| {
696                            let k: usize = (*k?).try_into().ok()?;
697                            values.get(k)
698                        }),
699                    )?;
700                    Ok(ca.into_series())
701                })
702            }};
703        }
704
705        match key_type {
706            I::Int8 => unpack_categorical_chunked!(i8),
707            I::UInt8 => unpack_categorical_chunked!(u8),
708            I::Int16 => unpack_categorical_chunked!(i16),
709            I::UInt16 => unpack_categorical_chunked!(u16),
710            I::Int32 => unpack_categorical_chunked!(i32),
711            I::UInt32 => unpack_categorical_chunked!(u32),
712            I::Int64 => unpack_categorical_chunked!(i64),
713            I::UInt64 => unpack_categorical_chunked!(u64),
714            _ => polars_bail!(
715                ComputeError: "unsupported arrow key type: {key_type:?}"
716            ),
717        }
718    } else {
719        macro_rules! unpack_keys_values {
720            ($dt:ty) => {{
721                let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
722                let keys = arr.keys();
723                let keys = polars_compute::cast::primitive_to_primitive::<
724                    $dt,
725                    <IdxType as PolarsNumericType>::Native,
726                >(keys, &IDX_DTYPE.to_arrow(CompatLevel::newest()));
727                (keys, arr.values())
728            }};
729        }
730
731        let (keys, values) = match key_type {
732            I::Int8 => unpack_keys_values!(i8),
733            I::UInt8 => unpack_keys_values!(u8),
734            I::Int16 => unpack_keys_values!(i16),
735            I::UInt16 => unpack_keys_values!(u16),
736            I::Int32 => unpack_keys_values!(i32),
737            I::UInt32 => unpack_keys_values!(u32),
738            I::Int64 => unpack_keys_values!(i64),
739            I::UInt64 => unpack_keys_values!(u64),
740            _ => polars_bail!(
741                ComputeError: "unsupported arrow key type: {key_type:?}"
742            ),
743        };
744
745        let values = Series::_try_from_arrow_unchecked_with_md(
746            name,
747            vec![values.clone()],
748            values.dtype(),
749            None,
750        )?;
751
752        values.take(&IdxCa::from_chunks_and_dtype(
753            PlSmallStr::EMPTY,
754            vec![keys.to_boxed()],
755            IDX_DTYPE,
756        ))
757    }
758}
759
760#[cfg(feature = "dtype-struct")]
761fn convert_month_day_nano_to_struct(chunk: Box<dyn Array>) -> PolarsResult<Box<dyn Array>> {
762    let arr: &PrimitiveArray<months_days_ns> = chunk.as_any().downcast_ref().unwrap();
763
764    let values: &[months_days_ns] = arr.values();
765
766    let (months_out, days_out, nanoseconds_out): (Vec<i32>, Vec<i32>, Vec<i64>) = values
767        .iter()
768        .map(|x| (x.months(), x.days(), x.ns()))
769        .collect();
770
771    let out = StructArray::new(
772        DataType::_month_days_ns_struct_type()
773            .to_physical()
774            .to_arrow(CompatLevel::newest()),
775        arr.len(),
776        vec![
777            PrimitiveArray::<i32>::from_vec(months_out).boxed(),
778            PrimitiveArray::<i32>::from_vec(days_out).boxed(),
779            PrimitiveArray::<i64>::from_vec(nanoseconds_out).boxed(),
780        ],
781        arr.validity().cloned(),
782    );
783
784    Ok(out.boxed())
785}
786
787fn check_types(chunks: &[ArrayRef]) -> PolarsResult<ArrowDataType> {
788    let mut chunks_iter = chunks.iter();
789    let dtype: ArrowDataType = chunks_iter
790        .next()
791        .ok_or_else(|| polars_err!(NoData: "expected at least one array-ref"))?
792        .dtype()
793        .clone();
794
795    for chunk in chunks_iter {
796        if chunk.dtype() != &dtype {
797            polars_bail!(
798                ComputeError: "cannot create series from multiple arrays with different types"
799            );
800        }
801    }
802    Ok(dtype)
803}
804
805impl Series {
806    pub fn try_new<T>(
807        name: PlSmallStr,
808        data: T,
809    ) -> Result<Self, <(PlSmallStr, T) as TryInto<Self>>::Error>
810    where
811        (PlSmallStr, T): TryInto<Self>,
812    {
813        // # TODO
814        // * Remove the TryFrom<tuple> impls in favor of this
815        <(PlSmallStr, T) as TryInto<Self>>::try_into((name, data))
816    }
817}
818
819impl TryFrom<(PlSmallStr, Vec<ArrayRef>)> for Series {
820    type Error = PolarsError;
821
822    fn try_from(name_arr: (PlSmallStr, Vec<ArrayRef>)) -> PolarsResult<Self> {
823        let (name, chunks) = name_arr;
824
825        let dtype = check_types(&chunks)?;
826        // SAFETY:
827        // dtype is checked
828        unsafe { Series::_try_from_arrow_unchecked(name, chunks, &dtype) }
829    }
830}
831
832impl TryFrom<(PlSmallStr, ArrayRef)> for Series {
833    type Error = PolarsError;
834
835    fn try_from(name_arr: (PlSmallStr, ArrayRef)) -> PolarsResult<Self> {
836        let (name, arr) = name_arr;
837        Series::try_from((name, vec![arr]))
838    }
839}
840
841impl TryFrom<(&ArrowField, Vec<ArrayRef>)> for Series {
842    type Error = PolarsError;
843
844    fn try_from(field_arr: (&ArrowField, Vec<ArrayRef>)) -> PolarsResult<Self> {
845        let (field, chunks) = field_arr;
846
847        let dtype = check_types(&chunks)?;
848
849        // SAFETY:
850        // dtype is checked
851        unsafe {
852            Series::_try_from_arrow_unchecked_with_md(
853                field.name.clone(),
854                chunks,
855                &dtype,
856                field.metadata.as_deref(),
857            )
858        }
859    }
860}
861
862impl TryFrom<(&ArrowField, ArrayRef)> for Series {
863    type Error = PolarsError;
864
865    fn try_from(field_arr: (&ArrowField, ArrayRef)) -> PolarsResult<Self> {
866        let (field, arr) = field_arr;
867        Series::try_from((field, vec![arr]))
868    }
869}
870
871/// Used to convert a [`ChunkedArray`], `&dyn SeriesTrait` and [`Series`]
872/// into a [`Series`].
873/// # Safety
874///
875/// This trait is marked `unsafe` as the `is_series` return is used
876/// to transmute to `Series`. This must always return `false` except
877/// for `Series` structs.
878pub unsafe trait IntoSeries {
879    fn is_series() -> bool {
880        false
881    }
882
883    fn into_series(self) -> Series
884    where
885        Self: Sized;
886}
887
888impl<T> From<ChunkedArray<T>> for Series
889where
890    T: PolarsDataType,
891    ChunkedArray<T>: IntoSeries,
892{
893    fn from(ca: ChunkedArray<T>) -> Self {
894        ca.into_series()
895    }
896}
897
898#[cfg(feature = "dtype-date")]
899impl From<DateChunked> for Series {
900    fn from(a: DateChunked) -> Self {
901        a.into_series()
902    }
903}
904
905#[cfg(feature = "dtype-datetime")]
906impl From<DatetimeChunked> for Series {
907    fn from(a: DatetimeChunked) -> Self {
908        a.into_series()
909    }
910}
911
912#[cfg(feature = "dtype-duration")]
913impl From<DurationChunked> for Series {
914    fn from(a: DurationChunked) -> Self {
915        a.into_series()
916    }
917}
918
919#[cfg(feature = "dtype-time")]
920impl From<TimeChunked> for Series {
921    fn from(a: TimeChunked) -> Self {
922        a.into_series()
923    }
924}
925
926unsafe impl IntoSeries for Arc<dyn SeriesTrait> {
927    fn into_series(self) -> Series {
928        Series(self)
929    }
930}
931
932unsafe impl IntoSeries for Series {
933    fn is_series() -> bool {
934        true
935    }
936
937    fn into_series(self) -> Series {
938        self
939    }
940}
941
942fn new_null(name: PlSmallStr, chunks: &[ArrayRef]) -> Series {
943    let len = chunks.iter().map(|arr| arr.len()).sum();
944    Series::new_null(name, len)
945}