polars_core/series/
from.rs

1#[cfg(feature = "dtype-categorical")]
2use arrow::compute::concatenate::concatenate_unchecked;
3use arrow::datatypes::Metadata;
4use arrow::offset::OffsetsBuffer;
5#[cfg(any(
6    feature = "dtype-date",
7    feature = "dtype-datetime",
8    feature = "dtype-time",
9    feature = "dtype-duration"
10))]
11use arrow::temporal_conversions::*;
12use polars_compute::cast::cast_unchecked as cast;
13use polars_error::feature_gated;
14use polars_utils::itertools::Itertools;
15
16use crate::chunked_array::cast::{CastOptions, cast_chunks};
17#[cfg(feature = "object")]
18use crate::chunked_array::object::extension::polars_extension::PolarsExtension;
19#[cfg(feature = "object")]
20use crate::chunked_array::object::registry::get_object_builder;
21use crate::prelude::*;
22
23impl Series {
24    pub fn from_array<A: ParameterFreeDtypeStaticArray>(name: PlSmallStr, array: A) -> Self {
25        unsafe {
26            Self::from_chunks_and_dtype_unchecked(
27                name,
28                vec![Box::new(array)],
29                &DataType::from_arrow_dtype(&A::get_dtype()),
30            )
31        }
32    }
33
34    pub fn from_chunk_and_dtype(
35        name: PlSmallStr,
36        chunk: ArrayRef,
37        dtype: &DataType,
38    ) -> PolarsResult<Self> {
39        if &dtype.to_physical().to_arrow(CompatLevel::newest()) != chunk.dtype() {
40            polars_bail!(
41                InvalidOperation: "cannot create a series of type '{dtype}' of arrow chunk with type '{:?}'",
42                chunk.dtype()
43            );
44        }
45
46        // SAFETY: We check that the datatype matches.
47        let series = unsafe { Self::from_chunks_and_dtype_unchecked(name, vec![chunk], dtype) };
48        Ok(series)
49    }
50
51    /// Takes chunks and a polars datatype and constructs the Series
52    /// This is faster than creating from chunks and an arrow datatype because there is no
53    /// casting involved
54    ///
55    /// # Safety
56    ///
57    /// The caller must ensure that the given `dtype`'s physical type matches all the `ArrayRef` dtypes.
58    pub unsafe fn from_chunks_and_dtype_unchecked(
59        name: PlSmallStr,
60        chunks: Vec<ArrayRef>,
61        dtype: &DataType,
62    ) -> Self {
63        use DataType::*;
64        match dtype {
65            #[cfg(feature = "dtype-i8")]
66            Int8 => Int8Chunked::from_chunks(name, chunks).into_series(),
67            #[cfg(feature = "dtype-i16")]
68            Int16 => Int16Chunked::from_chunks(name, chunks).into_series(),
69            Int32 => Int32Chunked::from_chunks(name, chunks).into_series(),
70            Int64 => Int64Chunked::from_chunks(name, chunks).into_series(),
71            #[cfg(feature = "dtype-u8")]
72            UInt8 => UInt8Chunked::from_chunks(name, chunks).into_series(),
73            #[cfg(feature = "dtype-u16")]
74            UInt16 => UInt16Chunked::from_chunks(name, chunks).into_series(),
75            UInt32 => UInt32Chunked::from_chunks(name, chunks).into_series(),
76            UInt64 => UInt64Chunked::from_chunks(name, chunks).into_series(),
77            #[cfg(feature = "dtype-i128")]
78            Int128 => Int128Chunked::from_chunks(name, chunks).into_series(),
79            #[cfg(feature = "dtype-date")]
80            Date => Int32Chunked::from_chunks(name, chunks)
81                .into_date()
82                .into_series(),
83            #[cfg(feature = "dtype-time")]
84            Time => Int64Chunked::from_chunks(name, chunks)
85                .into_time()
86                .into_series(),
87            #[cfg(feature = "dtype-duration")]
88            Duration(tu) => Int64Chunked::from_chunks(name, chunks)
89                .into_duration(*tu)
90                .into_series(),
91            #[cfg(feature = "dtype-datetime")]
92            Datetime(tu, tz) => Int64Chunked::from_chunks(name, chunks)
93                .into_datetime(*tu, tz.clone())
94                .into_series(),
95            #[cfg(feature = "dtype-decimal")]
96            Decimal(precision, scale) => Int128Chunked::from_chunks(name, chunks)
97                .into_decimal_unchecked(
98                    *precision,
99                    scale.unwrap_or_else(|| unreachable!("scale should be set")),
100                )
101                .into_series(),
102            #[cfg(feature = "dtype-array")]
103            Array(_, _) => {
104                ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
105                    .into_series()
106            },
107            List(_) => ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
108                .into_series(),
109            String => StringChunked::from_chunks(name, chunks).into_series(),
110            Binary => BinaryChunked::from_chunks(name, chunks).into_series(),
111            #[cfg(feature = "dtype-categorical")]
112            dt @ (Categorical(_, _) | Enum(_, _)) => {
113                with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
114                    let phys = ChunkedArray::from_chunks(name, chunks);
115                    CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(phys, dt.clone()).into_series()
116                })
117            },
118            Boolean => BooleanChunked::from_chunks(name, chunks).into_series(),
119            Float32 => Float32Chunked::from_chunks(name, chunks).into_series(),
120            Float64 => Float64Chunked::from_chunks(name, chunks).into_series(),
121            BinaryOffset => BinaryOffsetChunked::from_chunks(name, chunks).into_series(),
122            #[cfg(feature = "dtype-struct")]
123            Struct(_) => {
124                let mut ca =
125                    StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone());
126                StructChunked::propagate_nulls_mut(&mut ca);
127                ca.into_series()
128            },
129            #[cfg(feature = "object")]
130            Object(_) => {
131                if let Some(arr) = chunks[0].as_any().downcast_ref::<FixedSizeBinaryArray>() {
132                    assert_eq!(chunks.len(), 1);
133                    // SAFETY:
134                    // this is highly unsafe. it will dereference a raw ptr on the heap
135                    // make sure the ptr is allocated and from this pid
136                    // (the pid is checked before dereference)
137                    {
138                        let pe = PolarsExtension::new(arr.clone());
139                        let s = pe.get_series(&name);
140                        pe.take_and_forget();
141                        s
142                    }
143                } else {
144                    unsafe { get_object_builder(name, 0).from_chunks(chunks) }
145                }
146            },
147            Null => new_null(name, &chunks),
148            Unknown(_) => {
149                panic!("dtype is unknown; consider supplying data-types for all operations")
150            },
151            #[allow(unreachable_patterns)]
152            _ => unreachable!(),
153        }
154    }
155
156    /// # Safety
157    /// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
158    pub unsafe fn _try_from_arrow_unchecked(
159        name: PlSmallStr,
160        chunks: Vec<ArrayRef>,
161        dtype: &ArrowDataType,
162    ) -> PolarsResult<Self> {
163        Self::_try_from_arrow_unchecked_with_md(name, chunks, dtype, None)
164    }
165
166    /// Create a new Series without checking if the inner dtype of the chunks is correct
167    ///
168    /// # Safety
169    /// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
170    pub unsafe fn _try_from_arrow_unchecked_with_md(
171        name: PlSmallStr,
172        chunks: Vec<ArrayRef>,
173        dtype: &ArrowDataType,
174        md: Option<&Metadata>,
175    ) -> PolarsResult<Self> {
176        match dtype {
177            ArrowDataType::Utf8View => Ok(StringChunked::from_chunks(name, chunks).into_series()),
178            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
179                let chunks =
180                    cast_chunks(&chunks, &DataType::String, CastOptions::NonStrict).unwrap();
181                Ok(StringChunked::from_chunks(name, chunks).into_series())
182            },
183            ArrowDataType::BinaryView => Ok(BinaryChunked::from_chunks(name, chunks).into_series()),
184            ArrowDataType::LargeBinary => {
185                if let Some(md) = md {
186                    if md.maintain_type() {
187                        return Ok(BinaryOffsetChunked::from_chunks(name, chunks).into_series());
188                    }
189                }
190                let chunks =
191                    cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
192                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
193            },
194            ArrowDataType::Binary => {
195                let chunks =
196                    cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
197                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
198            },
199            ArrowDataType::List(_) | ArrowDataType::LargeList(_) => {
200                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
201                unsafe {
202                    Ok(
203                        ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
204                            .into_series(),
205                    )
206                }
207            },
208            #[cfg(feature = "dtype-array")]
209            ArrowDataType::FixedSizeList(_, _) => {
210                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
211                unsafe {
212                    Ok(
213                        ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
214                            .into_series(),
215                    )
216                }
217            },
218            ArrowDataType::Boolean => Ok(BooleanChunked::from_chunks(name, chunks).into_series()),
219            #[cfg(feature = "dtype-u8")]
220            ArrowDataType::UInt8 => Ok(UInt8Chunked::from_chunks(name, chunks).into_series()),
221            #[cfg(feature = "dtype-u16")]
222            ArrowDataType::UInt16 => Ok(UInt16Chunked::from_chunks(name, chunks).into_series()),
223            ArrowDataType::UInt32 => Ok(UInt32Chunked::from_chunks(name, chunks).into_series()),
224            ArrowDataType::UInt64 => Ok(UInt64Chunked::from_chunks(name, chunks).into_series()),
225            #[cfg(feature = "dtype-i8")]
226            ArrowDataType::Int8 => Ok(Int8Chunked::from_chunks(name, chunks).into_series()),
227            #[cfg(feature = "dtype-i16")]
228            ArrowDataType::Int16 => Ok(Int16Chunked::from_chunks(name, chunks).into_series()),
229            ArrowDataType::Int32 => Ok(Int32Chunked::from_chunks(name, chunks).into_series()),
230            ArrowDataType::Int64 => Ok(Int64Chunked::from_chunks(name, chunks).into_series()),
231            ArrowDataType::Int128 => feature_gated!(
232                "dtype-i128",
233                Ok(Int128Chunked::from_chunks(name, chunks).into_series())
234            ),
235            ArrowDataType::Float16 => {
236                let chunks =
237                    cast_chunks(&chunks, &DataType::Float32, CastOptions::NonStrict).unwrap();
238                Ok(Float32Chunked::from_chunks(name, chunks).into_series())
239            },
240            ArrowDataType::Float32 => Ok(Float32Chunked::from_chunks(name, chunks).into_series()),
241            ArrowDataType::Float64 => Ok(Float64Chunked::from_chunks(name, chunks).into_series()),
242            #[cfg(feature = "dtype-date")]
243            ArrowDataType::Date32 => {
244                let chunks =
245                    cast_chunks(&chunks, &DataType::Int32, CastOptions::Overflowing).unwrap();
246                Ok(Int32Chunked::from_chunks(name, chunks)
247                    .into_date()
248                    .into_series())
249            },
250            #[cfg(feature = "dtype-datetime")]
251            ArrowDataType::Date64 => {
252                let chunks =
253                    cast_chunks(&chunks, &DataType::Int64, CastOptions::Overflowing).unwrap();
254                let ca = Int64Chunked::from_chunks(name, chunks);
255                Ok(ca.into_datetime(TimeUnit::Milliseconds, None).into_series())
256            },
257            #[cfg(feature = "dtype-datetime")]
258            ArrowDataType::Timestamp(tu, tz) => {
259                let tz = TimeZone::opt_try_new(tz.clone())?;
260                let chunks =
261                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
262                let s = Int64Chunked::from_chunks(name, chunks)
263                    .into_datetime(tu.into(), tz)
264                    .into_series();
265                Ok(match tu {
266                    ArrowTimeUnit::Second => &s * MILLISECONDS,
267                    ArrowTimeUnit::Millisecond => s,
268                    ArrowTimeUnit::Microsecond => s,
269                    ArrowTimeUnit::Nanosecond => s,
270                })
271            },
272            #[cfg(feature = "dtype-duration")]
273            ArrowDataType::Duration(tu) => {
274                let chunks =
275                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
276                let s = Int64Chunked::from_chunks(name, chunks)
277                    .into_duration(tu.into())
278                    .into_series();
279                Ok(match tu {
280                    ArrowTimeUnit::Second => &s * MILLISECONDS,
281                    ArrowTimeUnit::Millisecond => s,
282                    ArrowTimeUnit::Microsecond => s,
283                    ArrowTimeUnit::Nanosecond => s,
284                })
285            },
286            #[cfg(feature = "dtype-time")]
287            ArrowDataType::Time64(tu) | ArrowDataType::Time32(tu) => {
288                let mut chunks = chunks;
289                if matches!(dtype, ArrowDataType::Time32(_)) {
290                    chunks =
291                        cast_chunks(&chunks, &DataType::Int32, CastOptions::NonStrict).unwrap();
292                }
293                let chunks =
294                    cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
295                let s = Int64Chunked::from_chunks(name, chunks)
296                    .into_time()
297                    .into_series();
298                Ok(match tu {
299                    ArrowTimeUnit::Second => &s * NANOSECONDS,
300                    ArrowTimeUnit::Millisecond => &s * 1_000_000,
301                    ArrowTimeUnit::Microsecond => &s * 1_000,
302                    ArrowTimeUnit::Nanosecond => s,
303                })
304            },
305            ArrowDataType::Decimal32(precision, scale) => {
306                feature_gated!("dtype-decimal", {
307                    polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})");
308                    polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision");
309
310                    let mut chunks = chunks;
311                    for chunk in chunks.iter_mut() {
312                        let old_chunk = chunk
313                            .as_any_mut()
314                            .downcast_mut::<PrimitiveArray<i32>>()
315                            .unwrap();
316
317                        // For now, we just cast the whole data to i128.
318                        let (_, values, validity) = std::mem::take(old_chunk).into_inner();
319                        *chunk = PrimitiveArray::new(
320                            ArrowDataType::Int128,
321                            values.iter().map(|&v| v as i128).collect(),
322                            validity,
323                        )
324                        .to_boxed();
325                    }
326
327                    // @NOTE: We cannot cast here as that will lower the scale.
328                    let s = Int128Chunked::from_chunks(name, chunks)
329                        .into_decimal_unchecked(Some(*precision), *scale)
330                        .into_series();
331                    Ok(s)
332                })
333            },
334            ArrowDataType::Decimal64(precision, scale) => {
335                feature_gated!("dtype-decimal", {
336                    polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})");
337                    polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision");
338
339                    let mut chunks = chunks;
340                    for chunk in chunks.iter_mut() {
341                        let old_chunk = chunk
342                            .as_any_mut()
343                            .downcast_mut::<PrimitiveArray<i64>>()
344                            .unwrap();
345
346                        // For now, we just cast the whole data to i128.
347                        let (_, values, validity) = std::mem::take(old_chunk).into_inner();
348                        *chunk = PrimitiveArray::new(
349                            ArrowDataType::Int128,
350                            values.iter().map(|&v| v as i128).collect(),
351                            validity,
352                        )
353                        .to_boxed();
354                    }
355
356                    // @NOTE: We cannot cast here as that will lower the scale.
357                    let s = Int128Chunked::from_chunks(name, chunks)
358                        .into_decimal_unchecked(Some(*precision), *scale)
359                        .into_series();
360                    Ok(s)
361                })
362            },
363            ArrowDataType::Decimal(precision, scale)
364            | ArrowDataType::Decimal256(precision, scale) => {
365                feature_gated!("dtype-decimal", {
366                    polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})");
367                    polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision");
368
369                    // Q? I don't think this is correct for Decimal256?
370                    let mut chunks = chunks;
371                    for chunk in chunks.iter_mut() {
372                        *chunk = std::mem::take(
373                            chunk
374                                .as_any_mut()
375                                .downcast_mut::<PrimitiveArray<i128>>()
376                                .unwrap(),
377                        )
378                        .to(ArrowDataType::Int128)
379                        .to_boxed();
380                    }
381
382                    // @NOTE: We cannot cast here as that will lower the scale.
383                    let s = Int128Chunked::from_chunks(name, chunks)
384                        .into_decimal_unchecked(Some(*precision), *scale)
385                        .into_series();
386                    Ok(s)
387                })
388            },
389            ArrowDataType::Null => Ok(new_null(name, &chunks)),
390            #[cfg(not(feature = "dtype-categorical"))]
391            ArrowDataType::Dictionary(_, _, _) => {
392                panic!("activate dtype-categorical to convert dictionary arrays")
393            },
394            #[cfg(feature = "dtype-categorical")]
395            ArrowDataType::Dictionary(key_type, _, _) => {
396                use arrow::datatypes::IntegerType as I;
397
398                // Don't spuriously call this; triggers a read on mmapped data.
399                let arr = if chunks.len() > 1 {
400                    concatenate_unchecked(&chunks)?
401                } else {
402                    chunks[0].clone()
403                };
404
405                let polars_dtype = DataType::from_arrow(dtype, md);
406                if matches!(
407                    polars_dtype,
408                    DataType::Categorical(_, _) | DataType::Enum(_, _)
409                ) {
410                    macro_rules! unpack_categorical_chunked {
411                        ($dt:ty) => {{
412                            let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
413                            let keys = arr.keys();
414                            let values = arr.values();
415                            let values = cast(&**values, &ArrowDataType::Utf8View)?;
416                            let values = values.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
417                            with_match_categorical_physical_type!(
418                                polars_dtype.cat_physical().unwrap(),
419                                |$C| {
420                                    let ca = CategoricalChunked::<$C>::from_str_iter(
421                                        name,
422                                        polars_dtype,
423                                        keys.iter().map(|k| {
424                                            let k: usize = (*k?).try_into().ok()?;
425                                            values.get(k)
426                                        }),
427                                    )?;
428                                    Ok(ca.into_series())
429                                }
430                            )
431                        }};
432                    }
433
434                    match key_type {
435                        I::Int8 => unpack_categorical_chunked!(i8),
436                        I::UInt8 => unpack_categorical_chunked!(u8),
437                        I::Int16 => unpack_categorical_chunked!(i16),
438                        I::UInt16 => unpack_categorical_chunked!(u16),
439                        I::Int32 => unpack_categorical_chunked!(i32),
440                        I::UInt32 => unpack_categorical_chunked!(u32),
441                        I::Int64 => unpack_categorical_chunked!(i64),
442                        I::UInt64 => unpack_categorical_chunked!(u64),
443                        _ => polars_bail!(
444                            ComputeError: "unsupported arrow key type: {key_type:?}"
445                        ),
446                    }
447                } else {
448                    macro_rules! unpack_keys_values {
449                        ($dt:ty) => {{
450                            let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
451                            let keys = arr.keys();
452                            let keys = polars_compute::cast::primitive_to_primitive::<
453                                $dt,
454                                <IdxType as PolarsNumericType>::Native,
455                            >(keys, &IDX_DTYPE.to_arrow(CompatLevel::newest()));
456                            (keys, arr.values())
457                        }};
458                    }
459
460                    let (keys, values) = match key_type {
461                        I::Int8 => unpack_keys_values!(i8),
462                        I::UInt8 => unpack_keys_values!(u8),
463                        I::Int16 => unpack_keys_values!(i16),
464                        I::UInt16 => unpack_keys_values!(u16),
465                        I::Int32 => unpack_keys_values!(i32),
466                        I::UInt32 => unpack_keys_values!(u32),
467                        I::Int64 => unpack_keys_values!(i64),
468                        I::UInt64 => unpack_keys_values!(u64),
469                        _ => polars_bail!(
470                            ComputeError: "unsupported arrow key type: {key_type:?}"
471                        ),
472                    };
473
474                    let values = Series::_try_from_arrow_unchecked_with_md(
475                        name,
476                        vec![values.clone()],
477                        values.dtype(),
478                        None,
479                    )?;
480
481                    values.take(&IdxCa::from_chunks_and_dtype(
482                        PlSmallStr::EMPTY,
483                        vec![keys.to_boxed()],
484                        IDX_DTYPE,
485                    ))
486                }
487            },
488            #[cfg(feature = "object")]
489            ArrowDataType::Extension(ext)
490                if ext.name == EXTENSION_NAME && ext.metadata.is_some() =>
491            {
492                assert_eq!(chunks.len(), 1);
493                let arr = chunks[0]
494                    .as_any()
495                    .downcast_ref::<FixedSizeBinaryArray>()
496                    .unwrap();
497                // SAFETY:
498                // this is highly unsafe. it will dereference a raw ptr on the heap
499                // make sure the ptr is allocated and from this pid
500                // (the pid is checked before dereference)
501                let s = {
502                    let pe = PolarsExtension::new(arr.clone());
503                    let s = pe.get_series(&name);
504                    pe.take_and_forget();
505                    s
506                };
507                Ok(s)
508            },
509            #[cfg(feature = "dtype-struct")]
510            ArrowDataType::Struct(_) => {
511                let (chunks, dtype) = to_physical_and_dtype(chunks, md);
512
513                unsafe {
514                    let mut ca =
515                        StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype);
516                    StructChunked::propagate_nulls_mut(&mut ca);
517                    Ok(ca.into_series())
518                }
519            },
520            ArrowDataType::FixedSizeBinary(_) => {
521                let chunks = cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict)?;
522                Ok(BinaryChunked::from_chunks(name, chunks).into_series())
523            },
524            ArrowDataType::Map(field, _is_ordered) => {
525                let struct_arrays = chunks
526                    .iter()
527                    .map(|arr| {
528                        let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
529                        arr.field().clone()
530                    })
531                    .collect::<Vec<_>>();
532
533                let (phys_struct_arrays, dtype) =
534                    to_physical_and_dtype(struct_arrays, field.metadata.as_deref());
535
536                let chunks = chunks
537                    .iter()
538                    .zip(phys_struct_arrays)
539                    .map(|(arr, values)| {
540                        let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
541                        let offsets: &OffsetsBuffer<i32> = arr.offsets();
542
543                        let validity = values.validity().cloned();
544
545                        Box::from(ListArray::<i64>::new(
546                            ListArray::<i64>::default_datatype(values.dtype().clone()),
547                            OffsetsBuffer::<i64>::from(offsets),
548                            values,
549                            validity,
550                        )) as ArrayRef
551                    })
552                    .collect();
553
554                unsafe {
555                    let out = ListChunked::from_chunks_and_dtype_unchecked(
556                        name,
557                        chunks,
558                        DataType::List(Box::new(dtype)),
559                    );
560
561                    Ok(out.into_series())
562                }
563            },
564            dt => polars_bail!(ComputeError: "cannot create series from {:?}", dt),
565        }
566    }
567}
568
569fn convert<F: Fn(&dyn Array) -> ArrayRef>(arr: &[ArrayRef], f: F) -> Vec<ArrayRef> {
570    arr.iter().map(|arr| f(&**arr)).collect()
571}
572
573/// Converts to physical types and bubbles up the correct [`DataType`].
574#[allow(clippy::only_used_in_recursion)]
575unsafe fn to_physical_and_dtype(
576    arrays: Vec<ArrayRef>,
577    md: Option<&Metadata>,
578) -> (Vec<ArrayRef>, DataType) {
579    match arrays[0].dtype() {
580        ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
581            let chunks = cast_chunks(&arrays, &DataType::String, CastOptions::NonStrict).unwrap();
582            (chunks, DataType::String)
583        },
584        ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::FixedSizeBinary(_) => {
585            let chunks = cast_chunks(&arrays, &DataType::Binary, CastOptions::NonStrict).unwrap();
586            (chunks, DataType::Binary)
587        },
588        #[allow(unused_variables)]
589        dt @ ArrowDataType::Dictionary(_, _, _) => {
590            feature_gated!("dtype-categorical", {
591                let s = unsafe {
592                    let dt = dt.clone();
593                    Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
594                }
595                .unwrap();
596                (s.chunks().clone(), s.dtype().clone())
597            })
598        },
599        ArrowDataType::List(field) => {
600            let out = convert(&arrays, |arr| {
601                cast(arr, &ArrowDataType::LargeList(field.clone())).unwrap()
602            });
603            to_physical_and_dtype(out, md)
604        },
605        #[cfg(feature = "dtype-array")]
606        ArrowDataType::FixedSizeList(field, size) => {
607            let values = arrays
608                .iter()
609                .map(|arr| {
610                    let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
611                    arr.values().clone()
612                })
613                .collect::<Vec<_>>();
614
615            let (converted_values, dtype) =
616                to_physical_and_dtype(values, field.metadata.as_deref());
617
618            let arrays = arrays
619                .iter()
620                .zip(converted_values)
621                .map(|(arr, values)| {
622                    let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
623
624                    let dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), *size);
625                    Box::from(FixedSizeListArray::new(
626                        dtype,
627                        arr.len(),
628                        values,
629                        arr.validity().cloned(),
630                    )) as ArrayRef
631                })
632                .collect();
633            (arrays, DataType::Array(Box::new(dtype), *size))
634        },
635        ArrowDataType::LargeList(field) => {
636            let values = arrays
637                .iter()
638                .map(|arr| {
639                    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
640                    arr.values().clone()
641                })
642                .collect::<Vec<_>>();
643
644            let (converted_values, dtype) =
645                to_physical_and_dtype(values, field.metadata.as_deref());
646
647            let arrays = arrays
648                .iter()
649                .zip(converted_values)
650                .map(|(arr, values)| {
651                    let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
652
653                    let dtype = ListArray::<i64>::default_datatype(values.dtype().clone());
654                    Box::from(ListArray::<i64>::new(
655                        dtype,
656                        arr.offsets().clone(),
657                        values,
658                        arr.validity().cloned(),
659                    )) as ArrayRef
660                })
661                .collect();
662            (arrays, DataType::List(Box::new(dtype)))
663        },
664        ArrowDataType::Struct(_fields) => {
665            feature_gated!("dtype-struct", {
666                let mut pl_fields = None;
667                let arrays = arrays
668                    .iter()
669                    .map(|arr| {
670                        let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
671                        let (values, dtypes): (Vec<_>, Vec<_>) = arr
672                            .values()
673                            .iter()
674                            .zip(_fields.iter())
675                            .map(|(value, field)| {
676                                let mut out = to_physical_and_dtype(
677                                    vec![value.clone()],
678                                    field.metadata.as_deref(),
679                                );
680                                (out.0.pop().unwrap(), out.1)
681                            })
682                            .unzip();
683
684                        let arrow_fields = values
685                            .iter()
686                            .zip(_fields.iter())
687                            .map(|(arr, field)| {
688                                ArrowField::new(field.name.clone(), arr.dtype().clone(), true)
689                            })
690                            .collect();
691                        let arrow_array = Box::new(StructArray::new(
692                            ArrowDataType::Struct(arrow_fields),
693                            arr.len(),
694                            values,
695                            arr.validity().cloned(),
696                        )) as ArrayRef;
697
698                        if pl_fields.is_none() {
699                            pl_fields = Some(
700                                _fields
701                                    .iter()
702                                    .zip(dtypes)
703                                    .map(|(field, dtype)| Field::new(field.name.clone(), dtype))
704                                    .collect_vec(),
705                            )
706                        }
707
708                        arrow_array
709                    })
710                    .collect_vec();
711
712                (arrays, DataType::Struct(pl_fields.unwrap()))
713            })
714        },
715        // Use Series architecture to convert nested logical types to physical.
716        dt @ (ArrowDataType::Duration(_)
717        | ArrowDataType::Time32(_)
718        | ArrowDataType::Time64(_)
719        | ArrowDataType::Timestamp(_, _)
720        | ArrowDataType::Date32
721        | ArrowDataType::Decimal(_, _)
722        | ArrowDataType::Date64
723        | ArrowDataType::Map(_, _)) => {
724            let dt = dt.clone();
725            let mut s = Series::_try_from_arrow_unchecked(PlSmallStr::EMPTY, arrays, &dt).unwrap();
726            let dtype = s.dtype().clone();
727            (std::mem::take(s.chunks_mut()), dtype)
728        },
729        dt => {
730            let dtype = DataType::from_arrow(dt, md);
731            (arrays, dtype)
732        },
733    }
734}
735
736fn check_types(chunks: &[ArrayRef]) -> PolarsResult<ArrowDataType> {
737    let mut chunks_iter = chunks.iter();
738    let dtype: ArrowDataType = chunks_iter
739        .next()
740        .ok_or_else(|| polars_err!(NoData: "expected at least one array-ref"))?
741        .dtype()
742        .clone();
743
744    for chunk in chunks_iter {
745        if chunk.dtype() != &dtype {
746            polars_bail!(
747                ComputeError: "cannot create series from multiple arrays with different types"
748            );
749        }
750    }
751    Ok(dtype)
752}
753
754impl Series {
755    pub fn try_new<T>(
756        name: PlSmallStr,
757        data: T,
758    ) -> Result<Self, <(PlSmallStr, T) as TryInto<Self>>::Error>
759    where
760        (PlSmallStr, T): TryInto<Self>,
761    {
762        // # TODO
763        // * Remove the TryFrom<tuple> impls in favor of this
764        <(PlSmallStr, T) as TryInto<Self>>::try_into((name, data))
765    }
766}
767
768impl TryFrom<(PlSmallStr, Vec<ArrayRef>)> for Series {
769    type Error = PolarsError;
770
771    fn try_from(name_arr: (PlSmallStr, Vec<ArrayRef>)) -> PolarsResult<Self> {
772        let (name, chunks) = name_arr;
773
774        let dtype = check_types(&chunks)?;
775        // SAFETY:
776        // dtype is checked
777        unsafe { Series::_try_from_arrow_unchecked(name, chunks, &dtype) }
778    }
779}
780
781impl TryFrom<(PlSmallStr, ArrayRef)> for Series {
782    type Error = PolarsError;
783
784    fn try_from(name_arr: (PlSmallStr, ArrayRef)) -> PolarsResult<Self> {
785        let (name, arr) = name_arr;
786        Series::try_from((name, vec![arr]))
787    }
788}
789
790impl TryFrom<(&ArrowField, Vec<ArrayRef>)> for Series {
791    type Error = PolarsError;
792
793    fn try_from(field_arr: (&ArrowField, Vec<ArrayRef>)) -> PolarsResult<Self> {
794        let (field, chunks) = field_arr;
795
796        let dtype = check_types(&chunks)?;
797
798        // SAFETY:
799        // dtype is checked
800        unsafe {
801            Series::_try_from_arrow_unchecked_with_md(
802                field.name.clone(),
803                chunks,
804                &dtype,
805                field.metadata.as_deref(),
806            )
807        }
808    }
809}
810
811impl TryFrom<(&ArrowField, ArrayRef)> for Series {
812    type Error = PolarsError;
813
814    fn try_from(field_arr: (&ArrowField, ArrayRef)) -> PolarsResult<Self> {
815        let (field, arr) = field_arr;
816        Series::try_from((field, vec![arr]))
817    }
818}
819
820/// Used to convert a [`ChunkedArray`], `&dyn SeriesTrait` and [`Series`]
821/// into a [`Series`].
822/// # Safety
823///
824/// This trait is marked `unsafe` as the `is_series` return is used
825/// to transmute to `Series`. This must always return `false` except
826/// for `Series` structs.
827pub unsafe trait IntoSeries {
828    fn is_series() -> bool {
829        false
830    }
831
832    fn into_series(self) -> Series
833    where
834        Self: Sized;
835}
836
837impl<T> From<ChunkedArray<T>> for Series
838where
839    T: PolarsDataType,
840    ChunkedArray<T>: IntoSeries,
841{
842    fn from(ca: ChunkedArray<T>) -> Self {
843        ca.into_series()
844    }
845}
846
847#[cfg(feature = "dtype-date")]
848impl From<DateChunked> for Series {
849    fn from(a: DateChunked) -> Self {
850        a.into_series()
851    }
852}
853
854#[cfg(feature = "dtype-datetime")]
855impl From<DatetimeChunked> for Series {
856    fn from(a: DatetimeChunked) -> Self {
857        a.into_series()
858    }
859}
860
861#[cfg(feature = "dtype-duration")]
862impl From<DurationChunked> for Series {
863    fn from(a: DurationChunked) -> Self {
864        a.into_series()
865    }
866}
867
868#[cfg(feature = "dtype-time")]
869impl From<TimeChunked> for Series {
870    fn from(a: TimeChunked) -> Self {
871        a.into_series()
872    }
873}
874
875unsafe impl IntoSeries for Arc<dyn SeriesTrait> {
876    fn into_series(self) -> Series {
877        Series(self)
878    }
879}
880
881unsafe impl IntoSeries for Series {
882    fn is_series() -> bool {
883        true
884    }
885
886    fn into_series(self) -> Series {
887        self
888    }
889}
890
891fn new_null(name: PlSmallStr, chunks: &[ArrayRef]) -> Series {
892    let len = chunks.iter().map(|arr| arr.len()).sum();
893    Series::new_null(name, len)
894}