polars_core/series/
any_value.rs

1use std::borrow::Cow;
2use std::fmt::Write;
3
4use arrow::bitmap::MutableBitmap;
5
6use crate::chunked_array::builder::{AnonymousOwnedListBuilder, get_list_builder};
7use crate::prelude::*;
8use crate::utils::any_values_to_supertype;
9
10impl<'a, T: AsRef<[AnyValue<'a>]>> NamedFrom<T, [AnyValue<'a>]> for Series {
11    /// Construct a new [`Series`] from a collection of [`AnyValue`].
12    ///
13    /// # Panics
14    ///
15    /// Panics if the values do not all share the same data type (with the exception
16    /// of [`DataType::Null`], which is always allowed).
17    ///
18    /// [`AnyValue`]: crate::datatypes::AnyValue
19    fn new(name: PlSmallStr, values: T) -> Self {
20        let values = values.as_ref();
21        Series::from_any_values(name, values, true).expect("data types of values should match")
22    }
23}
24
25fn initialize_empty_categorical_revmap_rec(dtype: &DataType) -> Cow<DataType> {
26    use DataType as T;
27    match dtype {
28        #[cfg(feature = "dtype-categorical")]
29        T::Categorical(None, o) => {
30            Cow::Owned(T::Categorical(Some(Arc::new(RevMapping::default())), *o))
31        },
32        T::List(inner_dtype) => match initialize_empty_categorical_revmap_rec(inner_dtype) {
33            Cow::Owned(inner_dtype) => Cow::Owned(T::List(Box::new(inner_dtype))),
34            _ => Cow::Borrowed(dtype),
35        },
36        #[cfg(feature = "dtype-array")]
37        T::Array(inner_dtype, width) => {
38            match initialize_empty_categorical_revmap_rec(inner_dtype) {
39                Cow::Owned(inner_dtype) => Cow::Owned(T::Array(Box::new(inner_dtype), *width)),
40                _ => Cow::Borrowed(dtype),
41            }
42        },
43        #[cfg(feature = "dtype-struct")]
44        T::Struct(fields) => {
45            for (i, field) in fields.iter().enumerate() {
46                if let Cow::Owned(field_dtype) =
47                    initialize_empty_categorical_revmap_rec(field.dtype())
48                {
49                    let mut new_fields = Vec::with_capacity(fields.len());
50                    new_fields.extend(fields[..i].iter().cloned());
51                    new_fields.push(Field::new(field.name().clone(), field_dtype));
52                    new_fields.extend(fields[i + 1..].iter().map(|field| {
53                        let field_dtype =
54                            initialize_empty_categorical_revmap_rec(field.dtype()).into_owned();
55                        Field::new(field.name().clone(), field_dtype)
56                    }));
57                    return Cow::Owned(T::Struct(new_fields));
58                }
59            }
60
61            Cow::Borrowed(dtype)
62        },
63        _ => Cow::Borrowed(dtype),
64    }
65}
66
67impl Series {
68    /// Construct a new [`Series`] from a slice of AnyValues.
69    ///
70    /// The data type of the resulting Series is determined by the `values`
71    /// and the `strict` parameter:
72    /// - If `strict` is `true`, the data type is equal to the data type of the
73    ///   first non-null value. If any other non-null values do not match this
74    ///   data type, an error is raised.
75    /// - If `strict` is `false`, the data type is the supertype of the `values`.
76    ///   An error is returned if no supertype can be determined.
77    ///   **WARNING**: A full pass over the values is required to determine the supertype.
78    /// - If no values were passed, the resulting data type is `Null`.
79    pub fn from_any_values(
80        name: PlSmallStr,
81        values: &[AnyValue],
82        strict: bool,
83    ) -> PolarsResult<Self> {
84        fn get_first_non_null_dtype(values: &[AnyValue]) -> DataType {
85            let mut all_flat_null = true;
86            let first_non_null = values.iter().find(|av| {
87                if !av.is_null() {
88                    all_flat_null = false
89                };
90                !av.is_nested_null()
91            });
92            match first_non_null {
93                Some(av) => av.dtype(),
94                None => {
95                    if all_flat_null {
96                        DataType::Null
97                    } else {
98                        // Second pass to check for the nested null value that
99                        // toggled `all_flat_null` to false, e.g. a List(Null).
100                        let first_nested_null = values.iter().find(|av| !av.is_null()).unwrap();
101                        first_nested_null.dtype()
102                    }
103                },
104            }
105        }
106        let dtype = if strict {
107            get_first_non_null_dtype(values)
108        } else {
109            // Currently does not work correctly for Decimal because equality is not implemented.
110            any_values_to_supertype(values)?
111        };
112
113        // TODO: Remove this when Decimal data type equality is implemented.
114        #[cfg(feature = "dtype-decimal")]
115        if dtype.is_decimal() {
116            let dtype = DataType::Decimal(None, None);
117            return Self::from_any_values_and_dtype(name, values, &dtype, strict);
118        }
119
120        Self::from_any_values_and_dtype(name, values, &dtype, strict)
121    }
122
123    /// Construct a new [`Series`] with the given `dtype` from a slice of AnyValues.
124    ///
125    /// If `strict` is `true`, an error is returned if the values do not match the given
126    /// data type. If `strict` is `false`, values that do not match the given data type
127    /// are cast. If casting is not possible, the values are set to null instead.
128    pub fn from_any_values_and_dtype(
129        name: PlSmallStr,
130        values: &[AnyValue],
131        dtype: &DataType,
132        strict: bool,
133    ) -> PolarsResult<Self> {
134        if values.is_empty() {
135            return Ok(Self::new_empty(
136                name,
137                // This is given categoricals with empty revmaps, but we need to always return
138                // categoricals with non-empty revmaps.
139                initialize_empty_categorical_revmap_rec(dtype).as_ref(),
140            ));
141        }
142
143        let mut s = match dtype {
144            #[cfg(feature = "dtype-i8")]
145            DataType::Int8 => any_values_to_integer::<Int8Type>(values, strict)?.into_series(),
146            #[cfg(feature = "dtype-i16")]
147            DataType::Int16 => any_values_to_integer::<Int16Type>(values, strict)?.into_series(),
148            DataType::Int32 => any_values_to_integer::<Int32Type>(values, strict)?.into_series(),
149            DataType::Int64 => any_values_to_integer::<Int64Type>(values, strict)?.into_series(),
150            #[cfg(feature = "dtype-i128")]
151            DataType::Int128 => any_values_to_integer::<Int128Type>(values, strict)?.into_series(),
152            #[cfg(feature = "dtype-u8")]
153            DataType::UInt8 => any_values_to_integer::<UInt8Type>(values, strict)?.into_series(),
154            #[cfg(feature = "dtype-u16")]
155            DataType::UInt16 => any_values_to_integer::<UInt16Type>(values, strict)?.into_series(),
156            DataType::UInt32 => any_values_to_integer::<UInt32Type>(values, strict)?.into_series(),
157            DataType::UInt64 => any_values_to_integer::<UInt64Type>(values, strict)?.into_series(),
158            DataType::Float32 => any_values_to_f32(values, strict)?.into_series(),
159            DataType::Float64 => any_values_to_f64(values, strict)?.into_series(),
160            DataType::Boolean => any_values_to_bool(values, strict)?.into_series(),
161            DataType::String => any_values_to_string(values, strict)?.into_series(),
162            DataType::Binary => any_values_to_binary(values, strict)?.into_series(),
163            #[cfg(feature = "dtype-date")]
164            DataType::Date => any_values_to_date(values, strict)?.into_series(),
165            #[cfg(feature = "dtype-time")]
166            DataType::Time => any_values_to_time(values, strict)?.into_series(),
167            #[cfg(feature = "dtype-datetime")]
168            DataType::Datetime(tu, tz) => {
169                any_values_to_datetime(values, *tu, (*tz).clone(), strict)?.into_series()
170            },
171            #[cfg(feature = "dtype-duration")]
172            DataType::Duration(tu) => any_values_to_duration(values, *tu, strict)?.into_series(),
173            #[cfg(feature = "dtype-categorical")]
174            dt @ DataType::Categorical(_, _) => any_values_to_categorical(values, dt, strict)?,
175            #[cfg(feature = "dtype-categorical")]
176            dt @ DataType::Enum(_, _) => any_values_to_enum(values, dt, strict)?,
177            #[cfg(feature = "dtype-decimal")]
178            DataType::Decimal(precision, scale) => {
179                any_values_to_decimal(values, *precision, *scale, strict)?.into_series()
180            },
181            DataType::List(inner) => any_values_to_list(values, inner, strict)?.into_series(),
182            #[cfg(feature = "dtype-array")]
183            DataType::Array(inner, size) => any_values_to_array(values, inner, strict, *size)?
184                .into_series()
185                .cast(&DataType::Array(inner.clone(), *size))?,
186            #[cfg(feature = "dtype-struct")]
187            DataType::Struct(fields) => any_values_to_struct(values, fields, strict)?,
188            #[cfg(feature = "object")]
189            DataType::Object(_) => any_values_to_object(values)?,
190            DataType::Null => Series::new_null(PlSmallStr::EMPTY, values.len()),
191            dt => {
192                polars_bail!(
193                    InvalidOperation:
194                    "constructing a Series with data type {dt:?} from AnyValues is not supported"
195                )
196            },
197        };
198        s.rename(name);
199        Ok(s)
200    }
201}
202
203fn any_values_to_primitive_nonstrict<T: PolarsNumericType>(values: &[AnyValue]) -> ChunkedArray<T> {
204    values
205        .iter()
206        .map(|av| av.extract::<T::Native>())
207        .collect_trusted()
208}
209
210fn any_values_to_integer<T: PolarsIntegerType>(
211    values: &[AnyValue],
212    strict: bool,
213) -> PolarsResult<ChunkedArray<T>> {
214    fn any_values_to_integer_strict<T: PolarsIntegerType>(
215        values: &[AnyValue],
216    ) -> PolarsResult<ChunkedArray<T>> {
217        let mut builder = PrimitiveChunkedBuilder::<T>::new(PlSmallStr::EMPTY, values.len());
218        for av in values {
219            match &av {
220                av if av.is_integer() => {
221                    let opt_val = av.extract::<T::Native>();
222                    let val = match opt_val {
223                        Some(v) => v,
224                        None => return Err(invalid_value_error(&T::get_dtype(), av)),
225                    };
226                    builder.append_value(val)
227                },
228                AnyValue::Null => builder.append_null(),
229                av => return Err(invalid_value_error(&T::get_dtype(), av)),
230            }
231        }
232        Ok(builder.finish())
233    }
234
235    if strict {
236        any_values_to_integer_strict::<T>(values)
237    } else {
238        Ok(any_values_to_primitive_nonstrict::<T>(values))
239    }
240}
241
242fn any_values_to_f32(values: &[AnyValue], strict: bool) -> PolarsResult<Float32Chunked> {
243    fn any_values_to_f32_strict(values: &[AnyValue]) -> PolarsResult<Float32Chunked> {
244        let mut builder =
245            PrimitiveChunkedBuilder::<Float32Type>::new(PlSmallStr::EMPTY, values.len());
246        for av in values {
247            match av {
248                AnyValue::Float32(i) => builder.append_value(*i),
249                AnyValue::Null => builder.append_null(),
250                av => return Err(invalid_value_error(&DataType::Float32, av)),
251            }
252        }
253        Ok(builder.finish())
254    }
255    if strict {
256        any_values_to_f32_strict(values)
257    } else {
258        Ok(any_values_to_primitive_nonstrict::<Float32Type>(values))
259    }
260}
261fn any_values_to_f64(values: &[AnyValue], strict: bool) -> PolarsResult<Float64Chunked> {
262    fn any_values_to_f64_strict(values: &[AnyValue]) -> PolarsResult<Float64Chunked> {
263        let mut builder =
264            PrimitiveChunkedBuilder::<Float64Type>::new(PlSmallStr::EMPTY, values.len());
265        for av in values {
266            match av {
267                AnyValue::Float64(i) => builder.append_value(*i),
268                AnyValue::Float32(i) => builder.append_value(*i as f64),
269                AnyValue::Null => builder.append_null(),
270                av => return Err(invalid_value_error(&DataType::Float64, av)),
271            }
272        }
273        Ok(builder.finish())
274    }
275    if strict {
276        any_values_to_f64_strict(values)
277    } else {
278        Ok(any_values_to_primitive_nonstrict::<Float64Type>(values))
279    }
280}
281
282fn any_values_to_bool(values: &[AnyValue], strict: bool) -> PolarsResult<BooleanChunked> {
283    let mut builder = BooleanChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
284    for av in values {
285        match av {
286            AnyValue::Boolean(b) => builder.append_value(*b),
287            AnyValue::Null => builder.append_null(),
288            av => {
289                if strict {
290                    return Err(invalid_value_error(&DataType::Boolean, av));
291                }
292                match av.cast(&DataType::Boolean) {
293                    AnyValue::Boolean(b) => builder.append_value(b),
294                    _ => builder.append_null(),
295                }
296            },
297        }
298    }
299    Ok(builder.finish())
300}
301
302fn any_values_to_string(values: &[AnyValue], strict: bool) -> PolarsResult<StringChunked> {
303    fn any_values_to_string_strict(values: &[AnyValue]) -> PolarsResult<StringChunked> {
304        let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
305        for av in values {
306            match av {
307                AnyValue::String(s) => builder.append_value(s),
308                AnyValue::StringOwned(s) => builder.append_value(s),
309                AnyValue::Null => builder.append_null(),
310                av => return Err(invalid_value_error(&DataType::String, av)),
311            }
312        }
313        Ok(builder.finish())
314    }
315    fn any_values_to_string_nonstrict(values: &[AnyValue]) -> StringChunked {
316        let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
317        let mut owned = String::new(); // Amortize allocations.
318        for av in values {
319            match av {
320                AnyValue::String(s) => builder.append_value(s),
321                AnyValue::StringOwned(s) => builder.append_value(s),
322                AnyValue::Null => builder.append_null(),
323                AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => builder.append_null(),
324                av => {
325                    owned.clear();
326                    write!(owned, "{av}").unwrap();
327                    builder.append_value(&owned);
328                },
329            }
330        }
331        builder.finish()
332    }
333    if strict {
334        any_values_to_string_strict(values)
335    } else {
336        Ok(any_values_to_string_nonstrict(values))
337    }
338}
339
340fn any_values_to_binary(values: &[AnyValue], strict: bool) -> PolarsResult<BinaryChunked> {
341    fn any_values_to_binary_strict(values: &[AnyValue]) -> PolarsResult<BinaryChunked> {
342        let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
343        for av in values {
344            match av {
345                AnyValue::Binary(s) => builder.append_value(*s),
346                AnyValue::BinaryOwned(s) => builder.append_value(&**s),
347                AnyValue::Null => builder.append_null(),
348                av => return Err(invalid_value_error(&DataType::Binary, av)),
349            }
350        }
351        Ok(builder.finish())
352    }
353    fn any_values_to_binary_nonstrict(values: &[AnyValue]) -> BinaryChunked {
354        values
355            .iter()
356            .map(|av| match av {
357                AnyValue::Binary(b) => Some(*b),
358                AnyValue::BinaryOwned(b) => Some(&**b),
359                AnyValue::String(s) => Some(s.as_bytes()),
360                AnyValue::StringOwned(s) => Some(s.as_str().as_bytes()),
361                _ => None,
362            })
363            .collect_trusted()
364    }
365    if strict {
366        any_values_to_binary_strict(values)
367    } else {
368        Ok(any_values_to_binary_nonstrict(values))
369    }
370}
371
372#[cfg(feature = "dtype-date")]
373fn any_values_to_date(values: &[AnyValue], strict: bool) -> PolarsResult<DateChunked> {
374    let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(PlSmallStr::EMPTY, values.len());
375    for av in values {
376        match av {
377            AnyValue::Date(i) => builder.append_value(*i),
378            AnyValue::Null => builder.append_null(),
379            av => {
380                if strict {
381                    return Err(invalid_value_error(&DataType::Date, av));
382                }
383                match av.cast(&DataType::Date) {
384                    AnyValue::Date(i) => builder.append_value(i),
385                    _ => builder.append_null(),
386                }
387            },
388        }
389    }
390    Ok(builder.finish().into())
391}
392
393#[cfg(feature = "dtype-time")]
394fn any_values_to_time(values: &[AnyValue], strict: bool) -> PolarsResult<TimeChunked> {
395    let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
396    for av in values {
397        match av {
398            AnyValue::Time(i) => builder.append_value(*i),
399            AnyValue::Null => builder.append_null(),
400            av => {
401                if strict {
402                    return Err(invalid_value_error(&DataType::Time, av));
403                }
404                match av.cast(&DataType::Time) {
405                    AnyValue::Time(i) => builder.append_value(i),
406                    _ => builder.append_null(),
407                }
408            },
409        }
410    }
411    Ok(builder.finish().into())
412}
413
414#[cfg(feature = "dtype-datetime")]
415fn any_values_to_datetime(
416    values: &[AnyValue],
417    time_unit: TimeUnit,
418    time_zone: Option<TimeZone>,
419    strict: bool,
420) -> PolarsResult<DatetimeChunked> {
421    let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
422    let target_dtype = DataType::Datetime(time_unit, time_zone.clone());
423    for av in values {
424        match av {
425            AnyValue::Datetime(i, tu, _) if *tu == time_unit => builder.append_value(*i),
426            AnyValue::DatetimeOwned(i, tu, _) if *tu == time_unit => builder.append_value(*i),
427            AnyValue::Null => builder.append_null(),
428            av => {
429                if strict {
430                    return Err(invalid_value_error(&target_dtype, av));
431                }
432                match av.cast(&target_dtype) {
433                    AnyValue::Datetime(i, _, _) => builder.append_value(i),
434                    AnyValue::DatetimeOwned(i, _, _) => builder.append_value(i),
435                    _ => builder.append_null(),
436                }
437            },
438        }
439    }
440    Ok(builder.finish().into_datetime(time_unit, time_zone))
441}
442
443#[cfg(feature = "dtype-duration")]
444fn any_values_to_duration(
445    values: &[AnyValue],
446    time_unit: TimeUnit,
447    strict: bool,
448) -> PolarsResult<DurationChunked> {
449    let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
450    let target_dtype = DataType::Duration(time_unit);
451    for av in values {
452        match av {
453            AnyValue::Duration(i, tu) if *tu == time_unit => builder.append_value(*i),
454            AnyValue::Null => builder.append_null(),
455            av => {
456                if strict {
457                    return Err(invalid_value_error(&target_dtype, av));
458                }
459                match av.cast(&target_dtype) {
460                    AnyValue::Duration(i, _) => builder.append_value(i),
461                    _ => builder.append_null(),
462                }
463            },
464        }
465    }
466    Ok(builder.finish().into_duration(time_unit))
467}
468
469#[cfg(feature = "dtype-categorical")]
470fn any_values_to_categorical(
471    values: &[AnyValue],
472    dtype: &DataType,
473    strict: bool,
474) -> PolarsResult<Series> {
475    let ordering = match dtype {
476        DataType::Categorical(_, ordering) => ordering,
477        _ => panic!("any_values_to_categorical with dtype={dtype:?}"),
478    };
479
480    let mut builder = CategoricalChunkedBuilder::new(PlSmallStr::EMPTY, values.len(), *ordering);
481
482    let mut owned = String::new(); // Amortize allocations.
483    for av in values {
484        match av {
485            AnyValue::String(s) => builder.append_value(s),
486            AnyValue::StringOwned(s) => builder.append_value(s),
487
488            AnyValue::Enum(s, rev, _) => builder.append_value(rev.get(*s)),
489            AnyValue::EnumOwned(s, rev, _) => builder.append_value(rev.get(*s)),
490
491            AnyValue::Categorical(s, rev, _) => builder.append_value(rev.get(*s)),
492            AnyValue::CategoricalOwned(s, rev, _) => builder.append_value(rev.get(*s)),
493
494            AnyValue::Binary(_) | AnyValue::BinaryOwned(_) if !strict => builder.append_null(),
495            AnyValue::Null => builder.append_null(),
496
497            av => {
498                if strict {
499                    return Err(invalid_value_error(&DataType::String, av));
500                }
501
502                owned.clear();
503                write!(owned, "{av}").unwrap();
504                builder.append_value(&owned);
505            },
506        }
507    }
508
509    let ca = builder.finish();
510
511    Ok(ca.into_series())
512}
513
514#[cfg(feature = "dtype-categorical")]
515fn any_values_to_enum(values: &[AnyValue], dtype: &DataType, strict: bool) -> PolarsResult<Series> {
516    use self::enum_::EnumChunkedBuilder;
517
518    let (rev, ordering) = match dtype {
519        DataType::Enum(rev, ordering) => (rev.clone(), ordering),
520        _ => panic!("any_values_to_categorical with dtype={dtype:?}"),
521    };
522
523    let Some(rev) = rev else {
524        polars_bail!(nyi = "Not yet possible to create enum series without a rev-map");
525    };
526
527    let mut builder =
528        EnumChunkedBuilder::new(PlSmallStr::EMPTY, values.len(), rev, *ordering, strict);
529
530    let mut owned = String::new(); // Amortize allocations.
531    for av in values {
532        match av {
533            AnyValue::String(s) => builder.append_str(s)?,
534            AnyValue::StringOwned(s) => builder.append_str(s)?,
535
536            AnyValue::Enum(s, rev, _) => builder.append_enum(*s, rev)?,
537            AnyValue::EnumOwned(s, rev, _) => builder.append_enum(*s, rev)?,
538
539            AnyValue::Categorical(s, rev, _) => builder.append_str(rev.get(*s))?,
540            AnyValue::CategoricalOwned(s, rev, _) => builder.append_str(rev.get(*s))?,
541
542            AnyValue::Binary(_) | AnyValue::BinaryOwned(_) if !strict => builder.append_null(),
543            AnyValue::Null => builder.append_null(),
544
545            av => {
546                if strict {
547                    return Err(invalid_value_error(&DataType::String, av));
548                }
549
550                owned.clear();
551                write!(owned, "{av}").unwrap();
552                builder.append_str(&owned)?
553            },
554        };
555    }
556
557    let ca = builder.finish();
558
559    Ok(ca.into_series())
560}
561
562#[cfg(feature = "dtype-decimal")]
563fn any_values_to_decimal(
564    values: &[AnyValue],
565    precision: Option<usize>,
566    scale: Option<usize>, // If None, we're inferring the scale.
567    strict: bool,
568) -> PolarsResult<DecimalChunked> {
569    /// Get the maximum scale among AnyValues
570    fn infer_scale(
571        values: &[AnyValue],
572        precision: Option<usize>,
573        strict: bool,
574    ) -> PolarsResult<usize> {
575        let mut max_scale = 0;
576        for av in values {
577            let av_scale = match av {
578                AnyValue::Decimal(_, scale) => *scale,
579                AnyValue::Null => continue,
580                av => {
581                    if strict {
582                        let target_dtype = DataType::Decimal(precision, None);
583                        return Err(invalid_value_error(&target_dtype, av));
584                    }
585                    continue;
586                },
587            };
588            max_scale = max_scale.max(av_scale);
589        }
590        Ok(max_scale)
591    }
592    let scale = match scale {
593        Some(s) => s,
594        None => infer_scale(values, precision, strict)?,
595    };
596    let target_dtype = DataType::Decimal(precision, Some(scale));
597
598    let mut builder = PrimitiveChunkedBuilder::<Int128Type>::new(PlSmallStr::EMPTY, values.len());
599    for av in values {
600        match av {
601            // Allow equal or less scale. We do want to support different scales even in 'strict' mode.
602            AnyValue::Decimal(v, s) if *s <= scale => {
603                if *s == scale {
604                    builder.append_value(*v)
605                } else {
606                    match av.strict_cast(&target_dtype) {
607                        Some(AnyValue::Decimal(i, _)) => builder.append_value(i),
608                        _ => builder.append_null(),
609                    }
610                }
611            },
612            AnyValue::Null => builder.append_null(),
613            av => {
614                if strict {
615                    return Err(invalid_value_error(&target_dtype, av));
616                }
617                // TODO: Precision check, else set to null
618                match av.strict_cast(&target_dtype) {
619                    Some(AnyValue::Decimal(i, _)) => builder.append_value(i),
620                    _ => builder.append_null(),
621                }
622            },
623        };
624    }
625
626    // Build the array and do a precision check if needed.
627    builder.finish().into_decimal(precision, scale)
628}
629
630fn any_values_to_list(
631    avs: &[AnyValue],
632    inner_type: &DataType,
633    strict: bool,
634) -> PolarsResult<ListChunked> {
635    // GB:
636    // Lord forgive for the sins I have committed in this function. The amount of strange
637    // exceptions that need to happen for this to work are insane and I feel like I am going crazy.
638    //
639    // This function is essentially a copy of the `<ListChunked as FromIterator>` where it does not
640    // sample the datatype from the first element and instead we give it explicitly. This allows
641    // this function to properly assign a datatype if `avs` starts with a `null` value. Previously,
642    // this was solved by assigning the `dtype` again afterwards, but why? We should not link the
643    // implementation of these functions. We still need to assign the dtype of the ListArray and
644    // such, anyways.
645    //
646    // Then, `collect_ca_with_dtype` does not possess the necessary exceptions shown in this
647    // function to use that. I have tried adding the exceptions there and it broke other things. I
648    // really do feel like this is the simplest solution.
649
650    let mut valid = true;
651    let capacity = avs.len();
652
653    let ca = match inner_type {
654        // AnyValues with empty lists in python can create
655        // Series of an unknown dtype.
656        // We use the anonymousbuilder without a dtype
657        // the empty arrays is then not added (we add an extra offset instead)
658        // the next non-empty series then must have the correct dtype.
659        DataType::Null => {
660            let mut builder = AnonymousOwnedListBuilder::new(PlSmallStr::EMPTY, capacity, None);
661            for av in avs {
662                match av {
663                    AnyValue::List(b) => builder.append_series(b)?,
664                    AnyValue::Null => builder.append_null(),
665                    _ => {
666                        valid = false;
667                        builder.append_null();
668                    },
669                }
670            }
671            builder.finish()
672        },
673
674        #[cfg(feature = "object")]
675        DataType::Object(_) => polars_bail!(nyi = "Nested object types"),
676
677        _ => {
678            let list_inner_type = match inner_type {
679                // Categoricals may not have a revmap yet. We just give them an empty one here and
680                // the list builder takes care of the rest.
681                #[cfg(feature = "dtype-categorical")]
682                DataType::Categorical(None, ordering) => {
683                    DataType::Categorical(Some(Arc::new(RevMapping::default())), *ordering)
684                },
685
686                _ => inner_type.clone(),
687            };
688
689            let mut builder =
690                get_list_builder(&list_inner_type, capacity * 5, capacity, PlSmallStr::EMPTY);
691
692            for av in avs {
693                match av {
694                    AnyValue::List(b) => match b.cast(inner_type) {
695                        Ok(casted) => {
696                            if casted.null_count() != b.null_count() {
697                                valid = !strict;
698                            }
699                            builder.append_series(&casted)?;
700                        },
701                        Err(_) => {
702                            valid = false;
703                            for _ in 0..b.len() {
704                                builder.append_null();
705                            }
706                        },
707                    },
708                    AnyValue::Null => builder.append_null(),
709                    _ => {
710                        valid = false;
711                        builder.append_null()
712                    },
713                }
714            }
715
716            builder.finish()
717        },
718    };
719
720    if strict && !valid {
721        polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", DataType::List(Box::new(inner_type.clone())));
722    }
723
724    Ok(ca)
725}
726
727#[cfg(feature = "dtype-array")]
728fn any_values_to_array(
729    avs: &[AnyValue],
730    inner_type: &DataType,
731    strict: bool,
732    width: usize,
733) -> PolarsResult<ArrayChunked> {
734    fn to_arr(s: &Series) -> Option<ArrayRef> {
735        if s.chunks().len() > 1 {
736            let s = s.rechunk();
737            Some(s.chunks()[0].clone())
738        } else {
739            Some(s.chunks()[0].clone())
740        }
741    }
742
743    let target_dtype = DataType::Array(Box::new(inner_type.clone()), width);
744
745    // This is handled downstream. The builder will choose the first non null type.
746    let mut valid = true;
747    #[allow(unused_mut)]
748    let mut out: ArrayChunked = if inner_type == &DataType::Null {
749        avs.iter()
750            .map(|av| match av {
751                AnyValue::List(b) | AnyValue::Array(b, _) => to_arr(b),
752                AnyValue::Null => None,
753                _ => {
754                    valid = false;
755                    None
756                },
757            })
758            .collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
759    }
760    // Make sure that wrongly inferred AnyValues don't deviate from the datatype.
761    else {
762        avs.iter()
763            .map(|av| match av {
764                AnyValue::List(b) | AnyValue::Array(b, _) => {
765                    if b.dtype() == inner_type {
766                        to_arr(b)
767                    } else {
768                        let s = match b.cast(inner_type) {
769                            Ok(out) => out,
770                            Err(_) => Series::full_null(b.name().clone(), b.len(), inner_type),
771                        };
772                        to_arr(&s)
773                    }
774                },
775                AnyValue::Null => None,
776                _ => {
777                    valid = false;
778                    None
779                },
780            })
781            .collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
782    };
783
784    if strict && !valid {
785        polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", target_dtype);
786    }
787    polars_ensure!(
788        out.width() == width,
789        SchemaMismatch: "got mixed size array widths where width {} was expected", width
790    );
791
792    // Ensure the logical type is correct for nested types.
793    #[cfg(feature = "dtype-struct")]
794    if !matches!(inner_type, DataType::Null) && out.inner_dtype().is_nested() {
795        unsafe {
796            out.set_dtype(target_dtype.clone());
797        };
798    }
799
800    Ok(out)
801}
802
803#[cfg(feature = "dtype-struct")]
804fn _any_values_to_struct<'a>(
805    av_fields: &[Field],
806    av_values: &[AnyValue<'a>],
807    field_index: usize,
808    field: &Field,
809    fields: &[Field],
810    field_avs: &mut Vec<AnyValue<'a>>,
811) {
812    // TODO: Optimize.
813
814    let mut append_by_search = || {
815        // Search for the name.
816        if let Some(i) = av_fields
817            .iter()
818            .position(|av_fld| av_fld.name == field.name)
819        {
820            field_avs.push(av_values[i].clone());
821            return;
822        }
823        field_avs.push(AnyValue::Null)
824    };
825
826    // All fields are available in this single value.
827    // We can use the index to get value.
828    if fields.len() == av_fields.len() {
829        if fields.iter().zip(av_fields.iter()).any(|(l, r)| l != r) {
830            append_by_search()
831        } else {
832            let av_val = av_values
833                .get(field_index)
834                .cloned()
835                .unwrap_or(AnyValue::Null);
836            field_avs.push(av_val)
837        }
838    }
839    // Not all fields are available, we search the proper field.
840    else {
841        // Search for the name.
842        append_by_search()
843    }
844}
845
846#[cfg(feature = "dtype-struct")]
847fn any_values_to_struct(
848    values: &[AnyValue],
849    fields: &[Field],
850    strict: bool,
851) -> PolarsResult<Series> {
852    // Fast path for structs with no fields.
853    if fields.is_empty() {
854        return Ok(
855            StructChunked::from_series(PlSmallStr::EMPTY, values.len(), [].iter())?.into_series(),
856        );
857    }
858
859    // The physical series fields of the struct.
860    let mut series_fields = Vec::with_capacity(fields.len());
861    let mut has_outer_validity = false;
862    let mut field_avs = Vec::with_capacity(values.len());
863    for (i, field) in fields.iter().enumerate() {
864        field_avs.clear();
865
866        for av in values.iter() {
867            match av {
868                AnyValue::StructOwned(payload) => {
869                    let av_fields = &payload.1;
870                    let av_values = &payload.0;
871                    _any_values_to_struct(av_fields, av_values, i, field, fields, &mut field_avs);
872                },
873                AnyValue::Struct(_, _, av_fields) => {
874                    let av_values: Vec<_> = av._iter_struct_av().collect();
875                    _any_values_to_struct(av_fields, &av_values, i, field, fields, &mut field_avs);
876                },
877                _ => {
878                    has_outer_validity = true;
879                    field_avs.push(AnyValue::Null)
880                },
881            }
882        }
883        // If the inferred dtype is null, we let auto inference work.
884        let s = if matches!(field.dtype, DataType::Null) {
885            Series::from_any_values(field.name().clone(), &field_avs, strict)?
886        } else {
887            Series::from_any_values_and_dtype(
888                field.name().clone(),
889                &field_avs,
890                &field.dtype,
891                strict,
892            )?
893        };
894        series_fields.push(s)
895    }
896
897    let mut out =
898        StructChunked::from_series(PlSmallStr::EMPTY, values.len(), series_fields.iter())?;
899    if has_outer_validity {
900        let mut validity = MutableBitmap::new();
901        validity.extend_constant(values.len(), true);
902        for (i, v) in values.iter().enumerate() {
903            if matches!(v, AnyValue::Null) {
904                unsafe { validity.set_unchecked(i, false) }
905            }
906        }
907        out.set_outer_validity(Some(validity.freeze()))
908    }
909    Ok(out.into_series())
910}
911
912#[cfg(feature = "object")]
913fn any_values_to_object(values: &[AnyValue]) -> PolarsResult<Series> {
914    use crate::chunked_array::object::registry;
915    let converter = registry::get_object_converter();
916    let mut builder = registry::get_object_builder(PlSmallStr::EMPTY, values.len());
917    for av in values {
918        match av {
919            AnyValue::Object(val) => builder.append_value(val.as_any()),
920            AnyValue::Null => builder.append_null(),
921            _ => {
922                // This is needed because in Python users can send mixed types.
923                // This only works if you set a global converter.
924                let any = converter(av.as_borrowed());
925                builder.append_value(&*any)
926            },
927        }
928    }
929
930    Ok(builder.to_series())
931}
932
933fn invalid_value_error(dtype: &DataType, value: &AnyValue) -> PolarsError {
934    polars_err!(
935        SchemaMismatch:
936        "unexpected value while building Series of type {:?}; found value of type {:?}: {}",
937        dtype,
938        value.dtype(),
939        value
940    )
941}