polars_core/frame/column/
mod.rs

1use std::borrow::Cow;
2
3use arrow::bitmap::BitmapBuilder;
4use arrow::trusted_len::TrustMyLength;
5use num_traits::{Num, NumCast};
6use polars_compute::rolling::QuantileMethod;
7use polars_error::PolarsResult;
8use polars_utils::aliases::PlSeedableRandomStateQuality;
9use polars_utils::index::check_bounds;
10use polars_utils::pl_str::PlSmallStr;
11pub use scalar::ScalarColumn;
12
13use self::compare_inner::{TotalEqInner, TotalOrdInner};
14use self::gather::check_bounds_ca;
15use self::partitioned::PartitionedColumn;
16use self::series::SeriesColumn;
17use crate::chunked_array::cast::CastOptions;
18use crate::chunked_array::flags::StatisticsFlags;
19use crate::datatypes::ReshapeDimension;
20use crate::prelude::*;
21use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
22use crate::utils::{Container, slice_offsets};
23use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
24
25mod arithmetic;
26mod compare;
27mod partitioned;
28mod scalar;
29mod series;
30
31/// A column within a [`DataFrame`].
32///
33/// This is lazily initialized to a [`Series`] with methods like
34/// [`as_materialized_series`][Column::as_materialized_series] and
35/// [`take_materialized_series`][Column::take_materialized_series].
36///
37/// Currently, there are two ways to represent a [`Column`].
38/// 1. A [`Series`] of values
39/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
40#[derive(Debug, Clone)]
41#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
42pub enum Column {
43    Series(SeriesColumn),
44    Partitioned(PartitionedColumn),
45    Scalar(ScalarColumn),
46}
47
48/// Convert `Self` into a [`Column`]
49pub trait IntoColumn: Sized {
50    fn into_column(self) -> Column;
51}
52
53impl Column {
54    #[inline]
55    #[track_caller]
56    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
57    where
58        Phantom: ?Sized,
59        Series: NamedFrom<T, Phantom>,
60    {
61        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
62    }
63
64    #[inline]
65    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
66        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
67    }
68
69    #[inline]
70    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
71        Self::Scalar(ScalarColumn::new(name, scalar, length))
72    }
73
74    #[inline]
75    pub fn new_partitioned(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
76        Self::Scalar(ScalarColumn::new(name, scalar, length))
77    }
78
79    pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
80        let Ok(length) = IdxSize::try_from(length) else {
81            polars_bail!(
82                ComputeError:
83                "row index length {} overflows IdxSize::MAX ({})",
84                length,
85                IdxSize::MAX,
86            )
87        };
88
89        if offset.checked_add(length).is_none() {
90            polars_bail!(
91                ComputeError:
92                "row index with offset {} overflows on dataframe with height {}",
93                offset, length
94            )
95        }
96
97        let range = offset..offset + length;
98
99        let mut ca = IdxCa::from_vec(name, range.collect());
100        ca.set_sorted_flag(IsSorted::Ascending);
101        let col = ca.into_series().into();
102
103        Ok(col)
104    }
105
106    // # Materialize
107    /// Get a reference to a [`Series`] for this [`Column`]
108    ///
109    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
110    #[inline]
111    pub fn as_materialized_series(&self) -> &Series {
112        match self {
113            Column::Series(s) => s,
114            Column::Partitioned(s) => s.as_materialized_series(),
115            Column::Scalar(s) => s.as_materialized_series(),
116        }
117    }
118
119    /// If the memory repr of this Column is a scalar, a unit-length Series will
120    /// be returned.
121    #[inline]
122    pub fn as_materialized_series_maintain_scalar(&self) -> Series {
123        match self {
124            Column::Scalar(s) => s.as_single_value_series(),
125            v => v.as_materialized_series().clone(),
126        }
127    }
128
129    /// Returns the backing `Series` for the values of this column.
130    ///
131    /// * For `Column::Series` columns, simply returns the inner `Series`.
132    /// * For `Column::Partitioned` columns, returns the series representing the values.
133    /// * For `Column::Scalar` columns, returns an empty or unit length series.
134    ///
135    /// # Note
136    /// This method is safe to use. However, care must be taken when operating on the returned
137    /// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations
138    /// on it, however e.g. aggregations will return unspecified results.
139    pub fn _get_backing_series(&self) -> Series {
140        match self {
141            Column::Series(s) => (**s).clone(),
142            Column::Partitioned(s) => s.partitions().clone(),
143            Column::Scalar(s) => s.as_single_value_series(),
144        }
145    }
146
147    /// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing
148    /// the values.
149    ///
150    /// # Panics
151    /// Panics if:
152    /// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.
153    /// * `self` is `Column::Partitioned` and the length of `new_s` does not match that of the existing partitions.
154    /// * `self` is `Column::Scalar` and if either:
155    ///   * `self` is not empty and `new_s` is not of unit length.
156    ///   * `self` is empty and `new_s` is not empty.
157    pub fn _to_new_from_backing(&self, new_s: Series) -> Self {
158        match self {
159            Column::Series(s) => {
160                assert_eq!(new_s.len(), s.len());
161                Column::Series(SeriesColumn::new(new_s))
162            },
163            Column::Partitioned(s) => {
164                assert_eq!(new_s.len(), s.partitions().len());
165                unsafe {
166                    Column::Partitioned(PartitionedColumn::new_unchecked(
167                        new_s.name().clone(),
168                        new_s,
169                        s.partition_ends_ref().clone(),
170                    ))
171                }
172            },
173            Column::Scalar(s) => {
174                assert_eq!(new_s.len(), s.as_single_value_series().len());
175                Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))
176            },
177        }
178    }
179
180    /// Turn [`Column`] into a [`Column::Series`].
181    ///
182    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
183    #[inline]
184    pub fn into_materialized_series(&mut self) -> &mut Series {
185        match self {
186            Column::Series(s) => s,
187            Column::Partitioned(s) => {
188                let series = std::mem::replace(
189                    s,
190                    PartitionedColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
191                )
192                .take_materialized_series();
193                *self = Column::Series(series.into());
194                let Column::Series(s) = self else {
195                    unreachable!();
196                };
197                s
198            },
199            Column::Scalar(s) => {
200                let series = std::mem::replace(
201                    s,
202                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
203                )
204                .take_materialized_series();
205                *self = Column::Series(series.into());
206                let Column::Series(s) = self else {
207                    unreachable!();
208                };
209                s
210            },
211        }
212    }
213    /// Take [`Series`] from a [`Column`]
214    ///
215    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
216    #[inline]
217    pub fn take_materialized_series(self) -> Series {
218        match self {
219            Column::Series(s) => s.take(),
220            Column::Partitioned(s) => s.take_materialized_series(),
221            Column::Scalar(s) => s.take_materialized_series(),
222        }
223    }
224
225    #[inline]
226    pub fn dtype(&self) -> &DataType {
227        match self {
228            Column::Series(s) => s.dtype(),
229            Column::Partitioned(s) => s.dtype(),
230            Column::Scalar(s) => s.dtype(),
231        }
232    }
233
234    #[inline]
235    pub fn field(&self) -> Cow<'_, Field> {
236        match self {
237            Column::Series(s) => s.field(),
238            Column::Partitioned(s) => s.field(),
239            Column::Scalar(s) => match s.lazy_as_materialized_series() {
240                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
241                Some(s) => s.field(),
242            },
243        }
244    }
245
246    #[inline]
247    pub fn name(&self) -> &PlSmallStr {
248        match self {
249            Column::Series(s) => s.name(),
250            Column::Partitioned(s) => s.name(),
251            Column::Scalar(s) => s.name(),
252        }
253    }
254
255    #[inline]
256    pub fn len(&self) -> usize {
257        match self {
258            Column::Series(s) => s.len(),
259            Column::Partitioned(s) => s.len(),
260            Column::Scalar(s) => s.len(),
261        }
262    }
263
264    #[inline]
265    pub fn with_name(mut self, name: PlSmallStr) -> Column {
266        self.rename(name);
267        self
268    }
269
270    #[inline]
271    pub fn rename(&mut self, name: PlSmallStr) {
272        match self {
273            Column::Series(s) => _ = s.rename(name),
274            Column::Partitioned(s) => _ = s.rename(name),
275            Column::Scalar(s) => _ = s.rename(name),
276        }
277    }
278
279    // # Downcasting
280    #[inline]
281    pub fn as_series(&self) -> Option<&Series> {
282        match self {
283            Column::Series(s) => Some(s),
284            _ => None,
285        }
286    }
287    #[inline]
288    pub fn as_partitioned_column(&self) -> Option<&PartitionedColumn> {
289        match self {
290            Column::Partitioned(s) => Some(s),
291            _ => None,
292        }
293    }
294    #[inline]
295    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
296        match self {
297            Column::Scalar(s) => Some(s),
298            _ => None,
299        }
300    }
301    #[inline]
302    pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
303        match self {
304            Column::Scalar(s) => Some(s),
305            _ => None,
306        }
307    }
308
309    // # Try to Chunked Arrays
310    pub fn try_bool(&self) -> Option<&BooleanChunked> {
311        self.as_materialized_series().try_bool()
312    }
313    pub fn try_i8(&self) -> Option<&Int8Chunked> {
314        self.as_materialized_series().try_i8()
315    }
316    pub fn try_i16(&self) -> Option<&Int16Chunked> {
317        self.as_materialized_series().try_i16()
318    }
319    pub fn try_i32(&self) -> Option<&Int32Chunked> {
320        self.as_materialized_series().try_i32()
321    }
322    pub fn try_i64(&self) -> Option<&Int64Chunked> {
323        self.as_materialized_series().try_i64()
324    }
325    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
326        self.as_materialized_series().try_u8()
327    }
328    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
329        self.as_materialized_series().try_u16()
330    }
331    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
332        self.as_materialized_series().try_u32()
333    }
334    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
335        self.as_materialized_series().try_u64()
336    }
337    pub fn try_f32(&self) -> Option<&Float32Chunked> {
338        self.as_materialized_series().try_f32()
339    }
340    pub fn try_f64(&self) -> Option<&Float64Chunked> {
341        self.as_materialized_series().try_f64()
342    }
343    pub fn try_str(&self) -> Option<&StringChunked> {
344        self.as_materialized_series().try_str()
345    }
346    pub fn try_list(&self) -> Option<&ListChunked> {
347        self.as_materialized_series().try_list()
348    }
349    pub fn try_binary(&self) -> Option<&BinaryChunked> {
350        self.as_materialized_series().try_binary()
351    }
352    pub fn try_idx(&self) -> Option<&IdxCa> {
353        self.as_materialized_series().try_idx()
354    }
355    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
356        self.as_materialized_series().try_binary_offset()
357    }
358    #[cfg(feature = "dtype-datetime")]
359    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
360        self.as_materialized_series().try_datetime()
361    }
362    #[cfg(feature = "dtype-struct")]
363    pub fn try_struct(&self) -> Option<&StructChunked> {
364        self.as_materialized_series().try_struct()
365    }
366    #[cfg(feature = "dtype-decimal")]
367    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
368        self.as_materialized_series().try_decimal()
369    }
370    #[cfg(feature = "dtype-array")]
371    pub fn try_array(&self) -> Option<&ArrayChunked> {
372        self.as_materialized_series().try_array()
373    }
374    #[cfg(feature = "dtype-categorical")]
375    pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {
376        self.as_materialized_series().try_cat::<T>()
377    }
378    #[cfg(feature = "dtype-categorical")]
379    pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {
380        self.as_materialized_series().try_cat8()
381    }
382    #[cfg(feature = "dtype-categorical")]
383    pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {
384        self.as_materialized_series().try_cat16()
385    }
386    #[cfg(feature = "dtype-categorical")]
387    pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {
388        self.as_materialized_series().try_cat32()
389    }
390    #[cfg(feature = "dtype-date")]
391    pub fn try_date(&self) -> Option<&DateChunked> {
392        self.as_materialized_series().try_date()
393    }
394    #[cfg(feature = "dtype-duration")]
395    pub fn try_duration(&self) -> Option<&DurationChunked> {
396        self.as_materialized_series().try_duration()
397    }
398
399    // # To Chunked Arrays
400    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
401        self.as_materialized_series().bool()
402    }
403    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
404        self.as_materialized_series().i8()
405    }
406    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
407        self.as_materialized_series().i16()
408    }
409    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
410        self.as_materialized_series().i32()
411    }
412    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
413        self.as_materialized_series().i64()
414    }
415    #[cfg(feature = "dtype-i128")]
416    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
417        self.as_materialized_series().i128()
418    }
419    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
420        self.as_materialized_series().u8()
421    }
422    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
423        self.as_materialized_series().u16()
424    }
425    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
426        self.as_materialized_series().u32()
427    }
428    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
429        self.as_materialized_series().u64()
430    }
431    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
432        self.as_materialized_series().f32()
433    }
434    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
435        self.as_materialized_series().f64()
436    }
437    pub fn str(&self) -> PolarsResult<&StringChunked> {
438        self.as_materialized_series().str()
439    }
440    pub fn list(&self) -> PolarsResult<&ListChunked> {
441        self.as_materialized_series().list()
442    }
443    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
444        self.as_materialized_series().binary()
445    }
446    pub fn idx(&self) -> PolarsResult<&IdxCa> {
447        self.as_materialized_series().idx()
448    }
449    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
450        self.as_materialized_series().binary_offset()
451    }
452    #[cfg(feature = "dtype-datetime")]
453    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
454        self.as_materialized_series().datetime()
455    }
456    #[cfg(feature = "dtype-struct")]
457    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
458        self.as_materialized_series().struct_()
459    }
460    #[cfg(feature = "dtype-decimal")]
461    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
462        self.as_materialized_series().decimal()
463    }
464    #[cfg(feature = "dtype-array")]
465    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
466        self.as_materialized_series().array()
467    }
468    #[cfg(feature = "dtype-categorical")]
469    pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {
470        self.as_materialized_series().cat::<T>()
471    }
472    #[cfg(feature = "dtype-categorical")]
473    pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {
474        self.as_materialized_series().cat8()
475    }
476    #[cfg(feature = "dtype-categorical")]
477    pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {
478        self.as_materialized_series().cat16()
479    }
480    #[cfg(feature = "dtype-categorical")]
481    pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {
482        self.as_materialized_series().cat32()
483    }
484    #[cfg(feature = "dtype-date")]
485    pub fn date(&self) -> PolarsResult<&DateChunked> {
486        self.as_materialized_series().date()
487    }
488    #[cfg(feature = "dtype-duration")]
489    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
490        self.as_materialized_series().duration()
491    }
492
493    // # Casting
494    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
495        match self {
496            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
497            Column::Partitioned(s) => s.cast_with_options(dtype, options).map(Column::from),
498            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
499        }
500    }
501    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
502        match self {
503            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
504            Column::Partitioned(s) => s.strict_cast(dtype).map(Column::from),
505            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
506        }
507    }
508    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
509        match self {
510            Column::Series(s) => s.cast(dtype).map(Column::from),
511            Column::Partitioned(s) => s.cast(dtype).map(Column::from),
512            Column::Scalar(s) => s.cast(dtype).map(Column::from),
513        }
514    }
515    /// # Safety
516    ///
517    /// This can lead to invalid memory access in downstream code.
518    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
519        match self {
520            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
521            Column::Partitioned(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
522            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
523        }
524    }
525
526    pub fn clear(&self) -> Self {
527        match self {
528            Column::Series(s) => s.clear().into(),
529            Column::Partitioned(s) => s.clear().into(),
530            Column::Scalar(s) => s.resize(0).into(),
531        }
532    }
533
534    #[inline]
535    pub fn shrink_to_fit(&mut self) {
536        match self {
537            Column::Series(s) => s.shrink_to_fit(),
538            // @partition-opt
539            Column::Partitioned(_) => {},
540            Column::Scalar(_) => {},
541        }
542    }
543
544    #[inline]
545    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
546        if index >= self.len() {
547            return Self::full_null(self.name().clone(), length, self.dtype());
548        }
549
550        match self {
551            Column::Series(s) => {
552                // SAFETY: Bounds check done before.
553                let av = unsafe { s.get_unchecked(index) };
554                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
555                Self::new_scalar(self.name().clone(), scalar, length)
556            },
557            Column::Partitioned(s) => {
558                // SAFETY: Bounds check done before.
559                let av = unsafe { s.get_unchecked(index) };
560                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
561                Self::new_scalar(self.name().clone(), scalar, length)
562            },
563            Column::Scalar(s) => s.resize(length).into(),
564        }
565    }
566
567    #[inline]
568    pub fn has_nulls(&self) -> bool {
569        match self {
570            Self::Series(s) => s.has_nulls(),
571            // @partition-opt
572            Self::Partitioned(s) => s.as_materialized_series().has_nulls(),
573            Self::Scalar(s) => s.has_nulls(),
574        }
575    }
576
577    #[inline]
578    pub fn is_null(&self) -> BooleanChunked {
579        match self {
580            Self::Series(s) => s.is_null(),
581            // @partition-opt
582            Self::Partitioned(s) => s.as_materialized_series().is_null(),
583            Self::Scalar(s) => {
584                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
585            },
586        }
587    }
588    #[inline]
589    pub fn is_not_null(&self) -> BooleanChunked {
590        match self {
591            Self::Series(s) => s.is_not_null(),
592            // @partition-opt
593            Self::Partitioned(s) => s.as_materialized_series().is_not_null(),
594            Self::Scalar(s) => {
595                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
596            },
597        }
598    }
599
600    pub fn to_physical_repr(&self) -> Column {
601        // @scalar-opt
602        self.as_materialized_series()
603            .to_physical_repr()
604            .into_owned()
605            .into()
606    }
607    /// # Safety
608    ///
609    /// This can lead to invalid memory access in downstream code.
610    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
611        // @scalar-opt
612        self.as_materialized_series()
613            .from_physical_unchecked(dtype)
614            .map(Column::from)
615    }
616
617    pub fn head(&self, length: Option<usize>) -> Column {
618        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
619        let len = usize::min(len, self.len());
620        self.slice(0, len)
621    }
622    pub fn tail(&self, length: Option<usize>) -> Column {
623        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
624        let len = usize::min(len, self.len());
625        debug_assert!(len <= i64::MAX as usize);
626        self.slice(-(len as i64), len)
627    }
628    pub fn slice(&self, offset: i64, length: usize) -> Column {
629        match self {
630            Column::Series(s) => s.slice(offset, length).into(),
631            // @partition-opt
632            Column::Partitioned(s) => s.as_materialized_series().slice(offset, length).into(),
633            Column::Scalar(s) => {
634                let (_, length) = slice_offsets(offset, length, s.len());
635                s.resize(length).into()
636            },
637        }
638    }
639
640    pub fn split_at(&self, offset: i64) -> (Column, Column) {
641        // @scalar-opt
642        let (l, r) = self.as_materialized_series().split_at(offset);
643        (l.into(), r.into())
644    }
645
646    #[inline]
647    pub fn null_count(&self) -> usize {
648        match self {
649            Self::Series(s) => s.null_count(),
650            Self::Partitioned(s) => s.null_count(),
651            Self::Scalar(s) if s.scalar().is_null() => s.len(),
652            Self::Scalar(_) => 0,
653        }
654    }
655
656    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
657        check_bounds_ca(indices, self.len() as IdxSize)?;
658        Ok(unsafe { self.take_unchecked(indices) })
659    }
660    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
661        check_bounds(indices, self.len() as IdxSize)?;
662        Ok(unsafe { self.take_slice_unchecked(indices) })
663    }
664    /// # Safety
665    ///
666    /// No bounds on the indexes are performed.
667    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
668        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
669
670        match self {
671            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
672            Self::Partitioned(s) => {
673                let s = s.as_materialized_series();
674                unsafe { s.take_unchecked(indices) }.into()
675            },
676            Self::Scalar(s) => {
677                let idxs_length = indices.len();
678                let idxs_null_count = indices.null_count();
679
680                let scalar = ScalarColumn::from_single_value_series(
681                    s.as_single_value_series().take_unchecked(&IdxCa::new(
682                        indices.name().clone(),
683                        &[0][..s.len().min(1)],
684                    )),
685                    idxs_length,
686                );
687
688                // We need to make sure that null values in `idx` become null values in the result
689                if idxs_null_count == 0 || scalar.has_nulls() {
690                    scalar.into_column()
691                } else if idxs_null_count == idxs_length {
692                    scalar.into_nulls().into_column()
693                } else {
694                    let validity = indices.rechunk_validity();
695                    let series = scalar.take_materialized_series();
696                    let name = series.name().clone();
697                    let dtype = series.dtype().clone();
698                    let mut chunks = series.into_chunks();
699                    assert_eq!(chunks.len(), 1);
700                    chunks[0] = chunks[0].with_validity(validity);
701                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
702                        .into_column()
703                }
704            },
705        }
706    }
707    /// # Safety
708    ///
709    /// No bounds on the indexes are performed.
710    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
711        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
712
713        match self {
714            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
715            Self::Partitioned(s) => {
716                let s = s.as_materialized_series();
717                unsafe { s.take_slice_unchecked(indices) }.into()
718            },
719            Self::Scalar(s) => ScalarColumn::from_single_value_series(
720                s.as_single_value_series()
721                    .take_slice_unchecked(&[0][..s.len().min(1)]),
722                indices.len(),
723            )
724            .into(),
725        }
726    }
727
728    /// General implementation for aggregation where a non-missing scalar would map to itself.
729    #[inline(always)]
730    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
731    fn agg_with_unit_scalar(
732        &self,
733        groups: &GroupsType,
734        series_agg: impl Fn(&Series, &GroupsType) -> Series,
735    ) -> Column {
736        match self {
737            Column::Series(s) => series_agg(s, groups).into_column(),
738            // @partition-opt
739            Column::Partitioned(s) => series_agg(s.as_materialized_series(), groups).into_column(),
740            Column::Scalar(s) => {
741                if s.is_empty() {
742                    return series_agg(s.as_materialized_series(), groups).into_column();
743                }
744
745                // We utilize the aggregation on Series to see:
746                // 1. the output datatype of the aggregation
747                // 2. whether this aggregation is even defined
748                let series_aggregation = series_agg(
749                    &s.as_single_value_series(),
750                    &GroupsType::Slice {
751                        // @NOTE: this group is always valid since s is non-empty.
752                        groups: vec![[0, 1]],
753                        rolling: false,
754                    },
755                );
756
757                // If the aggregation is not defined, just return all nulls.
758                if series_aggregation.has_nulls() {
759                    return Self::new_scalar(
760                        series_aggregation.name().clone(),
761                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
762                        groups.len(),
763                    );
764                }
765
766                let mut scalar_col = s.resize(groups.len());
767                // The aggregation might change the type (e.g. mean changes int -> float), so we do
768                // a cast here to the output type.
769                if series_aggregation.dtype() != s.dtype() {
770                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
771                }
772
773                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
774                    // Fast path: no empty groups. keep the scalar intact.
775                    return scalar_col.into_column();
776                };
777
778                // All empty groups produce a *missing* or `null` value.
779                let mut validity = BitmapBuilder::with_capacity(groups.len());
780                validity.extend_constant(first_empty_idx, true);
781                // SAFETY: We trust the length of this iterator.
782                let iter = unsafe {
783                    TrustMyLength::new(
784                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
785                        groups.len() - first_empty_idx,
786                    )
787                };
788                validity.extend_trusted_len_iter(iter);
789
790                let mut s = scalar_col.take_materialized_series().rechunk();
791                // SAFETY: We perform a compute_len afterwards.
792                let chunks = unsafe { s.chunks_mut() };
793                let arr = &mut chunks[0];
794                *arr = arr.with_validity(validity.into_opt_validity());
795                s.compute_len();
796
797                s.into_column()
798            },
799        }
800    }
801
802    /// # Safety
803    ///
804    /// Does no bounds checks, groups must be correct.
805    #[cfg(feature = "algorithm_group_by")]
806    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
807        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_min(g) })
808    }
809
810    /// # Safety
811    ///
812    /// Does no bounds checks, groups must be correct.
813    #[cfg(feature = "algorithm_group_by")]
814    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
815        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_max(g) })
816    }
817
818    /// # Safety
819    ///
820    /// Does no bounds checks, groups must be correct.
821    #[cfg(feature = "algorithm_group_by")]
822    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
823        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_mean(g) })
824    }
825
826    /// # Safety
827    ///
828    /// Does no bounds checks, groups must be correct.
829    #[cfg(feature = "algorithm_group_by")]
830    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
831        // @scalar-opt
832        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
833    }
834
835    /// # Safety
836    ///
837    /// Does no bounds checks, groups must be correct.
838    #[cfg(feature = "algorithm_group_by")]
839    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
840        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first(g) })
841    }
842
843    /// # Safety
844    ///
845    /// Does no bounds checks, groups must be correct.
846    #[cfg(feature = "algorithm_group_by")]
847    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
848        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last(g) })
849    }
850
851    /// # Safety
852    ///
853    /// Does no bounds checks, groups must be correct.
854    #[cfg(feature = "algorithm_group_by")]
855    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
856        // @scalar-opt
857        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
858    }
859
860    /// # Safety
861    ///
862    /// Does no bounds checks, groups must be correct.
863    #[cfg(feature = "algorithm_group_by")]
864    pub unsafe fn agg_quantile(
865        &self,
866        groups: &GroupsType,
867        quantile: f64,
868        method: QuantileMethod,
869    ) -> Self {
870        // @scalar-opt
871
872        unsafe {
873            self.as_materialized_series()
874                .agg_quantile(groups, quantile, method)
875        }
876        .into()
877    }
878
879    /// # Safety
880    ///
881    /// Does no bounds checks, groups must be correct.
882    #[cfg(feature = "algorithm_group_by")]
883    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
884        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_median(g) })
885    }
886
887    /// # Safety
888    ///
889    /// Does no bounds checks, groups must be correct.
890    #[cfg(feature = "algorithm_group_by")]
891    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
892        // @scalar-opt
893        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
894    }
895
896    /// # Safety
897    ///
898    /// Does no bounds checks, groups must be correct.
899    #[cfg(feature = "algorithm_group_by")]
900    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
901        // @scalar-opt
902        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
903    }
904
905    /// # Safety
906    ///
907    /// Does no bounds checks, groups must be correct.
908    #[cfg(feature = "algorithm_group_by")]
909    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
910        // @scalar-opt
911        unsafe { self.as_materialized_series().agg_list(groups) }.into()
912    }
913
914    /// # Safety
915    ///
916    /// Does no bounds checks, groups must be correct.
917    #[cfg(feature = "algorithm_group_by")]
918    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
919        // @partition-opt
920        // @scalar-opt
921        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
922    }
923
924    /// # Safety
925    ///
926    /// Does no bounds checks, groups must be correct.
927    #[cfg(feature = "bitwise")]
928    pub fn agg_and(&self, groups: &GroupsType) -> Self {
929        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_and(g) })
930    }
931    /// # Safety
932    ///
933    /// Does no bounds checks, groups must be correct.
934    #[cfg(feature = "bitwise")]
935    pub fn agg_or(&self, groups: &GroupsType) -> Self {
936        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_or(g) })
937    }
938    /// # Safety
939    ///
940    /// Does no bounds checks, groups must be correct.
941    #[cfg(feature = "bitwise")]
942    pub fn agg_xor(&self, groups: &GroupsType) -> Self {
943        // @partition-opt
944        // @scalar-opt
945        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
946    }
947
948    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
949        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
950    }
951
952    pub fn is_empty(&self) -> bool {
953        self.len() == 0
954    }
955
956    pub fn reverse(&self) -> Column {
957        match self {
958            Column::Series(s) => s.reverse().into(),
959            Column::Partitioned(s) => s.reverse().into(),
960            Column::Scalar(_) => self.clone(),
961        }
962    }
963
964    pub fn equals(&self, other: &Column) -> bool {
965        // @scalar-opt
966        self.as_materialized_series()
967            .equals(other.as_materialized_series())
968    }
969
970    pub fn equals_missing(&self, other: &Column) -> bool {
971        // @scalar-opt
972        self.as_materialized_series()
973            .equals_missing(other.as_materialized_series())
974    }
975
976    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
977        // @scalar-opt
978        match self {
979            Column::Series(s) => s.set_sorted_flag(sorted),
980            Column::Partitioned(s) => s.set_sorted_flag(sorted),
981            Column::Scalar(_) => {},
982        }
983    }
984
985    pub fn get_flags(&self) -> StatisticsFlags {
986        match self {
987            Column::Series(s) => s.get_flags(),
988            // @partition-opt
989            Column::Partitioned(_) => StatisticsFlags::empty(),
990            Column::Scalar(_) => {
991                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
992            },
993        }
994    }
995
996    /// Returns whether the flags were set
997    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
998        match self {
999            Column::Series(s) => {
1000                s.set_flags(flags);
1001                true
1002            },
1003            // @partition-opt
1004            Column::Partitioned(_) => false,
1005            Column::Scalar(_) => false,
1006        }
1007    }
1008
1009    pub fn vec_hash(
1010        &self,
1011        build_hasher: PlSeedableRandomStateQuality,
1012        buf: &mut Vec<u64>,
1013    ) -> PolarsResult<()> {
1014        // @scalar-opt?
1015        self.as_materialized_series().vec_hash(build_hasher, buf)
1016    }
1017
1018    pub fn vec_hash_combine(
1019        &self,
1020        build_hasher: PlSeedableRandomStateQuality,
1021        hashes: &mut [u64],
1022    ) -> PolarsResult<()> {
1023        // @scalar-opt?
1024        self.as_materialized_series()
1025            .vec_hash_combine(build_hasher, hashes)
1026    }
1027
1028    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1029        // @scalar-opt
1030        self.into_materialized_series()
1031            .append(other.as_materialized_series())?;
1032        Ok(self)
1033    }
1034    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
1035        self.into_materialized_series()
1036            .append_owned(other.take_materialized_series())?;
1037        Ok(self)
1038    }
1039
1040    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
1041        if self.is_empty() {
1042            return IdxCa::from_vec(self.name().clone(), Vec::new());
1043        }
1044
1045        if self.null_count() == self.len() {
1046            // We might need to maintain order so just respect the descending parameter.
1047            let values = if options.descending {
1048                (0..self.len() as IdxSize).rev().collect()
1049            } else {
1050                (0..self.len() as IdxSize).collect()
1051            };
1052
1053            return IdxCa::from_vec(self.name().clone(), values);
1054        }
1055
1056        let is_sorted = Some(self.is_sorted_flag());
1057        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
1058            return self.as_materialized_series().arg_sort(options);
1059        };
1060
1061        // Fast path: the data is sorted.
1062        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
1063        let invert = options.descending != is_sorted_dsc;
1064
1065        let mut values = Vec::with_capacity(self.len());
1066
1067        #[inline(never)]
1068        fn extend(
1069            start: IdxSize,
1070            end: IdxSize,
1071            slf: &Column,
1072            values: &mut Vec<IdxSize>,
1073            is_only_nulls: bool,
1074            invert: bool,
1075            maintain_order: bool,
1076        ) {
1077            debug_assert!(start <= end);
1078            debug_assert!(start as usize <= slf.len());
1079            debug_assert!(end as usize <= slf.len());
1080
1081            if !invert || is_only_nulls {
1082                values.extend(start..end);
1083                return;
1084            }
1085
1086            // If we don't have to maintain order but we have to invert. Just flip it around.
1087            if !maintain_order {
1088                values.extend((start..end).rev());
1089                return;
1090            }
1091
1092            // If we want to maintain order but we also needs to invert, we need to invert
1093            // per group of items.
1094            //
1095            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1096            // just do a single traversal.
1097            let arg_unique = slf
1098                .slice(start as i64, (end - start) as usize)
1099                .arg_unique()
1100                .unwrap();
1101
1102            assert!(!arg_unique.has_nulls());
1103
1104            let num_unique = arg_unique.len();
1105
1106            // Fast path: all items are unique.
1107            if num_unique == (end - start) as usize {
1108                values.extend((start..end).rev());
1109                return;
1110            }
1111
1112            if num_unique == 1 {
1113                values.extend(start..end);
1114                return;
1115            }
1116
1117            let mut prev_idx = end - start;
1118            for chunk in arg_unique.downcast_iter() {
1119                for &idx in chunk.values().as_slice().iter().rev() {
1120                    values.extend(start + idx..start + prev_idx);
1121                    prev_idx = idx;
1122                }
1123            }
1124        }
1125        macro_rules! extend {
1126            ($start:expr, $end:expr) => {
1127                extend!($start, $end, is_only_nulls = false);
1128            };
1129            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1130                extend(
1131                    $start,
1132                    $end,
1133                    self,
1134                    &mut values,
1135                    $is_only_nulls,
1136                    invert,
1137                    options.maintain_order,
1138                );
1139            };
1140        }
1141
1142        let length = self.len() as IdxSize;
1143        let null_count = self.null_count() as IdxSize;
1144
1145        if null_count == 0 {
1146            extend!(0, length);
1147        } else {
1148            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1149            match (options.nulls_last, has_nulls_last) {
1150                (true, true) => {
1151                    // Current: Nulls last, Wanted: Nulls last
1152                    extend!(0, length - null_count);
1153                    extend!(length - null_count, length, is_only_nulls = true);
1154                },
1155                (true, false) => {
1156                    // Current: Nulls first, Wanted: Nulls last
1157                    extend!(null_count, length);
1158                    extend!(0, null_count, is_only_nulls = true);
1159                },
1160                (false, true) => {
1161                    // Current: Nulls last, Wanted: Nulls first
1162                    extend!(length - null_count, length, is_only_nulls = true);
1163                    extend!(0, length - null_count);
1164                },
1165                (false, false) => {
1166                    // Current: Nulls first, Wanted: Nulls first
1167                    extend!(0, null_count, is_only_nulls = true);
1168                    extend!(null_count, length);
1169                },
1170            }
1171        }
1172
1173        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1174        // worth it... probably not...
1175        if let Some(limit) = options.limit {
1176            let limit = limit.min(length);
1177            values.truncate(limit as usize);
1178        }
1179
1180        IdxCa::from_vec(self.name().clone(), values)
1181    }
1182
1183    pub fn arg_sort_multiple(
1184        &self,
1185        by: &[Column],
1186        options: &SortMultipleOptions,
1187    ) -> PolarsResult<IdxCa> {
1188        // @scalar-opt
1189        self.as_materialized_series().arg_sort_multiple(by, options)
1190    }
1191
1192    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1193        match self {
1194            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1195            _ => self.as_materialized_series().arg_unique(),
1196        }
1197    }
1198
1199    pub fn bit_repr(&self) -> Option<BitRepr> {
1200        // @scalar-opt
1201        self.as_materialized_series().bit_repr()
1202    }
1203
1204    pub fn into_frame(self) -> DataFrame {
1205        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1206        unsafe { DataFrame::new_no_checks(self.len(), vec![self]) }
1207    }
1208
1209    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1210        // @scalar-opt
1211        self.into_materialized_series()
1212            .extend(other.as_materialized_series())?;
1213        Ok(self)
1214    }
1215
1216    pub fn rechunk(&self) -> Column {
1217        match self {
1218            Column::Series(s) => s.rechunk().into(),
1219            Column::Partitioned(s) => {
1220                if let Some(s) = s.lazy_as_materialized_series() {
1221                    // This should always hold for partitioned.
1222                    debug_assert_eq!(s.n_chunks(), 1)
1223                }
1224                self.clone()
1225            },
1226            Column::Scalar(s) => {
1227                if s.lazy_as_materialized_series()
1228                    .filter(|x| x.n_chunks() > 1)
1229                    .is_some()
1230                {
1231                    Column::Scalar(ScalarColumn::new(
1232                        s.name().clone(),
1233                        s.scalar().clone(),
1234                        s.len(),
1235                    ))
1236                } else {
1237                    self.clone()
1238                }
1239            },
1240        }
1241    }
1242
1243    pub fn explode(&self, skip_empty: bool) -> PolarsResult<Column> {
1244        self.as_materialized_series()
1245            .explode(skip_empty)
1246            .map(Column::from)
1247    }
1248    pub fn implode(&self) -> PolarsResult<ListChunked> {
1249        self.as_materialized_series().implode()
1250    }
1251
1252    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1253        // @scalar-opt
1254        self.as_materialized_series()
1255            .fill_null(strategy)
1256            .map(Column::from)
1257    }
1258
1259    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1260        // @scalar-opt
1261        self.as_materialized_series()
1262            .divide(rhs.as_materialized_series())
1263            .map(Column::from)
1264    }
1265
1266    pub fn shift(&self, periods: i64) -> Column {
1267        // @scalar-opt
1268        self.as_materialized_series().shift(periods).into()
1269    }
1270
1271    #[cfg(feature = "zip_with")]
1272    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1273        // @scalar-opt
1274        self.as_materialized_series()
1275            .zip_with(mask, other.as_materialized_series())
1276            .map(Self::from)
1277    }
1278
1279    #[cfg(feature = "zip_with")]
1280    pub fn zip_with_same_type(
1281        &self,
1282        mask: &ChunkedArray<BooleanType>,
1283        other: &Column,
1284    ) -> PolarsResult<Column> {
1285        // @scalar-opt
1286        self.as_materialized_series()
1287            .zip_with_same_type(mask, other.as_materialized_series())
1288            .map(Column::from)
1289    }
1290
1291    pub fn drop_nulls(&self) -> Column {
1292        match self {
1293            Column::Series(s) => s.drop_nulls().into_column(),
1294            // @partition-opt
1295            Column::Partitioned(s) => s.as_materialized_series().drop_nulls().into_column(),
1296            Column::Scalar(s) => s.drop_nulls().into_column(),
1297        }
1298    }
1299
1300    /// Packs every element into a list.
1301    pub fn as_list(&self) -> ListChunked {
1302        // @scalar-opt
1303        // @partition-opt
1304        self.as_materialized_series().as_list()
1305    }
1306
1307    pub fn is_sorted_flag(&self) -> IsSorted {
1308        match self {
1309            Column::Series(s) => s.is_sorted_flag(),
1310            Column::Partitioned(s) => s.partitions().is_sorted_flag(),
1311            Column::Scalar(_) => IsSorted::Ascending,
1312        }
1313    }
1314
1315    pub fn unique(&self) -> PolarsResult<Column> {
1316        match self {
1317            Column::Series(s) => s.unique().map(Column::from),
1318            // @partition-opt
1319            Column::Partitioned(s) => s.as_materialized_series().unique().map(Column::from),
1320            Column::Scalar(s) => {
1321                _ = s.as_single_value_series().unique()?;
1322                if s.is_empty() {
1323                    return Ok(s.clone().into_column());
1324                }
1325
1326                Ok(s.resize(1).into_column())
1327            },
1328        }
1329    }
1330    pub fn unique_stable(&self) -> PolarsResult<Column> {
1331        match self {
1332            Column::Series(s) => s.unique_stable().map(Column::from),
1333            // @partition-opt
1334            Column::Partitioned(s) => s.as_materialized_series().unique_stable().map(Column::from),
1335            Column::Scalar(s) => {
1336                _ = s.as_single_value_series().unique_stable()?;
1337                if s.is_empty() {
1338                    return Ok(s.clone().into_column());
1339                }
1340
1341                Ok(s.resize(1).into_column())
1342            },
1343        }
1344    }
1345
1346    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1347        // @scalar-opt
1348        self.as_materialized_series()
1349            .reshape_list(dimensions)
1350            .map(Self::from)
1351    }
1352
1353    #[cfg(feature = "dtype-array")]
1354    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1355        // @scalar-opt
1356        self.as_materialized_series()
1357            .reshape_array(dimensions)
1358            .map(Self::from)
1359    }
1360
1361    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1362        // @scalar-opt
1363        self.as_materialized_series()
1364            .sort(sort_options)
1365            .map(Self::from)
1366    }
1367
1368    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1369        match self {
1370            Column::Series(s) => s.filter(filter).map(Column::from),
1371            Column::Partitioned(s) => s.as_materialized_series().filter(filter).map(Column::from),
1372            Column::Scalar(s) => {
1373                if s.is_empty() {
1374                    return Ok(s.clone().into_column());
1375                }
1376
1377                // Broadcasting
1378                if filter.len() == 1 {
1379                    return match filter.get(0) {
1380                        Some(true) => Ok(s.clone().into_column()),
1381                        _ => Ok(s.resize(0).into_column()),
1382                    };
1383                }
1384
1385                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1386            },
1387        }
1388    }
1389
1390    #[cfg(feature = "random")]
1391    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1392        // @scalar-opt
1393        self.as_materialized_series().shuffle(seed).into()
1394    }
1395
1396    #[cfg(feature = "random")]
1397    pub fn sample_frac(
1398        &self,
1399        frac: f64,
1400        with_replacement: bool,
1401        shuffle: bool,
1402        seed: Option<u64>,
1403    ) -> PolarsResult<Self> {
1404        self.as_materialized_series()
1405            .sample_frac(frac, with_replacement, shuffle, seed)
1406            .map(Self::from)
1407    }
1408
1409    #[cfg(feature = "random")]
1410    pub fn sample_n(
1411        &self,
1412        n: usize,
1413        with_replacement: bool,
1414        shuffle: bool,
1415        seed: Option<u64>,
1416    ) -> PolarsResult<Self> {
1417        self.as_materialized_series()
1418            .sample_n(n, with_replacement, shuffle, seed)
1419            .map(Self::from)
1420    }
1421
1422    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {
1423        polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");
1424        if self.len().saturating_sub(offset) == 0 {
1425            return Ok(self.clear());
1426        }
1427
1428        match self {
1429            Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),
1430            Column::Partitioned(s) => {
1431                Ok(s.as_materialized_series().gather_every(n, offset)?.into())
1432            },
1433            Column::Scalar(s) => {
1434                let total = s.len() - offset;
1435                Ok(s.resize(1 + (total - 1) / n).into())
1436            },
1437        }
1438    }
1439
1440    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1441        if self.is_empty() {
1442            return Ok(Self::new_scalar(
1443                self.name().clone(),
1444                Scalar::new(self.dtype().clone(), value.into_static()),
1445                n,
1446            ));
1447        }
1448
1449        match self {
1450            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1451            Column::Partitioned(s) => s.extend_constant(value, n).map(Column::from),
1452            Column::Scalar(s) => {
1453                if s.scalar().as_any_value() == value {
1454                    Ok(s.resize(s.len() + n).into())
1455                } else {
1456                    s.as_materialized_series()
1457                        .extend_constant(value, n)
1458                        .map(Column::from)
1459                }
1460            },
1461        }
1462    }
1463
1464    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1465        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1466    }
1467    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1468        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1469    }
1470    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1471        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1472    }
1473    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1474        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1475    }
1476
1477    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1478    where
1479        T: Num + NumCast,
1480    {
1481        // @scalar-opt
1482        self.as_materialized_series()
1483            .wrapping_trunc_div_scalar(rhs)
1484            .into()
1485    }
1486
1487    pub fn product(&self) -> PolarsResult<Scalar> {
1488        // @scalar-opt
1489        self.as_materialized_series().product()
1490    }
1491
1492    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1493        // @scalar-opt
1494        self.as_materialized_series().phys_iter()
1495    }
1496
1497    #[inline]
1498    pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {
1499        polars_ensure!(index < self.len(), oob = index, self.len());
1500
1501        // SAFETY: Bounds check done just before.
1502        Ok(unsafe { self.get_unchecked(index) })
1503    }
1504    /// # Safety
1505    ///
1506    /// Does not perform bounds check on `index`
1507    #[inline(always)]
1508    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1509        debug_assert!(index < self.len());
1510
1511        match self {
1512            Column::Series(s) => unsafe { s.get_unchecked(index) },
1513            Column::Partitioned(s) => unsafe { s.get_unchecked(index) },
1514            Column::Scalar(s) => s.scalar().as_any_value(),
1515        }
1516    }
1517
1518    #[cfg(feature = "object")]
1519    pub fn get_object(
1520        &self,
1521        index: usize,
1522    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1523        self.as_materialized_series().get_object(index)
1524    }
1525
1526    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1527        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1528    }
1529    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1530        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1531    }
1532    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1533        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1534    }
1535
1536    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1537        match (self, other) {
1538            (Column::Series(lhs), Column::Series(rhs)) => {
1539                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1540            },
1541            (lhs, rhs) => lhs + rhs,
1542        }
1543    }
1544    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1545        match (self, other) {
1546            (Column::Series(lhs), Column::Series(rhs)) => {
1547                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1548            },
1549            (lhs, rhs) => lhs - rhs,
1550        }
1551    }
1552    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1553        match (self, other) {
1554            (Column::Series(lhs), Column::Series(rhs)) => {
1555                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1556            },
1557            (lhs, rhs) => lhs * rhs,
1558        }
1559    }
1560
1561    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
1562        Ok(self.get(index)?.str_value())
1563    }
1564
1565    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1566        match self {
1567            Column::Series(s) => s.min_reduce(),
1568            Column::Partitioned(s) => s.min_reduce(),
1569            Column::Scalar(s) => {
1570                // We don't really want to deal with handling the full semantics here so we just
1571                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1572                s.as_single_value_series().min_reduce()
1573            },
1574        }
1575    }
1576    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1577        match self {
1578            Column::Series(s) => s.max_reduce(),
1579            Column::Partitioned(s) => s.max_reduce(),
1580            Column::Scalar(s) => {
1581                // We don't really want to deal with handling the full semantics here so we just
1582                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1583                s.as_single_value_series().max_reduce()
1584            },
1585        }
1586    }
1587    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1588        match self {
1589            Column::Series(s) => s.median_reduce(),
1590            Column::Partitioned(s) => s.as_materialized_series().median_reduce(),
1591            Column::Scalar(s) => {
1592                // We don't really want to deal with handling the full semantics here so we just
1593                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1594                s.as_single_value_series().median_reduce()
1595            },
1596        }
1597    }
1598    pub fn mean_reduce(&self) -> Scalar {
1599        match self {
1600            Column::Series(s) => s.mean_reduce(),
1601            Column::Partitioned(s) => s.as_materialized_series().mean_reduce(),
1602            Column::Scalar(s) => {
1603                // We don't really want to deal with handling the full semantics here so we just
1604                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1605                s.as_single_value_series().mean_reduce()
1606            },
1607        }
1608    }
1609    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1610        match self {
1611            Column::Series(s) => s.std_reduce(ddof),
1612            Column::Partitioned(s) => s.as_materialized_series().std_reduce(ddof),
1613            Column::Scalar(s) => {
1614                // We don't really want to deal with handling the full semantics here so we just
1615                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1616                let n = s.len().min(ddof as usize + 1);
1617                s.as_n_values_series(n).std_reduce(ddof)
1618            },
1619        }
1620    }
1621    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1622        match self {
1623            Column::Series(s) => s.var_reduce(ddof),
1624            Column::Partitioned(s) => s.as_materialized_series().var_reduce(ddof),
1625            Column::Scalar(s) => {
1626                // We don't really want to deal with handling the full semantics here so we just
1627                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1628                let n = s.len().min(ddof as usize + 1);
1629                s.as_n_values_series(n).var_reduce(ddof)
1630            },
1631        }
1632    }
1633    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1634        // @partition-opt
1635        // @scalar-opt
1636        self.as_materialized_series().sum_reduce()
1637    }
1638    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1639        match self {
1640            Column::Series(s) => s.and_reduce(),
1641            Column::Partitioned(s) => s.and_reduce(),
1642            Column::Scalar(s) => {
1643                // We don't really want to deal with handling the full semantics here so we just
1644                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1645                s.as_single_value_series().and_reduce()
1646            },
1647        }
1648    }
1649    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1650        match self {
1651            Column::Series(s) => s.or_reduce(),
1652            Column::Partitioned(s) => s.or_reduce(),
1653            Column::Scalar(s) => {
1654                // We don't really want to deal with handling the full semantics here so we just
1655                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1656                s.as_single_value_series().or_reduce()
1657            },
1658        }
1659    }
1660    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1661        match self {
1662            Column::Series(s) => s.xor_reduce(),
1663            // @partition-opt
1664            Column::Partitioned(s) => s.as_materialized_series().xor_reduce(),
1665            Column::Scalar(s) => {
1666                // We don't really want to deal with handling the full semantics here so we just
1667                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1668                //
1669                // We have to deal with the fact that xor is 0 if there is an even number of
1670                // elements and the value if there is an odd number of elements. If there are zero
1671                // elements the result should be `null`.
1672                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1673            },
1674        }
1675    }
1676    pub fn n_unique(&self) -> PolarsResult<usize> {
1677        match self {
1678            Column::Series(s) => s.n_unique(),
1679            Column::Partitioned(s) => s.partitions().n_unique(),
1680            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1681        }
1682    }
1683    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1684        self.as_materialized_series()
1685            .quantile_reduce(quantile, method)
1686    }
1687
1688    pub(crate) fn estimated_size(&self) -> usize {
1689        // @scalar-opt
1690        self.as_materialized_series().estimated_size()
1691    }
1692
1693    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1694        match self {
1695            Column::Series(s) => s.sort_with(options).map(Self::from),
1696            // @partition-opt
1697            Column::Partitioned(s) => s
1698                .as_materialized_series()
1699                .sort_with(options)
1700                .map(Self::from),
1701            Column::Scalar(s) => {
1702                // This makes this function throw the same errors as Series::sort_with
1703                _ = s.as_single_value_series().sort_with(options)?;
1704
1705                Ok(self.clone())
1706            },
1707        }
1708    }
1709
1710    pub fn map_unary_elementwise_to_bool(
1711        &self,
1712        f: impl Fn(&Series) -> BooleanChunked,
1713    ) -> BooleanChunked {
1714        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1715            .unwrap()
1716    }
1717    pub fn try_map_unary_elementwise_to_bool(
1718        &self,
1719        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1720    ) -> PolarsResult<BooleanChunked> {
1721        match self {
1722            Column::Series(s) => f(s),
1723            Column::Partitioned(s) => f(s.as_materialized_series()),
1724            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1725        }
1726    }
1727
1728    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1729        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1730    }
1731    pub fn try_apply_unary_elementwise(
1732        &self,
1733        f: impl Fn(&Series) -> PolarsResult<Series>,
1734    ) -> PolarsResult<Column> {
1735        match self {
1736            Column::Series(s) => f(s).map(Column::from),
1737            Column::Partitioned(s) => s.try_apply_unary_elementwise(f).map(Self::from),
1738            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1739                f(&s.as_single_value_series())?,
1740                s.len(),
1741            )
1742            .into()),
1743        }
1744    }
1745
1746    pub fn apply_broadcasting_binary_elementwise(
1747        &self,
1748        other: &Self,
1749        op: impl Fn(&Series, &Series) -> Series,
1750    ) -> PolarsResult<Column> {
1751        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1752    }
1753    pub fn try_apply_broadcasting_binary_elementwise(
1754        &self,
1755        other: &Self,
1756        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1757    ) -> PolarsResult<Column> {
1758        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1759            match (a.len(), b.len()) {
1760                // broadcasting
1761                (1, o) | (o, 1) => Ok(o),
1762                // equal
1763                (a, b) if a == b => Ok(a),
1764                // unequal
1765                (a, b) => {
1766                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1767                },
1768            }
1769        }
1770
1771        // Here we rely on the underlying broadcast operations.
1772        let length = output_length(self, other)?;
1773        match (self, other) {
1774            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1775            (Column::Series(lhs), Column::Scalar(rhs)) => {
1776                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1777            },
1778            (Column::Scalar(lhs), Column::Series(rhs)) => {
1779                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1780            },
1781            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1782                let lhs = lhs.as_single_value_series();
1783                let rhs = rhs.as_single_value_series();
1784
1785                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1786            },
1787            // @partition-opt
1788            (lhs, rhs) => {
1789                op(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1790            },
1791        }
1792    }
1793
1794    pub fn apply_binary_elementwise(
1795        &self,
1796        other: &Self,
1797        f: impl Fn(&Series, &Series) -> Series,
1798        f_lb: impl Fn(&Scalar, &Series) -> Series,
1799        f_rb: impl Fn(&Series, &Scalar) -> Series,
1800    ) -> Column {
1801        self.try_apply_binary_elementwise(
1802            other,
1803            |lhs, rhs| Ok(f(lhs, rhs)),
1804            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1805            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1806        )
1807        .unwrap()
1808    }
1809    pub fn try_apply_binary_elementwise(
1810        &self,
1811        other: &Self,
1812        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1813        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1814        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1815    ) -> PolarsResult<Column> {
1816        debug_assert_eq!(self.len(), other.len());
1817
1818        match (self, other) {
1819            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1820            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1821            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1822            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1823                let lhs = lhs.as_single_value_series();
1824                let rhs = rhs.as_single_value_series();
1825
1826                Ok(
1827                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1828                        .into_column(),
1829                )
1830            },
1831            // @partition-opt
1832            (lhs, rhs) => {
1833                f(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1834            },
1835        }
1836    }
1837
1838    #[cfg(feature = "approx_unique")]
1839    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1840        match self {
1841            Column::Series(s) => s.approx_n_unique(),
1842            // @partition-opt
1843            Column::Partitioned(s) => s.as_materialized_series().approx_n_unique(),
1844            Column::Scalar(s) => {
1845                // @NOTE: We do this for the error handling.
1846                s.as_single_value_series().approx_n_unique()?;
1847                Ok(1)
1848            },
1849        }
1850    }
1851
1852    pub fn n_chunks(&self) -> usize {
1853        match self {
1854            Column::Series(s) => s.n_chunks(),
1855            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1856            Column::Partitioned(s) => {
1857                if let Some(s) = s.lazy_as_materialized_series() {
1858                    // This should always hold for partitioned.
1859                    debug_assert_eq!(s.n_chunks(), 1)
1860                }
1861                1
1862            },
1863        }
1864    }
1865
1866    #[expect(clippy::wrong_self_convention)]
1867    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1868        // @scalar-opt
1869        self.as_materialized_series().into_total_ord_inner()
1870    }
1871    #[expect(unused, clippy::wrong_self_convention)]
1872    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1873        // @scalar-opt
1874        self.as_materialized_series().into_total_eq_inner()
1875    }
1876
1877    pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1878        // Rechunk to one chunk if necessary
1879        let mut series = self.take_materialized_series();
1880        if series.n_chunks() > 1 {
1881            series = series.rechunk();
1882        }
1883        series.to_arrow(0, compat_level)
1884    }
1885
1886    pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {
1887        self.as_materialized_series()
1888            .trim_lists_to_normalized_offsets()
1889            .map(Column::from)
1890    }
1891
1892    pub fn propagate_nulls(&self) -> Option<Column> {
1893        self.as_materialized_series()
1894            .propagate_nulls()
1895            .map(Column::from)
1896    }
1897}
1898
1899impl Default for Column {
1900    fn default() -> Self {
1901        Self::new_scalar(
1902            PlSmallStr::EMPTY,
1903            Scalar::new(DataType::Int64, AnyValue::Null),
1904            0,
1905        )
1906    }
1907}
1908
1909impl PartialEq for Column {
1910    fn eq(&self, other: &Self) -> bool {
1911        // @scalar-opt
1912        self.as_materialized_series()
1913            .eq(other.as_materialized_series())
1914    }
1915}
1916
1917impl From<Series> for Column {
1918    #[inline]
1919    fn from(series: Series) -> Self {
1920        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1921        // future operations to be faster.
1922        if series.len() == 1 {
1923            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1924        }
1925
1926        Self::Series(SeriesColumn::new(series))
1927    }
1928}
1929
1930impl<T: IntoSeries> IntoColumn for T {
1931    #[inline]
1932    fn into_column(self) -> Column {
1933        self.into_series().into()
1934    }
1935}
1936
1937impl IntoColumn for Column {
1938    #[inline(always)]
1939    fn into_column(self) -> Column {
1940        self
1941    }
1942}
1943
1944/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1945/// initialized without implementing From<Column> for Series.
1946///
1947/// Those casts should be explicit.
1948#[derive(Clone)]
1949#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1950#[cfg_attr(feature = "serde", serde(into = "Series"))]
1951struct _SerdeSeries(Series);
1952
1953impl From<Column> for _SerdeSeries {
1954    #[inline]
1955    fn from(value: Column) -> Self {
1956        Self(value.take_materialized_series())
1957    }
1958}
1959
1960impl From<_SerdeSeries> for Series {
1961    #[inline]
1962    fn from(value: _SerdeSeries) -> Self {
1963        value.0
1964    }
1965}