Skip to main content

polars_core/frame/column/
mod.rs

1use std::borrow::Cow;
2
3use arrow::bitmap::{Bitmap, BitmapBuilder};
4use arrow::trusted_len::TrustMyLength;
5use num_traits::{Num, NumCast};
6use polars_compute::rolling::QuantileMethod;
7use polars_error::PolarsResult;
8use polars_utils::aliases::PlSeedableRandomStateQuality;
9use polars_utils::index::check_bounds;
10use polars_utils::pl_str::PlSmallStr;
11pub use scalar::ScalarColumn;
12
13use self::compare_inner::{TotalEqInner, TotalOrdInner};
14use self::gather::check_bounds_ca;
15use self::series::SeriesColumn;
16use crate::chunked_array::cast::CastOptions;
17use crate::chunked_array::flags::StatisticsFlags;
18use crate::datatypes::ReshapeDimension;
19use crate::prelude::*;
20use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
21use crate::utils::{Container, slice_offsets};
22use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
23
24mod arithmetic;
25mod compare;
26mod scalar;
27mod series;
28
29/// A column within a [`DataFrame`].
30///
31/// This is lazily initialized to a [`Series`] with methods like
32/// [`as_materialized_series`][Column::as_materialized_series] and
33/// [`take_materialized_series`][Column::take_materialized_series].
34///
35/// Currently, there are two ways to represent a [`Column`].
36/// 1. A [`Series`] of values
37/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
38#[derive(Debug, Clone)]
39#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
40#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
41pub enum Column {
42    Series(SeriesColumn),
43    Scalar(ScalarColumn),
44}
45
46/// Convert `Self` into a [`Column`]
47pub trait IntoColumn: Sized {
48    fn into_column(self) -> Column;
49}
50
51impl Column {
52    #[inline]
53    #[track_caller]
54    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
55    where
56        Phantom: ?Sized,
57        Series: NamedFrom<T, Phantom>,
58    {
59        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
60    }
61
62    #[inline]
63    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
64        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
65    }
66
67    #[inline]
68    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
69        Self::Scalar(ScalarColumn::new(name, scalar, length))
70    }
71
72    pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
73        let Ok(length) = IdxSize::try_from(length) else {
74            polars_bail!(
75                ComputeError:
76                "row index length {} overflows IdxSize::MAX ({})",
77                length,
78                IdxSize::MAX,
79            )
80        };
81
82        if offset.checked_add(length).is_none() {
83            polars_bail!(
84                ComputeError:
85                "row index with offset {} overflows on dataframe with height {}",
86                offset, length
87            )
88        }
89
90        let range = offset..offset + length;
91
92        let mut ca = IdxCa::from_vec(name, range.collect());
93        ca.set_sorted_flag(IsSorted::Ascending);
94        let col = ca.into_series().into();
95
96        Ok(col)
97    }
98
99    // # Materialize
100    /// Get a reference to a [`Series`] for this [`Column`]
101    ///
102    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
103    #[inline]
104    pub fn as_materialized_series(&self) -> &Series {
105        match self {
106            Column::Series(s) => s,
107            Column::Scalar(s) => s.as_materialized_series(),
108        }
109    }
110
111    /// If the memory repr of this Column is a scalar, a unit-length Series will
112    /// be returned.
113    #[inline]
114    pub fn as_materialized_series_maintain_scalar(&self) -> Series {
115        match self {
116            Column::Scalar(s) => s.as_single_value_series(),
117            v => v.as_materialized_series().clone(),
118        }
119    }
120
121    /// Returns the backing `Series` for the values of this column.
122    ///
123    /// * For `Column::Series` columns, simply returns the inner `Series`.
124    /// * For `Column::Scalar` columns, returns an empty or unit length series.
125    ///
126    /// # Note
127    /// This method is safe to use. However, care must be taken when operating on the returned
128    /// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations
129    /// on it, however e.g. aggregations will return unspecified results.
130    pub fn _get_backing_series(&self) -> Series {
131        match self {
132            Column::Series(s) => (**s).clone(),
133            Column::Scalar(s) => s.as_single_value_series(),
134        }
135    }
136
137    /// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing
138    /// the values.
139    ///
140    /// # Panics
141    /// Panics if:
142    /// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.
143    /// * `self` is `Column::Scalar` and if either:
144    ///   * `self` is not empty and `new_s` is not of unit length.
145    ///   * `self` is empty and `new_s` is not empty.
146    pub fn _to_new_from_backing(&self, new_s: Series) -> Self {
147        match self {
148            Column::Series(s) => {
149                assert_eq!(new_s.len(), s.len());
150                Column::Series(SeriesColumn::new(new_s))
151            },
152            Column::Scalar(s) => {
153                assert_eq!(new_s.len(), s.as_single_value_series().len());
154                Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))
155            },
156        }
157    }
158
159    /// Turn [`Column`] into a [`Column::Series`].
160    ///
161    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
162    #[inline]
163    pub fn into_materialized_series(&mut self) -> &mut Series {
164        match self {
165            Column::Series(s) => s,
166            Column::Scalar(s) => {
167                let series = std::mem::replace(
168                    s,
169                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
170                )
171                .take_materialized_series();
172                *self = Column::Series(series.into());
173                let Column::Series(s) = self else {
174                    unreachable!();
175                };
176                s
177            },
178        }
179    }
180    /// Take [`Series`] from a [`Column`]
181    ///
182    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
183    #[inline]
184    pub fn take_materialized_series(self) -> Series {
185        match self {
186            Column::Series(s) => s.take(),
187            Column::Scalar(s) => s.take_materialized_series(),
188        }
189    }
190
191    #[inline]
192    pub fn dtype(&self) -> &DataType {
193        match self {
194            Column::Series(s) => s.dtype(),
195            Column::Scalar(s) => s.dtype(),
196        }
197    }
198
199    #[inline]
200    pub fn field(&self) -> Cow<'_, Field> {
201        match self {
202            Column::Series(s) => s.field(),
203            Column::Scalar(s) => match s.lazy_as_materialized_series() {
204                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
205                Some(s) => s.field(),
206            },
207        }
208    }
209
210    #[inline]
211    pub fn name(&self) -> &PlSmallStr {
212        match self {
213            Column::Series(s) => s.name(),
214            Column::Scalar(s) => s.name(),
215        }
216    }
217
218    #[inline]
219    pub fn len(&self) -> usize {
220        match self {
221            Column::Series(s) => s.len(),
222            Column::Scalar(s) => s.len(),
223        }
224    }
225
226    #[inline]
227    pub fn with_name(mut self, name: PlSmallStr) -> Column {
228        self.rename(name);
229        self
230    }
231
232    #[inline]
233    pub fn rename(&mut self, name: PlSmallStr) {
234        match self {
235            Column::Series(s) => _ = s.rename(name),
236            Column::Scalar(s) => _ = s.rename(name),
237        }
238    }
239
240    // # Downcasting
241    #[inline]
242    pub fn as_series(&self) -> Option<&Series> {
243        match self {
244            Column::Series(s) => Some(s),
245            _ => None,
246        }
247    }
248    #[inline]
249    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
250        match self {
251            Column::Scalar(s) => Some(s),
252            _ => None,
253        }
254    }
255    #[inline]
256    pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
257        match self {
258            Column::Scalar(s) => Some(s),
259            _ => None,
260        }
261    }
262
263    // # Try to Chunked Arrays
264    pub fn try_bool(&self) -> Option<&BooleanChunked> {
265        self.as_materialized_series().try_bool()
266    }
267    pub fn try_i8(&self) -> Option<&Int8Chunked> {
268        self.as_materialized_series().try_i8()
269    }
270    pub fn try_i16(&self) -> Option<&Int16Chunked> {
271        self.as_materialized_series().try_i16()
272    }
273    pub fn try_i32(&self) -> Option<&Int32Chunked> {
274        self.as_materialized_series().try_i32()
275    }
276    pub fn try_i64(&self) -> Option<&Int64Chunked> {
277        self.as_materialized_series().try_i64()
278    }
279    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
280        self.as_materialized_series().try_u8()
281    }
282    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
283        self.as_materialized_series().try_u16()
284    }
285    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
286        self.as_materialized_series().try_u32()
287    }
288    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
289        self.as_materialized_series().try_u64()
290    }
291    #[cfg(feature = "dtype-u128")]
292    pub fn try_u128(&self) -> Option<&UInt128Chunked> {
293        self.as_materialized_series().try_u128()
294    }
295    #[cfg(feature = "dtype-f16")]
296    pub fn try_f16(&self) -> Option<&Float16Chunked> {
297        self.as_materialized_series().try_f16()
298    }
299    pub fn try_f32(&self) -> Option<&Float32Chunked> {
300        self.as_materialized_series().try_f32()
301    }
302    pub fn try_f64(&self) -> Option<&Float64Chunked> {
303        self.as_materialized_series().try_f64()
304    }
305    pub fn try_str(&self) -> Option<&StringChunked> {
306        self.as_materialized_series().try_str()
307    }
308    pub fn try_list(&self) -> Option<&ListChunked> {
309        self.as_materialized_series().try_list()
310    }
311    pub fn try_binary(&self) -> Option<&BinaryChunked> {
312        self.as_materialized_series().try_binary()
313    }
314    pub fn try_idx(&self) -> Option<&IdxCa> {
315        self.as_materialized_series().try_idx()
316    }
317    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
318        self.as_materialized_series().try_binary_offset()
319    }
320    #[cfg(feature = "dtype-datetime")]
321    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
322        self.as_materialized_series().try_datetime()
323    }
324    #[cfg(feature = "dtype-struct")]
325    pub fn try_struct(&self) -> Option<&StructChunked> {
326        self.as_materialized_series().try_struct()
327    }
328    #[cfg(feature = "dtype-decimal")]
329    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
330        self.as_materialized_series().try_decimal()
331    }
332    #[cfg(feature = "dtype-array")]
333    pub fn try_array(&self) -> Option<&ArrayChunked> {
334        self.as_materialized_series().try_array()
335    }
336    #[cfg(feature = "dtype-categorical")]
337    pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {
338        self.as_materialized_series().try_cat::<T>()
339    }
340    #[cfg(feature = "dtype-categorical")]
341    pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {
342        self.as_materialized_series().try_cat8()
343    }
344    #[cfg(feature = "dtype-categorical")]
345    pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {
346        self.as_materialized_series().try_cat16()
347    }
348    #[cfg(feature = "dtype-categorical")]
349    pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {
350        self.as_materialized_series().try_cat32()
351    }
352    #[cfg(feature = "dtype-date")]
353    pub fn try_date(&self) -> Option<&DateChunked> {
354        self.as_materialized_series().try_date()
355    }
356    #[cfg(feature = "dtype-duration")]
357    pub fn try_duration(&self) -> Option<&DurationChunked> {
358        self.as_materialized_series().try_duration()
359    }
360
361    // # To Chunked Arrays
362    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
363        self.as_materialized_series().bool()
364    }
365    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
366        self.as_materialized_series().i8()
367    }
368    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
369        self.as_materialized_series().i16()
370    }
371    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
372        self.as_materialized_series().i32()
373    }
374    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
375        self.as_materialized_series().i64()
376    }
377    #[cfg(feature = "dtype-i128")]
378    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
379        self.as_materialized_series().i128()
380    }
381    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
382        self.as_materialized_series().u8()
383    }
384    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
385        self.as_materialized_series().u16()
386    }
387    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
388        self.as_materialized_series().u32()
389    }
390    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
391        self.as_materialized_series().u64()
392    }
393    #[cfg(feature = "dtype-u128")]
394    pub fn u128(&self) -> PolarsResult<&UInt128Chunked> {
395        self.as_materialized_series().u128()
396    }
397    #[cfg(feature = "dtype-f16")]
398    pub fn f16(&self) -> PolarsResult<&Float16Chunked> {
399        self.as_materialized_series().f16()
400    }
401    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
402        self.as_materialized_series().f32()
403    }
404    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
405        self.as_materialized_series().f64()
406    }
407    pub fn str(&self) -> PolarsResult<&StringChunked> {
408        self.as_materialized_series().str()
409    }
410    pub fn list(&self) -> PolarsResult<&ListChunked> {
411        self.as_materialized_series().list()
412    }
413    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
414        self.as_materialized_series().binary()
415    }
416    pub fn idx(&self) -> PolarsResult<&IdxCa> {
417        self.as_materialized_series().idx()
418    }
419    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
420        self.as_materialized_series().binary_offset()
421    }
422    #[cfg(feature = "dtype-datetime")]
423    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
424        self.as_materialized_series().datetime()
425    }
426    #[cfg(feature = "dtype-struct")]
427    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
428        self.as_materialized_series().struct_()
429    }
430    #[cfg(feature = "dtype-decimal")]
431    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
432        self.as_materialized_series().decimal()
433    }
434    #[cfg(feature = "dtype-array")]
435    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
436        self.as_materialized_series().array()
437    }
438    #[cfg(feature = "dtype-categorical")]
439    pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {
440        self.as_materialized_series().cat::<T>()
441    }
442    #[cfg(feature = "dtype-categorical")]
443    pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {
444        self.as_materialized_series().cat8()
445    }
446    #[cfg(feature = "dtype-categorical")]
447    pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {
448        self.as_materialized_series().cat16()
449    }
450    #[cfg(feature = "dtype-categorical")]
451    pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {
452        self.as_materialized_series().cat32()
453    }
454    #[cfg(feature = "dtype-date")]
455    pub fn date(&self) -> PolarsResult<&DateChunked> {
456        self.as_materialized_series().date()
457    }
458    #[cfg(feature = "dtype-duration")]
459    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
460        self.as_materialized_series().duration()
461    }
462
463    // # Casting
464    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
465        match self {
466            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
467            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
468        }
469    }
470    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
471        match self {
472            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
473            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
474        }
475    }
476    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
477        match self {
478            Column::Series(s) => s.cast(dtype).map(Column::from),
479            Column::Scalar(s) => s.cast(dtype).map(Column::from),
480        }
481    }
482    /// # Safety
483    ///
484    /// This can lead to invalid memory access in downstream code.
485    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
486        match self {
487            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
488            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
489        }
490    }
491
492    #[must_use]
493    pub fn clear(&self) -> Self {
494        match self {
495            Column::Series(s) => s.clear().into(),
496            Column::Scalar(s) => s.resize(0).into(),
497        }
498    }
499
500    #[inline]
501    pub fn shrink_to_fit(&mut self) {
502        match self {
503            Column::Series(s) => s.shrink_to_fit(),
504            Column::Scalar(_) => {},
505        }
506    }
507
508    #[inline]
509    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
510        if index >= self.len() {
511            return Self::full_null(self.name().clone(), length, self.dtype());
512        }
513
514        match self {
515            Column::Series(s) => {
516                // SAFETY: Bounds check done before.
517                let av = unsafe { s.get_unchecked(index) };
518                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
519                Self::new_scalar(self.name().clone(), scalar, length)
520            },
521            Column::Scalar(s) => s.resize(length).into(),
522        }
523    }
524
525    #[inline]
526    pub fn has_nulls(&self) -> bool {
527        match self {
528            Self::Series(s) => s.has_nulls(),
529            Self::Scalar(s) => s.has_nulls(),
530        }
531    }
532
533    #[inline]
534    pub fn is_null(&self) -> BooleanChunked {
535        match self {
536            Self::Series(s) => s.is_null(),
537            Self::Scalar(s) => {
538                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
539            },
540        }
541    }
542    #[inline]
543    pub fn is_not_null(&self) -> BooleanChunked {
544        match self {
545            Self::Series(s) => s.is_not_null(),
546            Self::Scalar(s) => {
547                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
548            },
549        }
550    }
551
552    pub fn to_physical_repr(&self) -> Column {
553        // @scalar-opt
554        self.as_materialized_series()
555            .to_physical_repr()
556            .into_owned()
557            .into()
558    }
559    /// # Safety
560    ///
561    /// This can lead to invalid memory access in downstream code.
562    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
563        // @scalar-opt
564        self.as_materialized_series()
565            .from_physical_unchecked(dtype)
566            .map(Column::from)
567    }
568
569    pub fn head(&self, length: Option<usize>) -> Column {
570        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
571        let len = usize::min(len, self.len());
572        self.slice(0, len)
573    }
574    pub fn tail(&self, length: Option<usize>) -> Column {
575        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
576        let len = usize::min(len, self.len());
577        debug_assert!(len <= i64::MAX as usize);
578        self.slice(-(len as i64), len)
579    }
580    pub fn slice(&self, offset: i64, length: usize) -> Column {
581        match self {
582            Column::Series(s) => s.slice(offset, length).into(),
583            Column::Scalar(s) => {
584                let (_, length) = slice_offsets(offset, length, s.len());
585                s.resize(length).into()
586            },
587        }
588    }
589
590    pub fn split_at(&self, offset: i64) -> (Column, Column) {
591        match self {
592            Column::Scalar(c) => {
593                let len = c.len();
594                let offset = if offset < 0 {
595                    let offset_abs = usize::try_from(offset.strict_abs())
596                        .expect("offset exceeds usize limits")
597                        .min(len);
598                    len - offset_abs
599                } else {
600                    usize::try_from(offset)
601                        .expect("offset exceeds usize limits")
602                        .min(len)
603                };
604                (
605                    Column::Scalar(c.resize(offset)),
606                    Column::Scalar(c.resize(len - offset)),
607                )
608            },
609            Column::Series(_) => {
610                let (l, r) = self.as_materialized_series().split_at(offset);
611                (l.into(), r.into())
612            },
613        }
614    }
615
616    #[inline]
617    pub fn null_count(&self) -> usize {
618        match self {
619            Self::Series(s) => s.null_count(),
620            Self::Scalar(s) if s.scalar().is_null() => s.len(),
621            Self::Scalar(_) => 0,
622        }
623    }
624
625    pub fn first_non_null(&self) -> Option<usize> {
626        match self {
627            Self::Series(s) => crate::utils::first_non_null(s.chunks().iter().map(|a| a.as_ref())),
628            Self::Scalar(s) => (!s.scalar().is_null() && !s.is_empty()).then_some(0),
629        }
630    }
631
632    pub fn last_non_null(&self) -> Option<usize> {
633        match self {
634            Self::Series(s) => {
635                crate::utils::last_non_null(s.chunks().iter().map(|a| a.as_ref()), s.len())
636            },
637            Self::Scalar(s) => (!s.scalar().is_null() && !s.is_empty()).then(|| s.len() - 1),
638        }
639    }
640
641    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
642        check_bounds_ca(indices, self.len() as IdxSize)?;
643        Ok(unsafe { self.take_unchecked(indices) })
644    }
645    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
646        check_bounds(indices, self.len() as IdxSize)?;
647        Ok(unsafe { self.take_slice_unchecked(indices) })
648    }
649    /// # Safety
650    ///
651    /// No bounds on the indexes are performed.
652    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
653        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
654
655        match self {
656            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
657            Self::Scalar(s) => {
658                let idxs_length = indices.len();
659                let idxs_null_count = indices.null_count();
660
661                let scalar = ScalarColumn::from_single_value_series(
662                    s.as_single_value_series().take_unchecked(&IdxCa::new(
663                        indices.name().clone(),
664                        &[0][..s.len().min(1)],
665                    )),
666                    idxs_length,
667                );
668
669                // We need to make sure that null values in `idx` become null values in the result
670                if idxs_null_count == 0 || scalar.has_nulls() {
671                    scalar.into_column()
672                } else if idxs_null_count == idxs_length {
673                    scalar.into_nulls().into_column()
674                } else {
675                    let validity = indices.rechunk_validity();
676                    let series = scalar.take_materialized_series();
677                    let name = series.name().clone();
678                    let dtype = series.dtype().clone();
679                    let mut chunks = series.into_chunks();
680                    assert_eq!(chunks.len(), 1);
681                    chunks[0] = chunks[0].with_validity(validity);
682                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
683                        .into_column()
684                }
685            },
686        }
687    }
688    /// # Safety
689    ///
690    /// No bounds on the indexes are performed.
691    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
692        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
693
694        match self {
695            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
696            Self::Scalar(s) => ScalarColumn::from_single_value_series(
697                s.as_single_value_series()
698                    .take_slice_unchecked(&[0][..s.len().min(1)]),
699                indices.len(),
700            )
701            .into(),
702        }
703    }
704
705    /// General implementation for aggregation where a non-missing scalar would map to itself.
706    #[inline(always)]
707    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
708    fn agg_with_scalar_identity(
709        &self,
710        groups: &GroupsType,
711        series_agg: impl Fn(&Series, &GroupsType) -> Series,
712    ) -> Column {
713        match self {
714            Column::Series(s) => series_agg(s, groups).into_column(),
715            Column::Scalar(s) => {
716                if s.is_empty() {
717                    return series_agg(s.as_materialized_series(), groups).into_column();
718                }
719
720                // We utilize the aggregation on Series to see:
721                // 1. the output datatype of the aggregation
722                // 2. whether this aggregation is even defined
723                let series_aggregation = series_agg(
724                    &s.as_single_value_series(),
725                    // @NOTE: this group is always valid since s is non-empty.
726                    &GroupsType::new_slice(vec![[0, 1]], false, true),
727                );
728
729                // If the aggregation is not defined, just return all nulls.
730                if series_aggregation.has_nulls() {
731                    return Self::new_scalar(
732                        series_aggregation.name().clone(),
733                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
734                        groups.len(),
735                    );
736                }
737
738                let mut scalar_col = s.resize(groups.len());
739                // The aggregation might change the type (e.g. mean changes int -> float), so we do
740                // a cast here to the output type.
741                if series_aggregation.dtype() != s.dtype() {
742                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
743                }
744
745                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
746                    // Fast path: no empty groups. keep the scalar intact.
747                    return scalar_col.into_column();
748                };
749
750                // All empty groups produce a *missing* or `null` value.
751                let mut validity = BitmapBuilder::with_capacity(groups.len());
752                validity.extend_constant(first_empty_idx, true);
753                // SAFETY: We trust the length of this iterator.
754                let iter = unsafe {
755                    TrustMyLength::new(
756                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
757                        groups.len() - first_empty_idx,
758                    )
759                };
760                validity.extend_trusted_len_iter(iter);
761
762                let mut s = scalar_col.take_materialized_series().rechunk();
763                // SAFETY: We perform a compute_len afterwards.
764                let chunks = unsafe { s.chunks_mut() };
765                let arr = &mut chunks[0];
766                *arr = arr.with_validity(validity.into_opt_validity());
767                s.compute_len();
768
769                s.into_column()
770            },
771        }
772    }
773
774    /// # Safety
775    ///
776    /// Does no bounds checks, groups must be correct.
777    #[cfg(feature = "algorithm_group_by")]
778    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
779        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_min(g) })
780    }
781
782    /// # Safety
783    ///
784    /// Does no bounds checks, groups must be correct.
785    #[cfg(feature = "algorithm_group_by")]
786    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
787        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_max(g) })
788    }
789
790    /// # Safety
791    ///
792    /// Does no bounds checks, groups must be correct.
793    #[cfg(feature = "algorithm_group_by")]
794    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
795        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_mean(g) })
796    }
797
798    /// # Safety
799    ///
800    /// Does no bounds checks, groups must be correct.
801    #[cfg(feature = "algorithm_group_by")]
802    pub unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Self {
803        match self {
804            Column::Series(s) => unsafe { Column::from(s.agg_arg_min(groups)) },
805            Column::Scalar(sc) => {
806                let scalar = if sc.is_empty() || sc.has_nulls() {
807                    Scalar::null(IDX_DTYPE)
808                } else {
809                    Scalar::new_idxsize(0)
810                };
811                Column::new_scalar(self.name().clone(), scalar, 1)
812            },
813        }
814    }
815
816    /// # Safety
817    ///
818    /// Does no bounds checks, groups must be correct.
819    #[cfg(feature = "algorithm_group_by")]
820    pub unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Self {
821        match self {
822            Column::Series(s) => unsafe { Column::from(s.agg_arg_max(groups)) },
823            Column::Scalar(sc) => {
824                let scalar = if sc.is_empty() || sc.has_nulls() {
825                    Scalar::null(IDX_DTYPE)
826                } else {
827                    Scalar::new_idxsize(0)
828                };
829                Column::new_scalar(self.name().clone(), scalar, 1)
830            },
831        }
832    }
833
834    /// # Safety
835    ///
836    /// Does no bounds checks, groups must be correct.
837    #[cfg(feature = "algorithm_group_by")]
838    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
839        // @scalar-opt
840        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
841    }
842
843    /// # Safety
844    ///
845    /// Does no bounds checks, groups must be correct.
846    #[cfg(feature = "algorithm_group_by")]
847    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
848        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first(g) })
849    }
850
851    /// # Safety
852    ///
853    /// Does no bounds checks, groups must be correct.
854    #[cfg(feature = "algorithm_group_by")]
855    pub unsafe fn agg_first_non_null(&self, groups: &GroupsType) -> Self {
856        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first_non_null(g) })
857    }
858
859    /// # Safety
860    ///
861    /// Does no bounds checks, groups must be correct.
862    #[cfg(feature = "algorithm_group_by")]
863    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
864        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last(g) })
865    }
866
867    /// # Safety
868    ///
869    /// Does no bounds checks, groups must be correct.
870    #[cfg(feature = "algorithm_group_by")]
871    pub unsafe fn agg_last_non_null(&self, groups: &GroupsType) -> Self {
872        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last_non_null(g) })
873    }
874
875    /// # Safety
876    ///
877    /// Does no bounds checks, groups must be correct.
878    #[cfg(feature = "algorithm_group_by")]
879    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
880        // @scalar-opt
881        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
882    }
883
884    /// # Safety
885    ///
886    /// Does no bounds checks, groups must be correct.
887    #[cfg(feature = "algorithm_group_by")]
888    pub unsafe fn agg_quantile(
889        &self,
890        groups: &GroupsType,
891        quantile: f64,
892        method: QuantileMethod,
893    ) -> Self {
894        // @scalar-opt
895
896        unsafe {
897            self.as_materialized_series()
898                .agg_quantile(groups, quantile, method)
899        }
900        .into()
901    }
902
903    /// # Safety
904    ///
905    /// Does no bounds checks, groups must be correct.
906    #[cfg(feature = "algorithm_group_by")]
907    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
908        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_median(g) })
909    }
910
911    /// # Safety
912    ///
913    /// Does no bounds checks, groups must be correct.
914    #[cfg(feature = "algorithm_group_by")]
915    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
916        // @scalar-opt
917        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
918    }
919
920    /// # Safety
921    ///
922    /// Does no bounds checks, groups must be correct.
923    #[cfg(feature = "algorithm_group_by")]
924    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
925        // @scalar-opt
926        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
927    }
928
929    /// # Safety
930    ///
931    /// Does no bounds checks, groups must be correct.
932    #[cfg(feature = "algorithm_group_by")]
933    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
934        // @scalar-opt
935        unsafe { self.as_materialized_series().agg_list(groups) }.into()
936    }
937
938    /// # Safety
939    ///
940    /// Does no bounds checks, groups must be correct.
941    #[cfg(feature = "algorithm_group_by")]
942    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
943        // @scalar-opt
944        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
945    }
946
947    /// # Safety
948    ///
949    /// Does no bounds checks, groups must be correct.
950    #[cfg(feature = "bitwise")]
951    pub unsafe fn agg_and(&self, groups: &GroupsType) -> Self {
952        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_and(g) })
953    }
954    /// # Safety
955    ///
956    /// Does no bounds checks, groups must be correct.
957    #[cfg(feature = "bitwise")]
958    pub unsafe fn agg_or(&self, groups: &GroupsType) -> Self {
959        self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_or(g) })
960    }
961    /// # Safety
962    ///
963    /// Does no bounds checks, groups must be correct.
964    #[cfg(feature = "bitwise")]
965    pub unsafe fn agg_xor(&self, groups: &GroupsType) -> Self {
966        // @scalar-opt
967        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
968    }
969
970    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
971        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
972    }
973
974    pub fn is_empty(&self) -> bool {
975        self.len() == 0
976    }
977
978    pub fn is_full_null(&self) -> bool {
979        match self {
980            Column::Series(s) => s.is_full_null(),
981            Column::Scalar(s) => s.is_full_null(),
982        }
983    }
984
985    pub fn reverse(&self) -> Column {
986        match self {
987            Column::Series(s) => s.reverse().into(),
988            Column::Scalar(_) => self.clone(),
989        }
990    }
991
992    pub fn equals(&self, other: &Column) -> bool {
993        // @scalar-opt
994        self.as_materialized_series()
995            .equals(other.as_materialized_series())
996    }
997
998    pub fn equals_missing(&self, other: &Column) -> bool {
999        // @scalar-opt
1000        self.as_materialized_series()
1001            .equals_missing(other.as_materialized_series())
1002    }
1003
1004    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
1005        // @scalar-opt
1006        match self {
1007            Column::Series(s) => s.set_sorted_flag(sorted),
1008            Column::Scalar(_) => {},
1009        }
1010    }
1011
1012    pub fn get_flags(&self) -> StatisticsFlags {
1013        match self {
1014            Column::Series(s) => s.get_flags(),
1015            Column::Scalar(_) => {
1016                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
1017            },
1018        }
1019    }
1020
1021    /// Returns whether the flags were set
1022    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
1023        match self {
1024            Column::Series(s) => {
1025                s.set_flags(flags);
1026                true
1027            },
1028            Column::Scalar(_) => false,
1029        }
1030    }
1031
1032    pub fn vec_hash(
1033        &self,
1034        build_hasher: PlSeedableRandomStateQuality,
1035        buf: &mut Vec<u64>,
1036    ) -> PolarsResult<()> {
1037        // @scalar-opt?
1038        self.as_materialized_series().vec_hash(build_hasher, buf)
1039    }
1040
1041    pub fn vec_hash_combine(
1042        &self,
1043        build_hasher: PlSeedableRandomStateQuality,
1044        hashes: &mut [u64],
1045    ) -> PolarsResult<()> {
1046        // @scalar-opt?
1047        self.as_materialized_series()
1048            .vec_hash_combine(build_hasher, hashes)
1049    }
1050
1051    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1052        // @scalar-opt
1053        self.into_materialized_series()
1054            .append(other.as_materialized_series())?;
1055        Ok(self)
1056    }
1057    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
1058        self.into_materialized_series()
1059            .append_owned(other.take_materialized_series())?;
1060        Ok(self)
1061    }
1062
1063    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
1064        if self.is_empty() {
1065            return IdxCa::from_vec(self.name().clone(), Vec::new());
1066        }
1067
1068        if self.null_count() == self.len() {
1069            // If all key values are null, then they are all equal,
1070            // so we can just return the original dataframe.
1071            return IdxCa::from_iter_values(self.name().clone(), 0..self.len() as IdxSize);
1072        }
1073
1074        let is_sorted = Some(self.is_sorted_flag());
1075        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
1076            return self.as_materialized_series().arg_sort(options);
1077        };
1078
1079        // Fast path: the data is sorted.
1080        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
1081        let invert = options.descending != is_sorted_dsc;
1082
1083        let mut values = Vec::with_capacity(self.len());
1084
1085        #[inline(never)]
1086        fn extend(
1087            start: IdxSize,
1088            end: IdxSize,
1089            slf: &Column,
1090            values: &mut Vec<IdxSize>,
1091            is_only_nulls: bool,
1092            invert: bool,
1093            maintain_order: bool,
1094        ) {
1095            debug_assert!(start <= end);
1096            debug_assert!(start as usize <= slf.len());
1097            debug_assert!(end as usize <= slf.len());
1098
1099            if !invert || is_only_nulls {
1100                values.extend(start..end);
1101                return;
1102            }
1103
1104            // If we don't have to maintain order but we have to invert. Just flip it around.
1105            if !maintain_order {
1106                values.extend((start..end).rev());
1107                return;
1108            }
1109
1110            // If we want to maintain order but we also needs to invert, we need to invert
1111            // per group of items.
1112            //
1113            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1114            // just do a single traversal.
1115            let arg_unique = slf
1116                .slice(start as i64, (end - start) as usize)
1117                .arg_unique()
1118                .unwrap();
1119
1120            assert!(!arg_unique.has_nulls());
1121
1122            let num_unique = arg_unique.len();
1123
1124            // Fast path: all items are unique.
1125            if num_unique == (end - start) as usize {
1126                values.extend((start..end).rev());
1127                return;
1128            }
1129
1130            if num_unique == 1 {
1131                values.extend(start..end);
1132                return;
1133            }
1134
1135            let mut prev_idx = end - start;
1136            for chunk in arg_unique.downcast_iter() {
1137                for &idx in chunk.values().as_slice().iter().rev() {
1138                    values.extend(start + idx..start + prev_idx);
1139                    prev_idx = idx;
1140                }
1141            }
1142        }
1143        macro_rules! extend {
1144            ($start:expr, $end:expr) => {
1145                extend!($start, $end, is_only_nulls = false);
1146            };
1147            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1148                extend(
1149                    $start,
1150                    $end,
1151                    self,
1152                    &mut values,
1153                    $is_only_nulls,
1154                    invert,
1155                    options.maintain_order,
1156                );
1157            };
1158        }
1159
1160        let length = self.len() as IdxSize;
1161        let null_count = self.null_count() as IdxSize;
1162
1163        if null_count == 0 {
1164            extend!(0, length);
1165        } else {
1166            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1167            match (options.nulls_last, has_nulls_last) {
1168                (true, true) => {
1169                    // Current: Nulls last, Wanted: Nulls last
1170                    extend!(0, length - null_count);
1171                    extend!(length - null_count, length, is_only_nulls = true);
1172                },
1173                (true, false) => {
1174                    // Current: Nulls first, Wanted: Nulls last
1175                    extend!(null_count, length);
1176                    extend!(0, null_count, is_only_nulls = true);
1177                },
1178                (false, true) => {
1179                    // Current: Nulls last, Wanted: Nulls first
1180                    extend!(length - null_count, length, is_only_nulls = true);
1181                    extend!(0, length - null_count);
1182                },
1183                (false, false) => {
1184                    // Current: Nulls first, Wanted: Nulls first
1185                    extend!(0, null_count, is_only_nulls = true);
1186                    extend!(null_count, length);
1187                },
1188            }
1189        }
1190
1191        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1192        // worth it... probably not...
1193        if let Some(limit) = options.limit {
1194            let limit = limit.min(length);
1195            values.truncate(limit as usize);
1196        }
1197
1198        IdxCa::from_vec(self.name().clone(), values)
1199    }
1200
1201    pub fn arg_sort_multiple(
1202        &self,
1203        by: &[Column],
1204        options: &SortMultipleOptions,
1205    ) -> PolarsResult<IdxCa> {
1206        // @scalar-opt
1207        self.as_materialized_series().arg_sort_multiple(by, options)
1208    }
1209
1210    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1211        match self {
1212            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1213            _ => self.as_materialized_series().arg_unique(),
1214        }
1215    }
1216
1217    pub fn bit_repr(&self) -> Option<BitRepr> {
1218        // @scalar-opt
1219        self.as_materialized_series().bit_repr()
1220    }
1221
1222    pub fn into_frame(self) -> DataFrame {
1223        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1224        unsafe { DataFrame::new_unchecked(self.len(), vec![self]) }
1225    }
1226
1227    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1228        // @scalar-opt
1229        self.into_materialized_series()
1230            .extend(other.as_materialized_series())?;
1231        Ok(self)
1232    }
1233
1234    pub fn rechunk(&self) -> Column {
1235        match self {
1236            Column::Series(s) => s.rechunk().into(),
1237            Column::Scalar(s) => {
1238                if s.lazy_as_materialized_series()
1239                    .filter(|x| x.n_chunks() > 1)
1240                    .is_some()
1241                {
1242                    Column::Scalar(ScalarColumn::new(
1243                        s.name().clone(),
1244                        s.scalar().clone(),
1245                        s.len(),
1246                    ))
1247                } else {
1248                    self.clone()
1249                }
1250            },
1251        }
1252    }
1253
1254    pub fn explode(&self, options: ExplodeOptions) -> PolarsResult<Column> {
1255        self.as_materialized_series()
1256            .explode(options)
1257            .map(Column::from)
1258    }
1259    pub fn implode(&self) -> PolarsResult<ListChunked> {
1260        self.as_materialized_series().implode()
1261    }
1262
1263    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1264        // @scalar-opt
1265        self.as_materialized_series()
1266            .fill_null(strategy)
1267            .map(Column::from)
1268    }
1269
1270    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1271        // @scalar-opt
1272        self.as_materialized_series()
1273            .divide(rhs.as_materialized_series())
1274            .map(Column::from)
1275    }
1276
1277    pub fn shift(&self, periods: i64) -> Column {
1278        // @scalar-opt
1279        self.as_materialized_series().shift(periods).into()
1280    }
1281
1282    #[cfg(feature = "zip_with")]
1283    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1284        // @scalar-opt
1285        self.as_materialized_series()
1286            .zip_with(mask, other.as_materialized_series())
1287            .map(Self::from)
1288    }
1289
1290    #[cfg(feature = "zip_with")]
1291    pub fn zip_with_same_type(
1292        &self,
1293        mask: &ChunkedArray<BooleanType>,
1294        other: &Column,
1295    ) -> PolarsResult<Column> {
1296        // @scalar-opt
1297        self.as_materialized_series()
1298            .zip_with_same_type(mask, other.as_materialized_series())
1299            .map(Column::from)
1300    }
1301
1302    pub fn drop_nulls(&self) -> Column {
1303        match self {
1304            Column::Series(s) => s.drop_nulls().into_column(),
1305            Column::Scalar(s) => s.drop_nulls().into_column(),
1306        }
1307    }
1308
1309    /// Packs every element into a list.
1310    pub fn as_list(&self) -> ListChunked {
1311        // @scalar-opt
1312        self.as_materialized_series().as_list()
1313    }
1314
1315    pub fn is_sorted_flag(&self) -> IsSorted {
1316        match self {
1317            Column::Series(s) => s.is_sorted_flag(),
1318            Column::Scalar(_) => IsSorted::Ascending,
1319        }
1320    }
1321
1322    pub fn unique(&self) -> PolarsResult<Column> {
1323        match self {
1324            Column::Series(s) => s.unique().map(Column::from),
1325            Column::Scalar(s) => {
1326                _ = s.as_single_value_series().unique()?;
1327                if s.is_empty() {
1328                    return Ok(s.clone().into_column());
1329                }
1330
1331                Ok(s.resize(1).into_column())
1332            },
1333        }
1334    }
1335    pub fn unique_stable(&self) -> PolarsResult<Column> {
1336        match self {
1337            Column::Series(s) => s.unique_stable().map(Column::from),
1338            Column::Scalar(s) => {
1339                _ = s.as_single_value_series().unique_stable()?;
1340                if s.is_empty() {
1341                    return Ok(s.clone().into_column());
1342                }
1343
1344                Ok(s.resize(1).into_column())
1345            },
1346        }
1347    }
1348
1349    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1350        // @scalar-opt
1351        self.as_materialized_series()
1352            .reshape_list(dimensions)
1353            .map(Self::from)
1354    }
1355
1356    #[cfg(feature = "dtype-array")]
1357    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1358        // @scalar-opt
1359        self.as_materialized_series()
1360            .reshape_array(dimensions)
1361            .map(Self::from)
1362    }
1363
1364    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1365        // @scalar-opt
1366        self.as_materialized_series()
1367            .sort(sort_options)
1368            .map(Self::from)
1369    }
1370
1371    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1372        match self {
1373            Column::Series(s) => s.filter(filter).map(Column::from),
1374            Column::Scalar(s) => {
1375                if s.is_empty() {
1376                    return Ok(s.clone().into_column());
1377                }
1378
1379                // Broadcasting
1380                if filter.len() == 1 {
1381                    return match filter.get(0) {
1382                        Some(true) => Ok(s.clone().into_column()),
1383                        _ => Ok(s.resize(0).into_column()),
1384                    };
1385                }
1386
1387                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1388            },
1389        }
1390    }
1391
1392    #[cfg(feature = "random")]
1393    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1394        // @scalar-opt
1395        self.as_materialized_series().shuffle(seed).into()
1396    }
1397
1398    #[cfg(feature = "random")]
1399    pub fn sample_frac(
1400        &self,
1401        frac: f64,
1402        with_replacement: bool,
1403        shuffle: bool,
1404        seed: Option<u64>,
1405    ) -> PolarsResult<Self> {
1406        self.as_materialized_series()
1407            .sample_frac(frac, with_replacement, shuffle, seed)
1408            .map(Self::from)
1409    }
1410
1411    #[cfg(feature = "random")]
1412    pub fn sample_n(
1413        &self,
1414        n: usize,
1415        with_replacement: bool,
1416        shuffle: bool,
1417        seed: Option<u64>,
1418    ) -> PolarsResult<Self> {
1419        self.as_materialized_series()
1420            .sample_n(n, with_replacement, shuffle, seed)
1421            .map(Self::from)
1422    }
1423
1424    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {
1425        polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");
1426        if self.len().saturating_sub(offset) == 0 {
1427            return Ok(self.clear());
1428        }
1429
1430        match self {
1431            Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),
1432            Column::Scalar(s) => {
1433                let total = s.len() - offset;
1434                Ok(s.resize(1 + (total - 1) / n).into())
1435            },
1436        }
1437    }
1438
1439    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1440        if self.is_empty() {
1441            return Ok(Self::new_scalar(
1442                self.name().clone(),
1443                Scalar::new(self.dtype().clone(), value.into_static()),
1444                n,
1445            ));
1446        }
1447
1448        match self {
1449            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1450            Column::Scalar(s) => {
1451                if s.scalar().as_any_value() == value {
1452                    Ok(s.resize(s.len() + n).into())
1453                } else {
1454                    s.as_materialized_series()
1455                        .extend_constant(value, n)
1456                        .map(Column::from)
1457                }
1458            },
1459        }
1460    }
1461
1462    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1463        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1464    }
1465    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1466        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1467    }
1468    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1469        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1470    }
1471    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1472        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1473    }
1474
1475    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1476    where
1477        T: Num + NumCast,
1478    {
1479        // @scalar-opt
1480        self.as_materialized_series()
1481            .wrapping_trunc_div_scalar(rhs)
1482            .into()
1483    }
1484
1485    pub fn product(&self) -> PolarsResult<Scalar> {
1486        // @scalar-opt
1487        self.as_materialized_series().product()
1488    }
1489
1490    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1491        // @scalar-opt
1492        self.as_materialized_series().phys_iter()
1493    }
1494
1495    #[inline]
1496    pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {
1497        polars_ensure!(index < self.len(), oob = index, self.len());
1498
1499        // SAFETY: Bounds check done just before.
1500        Ok(unsafe { self.get_unchecked(index) })
1501    }
1502    /// # Safety
1503    ///
1504    /// Does not perform bounds check on `index`
1505    #[inline(always)]
1506    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1507        debug_assert!(index < self.len());
1508
1509        match self {
1510            Column::Series(s) => unsafe { s.get_unchecked(index) },
1511            Column::Scalar(s) => s.scalar().as_any_value(),
1512        }
1513    }
1514
1515    #[cfg(feature = "object")]
1516    pub fn get_object(
1517        &self,
1518        index: usize,
1519    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1520        self.as_materialized_series().get_object(index)
1521    }
1522
1523    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1524        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1525    }
1526    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1527        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1528    }
1529    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1530        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1531    }
1532
1533    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1534        match (self, other) {
1535            (Column::Series(lhs), Column::Series(rhs)) => {
1536                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1537            },
1538            (lhs, rhs) => lhs + rhs,
1539        }
1540    }
1541    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1542        match (self, other) {
1543            (Column::Series(lhs), Column::Series(rhs)) => {
1544                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1545            },
1546            (lhs, rhs) => lhs - rhs,
1547        }
1548    }
1549    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1550        match (self, other) {
1551            (Column::Series(lhs), Column::Series(rhs)) => {
1552                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1553            },
1554            (lhs, rhs) => lhs * rhs,
1555        }
1556    }
1557
1558    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
1559        Ok(self.get(index)?.str_value())
1560    }
1561
1562    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1563        match self {
1564            Column::Series(s) => s.min_reduce(),
1565            Column::Scalar(s) => {
1566                // We don't really want to deal with handling the full semantics here so we just
1567                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1568                s.as_single_value_series().min_reduce()
1569            },
1570        }
1571    }
1572    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1573        match self {
1574            Column::Series(s) => s.max_reduce(),
1575            Column::Scalar(s) => {
1576                // We don't really want to deal with handling the full semantics here so we just
1577                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1578                s.as_single_value_series().max_reduce()
1579            },
1580        }
1581    }
1582    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1583        match self {
1584            Column::Series(s) => s.median_reduce(),
1585            Column::Scalar(s) => {
1586                // We don't really want to deal with handling the full semantics here so we just
1587                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1588                s.as_single_value_series().median_reduce()
1589            },
1590        }
1591    }
1592    pub fn mean_reduce(&self) -> PolarsResult<Scalar> {
1593        match self {
1594            Column::Series(s) => s.mean_reduce(),
1595            Column::Scalar(s) => {
1596                // We don't really want to deal with handling the full semantics here so we just
1597                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1598                s.as_single_value_series().mean_reduce()
1599            },
1600        }
1601    }
1602    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1603        match self {
1604            Column::Series(s) => s.std_reduce(ddof),
1605            Column::Scalar(s) => {
1606                // We don't really want to deal with handling the full semantics here so we just
1607                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1608                let n = s.len().min(ddof as usize + 1);
1609                s.as_n_values_series(n).std_reduce(ddof)
1610            },
1611        }
1612    }
1613    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1614        match self {
1615            Column::Series(s) => s.var_reduce(ddof),
1616            Column::Scalar(s) => {
1617                // We don't really want to deal with handling the full semantics here so we just
1618                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1619                let n = s.len().min(ddof as usize + 1);
1620                s.as_n_values_series(n).var_reduce(ddof)
1621            },
1622        }
1623    }
1624    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1625        // @scalar-opt
1626        self.as_materialized_series().sum_reduce()
1627    }
1628    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1629        match self {
1630            Column::Series(s) => s.and_reduce(),
1631            Column::Scalar(s) => {
1632                // We don't really want to deal with handling the full semantics here so we just
1633                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1634                s.as_single_value_series().and_reduce()
1635            },
1636        }
1637    }
1638    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1639        match self {
1640            Column::Series(s) => s.or_reduce(),
1641            Column::Scalar(s) => {
1642                // We don't really want to deal with handling the full semantics here so we just
1643                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1644                s.as_single_value_series().or_reduce()
1645            },
1646        }
1647    }
1648    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1649        match self {
1650            Column::Series(s) => s.xor_reduce(),
1651            Column::Scalar(s) => {
1652                // We don't really want to deal with handling the full semantics here so we just
1653                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1654                //
1655                // We have to deal with the fact that xor is 0 if there is an even number of
1656                // elements and the value if there is an odd number of elements. If there are zero
1657                // elements the result should be `null`.
1658                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1659            },
1660        }
1661    }
1662    pub fn n_unique(&self) -> PolarsResult<usize> {
1663        match self {
1664            Column::Series(s) => s.n_unique(),
1665            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1666        }
1667    }
1668
1669    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1670        self.as_materialized_series()
1671            .quantile_reduce(quantile, method)
1672    }
1673
1674    pub fn quantiles_reduce(
1675        &self,
1676        quantiles: &[f64],
1677        method: QuantileMethod,
1678    ) -> PolarsResult<Scalar> {
1679        self.as_materialized_series()
1680            .quantiles_reduce(quantiles, method)
1681    }
1682
1683    pub(crate) fn estimated_size(&self) -> usize {
1684        // @scalar-opt
1685        self.as_materialized_series().estimated_size()
1686    }
1687
1688    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1689        match self {
1690            Column::Series(s) => s.sort_with(options).map(Self::from),
1691            Column::Scalar(s) => {
1692                // This makes this function throw the same errors as Series::sort_with
1693                _ = s.as_single_value_series().sort_with(options)?;
1694
1695                Ok(self.clone())
1696            },
1697        }
1698    }
1699
1700    pub fn map_unary_elementwise_to_bool(
1701        &self,
1702        f: impl Fn(&Series) -> BooleanChunked,
1703    ) -> BooleanChunked {
1704        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1705            .unwrap()
1706    }
1707    pub fn try_map_unary_elementwise_to_bool(
1708        &self,
1709        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1710    ) -> PolarsResult<BooleanChunked> {
1711        match self {
1712            Column::Series(s) => f(s),
1713            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1714        }
1715    }
1716
1717    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1718        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1719    }
1720    pub fn try_apply_unary_elementwise(
1721        &self,
1722        f: impl Fn(&Series) -> PolarsResult<Series>,
1723    ) -> PolarsResult<Column> {
1724        match self {
1725            Column::Series(s) => f(s).map(Column::from),
1726            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1727                f(&s.as_single_value_series())?,
1728                s.len(),
1729            )
1730            .into()),
1731        }
1732    }
1733
1734    pub fn apply_broadcasting_binary_elementwise(
1735        &self,
1736        other: &Self,
1737        op: impl Fn(&Series, &Series) -> Series,
1738    ) -> PolarsResult<Column> {
1739        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1740    }
1741    pub fn try_apply_broadcasting_binary_elementwise(
1742        &self,
1743        other: &Self,
1744        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1745    ) -> PolarsResult<Column> {
1746        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1747            match (a.len(), b.len()) {
1748                // broadcasting
1749                (1, o) | (o, 1) => Ok(o),
1750                // equal
1751                (a, b) if a == b => Ok(a),
1752                // unequal
1753                (a, b) => {
1754                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1755                },
1756            }
1757        }
1758
1759        // Here we rely on the underlying broadcast operations.
1760        let length = output_length(self, other)?;
1761        match (self, other) {
1762            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1763            (Column::Series(lhs), Column::Scalar(rhs)) => {
1764                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1765            },
1766            (Column::Scalar(lhs), Column::Series(rhs)) => {
1767                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1768            },
1769            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1770                let lhs = lhs.as_single_value_series();
1771                let rhs = rhs.as_single_value_series();
1772
1773                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1774            },
1775        }
1776    }
1777
1778    pub fn apply_binary_elementwise(
1779        &self,
1780        other: &Self,
1781        f: impl Fn(&Series, &Series) -> Series,
1782        f_lb: impl Fn(&Scalar, &Series) -> Series,
1783        f_rb: impl Fn(&Series, &Scalar) -> Series,
1784    ) -> Column {
1785        self.try_apply_binary_elementwise(
1786            other,
1787            |lhs, rhs| Ok(f(lhs, rhs)),
1788            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1789            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1790        )
1791        .unwrap()
1792    }
1793    pub fn try_apply_binary_elementwise(
1794        &self,
1795        other: &Self,
1796        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1797        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1798        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1799    ) -> PolarsResult<Column> {
1800        debug_assert_eq!(self.len(), other.len());
1801
1802        match (self, other) {
1803            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1804            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1805            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1806            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1807                let lhs = lhs.as_single_value_series();
1808                let rhs = rhs.as_single_value_series();
1809
1810                Ok(
1811                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1812                        .into_column(),
1813                )
1814            },
1815        }
1816    }
1817
1818    #[cfg(feature = "approx_unique")]
1819    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1820        match self {
1821            Column::Series(s) => s.approx_n_unique(),
1822            Column::Scalar(s) => {
1823                // @NOTE: We do this for the error handling.
1824                s.as_single_value_series().approx_n_unique()?;
1825                Ok(1)
1826            },
1827        }
1828    }
1829
1830    pub fn n_chunks(&self) -> usize {
1831        match self {
1832            Column::Series(s) => s.n_chunks(),
1833            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1834        }
1835    }
1836
1837    #[expect(clippy::wrong_self_convention)]
1838    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1839        // @scalar-opt
1840        self.as_materialized_series().into_total_ord_inner()
1841    }
1842    #[expect(unused, clippy::wrong_self_convention)]
1843    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1844        // @scalar-opt
1845        self.as_materialized_series().into_total_eq_inner()
1846    }
1847
1848    pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1849        // Rechunk to one chunk if necessary
1850        let mut series = self.take_materialized_series();
1851        if series.n_chunks() > 1 {
1852            series = series.rechunk();
1853        }
1854        series.to_arrow(0, compat_level)
1855    }
1856
1857    pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {
1858        self.as_materialized_series()
1859            .trim_lists_to_normalized_offsets()
1860            .map(Column::from)
1861    }
1862
1863    pub fn propagate_nulls(&self) -> Option<Column> {
1864        self.as_materialized_series()
1865            .propagate_nulls()
1866            .map(Column::from)
1867    }
1868
1869    pub fn deposit(&self, validity: &Bitmap) -> Column {
1870        self.as_materialized_series()
1871            .deposit(validity)
1872            .into_column()
1873    }
1874
1875    pub fn rechunk_validity(&self) -> Option<Bitmap> {
1876        // @scalar-opt
1877        self.as_materialized_series().rechunk_validity()
1878    }
1879
1880    pub fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
1881        self.as_materialized_series().unique_id()
1882    }
1883}
1884
1885impl Default for Column {
1886    fn default() -> Self {
1887        Self::new_scalar(
1888            PlSmallStr::EMPTY,
1889            Scalar::new(DataType::Int64, AnyValue::Null),
1890            0,
1891        )
1892    }
1893}
1894
1895impl PartialEq for Column {
1896    fn eq(&self, other: &Self) -> bool {
1897        // @scalar-opt
1898        self.as_materialized_series()
1899            .eq(other.as_materialized_series())
1900    }
1901}
1902
1903impl From<Series> for Column {
1904    #[inline]
1905    fn from(series: Series) -> Self {
1906        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1907        // future operations to be faster.
1908        if series.len() == 1 {
1909            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1910        }
1911
1912        Self::Series(SeriesColumn::new(series))
1913    }
1914}
1915
1916impl<T: IntoSeries> IntoColumn for T {
1917    #[inline]
1918    fn into_column(self) -> Column {
1919        self.into_series().into()
1920    }
1921}
1922
1923impl IntoColumn for Column {
1924    #[inline(always)]
1925    fn into_column(self) -> Column {
1926        self
1927    }
1928}
1929
1930/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1931/// initialized without implementing From<Column> for Series.
1932///
1933/// Those casts should be explicit.
1934#[derive(Clone)]
1935#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1936#[cfg_attr(feature = "serde", serde(into = "Series"))]
1937struct _SerdeSeries(Series);
1938
1939impl From<Column> for _SerdeSeries {
1940    #[inline]
1941    fn from(value: Column) -> Self {
1942        Self(value.take_materialized_series())
1943    }
1944}
1945
1946impl From<_SerdeSeries> for Series {
1947    #[inline]
1948    fn from(value: _SerdeSeries) -> Self {
1949        value.0
1950    }
1951}