Trait polars_core::prelude::SeriesTrait

source ·

pub trait SeriesTrait: Send + Sync + PrivateSeries + PrivateSeriesNumeric {
Show 69 methods
    fn rename(&mut self, name: &str);
    fn chunks(&self) -> &Vec<ArrayRef> ⓘ;
    fn take_iter(&self, _iter: &mut dyn TakeIterator) -> PolarsResult<Series>;
    unsafe fn take_iter_unchecked(&self, _iter: &mut dyn TakeIterator) -> Series;
    unsafe fn take_unchecked(&self, _idx: &IdxCa) -> PolarsResult<Series>;
    unsafe fn take_opt_iter_unchecked(
        &self,
        _iter: &mut dyn TakeIteratorNulls
    ) -> Series;
    fn take(&self, _indices: &IdxCa) -> PolarsResult<Series>;
    fn len(&self) -> usize;
    fn take_every(&self, n: usize) -> Series;
    fn has_validity(&self) -> bool;

    fn is_sorted(&self) -> IsSorted { ... }
    fn bitand(&self, _other: &Series) -> PolarsResult<Series> { ... }
    fn bitor(&self, _other: &Series) -> PolarsResult<Series> { ... }
    fn bitxor(&self, _other: &Series) -> PolarsResult<Series> { ... }
    fn chunk_lengths(&self) -> ChunkIdIter<'_> { ... }
    fn name(&self) -> &str { ... }
    fn field(&self) -> Cow<'_, Field> { ... }
    fn dtype(&self) -> &DataType { ... }
    fn n_chunks(&self) -> usize { ... }
    fn shrink_to_fit(&mut self) { ... }
    fn limit(&self, num_elements: usize) -> Series { ... }
    fn slice(&self, _offset: i64, _length: usize) -> Series { ... }
    fn filter(&self, _filter: &BooleanChunked) -> PolarsResult<Series> { ... }
    fn is_empty(&self) -> bool { ... }
    fn rechunk(&self) -> Series { ... }
    fn drop_nulls(&self) -> Series { ... }
    fn mean(&self) -> Option<f64> { ... }
    fn median(&self) -> Option<f64> { ... }
    fn new_from_index(&self, _index: usize, _length: usize) -> Series { ... }
    fn cast(&self, _data_type: &DataType) -> PolarsResult<Series> { ... }
    fn get(&self, _index: usize) -> PolarsResult<AnyValue<'_>> { ... }
    unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_> { ... }
    fn sort_with(&self, _options: SortOptions) -> Series { ... }
    fn argsort(&self, options: SortOptions) -> IdxCa { ... }
    fn null_count(&self) -> usize { ... }
    fn unique(&self) -> PolarsResult<Series> { ... }
    fn n_unique(&self) -> PolarsResult<usize> { ... }
    fn arg_unique(&self) -> PolarsResult<IdxCa> { ... }
    fn arg_min(&self) -> Option<usize> { ... }
    fn arg_max(&self) -> Option<usize> { ... }
    fn is_null(&self) -> BooleanChunked { ... }
    fn is_not_null(&self) -> BooleanChunked { ... }
    fn is_unique(&self) -> PolarsResult<BooleanChunked> { ... }
    fn is_duplicated(&self) -> PolarsResult<BooleanChunked> { ... }
    fn reverse(&self) -> Series { ... }
    fn as_single_ptr(&mut self) -> PolarsResult<usize> { ... }
    fn shift(&self, _periods: i64) -> Series { ... }
    fn fill_null(&self, _strategy: FillNullStrategy) -> PolarsResult<Series> { ... }
    fn _sum_as_series(&self) -> Series { ... }
    fn max_as_series(&self) -> Series { ... }
    fn min_as_series(&self) -> Series { ... }
    fn median_as_series(&self) -> Series { ... }
    fn var_as_series(&self, _ddof: u8) -> Series { ... }
    fn std_as_series(&self, _ddof: u8) -> Series { ... }
    fn quantile_as_series(
        &self,
        _quantile: f64,
        _interpol: QuantileInterpolOptions
    ) -> PolarsResult<Series> { ... }
    fn fmt_list(&self) -> String { ... }
    fn clone_inner(&self) -> Arc<dyn SeriesTrait> { ... }
    fn get_object(&self, _index: usize) -> Option<&dyn PolarsObjectSafe> { ... }
    fn as_any(&self) -> &dyn Any { ... }
    fn as_any_mut(&mut self) -> &mut dyn Any { ... }
    fn peak_max(&self) -> BooleanChunked { ... }
    fn peak_min(&self) -> BooleanChunked { ... }
    fn is_in(&self, _other: &Series) -> PolarsResult<BooleanChunked> { ... }
    fn repeat_by(&self, _by: &IdxCa) -> ListChunked { ... }
    fn checked_div(&self, _rhs: &Series) -> PolarsResult<Series> { ... }
    fn is_first(&self) -> PolarsResult<BooleanChunked> { ... }
    fn mode(&self) -> PolarsResult<Series> { ... }
    fn rolling_apply(
        &self,
        _f: &dyn Fn(&Series) -> Series,
        _options: RollingOptionsFixedWindow
    ) -> PolarsResult<Series> { ... }
    fn str_concat(&self, _delimiter: &str) -> Utf8Chunked { ... }
}

Required Methods§

source

fn rename(&mut self, name: &str)

Rename the Series.

source

fn chunks(&self) -> &Vec<ArrayRef> ⓘ

Underlying chunks.

source

fn take_iter(&self, _iter: &mut dyn TakeIterator) -> PolarsResult<Series>

Take by index from an iterator. This operation clones the data.

source

unsafe fn take_iter_unchecked(&self, _iter: &mut dyn TakeIterator) -> Series

Take by index from an iterator. This operation clones the data.

Safety

This doesn’t check any bounds.
Iterator must be TrustedLen

source

unsafe fn take_unchecked(&self, _idx: &IdxCa) -> PolarsResult<Series>

Take by index if ChunkedArray contains a single chunk.

Safety

This doesn’t check any bounds.

source

unsafe fn take_opt_iter_unchecked(
&self,
_iter: &mut dyn TakeIteratorNulls
) -> Series

Take by index from an iterator. This operation clones the data.

Safety

This doesn’t check any bounds.
Iterator must be TrustedLen

source

fn take(&self, _indices: &IdxCa) -> PolarsResult<Series>

Take by index. This operation is clone.

source

fn len(&self) -> usize

Get length of series.

source

fn take_every(&self, n: usize) -> Series

Take every nth value as a new Series

source

fn has_validity(&self) -> bool

Return if any the chunks in this [ChunkedArray] have a validity bitmap. no bitmap means no null values.

Provided Methods§

source

fn is_sorted(&self) -> IsSorted

Check if Series is sorted.

Examples found in repository ?

src/utils/mod.rs (line 154)

fn flatten_df(df: &DataFrame) -> impl Iterator<Item = DataFrame> + '_ {
    df.iter_chunks_physical().flat_map(|chunk| {
        let df = DataFrame::new_no_checks(
            df.iter()
                .zip(chunk.into_arrays())
                .map(|(s, arr)| {
                    // Safety:
                    // datatypes are correct
                    let mut out = unsafe {
                        Series::from_chunks_and_dtype_unchecked(s.name(), vec![arr], s.dtype())
                    };
                    out.set_sorted(s.is_sorted());
                    out
                })
                .collect(),
        );
        if df.height() == 0 {
            None
        } else {
            Some(df)
        }
    })
}

More examples

Hide additional examples

src/frame/groupby/mod.rs (line 348)

    pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec<Series> {
        #[allow(unused_assignments)]
        // needed to keep the lifetimes valid for this scope
        let mut groups_owned = None;

        let groups = if let Some((offset, len)) = slice {
            groups_owned = Some(self.groups.slice(offset, len));
            groups_owned.as_deref().unwrap()
        } else {
            &self.groups
        };

        POOL.install(|| {
            self.selected_keys
                .par_iter()
                .map(|s| {
                    match groups {
                        GroupsProxy::Idx(groups) => {
                            let mut iter = groups.iter().map(|(first, _idx)| first as usize);
                            // Safety:
                            // groups are always in bounds
                            let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
                            if groups.sorted {
                                out.set_sorted(s.is_sorted());
                            };
                            out
                        }
                        GroupsProxy::Slice { groups, rolling } => {
                            if *rolling && !groups.is_empty() {
                                // groups can be sliced
                                let offset = groups[0][0];
                                let [upper_offset, upper_len] = groups[groups.len() - 1];
                                return s.slice(
                                    offset as i64,
                                    ((upper_offset + upper_len) - offset) as usize,
                                );
                            }

                            let mut iter = groups.iter().map(|&[first, _len]| first as usize);
                            // Safety:
                            // groups are always in bounds
                            let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
                            // sliced groups are always in order of discovery
                            out.set_sorted(s.is_sorted());
                            out
                        }
                    }
                })
                .collect()
        })
    }

source

fn chunk_lengths(&self) -> ChunkIdIter<'_>

Get the lengths of the underlying chunks

Examples found in repository ?

src/utils/mod.rs (line 184)

pub fn split_df_as_ref(df: &DataFrame, n: usize) -> PolarsResult<Vec<DataFrame>> {
    let total_len = df.height();
    let chunk_size = total_len / n;

    if df.n_chunks() == n
        && df.get_columns()[0]
            .chunk_lengths()
            .all(|len| len.abs_diff(chunk_size) < 100)
    {
        return Ok(flatten_df(df).collect());
    }

    let mut out = Vec::with_capacity(n);

    for i in 0..n {
        let offset = i * chunk_size;
        let len = if i == (n - 1) {
            total_len - offset
        } else {
            chunk_size
        };
        let df = df.slice((i * chunk_size) as i64, len);
        if df.n_chunks() > 1 {
            // we add every chunk as separate dataframe. This make sure that every partition
            // deals with it.
            out.extend(flatten_df(&df))
        } else {
            out.push(df)
        }
    }

    Ok(out)
}

More examples

Hide additional examples

src/frame/mod.rs (line 451)

    pub fn should_rechunk(&self) -> bool {
        let hb = RandomState::default();
        let hb2 = RandomState::with_seeds(392498, 98132457, 0, 412059);
        !self
            .columns
            .iter()
            // The idea is that we create a hash of the chunk lengths.
            // Consisting of the combined hash + the sum (assuming collision probability is nihil)
            // if not, we can add more hashes or at worst case we do an extra rechunk.
            // the old solution to this was clone all lengths to a vec and compare the vecs
            .map(|s| {
                s.chunk_lengths().map(|i| i as u64).fold(
                    (0u64, 0u64, s.n_chunks()),
                    |(lhash, lh2, n), rval| {
                        let mut h = hb.build_hasher();
                        rval.hash(&mut h);
                        let rhash = h.finish();
                        let mut h = hb2.build_hasher();
                        rval.hash(&mut h);
                        let rh2 = h.finish();
                        (
                            _boost_hash_combine(lhash, rhash),
                            _boost_hash_combine(lh2, rh2),
                            n,
                        )
                    },
                )
            })
            .all_equal()
    }

source

fn name(&self) -> &str

Name of series.

Examples found in repository ?

src/series/series_trait.rs (line 575)

    fn median_as_series(&self) -> Series {
        Series::full_null(self.name(), 1, self.dtype())
    }
    /// Get the variance of the Series as a new Series of length 1.
    fn var_as_series(&self, _ddof: u8) -> Series {
        Series::full_null(self.name(), 1, self.dtype())
    }
    /// Get the standard deviation of the Series as a new Series of length 1.
    fn std_as_series(&self, _ddof: u8) -> Series {
        Series::full_null(self.name(), 1, self.dtype())
    }
    /// Get the quantile of the ChunkedArray as a new Series of length 1.
    fn quantile_as_series(
        &self,
        _quantile: f64,
        _interpol: QuantileInterpolOptions,
    ) -> PolarsResult<Series> {
        Ok(Series::full_null(self.name(), 1, self.dtype()))
    }

More examples

Hide additional examples

src/series/implementations/object.rs (line 245)

    fn _sum_as_series(&self) -> Series {
        ObjectChunked::<T>::full_null(self.name(), 1).into_series()
    }
    fn max_as_series(&self) -> Series {
        ObjectChunked::<T>::full_null(self.name(), 1).into_series()
    }
    fn min_as_series(&self) -> Series {
        ObjectChunked::<T>::full_null(self.name(), 1).into_series()
    }
    fn median_as_series(&self) -> Series {
        ObjectChunked::<T>::full_null(self.name(), 1).into_series()
    }
    fn var_as_series(&self, _ddof: u8) -> Series {
        ObjectChunked::<T>::full_null(self.name(), 1).into_series()
    }
    fn std_as_series(&self, _ddof: u8) -> Series {
        ObjectChunked::<T>::full_null(self.name(), 1).into_series()
    }

src/frame/mod.rs (line 195)

    fn check_already_present(&self, name: &str) -> PolarsResult<()> {
        if self.columns.iter().any(|s| s.name() == name) {
            Err(PolarsError::Duplicate(
                format!("column with name: '{name}' already present in DataFrame").into(),
            ))
        } else {
            Ok(())
        }
    }

    /// Reserve additional slots into the chunks of the series.
    pub(crate) fn reserve_chunks(&mut self, additional: usize) {
        for s in &mut self.columns {
            // Safety
            // do not modify the data, simply resize.
            unsafe { s.chunks_mut().reserve(additional) }
        }
    }

    /// Create a DataFrame from a Vector of Series.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("days", [0, 1, 2].as_ref());
    /// let s1 = Series::new("temp", [22.1, 19.9, 7.].as_ref());
    ///
    /// let df = DataFrame::new(vec![s0, s1])?;
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn new<S: IntoSeries>(columns: Vec<S>) -> PolarsResult<Self> {
        let mut first_len = None;

        let shape_err = |s: &[Series]| {
            let msg = format!(
                "Could not create a new DataFrame from Series. \
            The Series have different lengths. \
            Got {s:?}",
            );
            Err(PolarsError::ShapeMisMatch(msg.into()))
        };

        let series_cols = if S::is_series() {
            // Safety:
            // we are guarded by the type system here.
            #[allow(clippy::transmute_undefined_repr)]
            let series_cols = unsafe { std::mem::transmute::<Vec<S>, Vec<Series>>(columns) };
            let mut names = PlHashSet::with_capacity(series_cols.len());

            for s in &series_cols {
                match first_len {
                    Some(len) => {
                        if s.len() != len {
                            return shape_err(&series_cols);
                        }
                    }
                    None => first_len = Some(s.len()),
                }
                let name = s.name();

                if names.contains(name) {
                    _duplicate_err(name)?
                }

                names.insert(name);
            }
            // we drop early as the brchk thinks the &str borrows are used when calling the drop
            // of both `series_cols` and `names`
            drop(names);
            series_cols
        } else {
            let mut series_cols = Vec::with_capacity(columns.len());
            let mut names = PlHashSet::with_capacity(columns.len());

            // check for series length equality and convert into series in one pass
            for s in columns {
                let series = s.into_series();
                match first_len {
                    Some(len) => {
                        if series.len() != len {
                            return shape_err(&series_cols);
                        }
                    }
                    None => first_len = Some(series.len()),
                }
                // we have aliasing borrows so we must allocate a string
                let name = series.name().to_string();

                if names.contains(&name) {
                    _duplicate_err(&name)?
                }

                series_cols.push(series);
                names.insert(name);
            }
            drop(names);
            series_cols
        };

        Ok(DataFrame {
            columns: series_cols,
        })
    }

    /// Creates an empty `DataFrame` usable in a compile time context (such as static initializers).
    ///
    /// # Example
    ///
    /// ```rust
    /// use polars_core::prelude::DataFrame;
    /// static EMPTY: DataFrame = DataFrame::empty();
    /// ```
    pub const fn empty() -> Self {
        DataFrame::new_no_checks(Vec::new())
    }

    /// Removes the last `Series` from the `DataFrame` and returns it, or [`None`] if it is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1 = Series::new("Ocean", &["Atlantic", "Indian"]);
    /// let s2 = Series::new("Area (km²)", &[106_460_000, 70_560_000]);
    /// let mut df = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// assert_eq!(df.pop(), Some(s2));
    /// assert_eq!(df.pop(), Some(s1));
    /// assert_eq!(df.pop(), None);
    /// assert!(df.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn pop(&mut self) -> Option<Series> {
        self.columns.pop()
    }

    /// Add a new column at index 0 that counts the rows.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Name" => &["James", "Mary", "John", "Patricia"])?;
    /// assert_eq!(df1.shape(), (4, 1));
    ///
    /// let df2: DataFrame = df1.with_row_count("Id", None)?;
    /// assert_eq!(df2.shape(), (4, 2));
    /// println!("{}", df2);
    ///
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    ///  shape: (4, 2)
    ///  +-----+----------+
    ///  | Id  | Name     |
    ///  | --- | ---      |
    ///  | u32 | str      |
    ///  +=====+==========+
    ///  | 0   | James    |
    ///  +-----+----------+
    ///  | 1   | Mary     |
    ///  +-----+----------+
    ///  | 2   | John     |
    ///  +-----+----------+
    ///  | 3   | Patricia |
    ///  +-----+----------+
    /// ```
    pub fn with_row_count(&self, name: &str, offset: Option<IdxSize>) -> PolarsResult<Self> {
        let mut columns = Vec::with_capacity(self.columns.len() + 1);
        let offset = offset.unwrap_or(0);

        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);
        columns.push(ca.into_series());

        columns.extend_from_slice(&self.columns);
        DataFrame::new(columns)
    }

    /// Add a row count in place.
    pub fn with_row_count_mut(&mut self, name: &str, offset: Option<IdxSize>) -> &mut Self {
        let offset = offset.unwrap_or(0);
        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);

        self.columns.insert(0, ca.into_series());
        self
    }

    /// Create a new `DataFrame` but does not check the length or duplicate occurrence of the `Series`.
    ///
    /// It is advised to use [Series::new](Series::new) in favor of this method.
    ///
    /// # Panic
    /// It is the callers responsibility to uphold the contract of all `Series`
    /// having an equal length, if not this may panic down the line.
    pub const fn new_no_checks(columns: Vec<Series>) -> DataFrame {
        DataFrame { columns }
    }

    /// Aggregate all chunks to contiguous memory.
    #[must_use]
    pub fn agg_chunks(&self) -> Self {
        // Don't parallelize this. Memory overhead
        let f = |s: &Series| s.rechunk();
        let cols = self.columns.iter().map(f).collect();
        DataFrame::new_no_checks(cols)
    }

    /// Shrink the capacity of this DataFrame to fit its length.
    pub fn shrink_to_fit(&mut self) {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            s.shrink_to_fit();
        }
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk.
    pub fn as_single_chunk(&mut self) -> &mut Self {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            *s = s.rechunk();
        }
        self
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk in parallel.
    /// This may lead to more peak memory consumption.
    pub fn as_single_chunk_par(&mut self) -> &mut Self {
        if self.columns.iter().any(|s| s.n_chunks() > 1) {
            self.columns = self.apply_columns_par(&|s| s.rechunk());
        }
        self
    }

    /// Estimates of the DataFrames columns consist of the same chunk sizes
    pub fn should_rechunk(&self) -> bool {
        let hb = RandomState::default();
        let hb2 = RandomState::with_seeds(392498, 98132457, 0, 412059);
        !self
            .columns
            .iter()
            // The idea is that we create a hash of the chunk lengths.
            // Consisting of the combined hash + the sum (assuming collision probability is nihil)
            // if not, we can add more hashes or at worst case we do an extra rechunk.
            // the old solution to this was clone all lengths to a vec and compare the vecs
            .map(|s| {
                s.chunk_lengths().map(|i| i as u64).fold(
                    (0u64, 0u64, s.n_chunks()),
                    |(lhash, lh2, n), rval| {
                        let mut h = hb.build_hasher();
                        rval.hash(&mut h);
                        let rhash = h.finish();
                        let mut h = hb2.build_hasher();
                        rval.hash(&mut h);
                        let rh2 = h.finish();
                        (
                            _boost_hash_combine(lhash, rhash),
                            _boost_hash_combine(lh2, rh2),
                            n,
                        )
                    },
                )
            })
            .all_equal()
    }

    /// Ensure all the chunks in the DataFrame are aligned.
    pub fn rechunk(&mut self) -> &mut Self {
        if self.should_rechunk() {
            self.as_single_chunk_par()
        } else {
            self
        }
    }

    /// Get the `DataFrame` schema.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
    ///                         "Diameter (m)" => &[8.8e26, f64::INFINITY])?;
    ///
    /// let f1: Field = Field::new("Thing", DataType::Utf8);
    /// let f2: Field = Field::new("Diameter (m)", DataType::Float64);
    /// let sc: Schema = Schema::from(vec![f1, f2].into_iter());
    ///
    /// assert_eq!(df.schema(), sc);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn schema(&self) -> Schema {
        Schema::from(self.iter().map(|s| s.field().into_owned()))
    }

    /// Get a reference to the `DataFrame` columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
    ///                         "Symbol" => &["A", "C", "G", "T"])?;
    /// let columns: &Vec<Series> = df.get_columns();
    ///
    /// assert_eq!(columns[0].name(), "Name");
    /// assert_eq!(columns[1].name(), "Symbol");
    /// # Ok::<(), PolarsError>(())
    /// ```
    #[inline]
    pub fn get_columns(&self) -> &Vec<Series> {
        &self.columns
    }

    #[cfg(feature = "private")]
    #[inline]
    pub fn get_columns_mut(&mut self) -> &mut Vec<Series> {
        &mut self.columns
    }

    /// Iterator over the columns as `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
    /// let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// let mut iterator = df.iter();
    ///
    /// assert_eq!(iterator.next(), Some(&s1));
    /// assert_eq!(iterator.next(), Some(&s2));
    /// assert_eq!(iterator.next(), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn iter(&self) -> std::slice::Iter<'_, Series> {
        self.columns.iter()
    }

    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Language" => &["Rust", "Python"],
    ///                         "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Language", "Designer"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn get_column_names(&self) -> Vec<&str> {
        self.columns.iter().map(|s| s.name()).collect()
    }

    /// Get the `Vec<String>` representing the column names.
    pub fn get_column_names_owned(&self) -> Vec<String> {
        self.columns.iter().map(|s| s.name().to_string()).collect()
    }

    /// Set the column names.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
    /// df.set_column_names(&["Set"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Set"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> PolarsResult<()> {
        if names.len() != self.columns.len() {
            return Err(PolarsError::ShapeMisMatch("the provided slice with column names has not the same size as the DataFrame's width".into()));
        }
        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(names.iter().map(|name| name.as_ref()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }

        let columns = mem::take(&mut self.columns);
        self.columns = columns
            .into_iter()
            .zip(names)
            .map(|(s, name)| {
                let mut s = s;
                s.rename(name.as_ref());
                s
            })
            .collect();
        Ok(())
    }

    /// Get the data types of the columns in the DataFrame.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
    ///                                "Fraction" => &[0.965, 0.035])?;
    ///
    /// assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn dtypes(&self) -> Vec<DataType> {
        self.columns.iter().map(|s| s.dtype().clone()).collect()
    }

    /// The number of chunks per column
    pub fn n_chunks(&self) -> usize {
        match self.columns.get(0) {
            None => 0,
            Some(s) => s.n_chunks(),
        }
    }

    /// Get a reference to the schema fields of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
    ///                            "Fraction" => &[0.708, 0.292])?;
    ///
    /// let f1: Field = Field::new("Surface type", DataType::Utf8);
    /// let f2: Field = Field::new("Fraction", DataType::Float64);
    ///
    /// assert_eq!(earth.fields(), &[f1, f2]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn fields(&self) -> Vec<Field> {
        self.columns
            .iter()
            .map(|s| s.field().into_owned())
            .collect()
    }

    /// Get (height, width) of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
    /// let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
    ///                          "2" => &[1, 2, 3, 4, 5])?;
    ///
    /// assert_eq!(df0.shape(), (0 ,0));
    /// assert_eq!(df1.shape(), (5, 1));
    /// assert_eq!(df2.shape(), (5, 2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn shape(&self) -> (usize, usize) {
        match self.columns.as_slice() {
            &[] => (0, 0),
            v => (v[0].len(), v.len()),
        }
    }

    /// Get the width of the `DataFrame` which is the number of columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Series 1" => &[0; 0])?;
    /// let df2: DataFrame = df!("Series 1" => &[0; 0],
    ///                          "Series 2" => &[0; 0])?;
    ///
    /// assert_eq!(df0.width(), 0);
    /// assert_eq!(df1.width(), 1);
    /// assert_eq!(df2.width(), 2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn width(&self) -> usize {
        self.columns.len()
    }

    /// Get the height of the `DataFrame` which is the number of rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Currency" => &["€", "$"])?;
    /// let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;
    ///
    /// assert_eq!(df0.height(), 0);
    /// assert_eq!(df1.height(), 2);
    /// assert_eq!(df2.height(), 5);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn height(&self) -> usize {
        self.shape().0
    }

    /// Check if the `DataFrame` is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = DataFrame::default();
    /// assert!(df1.is_empty());
    ///
    /// let df2: DataFrame = df!("First name" => &["Forever"],
    ///                          "Last name" => &["Alone"])?;
    /// assert!(!df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_empty(&self) -> bool {
        self.columns.is_empty()
    }

    pub(crate) fn hstack_mut_no_checks(&mut self, columns: &[Series]) -> &mut Self {
        for col in columns {
            self.columns.push(col.clone());
        }
        self
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn stack(df: &mut DataFrame, columns: &[Series]) {
    ///     df.hstack_mut(columns);
    /// }
    /// ```
    pub fn hstack_mut(&mut self, columns: &[Series]) -> PolarsResult<&mut Self> {
        let mut names = PlHashSet::with_capacity(self.columns.len());
        for s in &self.columns {
            names.insert(s.name());
        }

        let height = self.height();
        // first loop check validity. We don't do this in a single pass otherwise
        // this DataFrame is already modified when an error occurs.
        for col in columns {
            if col.len() != height && height != 0 {
                return Err(PolarsError::ShapeMisMatch(
                    format!("Could not horizontally stack Series. The Series length {} differs from the DataFrame height: {height}", col.len()).into()));
            }

            let name = col.name();
            if names.contains(name) {
                return Err(PolarsError::Duplicate(
                    format!("Cannot do hstack operation. Column with name: {name} already exists",)
                        .into(),
                ));
            }
            names.insert(name);
        }
        drop(names);
        Ok(self.hstack_mut_no_checks(columns))
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
    /// let s1: Series = Series::new("Proton", &[29, 47, 79]);
    /// let s2: Series = Series::new("Electron", &[29, 47, 79]);
    ///
    /// let df2: DataFrame = df1.hstack(&[s1, s2])?;
    /// assert_eq!(df2.shape(), (3, 3));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 3)
    /// +---------+--------+----------+
    /// | Element | Proton | Electron |
    /// | ---     | ---    | ---      |
    /// | str     | i32    | i32      |
    /// +=========+========+==========+
    /// | Copper  | 29     | 29       |
    /// +---------+--------+----------+
    /// | Silver  | 47     | 47       |
    /// +---------+--------+----------+
    /// | Gold    | 79     | 79       |
    /// +---------+--------+----------+
    /// ```
    pub fn hstack(&self, columns: &[Series]) -> PolarsResult<Self> {
        let mut new_cols = self.columns.clone();
        new_cols.extend_from_slice(columns);
        DataFrame::new(new_cols)
    }

    /// Concatenate a `DataFrame` to this `DataFrame` and return as newly allocated `DataFrame`.
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// let df3: DataFrame = df1.vstack(&df2)?;
    ///
    /// assert_eq!(df3.shape(), (5, 2));
    /// println!("{}", df3);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack(&self, other: &DataFrame) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.vstack_mut(other)?;
        Ok(df)
    }

    /// Concatenate a DataFrame to this DataFrame
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// df1.vstack_mut(&df2)?;
    ///
    /// assert_eq!(df1.shape(), (5, 2));
    /// println!("{}", df1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self> {
        if self.width() != other.width() {
            if self.width() == 0 {
                self.columns = other.columns.clone();
                return Ok(self);
            }

            return Err(PolarsError::ShapeMisMatch(
                format!("Could not vertically stack DataFrame. The DataFrames appended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.append(right).expect("should not fail");
                Ok(())
            })?;
        Ok(self)
    }

    /// Does not check if schema is correct
    pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) {
        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .for_each(|(left, right)| {
                left.append(right).expect("should not fail");
            });
    }

    /// Extend the memory backed by this [`DataFrame`] with the values from `other`.
    ///
    /// Different from [`vstack`](Self::vstack) which adds the chunks from `other` to the chunks of this [`DataFrame`]
    /// `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
    ///
    /// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
    /// and thus will yield faster queries.
    ///
    /// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
    /// online operations where you add `n` rows and rerun a query.
    ///
    /// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
    /// when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence
    /// of `append` operations with a [`rechunk`](Self::rechunk).
    pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()> {
        if self.width() != other.width() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Could not extend DataFrame. The DataFrames extended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.extend(right).unwrap();
                Ok(())
            })?;
        Ok(())
    }

    /// Remove a column by name and return the column removed.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
    ///                             "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
    ///
    /// let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
    /// assert!(s1.is_err());
    ///
    /// let s2: Series = df.drop_in_place("Animal")?;
    /// assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series> {
        let idx = self.check_name_to_idx(name)?;
        Ok(self.columns.remove(idx))
    }

    /// Return a new `DataFrame` where all null values are dropped.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
    ///                         "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
    /// assert_eq!(df1.shape(), (3, 2));
    ///
    /// let df2: DataFrame = df1.drop_nulls(None)?;
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------------------+
    /// | Country | Tax revenue (% GDP) |
    /// | ---     | ---                 |
    /// | str     | f64                 |
    /// +=========+=====================+
    /// | Malta   | 32.7                |
    /// +---------+---------------------+
    /// ```
    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

    /// Slice the `DataFrame` along the rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
    ///                         "Color" => &["Green", "Red", "White", "White", "Red"])?;
    /// let sl: DataFrame = df.slice(2, 3);
    ///
    /// assert_eq!(sl.shape(), (3, 2));
    /// println!("{}", sl);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Output:
    /// ```text
    /// shape: (3, 2)
    /// +-------+-------+
    /// | Fruit | Color |
    /// | ---   | ---   |
    /// | str   | str   |
    /// +=======+=======+
    /// | Grape | White |
    /// +-------+-------+
    /// | Fig   | White |
    /// +-------+-------+
    /// | Fig   | Red   |
    /// +-------+-------+
    /// ```
    #[must_use]
    pub fn slice(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        let col = self
            .columns
            .iter()
            .map(|s| s.slice(offset, length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    #[must_use]
    pub fn slice_par(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns_par(&|s| s.slice(offset, length)))
    }

    #[must_use]
    pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns(&|s| {
            let mut out = s.slice(offset, length);
            out.shrink_to_fit();
            out
        }))
    }

    /// Get the head of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
    ///         "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
    ///         "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
    ///         "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
    /// assert_eq!(countries.shape(), (5, 4));
    ///
    /// println!("{}", countries.head(Some(3)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 4)
    /// +--------------------+---------------+---------------+------------+
    /// | Rank by GDP (2021) | Continent     | Country       | Capital    |
    /// | ---                | ---           | ---           | ---        |
    /// | i32                | str           | str           | str        |
    /// +====================+===============+===============+============+
    /// | 1                  | North America | United States | Washington |
    /// +--------------------+---------------+---------------+------------+
    /// | 2                  | Asia          | China         | Beijing    |
    /// +--------------------+---------------+---------------+------------+
    /// | 3                  | Asia          | Japan         | Tokyo      |
    /// +--------------------+---------------+---------------+------------+
    /// ```
    #[must_use]
    pub fn head(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.head(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Get the tail of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank (2021)" => &[105, 106, 107, 108, 109],
    ///         "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
    ///         "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
    /// assert_eq!(countries.shape(), (5, 3));
    ///
    /// println!("{}", countries.tail(Some(2)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (2, 3)
    /// +-------------+--------------------+---------+
    /// | Rank (2021) | Apple Price (€/kg) | Country |
    /// | ---         | ---                | ---     |
    /// | i32         | f64                | str     |
    /// +=============+====================+=========+
    /// | 108         | 0.63               | Syria   |
    /// +-------------+--------------------+---------+
    /// | 109         | 0.63               | Turkey  |
    /// +-------------+--------------------+---------+
    /// ```
    #[must_use]
    pub fn tail(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.tail(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks(&self) -> RecordBatchIter {
        RecordBatchIter {
            columns: &self.columns,
            idx: 0,
            n_chunks: self.n_chunks(),
        }
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches as physical values.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> {
        PhysRecordBatchIter {
            iters: self.columns.iter().map(|s| s.chunks().iter()).collect(),
        }
    }

    /// Get a `DataFrame` with all the columns in reversed order.
    #[must_use]
    pub fn reverse(&self) -> Self {
        let col = self.columns.iter().map(|s| s.reverse()).collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Shift the values by a given period and fill the parts that will be empty due to this operation
    /// with `Nones`.
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.shift) for more info on the `shift` operation.
    #[must_use]
    pub fn shift(&self, periods: i64) -> Self {
        let col = self.apply_columns_par(&|s| s.shift(periods));

        DataFrame::new_no_checks(col)
    }

    /// Replace None values with one of the following strategies:
    /// * Forward fill (replace None with the previous value)
    /// * Backward fill (replace None with the next value)
    /// * Mean fill (replace None with the mean of the whole array)
    /// * Min fill (replace None with the minimum of the whole array)
    /// * Max fill (replace None with the maximum of the whole array)
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.fill_null) for more info on the `fill_null` operation.
    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
        let col = self.try_apply_columns_par(&|s| s.fill_null(strategy))?;

        Ok(DataFrame::new_no_checks(col))
    }

    /// Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.
    /// Try in keep output similar to pandas
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("categorical" => &["d","e","f"],
    ///                          "numeric" => &[1, 2, 3],
    ///                          "object" => &["a", "b", "c"])?;
    /// assert_eq!(df1.shape(), (3, 3));
    ///
    /// let df2: DataFrame = df1.describe(None);
    /// assert_eq!(df2.shape(), (8, 4));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (8, 4)
    /// ┌──────────┬─────────────┬─────────┬────────┐
    /// │ describe ┆ categorical ┆ numeric ┆ object │
    /// │ ---      ┆ ---         ┆ ---     ┆ ---    │
    /// │ str      ┆ f64         ┆ f64     ┆ f64    │
    /// ╞══════════╪═════════════╪═════════╪════════╡
    /// │ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ mean     ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ std      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ min      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 25%      ┆ null        ┆ 1.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 50%      ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 75%      ┆ null        ┆ 2.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ max      ┆ null        ┆ 3.0     ┆ null   │
    /// └──────────┴─────────────┴─────────┴────────┘
    /// ```
    #[must_use]
    #[cfg(feature = "describe")]
    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

    /// Aggregate the columns to their maximum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.max();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 6       | 5       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn max(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.max_as_series());

        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their standard deviation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.std(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +-------------------+--------------------+
    /// | Die n°1           | Die n°2            |
    /// | ---               | ---                |
    /// | f64               | f64                |
    /// +===================+====================+
    /// | 2.280350850198276 | 1.0954451150103321 |
    /// +-------------------+--------------------+
    /// ```
    #[must_use]
    pub fn std(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.std_as_series(ddof));

        DataFrame::new_no_checks(columns)
    }
    /// Aggregate the columns to their variation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.var(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 5.2     | 1.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn var(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.var_as_series(ddof));
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their minimum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.min();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 1       | 2       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn min(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.min_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their sum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.sum();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 16      | 16      |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn sum(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.sum_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their mean values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.mean();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 3.2     | 3.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn mean(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.mean_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their median values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.median();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 3       | 3       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn median(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.median_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their quantile values.
    pub fn quantile(&self, quantile: f64, interpol: QuantileInterpolOptions) -> PolarsResult<Self> {
        let columns = self.try_apply_columns_par(&|s| s.quantile_as_series(quantile, interpol))?;

        Ok(DataFrame::new_no_checks(columns))
    }

    /// Aggregate the column horizontally to their min values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmin(&self) -> PolarsResult<Option<Series>> {
        let min_fn = |acc: &Series, s: &Series| {
            let mask = acc.lt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => min_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| min_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their max values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmax(&self) -> PolarsResult<Option<Series>> {
        let max_fn = |acc: &Series, s: &Series| {
            let mask = acc.gt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => max_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| max_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their sum values.
    pub fn hsum(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        let sum_fn =
            |acc: &Series, s: &Series, none_strategy: NullStrategy| -> PolarsResult<Series> {
                let mut acc = acc.clone();
                let mut s = s.clone();
                if let NullStrategy::Ignore = none_strategy {
                    // if has nulls
                    if acc.has_validity() {
                        acc = acc.fill_null(FillNullStrategy::Zero)?;
                    }
                    if s.has_validity() {
                        s = s.fill_null(FillNullStrategy::Zero)?;
                    }
                }
                Ok(&acc + &s)
            };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => sum_fn(&self.columns[0], &self.columns[1], none_strategy).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| sum_fn(&l, &r, none_strategy).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their mean values.
    pub fn hmean(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            _ => {
                let columns = self
                    .columns
                    .iter()
                    .cloned()
                    .filter(|s| {
                        let dtype = s.dtype();
                        dtype.is_numeric() || matches!(dtype, DataType::Boolean)
                    })
                    .collect();
                let numeric_df = DataFrame::new_no_checks(columns);

                let sum = || numeric_df.hsum(none_strategy);

                let null_count = || {
                    numeric_df
                        .columns
                        .par_iter()
                        .map(|s| s.is_null().cast(&DataType::UInt32).unwrap())
                        .reduce_with(|l, r| &l + &r)
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 2 columns
                        .unwrap()
                };

                let (sum, null_count) = POOL.install(|| rayon::join(sum, null_count));
                let sum = sum?;

                // value lengths: len - null_count
                let value_length: UInt32Chunked =
                    (numeric_df.width().sub(&null_count)).u32().unwrap().clone();

                // make sure that we do not divide by zero
                // by replacing with None
                let value_length = value_length
                    .set(&value_length.equal(0), None)?
                    .into_series()
                    .cast(&DataType::Float64)?;

                Ok(sum.map(|sum| &sum / &value_length))
            }
        }
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe<F, B>(self, f: F) -> PolarsResult<B>
    where
        F: Fn(DataFrame) -> PolarsResult<B>,
    {
        f(self)
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe_mut<F, B>(&mut self, f: F) -> PolarsResult<B>
    where
        F: Fn(&mut DataFrame) -> PolarsResult<B>,
    {
        f(self)
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe_with_args<F, B, Args>(self, f: F, args: Args) -> PolarsResult<B>
    where
        F: Fn(DataFrame, Args) -> PolarsResult<B>,
    {
        f(self, args)
    }

    /// Drop duplicate rows from a `DataFrame`.
    /// *This fails when there is a column of type List in DataFrame*
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///               "flt" => [1., 1., 2., 2., 3., 3.],
    ///               "int" => [1, 1, 2, 2, 3, 3, ],
    ///               "str" => ["a", "a", "b", "b", "c", "c"]
    ///           }?;
    ///
    /// println!("{}", df.drop_duplicates(true, None)?);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Returns
    ///
    /// ```text
    /// +-----+-----+-----+
    /// | flt | int | str |
    /// | --- | --- | --- |
    /// | f64 | i32 | str |
    /// +=====+=====+=====+
    /// | 1   | 1   | "a" |
    /// +-----+-----+-----+
    /// | 2   | 2   | "b" |
    /// +-----+-----+-----+
    /// | 3   | 3   | "c" |
    /// +-----+-----+-----+
    /// ```
    #[deprecated(note = "use DataFrame::unique")]
    pub fn drop_duplicates(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
    ) -> PolarsResult<Self> {
        match maintain_order {
            true => self.unique_stable(subset, UniqueKeepStrategy::First),
            false => self.unique(subset, UniqueKeepStrategy::First),
        }
    }

    /// Drop duplicate rows from a `DataFrame`.
    /// *This fails when there is a column of type List in DataFrame*
    ///
    /// Stable means that the order is maintained. This has a higher cost than an unstable distinct.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///               "flt" => [1., 1., 2., 2., 3., 3.],
    ///               "int" => [1, 1, 2, 2, 3, 3, ],
    ///               "str" => ["a", "a", "b", "b", "c", "c"]
    ///           }?;
    ///
    /// println!("{}", df.unique_stable(None, UniqueKeepStrategy::First)?);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Returns
    ///
    /// ```text
    /// +-----+-----+-----+
    /// | flt | int | str |
    /// | --- | --- | --- |
    /// | f64 | i32 | str |
    /// +=====+=====+=====+
    /// | 1   | 1   | "a" |
    /// +-----+-----+-----+
    /// | 2   | 2   | "b" |
    /// +-----+-----+-----+
    /// | 3   | 3   | "c" |
    /// +-----+-----+-----+
    /// ```
    pub fn unique_stable(
        &self,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<DataFrame> {
        self.unique_impl(true, subset, keep)
    }

    /// Unstable distinct. See [`DataFrame::unique_stable`].
    pub fn unique(
        &self,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<DataFrame> {
        self.unique_impl(false, subset, keep)
    }

    fn unique_impl(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<Self> {
        use UniqueKeepStrategy::*;
        let names = match &subset {
            Some(s) => s.iter().map(|s| &**s).collect(),
            None => self.get_column_names(),
        };

        let columns = match (keep, maintain_order) {
            (First, true) => {
                let gb = self.groupby_stable(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, true) => {
                // maintain order by last values, so the sorted groups are not correct as they
                // are sorted by the first value
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                let last_idx: NoNull<IdxCa> = groups
                    .iter()
                    .map(|g| match g {
                        GroupsIndicator::Idx((_first, idx)) => idx[idx.len() - 1],
                        GroupsIndicator::Slice([first, len]) => first + len,
                    })
                    .collect();

                let last_idx = last_idx.sort(false);
                return Ok(unsafe { self.take_unchecked(&last_idx) });
            }
            (First, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
            }
        };
        Ok(DataFrame::new_no_checks(columns))
    }

    /// Get a mask of all the unique rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
    ///                         "ISIN" => &["US0378331005", "US5949181045"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_unique()?;
    ///
    /// assert!(ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_unique(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            true,
            false,
        ))
    }

    /// Get a mask of all the duplicated rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
    ///                         "ISIN" => &["US02079K3059", "US02079K1079"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;
    ///
    /// assert!(!ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_duplicated(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            false,
            true,
        ))
    }

    /// Create a new `DataFrame` that shows the null counts per column.
    #[must_use]
    pub fn null_count(&self) -> Self {
        let cols = self
            .columns
            .iter()
            .map(|s| Series::new(s.name(), &[s.null_count() as IdxSize]))
            .collect();
        Self::new_no_checks(cols)
    }

    /// Hash and combine the row values
    #[cfg(feature = "row_hash")]
    pub fn hash_rows(
        &mut self,
        hasher_builder: Option<RandomState>,
    ) -> PolarsResult<UInt64Chunked> {
        let dfs = split_df(self, POOL.current_num_threads())?;
        let (cas, _) = df_rows_to_hashes_threaded(&dfs, hasher_builder)?;

        let mut iter = cas.into_iter();
        let mut acc_ca = iter.next().unwrap();
        for ca in iter {
            acc_ca.append(&ca);
        }
        Ok(acc_ca.rechunk())
    }

    /// Get the supertype of the columns in this DataFrame
    pub fn get_supertype(&self) -> Option<PolarsResult<DataType>> {
        self.columns
            .iter()
            .map(|s| Ok(s.dtype().clone()))
            .reduce(|acc, b| try_get_supertype(&acc?, &b.unwrap()))
    }

    #[cfg(feature = "chunked_ids")]
    #[doc(hidden)]
    //// Take elements by a slice of [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    /// `sorted` indicates if the chunks are sorted.
    #[doc(hidden)]
    pub unsafe fn _take_chunked_unchecked_seq(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns(&|s| s._take_chunked_unchecked(idx, sorted));

        DataFrame::new_no_checks(cols)
    }
    #[cfg(feature = "chunked_ids")]
    //// Take elements by a slice of optional [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    #[doc(hidden)]
    pub unsafe fn _take_opt_chunked_unchecked_seq(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_chunked_unchecked(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_chunked_unchecked_threaded(idx, sorted, true),
            _ => s._take_chunked_unchecked(idx, sorted),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_opt_chunked_unchecked(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    /// Be careful with allowing threads when calling this in a large hot loop
    /// every thread split may be on rayon stack and lead to SO
    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice(&self, idx: &[IdxSize], allow_threads: bool) -> Self {
        self._take_unchecked_slice2(idx, allow_threads, IsSorted::Not)
    }

    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice2(
        &self,
        idx: &[IdxSize],
        allow_threads: bool,
        sorted: IsSorted,
    ) -> Self {
        #[cfg(debug_assertions)]
        {
            if idx.len() > 2 {
                match sorted {
                    IsSorted::Ascending => {
                        assert!(idx[0] <= idx[idx.len() - 1]);
                    }
                    IsSorted::Descending => {
                        assert!(idx[0] >= idx[idx.len() - 1]);
                    }
                    _ => {}
                }
            }
        }
        let ptr = idx.as_ptr() as *mut IdxSize;
        let len = idx.len();

        // create a temporary vec. we will not drop it.
        let mut ca = IdxCa::from_vec("", Vec::from_raw_parts(ptr, len, len));
        ca.set_sorted2(sorted);
        let out = self.take_unchecked_impl(&ca, allow_threads);

        // ref count of buffers should be one because we dropped all allocations
        let arr = {
            let arr_ref = std::mem::take(&mut ca.chunks).pop().unwrap();
            arr_ref
                .as_any()
                .downcast_ref::<PrimitiveArray<IdxSize>>()
                .unwrap()
                .clone()
        };
        // the only owned heap allocation is the `Vec` we created and must not be dropped
        let _ = std::mem::ManuallyDrop::new(arr.into_mut().right().unwrap());
        out
    }

    #[cfg(feature = "partition_by")]
    #[doc(hidden)]
    pub fn _partition_by_impl(
        &self,
        cols: &[String],
        stable: bool,
    ) -> PolarsResult<Vec<DataFrame>> {
        let groups = if stable {
            self.groupby_stable(cols)?.take_groups()
        } else {
            self.groupby(cols)?.take_groups()
        };

        // don't parallelize this
        // there is a lot of parallelization in take and this may easily SO
        POOL.install(|| {
            match groups {
                GroupsProxy::Idx(idx) => {
                    Ok(idx
                        .into_par_iter()
                        .map(|(_, group)| {
                            // groups are in bounds
                            unsafe { self._take_unchecked_slice(&group, false) }
                        })
                        .collect())
                }
                GroupsProxy::Slice { groups, .. } => Ok(groups
                    .into_par_iter()
                    .map(|[first, len]| self.slice(first as i64, len as usize))
                    .collect()),
            }
        })
    }

    /// Split into multiple DataFrames partitioned by groups
    #[cfg(feature = "partition_by")]
    #[cfg_attr(docsrs, doc(cfg(feature = "partition_by")))]
    pub fn partition_by(&self, cols: impl IntoVec<String>) -> PolarsResult<Vec<DataFrame>> {
        let cols = cols.into_vec();
        self._partition_by_impl(&cols, false)
    }

    /// Split into multiple DataFrames partitioned by groups
    /// Order of the groups are maintained.
    #[cfg(feature = "partition_by")]
    #[cfg_attr(docsrs, doc(cfg(feature = "partition_by")))]
    pub fn partition_by_stable(&self, cols: impl IntoVec<String>) -> PolarsResult<Vec<DataFrame>> {
        let cols = cols.into_vec();
        self._partition_by_impl(&cols, true)
    }

    /// Unnest the given `Struct` columns. This means that the fields of the `Struct` type will be
    /// inserted as columns.
    #[cfg(feature = "dtype-struct")]
    #[cfg_attr(docsrs, doc(cfg(feature = "dtype-struct")))]
    pub fn unnest<I: IntoVec<String>>(&self, cols: I) -> PolarsResult<DataFrame> {
        let cols = cols.into_vec();
        self.unnest_impl(cols.into_iter().collect())
    }

    #[cfg(feature = "dtype-struct")]
    fn unnest_impl(&self, cols: PlHashSet<String>) -> PolarsResult<DataFrame> {
        let mut new_cols = Vec::with_capacity(std::cmp::min(self.width() * 2, self.width() + 128));
        let mut count = 0;
        for s in &self.columns {
            if cols.contains(s.name()) {
                let ca = s.struct_()?;
                new_cols.extend_from_slice(ca.fields());
                count += 1;
            } else {
                new_cols.push(s.clone())
            }
        }
        if count != cols.len() {
            // one or more columns not found
            // the code below will return an error with the missing name
            let schema = self.schema();
            for col in cols {
                let _ = schema
                    .get(&col)
                    .ok_or_else(|| PolarsError::NotFound(col.into()))?;
            }
        }
        DataFrame::new(new_cols)
    }
}

pub struct RecordBatchIter<'a> {
    columns: &'a Vec<Series>,
    idx: usize,
    n_chunks: usize,
}

impl<'a> Iterator for RecordBatchIter<'a> {
    type Item = ArrowChunk;

    fn next(&mut self) -> Option<Self::Item> {
        if self.idx >= self.n_chunks {
            None
        } else {
            // create a batch of the columns with the same chunk no.
            let batch_cols = self.columns.iter().map(|s| s.to_arrow(self.idx)).collect();
            self.idx += 1;

            Some(ArrowChunk::new(batch_cols))
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let n = self.n_chunks - self.idx;
        (n, Some(n))
    }
}

pub struct PhysRecordBatchIter<'a> {
    iters: Vec<std::slice::Iter<'a, ArrayRef>>,
}

impl Iterator for PhysRecordBatchIter<'_> {
    type Item = ArrowChunk;

    fn next(&mut self) -> Option<Self::Item> {
        self.iters
            .iter_mut()
            .map(|phys_iter| phys_iter.next().cloned())
            .collect::<Option<Vec<_>>>()
            .map(ArrowChunk::new)
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        if let Some(iter) = self.iters.first() {
            iter.size_hint()
        } else {
            (0, None)
        }
    }
}

impl Default for DataFrame {
    fn default() -> Self {
        DataFrame::new_no_checks(vec![])
    }
}

impl From<DataFrame> for Vec<Series> {
    fn from(df: DataFrame) -> Self {
        df.columns
    }
}

// utility to test if we can vstack/extend the columns
fn can_extend(left: &Series, right: &Series) -> PolarsResult<()> {
    if left.dtype() != right.dtype() || left.name() != right.name() {
        if left.dtype() != right.dtype() {
            return Err(PolarsError::SchemaMisMatch(
                format!(
                    "cannot vstack: because column datatypes (dtypes) in the two DataFrames do not match for \
                                left.name='{}' with left.dtype={} != right.dtype={} with right.name='{}'",
                    left.name(),
                    left.dtype(),
                    right.dtype(),
                    right.name()
                )
                    .into(),
            ));
        } else {
            return Err(PolarsError::SchemaMisMatch(
                format!(
                    "cannot vstack: because column names in the two DataFrames do not match for \
                                left.name='{}' != right.name='{}'",
                    left.name(),
                    right.name()
                )
                .into(),
            ));
        }
    };
    Ok(())
}

src/series/unstable.rs (line 62)

    pub fn deep_clone(&self) -> Series {
        unsafe {
            let s = &(*self.container);
            debug_assert_eq!(s.chunks().len(), 1);
            let array_ref = s.chunks().get_unchecked(0).clone();
            let name = s.name();
            Series::from_chunks_and_dtype_unchecked(name, vec![array_ref], s.dtype())
        }
    }

src/frame/cross_join.rs (line 105)

    pub fn _cross_join_with_names(
        &self,
        other: &DataFrame,
        names: &[String],
    ) -> PolarsResult<DataFrame> {
        let (mut l_df, r_df) = self.cross_join_dfs(other, None, false)?;
        l_df.get_columns_mut().extend_from_slice(&r_df.columns);

        l_df.get_columns_mut()
            .iter_mut()
            .zip(names)
            .for_each(|(s, name)| {
                if s.name() != name {
                    s.rename(name);
                }
            });
        Ok(l_df)
    }

src/frame/groupby/mod.rs (line 385)

    fn prepare_agg(&self) -> PolarsResult<(Vec<Series>, Vec<Series>)> {
        let selection = match &self.selected_agg {
            Some(selection) => selection.clone(),
            None => {
                let by: Vec<_> = self.selected_keys.iter().map(|s| s.name()).collect();
                self.df
                    .get_column_names()
                    .into_iter()
                    .filter(|a| !by.contains(a))
                    .map(|s| s.to_string())
                    .collect()
            }
        };

        let keys = self.keys();
        let agg_col = self.df.select_series(selection)?;
        Ok((keys, agg_col))
    }

    /// Aggregate grouped series and compute the mean per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(&["temp", "rain"]).mean()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+-----------+-----------+
    /// | date       | temp_mean | rain_mean |
    /// | ---        | ---       | ---       |
    /// | Date       | f64       | f64       |
    /// +============+===========+===========+
    /// | 2020-08-23 | 9         | 0.1       |
    /// +------------+-----------+-----------+
    /// | 2020-08-22 | 4         | 0.155     |
    /// +------------+-----------+-----------+
    /// | 2020-08-21 | 15        | 0.15      |
    /// +------------+-----------+-----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn mean(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;

        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Mean);
            let mut agg = unsafe { agg_col.agg_mean(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the sum per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).sum()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+----------+
    /// | date       | temp_sum |
    /// | ---        | ---      |
    /// | Date       | i32      |
    /// +============+==========+
    /// | 2020-08-23 | 9        |
    /// +------------+----------+
    /// | 2020-08-22 | 8        |
    /// +------------+----------+
    /// | 2020-08-21 | 30       |
    /// +------------+----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn sum(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;

        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Sum);
            let mut agg = unsafe { agg_col.agg_sum(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the minimal value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).min()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+----------+
    /// | date       | temp_min |
    /// | ---        | ---      |
    /// | Date       | i32      |
    /// +============+==========+
    /// | 2020-08-23 | 9        |
    /// +------------+----------+
    /// | 2020-08-22 | 1        |
    /// +------------+----------+
    /// | 2020-08-21 | 10       |
    /// +------------+----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn min(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Min);
            let mut agg = unsafe { agg_col.agg_min(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the maximum value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).max()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+----------+
    /// | date       | temp_max |
    /// | ---        | ---      |
    /// | Date       | i32      |
    /// +============+==========+
    /// | 2020-08-23 | 9        |
    /// +------------+----------+
    /// | 2020-08-22 | 7        |
    /// +------------+----------+
    /// | 2020-08-21 | 20       |
    /// +------------+----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn max(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Max);
            let mut agg = unsafe { agg_col.agg_max(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and find the first value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).first()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------+
    /// | date       | temp_first |
    /// | ---        | ---        |
    /// | Date       | i32        |
    /// +============+============+
    /// | 2020-08-23 | 9          |
    /// +------------+------------+
    /// | 2020-08-22 | 7          |
    /// +------------+------------+
    /// | 2020-08-21 | 20         |
    /// +------------+------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn first(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::First);
            let mut agg = unsafe { agg_col.agg_first(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and return the last value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).last()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------+
    /// | date       | temp_last |
    /// | ---        | ---        |
    /// | Date       | i32        |
    /// +============+============+
    /// | 2020-08-23 | 9          |
    /// +------------+------------+
    /// | 2020-08-22 | 1          |
    /// +------------+------------+
    /// | 2020-08-21 | 10         |
    /// +------------+------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn last(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Last);
            let mut agg = unsafe { agg_col.agg_last(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` by counting the number of unique values.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).n_unique()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+---------------+
    /// | date       | temp_n_unique |
    /// | ---        | ---           |
    /// | Date       | u32           |
    /// +============+===============+
    /// | 2020-08-23 | 1             |
    /// +------------+---------------+
    /// | 2020-08-22 | 2             |
    /// +------------+---------------+
    /// | 2020-08-21 | 2             |
    /// +------------+---------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn n_unique(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::NUnique);
            let mut agg = unsafe { agg_col.agg_n_unique(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the quantile per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// # use polars_arrow::prelude::QuantileInterpolOptions;
    ///
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).quantile(0.2, QuantileInterpolOptions::default())
    /// }
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn quantile(
        &self,
        quantile: f64,
        interpol: QuantileInterpolOptions,
    ) -> PolarsResult<DataFrame> {
        if !(0.0..=1.0).contains(&quantile) {
            return Err(PolarsError::ComputeError(
                "quantile should be within 0.0 and 1.0".into(),
            ));
        }
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name =
                fmt_groupby_column(agg_col.name(), GroupByMethod::Quantile(quantile, interpol));
            let mut agg = unsafe { agg_col.agg_quantile(&self.groups, quantile, interpol) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the median per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).median()
    /// }
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn median(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Median);
            let mut agg = unsafe { agg_col.agg_median(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the variance per group.
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn var(&self, ddof: u8) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Var(ddof));
            let mut agg = unsafe { agg_col.agg_var(&self.groups, ddof) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the standard deviation per group.
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn std(&self, ddof: u8) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Std(ddof));
            let mut agg = unsafe { agg_col.agg_std(&self.groups, ddof) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the number of values per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).count()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------+
    /// | date       | temp_count |
    /// | ---        | ---        |
    /// | Date       | u32        |
    /// +============+============+
    /// | 2020-08-23 | 1          |
    /// +------------+------------+
    /// | 2020-08-22 | 2          |
    /// +------------+------------+
    /// | 2020-08-21 | 2          |
    /// +------------+------------+
    /// ```
    pub fn count(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;

        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Count);
            let mut ca = self.groups.group_count();
            ca.rename(&new_name);
            cols.push(ca.into_series());
        }
        DataFrame::new(cols)
    }

    /// Get the groupby group indexes.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.groups()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +--------------+------------+
    /// | date         | groups     |
    /// | ---          | ---        |
    /// | Date(days)   | list [u32] |
    /// +==============+============+
    /// | 2020-08-23   | "[3]"      |
    /// +--------------+------------+
    /// | 2020-08-22   | "[2, 4]"   |
    /// +--------------+------------+
    /// | 2020-08-21   | "[0, 1]"   |
    /// +--------------+------------+
    /// ```
    pub fn groups(&self) -> PolarsResult<DataFrame> {
        let mut cols = self.keys();
        let mut column = self.groups.as_list_chunked();
        let new_name = fmt_groupby_column("", GroupByMethod::Groups);
        column.rename(&new_name);
        cols.push(column.into_series());
        DataFrame::new(cols)
    }

    /// Aggregate the groups of the groupby operation into lists.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     // GroupBy and aggregate to Lists
    ///     df.groupby(["date"])?.select(["temp"]).agg_list()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------------------+
    /// | date       | temp_agg_list          |
    /// | ---        | ---                    |
    /// | Date       | list [i32]             |
    /// +============+========================+
    /// | 2020-08-23 | "[Some(9)]"            |
    /// +------------+------------------------+
    /// | 2020-08-22 | "[Some(7), Some(1)]"   |
    /// +------------+------------------------+
    /// | 2020-08-21 | "[Some(20), Some(10)]" |
    /// +------------+------------------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn agg_list(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::List);
            let mut agg = unsafe { agg_col.agg_list(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

Additional examples can be found in:

source

fn field(&self) -> Cow<'_, Field>

Get field (used in schema)

Examples found in repository ?

src/frame/mod.rs (line 497)

    pub fn schema(&self) -> Schema {
        Schema::from(self.iter().map(|s| s.field().into_owned()))
    }

    /// Get a reference to the `DataFrame` columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
    ///                         "Symbol" => &["A", "C", "G", "T"])?;
    /// let columns: &Vec<Series> = df.get_columns();
    ///
    /// assert_eq!(columns[0].name(), "Name");
    /// assert_eq!(columns[1].name(), "Symbol");
    /// # Ok::<(), PolarsError>(())
    /// ```
    #[inline]
    pub fn get_columns(&self) -> &Vec<Series> {
        &self.columns
    }

    #[cfg(feature = "private")]
    #[inline]
    pub fn get_columns_mut(&mut self) -> &mut Vec<Series> {
        &mut self.columns
    }

    /// Iterator over the columns as `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
    /// let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// let mut iterator = df.iter();
    ///
    /// assert_eq!(iterator.next(), Some(&s1));
    /// assert_eq!(iterator.next(), Some(&s2));
    /// assert_eq!(iterator.next(), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn iter(&self) -> std::slice::Iter<'_, Series> {
        self.columns.iter()
    }

    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Language" => &["Rust", "Python"],
    ///                         "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Language", "Designer"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn get_column_names(&self) -> Vec<&str> {
        self.columns.iter().map(|s| s.name()).collect()
    }

    /// Get the `Vec<String>` representing the column names.
    pub fn get_column_names_owned(&self) -> Vec<String> {
        self.columns.iter().map(|s| s.name().to_string()).collect()
    }

    /// Set the column names.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
    /// df.set_column_names(&["Set"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Set"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> PolarsResult<()> {
        if names.len() != self.columns.len() {
            return Err(PolarsError::ShapeMisMatch("the provided slice with column names has not the same size as the DataFrame's width".into()));
        }
        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(names.iter().map(|name| name.as_ref()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }

        let columns = mem::take(&mut self.columns);
        self.columns = columns
            .into_iter()
            .zip(names)
            .map(|(s, name)| {
                let mut s = s;
                s.rename(name.as_ref());
                s
            })
            .collect();
        Ok(())
    }

    /// Get the data types of the columns in the DataFrame.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
    ///                                "Fraction" => &[0.965, 0.035])?;
    ///
    /// assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn dtypes(&self) -> Vec<DataType> {
        self.columns.iter().map(|s| s.dtype().clone()).collect()
    }

    /// The number of chunks per column
    pub fn n_chunks(&self) -> usize {
        match self.columns.get(0) {
            None => 0,
            Some(s) => s.n_chunks(),
        }
    }

    /// Get a reference to the schema fields of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
    ///                            "Fraction" => &[0.708, 0.292])?;
    ///
    /// let f1: Field = Field::new("Surface type", DataType::Utf8);
    /// let f2: Field = Field::new("Fraction", DataType::Float64);
    ///
    /// assert_eq!(earth.fields(), &[f1, f2]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn fields(&self) -> Vec<Field> {
        self.columns
            .iter()
            .map(|s| s.field().into_owned())
            .collect()
    }

More examples

Hide additional examples

src/testing.rs (line 68)

    fn eq(&self, other: &Self) -> bool {
        self.len() == other.len()
            && self.field() == other.field()
            && self.null_count() == other.null_count()
            && self
                .equal(other)
                .unwrap()
                .sum()
                .map(|s| s as usize)
                .unwrap_or(0)
                == self.len()
    }

source

fn dtype(&self) -> &DataType

Get datatype of series.

Examples found in repository ?

src/frame/mod.rs (line 614)

    pub fn dtypes(&self) -> Vec<DataType> {
        self.columns.iter().map(|s| s.dtype().clone()).collect()
    }

    /// The number of chunks per column
    pub fn n_chunks(&self) -> usize {
        match self.columns.get(0) {
            None => 0,
            Some(s) => s.n_chunks(),
        }
    }

    /// Get a reference to the schema fields of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
    ///                            "Fraction" => &[0.708, 0.292])?;
    ///
    /// let f1: Field = Field::new("Surface type", DataType::Utf8);
    /// let f2: Field = Field::new("Fraction", DataType::Float64);
    ///
    /// assert_eq!(earth.fields(), &[f1, f2]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn fields(&self) -> Vec<Field> {
        self.columns
            .iter()
            .map(|s| s.field().into_owned())
            .collect()
    }

    /// Get (height, width) of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
    /// let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
    ///                          "2" => &[1, 2, 3, 4, 5])?;
    ///
    /// assert_eq!(df0.shape(), (0 ,0));
    /// assert_eq!(df1.shape(), (5, 1));
    /// assert_eq!(df2.shape(), (5, 2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn shape(&self) -> (usize, usize) {
        match self.columns.as_slice() {
            &[] => (0, 0),
            v => (v[0].len(), v.len()),
        }
    }

    /// Get the width of the `DataFrame` which is the number of columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Series 1" => &[0; 0])?;
    /// let df2: DataFrame = df!("Series 1" => &[0; 0],
    ///                          "Series 2" => &[0; 0])?;
    ///
    /// assert_eq!(df0.width(), 0);
    /// assert_eq!(df1.width(), 1);
    /// assert_eq!(df2.width(), 2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn width(&self) -> usize {
        self.columns.len()
    }

    /// Get the height of the `DataFrame` which is the number of rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Currency" => &["€", "$"])?;
    /// let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;
    ///
    /// assert_eq!(df0.height(), 0);
    /// assert_eq!(df1.height(), 2);
    /// assert_eq!(df2.height(), 5);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn height(&self) -> usize {
        self.shape().0
    }

    /// Check if the `DataFrame` is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = DataFrame::default();
    /// assert!(df1.is_empty());
    ///
    /// let df2: DataFrame = df!("First name" => &["Forever"],
    ///                          "Last name" => &["Alone"])?;
    /// assert!(!df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_empty(&self) -> bool {
        self.columns.is_empty()
    }

    pub(crate) fn hstack_mut_no_checks(&mut self, columns: &[Series]) -> &mut Self {
        for col in columns {
            self.columns.push(col.clone());
        }
        self
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn stack(df: &mut DataFrame, columns: &[Series]) {
    ///     df.hstack_mut(columns);
    /// }
    /// ```
    pub fn hstack_mut(&mut self, columns: &[Series]) -> PolarsResult<&mut Self> {
        let mut names = PlHashSet::with_capacity(self.columns.len());
        for s in &self.columns {
            names.insert(s.name());
        }

        let height = self.height();
        // first loop check validity. We don't do this in a single pass otherwise
        // this DataFrame is already modified when an error occurs.
        for col in columns {
            if col.len() != height && height != 0 {
                return Err(PolarsError::ShapeMisMatch(
                    format!("Could not horizontally stack Series. The Series length {} differs from the DataFrame height: {height}", col.len()).into()));
            }

            let name = col.name();
            if names.contains(name) {
                return Err(PolarsError::Duplicate(
                    format!("Cannot do hstack operation. Column with name: {name} already exists",)
                        .into(),
                ));
            }
            names.insert(name);
        }
        drop(names);
        Ok(self.hstack_mut_no_checks(columns))
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
    /// let s1: Series = Series::new("Proton", &[29, 47, 79]);
    /// let s2: Series = Series::new("Electron", &[29, 47, 79]);
    ///
    /// let df2: DataFrame = df1.hstack(&[s1, s2])?;
    /// assert_eq!(df2.shape(), (3, 3));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 3)
    /// +---------+--------+----------+
    /// | Element | Proton | Electron |
    /// | ---     | ---    | ---      |
    /// | str     | i32    | i32      |
    /// +=========+========+==========+
    /// | Copper  | 29     | 29       |
    /// +---------+--------+----------+
    /// | Silver  | 47     | 47       |
    /// +---------+--------+----------+
    /// | Gold    | 79     | 79       |
    /// +---------+--------+----------+
    /// ```
    pub fn hstack(&self, columns: &[Series]) -> PolarsResult<Self> {
        let mut new_cols = self.columns.clone();
        new_cols.extend_from_slice(columns);
        DataFrame::new(new_cols)
    }

    /// Concatenate a `DataFrame` to this `DataFrame` and return as newly allocated `DataFrame`.
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// let df3: DataFrame = df1.vstack(&df2)?;
    ///
    /// assert_eq!(df3.shape(), (5, 2));
    /// println!("{}", df3);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack(&self, other: &DataFrame) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.vstack_mut(other)?;
        Ok(df)
    }

    /// Concatenate a DataFrame to this DataFrame
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// df1.vstack_mut(&df2)?;
    ///
    /// assert_eq!(df1.shape(), (5, 2));
    /// println!("{}", df1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self> {
        if self.width() != other.width() {
            if self.width() == 0 {
                self.columns = other.columns.clone();
                return Ok(self);
            }

            return Err(PolarsError::ShapeMisMatch(
                format!("Could not vertically stack DataFrame. The DataFrames appended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.append(right).expect("should not fail");
                Ok(())
            })?;
        Ok(self)
    }

    /// Does not check if schema is correct
    pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) {
        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .for_each(|(left, right)| {
                left.append(right).expect("should not fail");
            });
    }

    /// Extend the memory backed by this [`DataFrame`] with the values from `other`.
    ///
    /// Different from [`vstack`](Self::vstack) which adds the chunks from `other` to the chunks of this [`DataFrame`]
    /// `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
    ///
    /// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
    /// and thus will yield faster queries.
    ///
    /// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
    /// online operations where you add `n` rows and rerun a query.
    ///
    /// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
    /// when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence
    /// of `append` operations with a [`rechunk`](Self::rechunk).
    pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()> {
        if self.width() != other.width() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Could not extend DataFrame. The DataFrames extended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.extend(right).unwrap();
                Ok(())
            })?;
        Ok(())
    }

    /// Remove a column by name and return the column removed.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
    ///                             "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
    ///
    /// let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
    /// assert!(s1.is_err());
    ///
    /// let s2: Series = df.drop_in_place("Animal")?;
    /// assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series> {
        let idx = self.check_name_to_idx(name)?;
        Ok(self.columns.remove(idx))
    }

    /// Return a new `DataFrame` where all null values are dropped.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
    ///                         "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
    /// assert_eq!(df1.shape(), (3, 2));
    ///
    /// let df2: DataFrame = df1.drop_nulls(None)?;
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------------------+
    /// | Country | Tax revenue (% GDP) |
    /// | ---     | ---                 |
    /// | str     | f64                 |
    /// +=========+=====================+
    /// | Malta   | 32.7                |
    /// +---------+---------------------+
    /// ```
    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

    /// Slice the `DataFrame` along the rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
    ///                         "Color" => &["Green", "Red", "White", "White", "Red"])?;
    /// let sl: DataFrame = df.slice(2, 3);
    ///
    /// assert_eq!(sl.shape(), (3, 2));
    /// println!("{}", sl);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Output:
    /// ```text
    /// shape: (3, 2)
    /// +-------+-------+
    /// | Fruit | Color |
    /// | ---   | ---   |
    /// | str   | str   |
    /// +=======+=======+
    /// | Grape | White |
    /// +-------+-------+
    /// | Fig   | White |
    /// +-------+-------+
    /// | Fig   | Red   |
    /// +-------+-------+
    /// ```
    #[must_use]
    pub fn slice(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        let col = self
            .columns
            .iter()
            .map(|s| s.slice(offset, length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    #[must_use]
    pub fn slice_par(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns_par(&|s| s.slice(offset, length)))
    }

    #[must_use]
    pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns(&|s| {
            let mut out = s.slice(offset, length);
            out.shrink_to_fit();
            out
        }))
    }

    /// Get the head of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
    ///         "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
    ///         "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
    ///         "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
    /// assert_eq!(countries.shape(), (5, 4));
    ///
    /// println!("{}", countries.head(Some(3)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 4)
    /// +--------------------+---------------+---------------+------------+
    /// | Rank by GDP (2021) | Continent     | Country       | Capital    |
    /// | ---                | ---           | ---           | ---        |
    /// | i32                | str           | str           | str        |
    /// +====================+===============+===============+============+
    /// | 1                  | North America | United States | Washington |
    /// +--------------------+---------------+---------------+------------+
    /// | 2                  | Asia          | China         | Beijing    |
    /// +--------------------+---------------+---------------+------------+
    /// | 3                  | Asia          | Japan         | Tokyo      |
    /// +--------------------+---------------+---------------+------------+
    /// ```
    #[must_use]
    pub fn head(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.head(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Get the tail of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank (2021)" => &[105, 106, 107, 108, 109],
    ///         "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
    ///         "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
    /// assert_eq!(countries.shape(), (5, 3));
    ///
    /// println!("{}", countries.tail(Some(2)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (2, 3)
    /// +-------------+--------------------+---------+
    /// | Rank (2021) | Apple Price (€/kg) | Country |
    /// | ---         | ---                | ---     |
    /// | i32         | f64                | str     |
    /// +=============+====================+=========+
    /// | 108         | 0.63               | Syria   |
    /// +-------------+--------------------+---------+
    /// | 109         | 0.63               | Turkey  |
    /// +-------------+--------------------+---------+
    /// ```
    #[must_use]
    pub fn tail(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.tail(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks(&self) -> RecordBatchIter {
        RecordBatchIter {
            columns: &self.columns,
            idx: 0,
            n_chunks: self.n_chunks(),
        }
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches as physical values.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> {
        PhysRecordBatchIter {
            iters: self.columns.iter().map(|s| s.chunks().iter()).collect(),
        }
    }

    /// Get a `DataFrame` with all the columns in reversed order.
    #[must_use]
    pub fn reverse(&self) -> Self {
        let col = self.columns.iter().map(|s| s.reverse()).collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Shift the values by a given period and fill the parts that will be empty due to this operation
    /// with `Nones`.
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.shift) for more info on the `shift` operation.
    #[must_use]
    pub fn shift(&self, periods: i64) -> Self {
        let col = self.apply_columns_par(&|s| s.shift(periods));

        DataFrame::new_no_checks(col)
    }

    /// Replace None values with one of the following strategies:
    /// * Forward fill (replace None with the previous value)
    /// * Backward fill (replace None with the next value)
    /// * Mean fill (replace None with the mean of the whole array)
    /// * Min fill (replace None with the minimum of the whole array)
    /// * Max fill (replace None with the maximum of the whole array)
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.fill_null) for more info on the `fill_null` operation.
    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
        let col = self.try_apply_columns_par(&|s| s.fill_null(strategy))?;

        Ok(DataFrame::new_no_checks(col))
    }

    /// Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.
    /// Try in keep output similar to pandas
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("categorical" => &["d","e","f"],
    ///                          "numeric" => &[1, 2, 3],
    ///                          "object" => &["a", "b", "c"])?;
    /// assert_eq!(df1.shape(), (3, 3));
    ///
    /// let df2: DataFrame = df1.describe(None);
    /// assert_eq!(df2.shape(), (8, 4));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (8, 4)
    /// ┌──────────┬─────────────┬─────────┬────────┐
    /// │ describe ┆ categorical ┆ numeric ┆ object │
    /// │ ---      ┆ ---         ┆ ---     ┆ ---    │
    /// │ str      ┆ f64         ┆ f64     ┆ f64    │
    /// ╞══════════╪═════════════╪═════════╪════════╡
    /// │ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ mean     ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ std      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ min      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 25%      ┆ null        ┆ 1.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 50%      ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 75%      ┆ null        ┆ 2.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ max      ┆ null        ┆ 3.0     ┆ null   │
    /// └──────────┴─────────────┴─────────┴────────┘
    /// ```
    #[must_use]
    #[cfg(feature = "describe")]
    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

    /// Aggregate the columns to their maximum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.max();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 6       | 5       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn max(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.max_as_series());

        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their standard deviation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.std(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +-------------------+--------------------+
    /// | Die n°1           | Die n°2            |
    /// | ---               | ---                |
    /// | f64               | f64                |
    /// +===================+====================+
    /// | 2.280350850198276 | 1.0954451150103321 |
    /// +-------------------+--------------------+
    /// ```
    #[must_use]
    pub fn std(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.std_as_series(ddof));

        DataFrame::new_no_checks(columns)
    }
    /// Aggregate the columns to their variation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.var(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 5.2     | 1.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn var(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.var_as_series(ddof));
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their minimum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.min();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 1       | 2       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn min(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.min_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their sum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.sum();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 16      | 16      |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn sum(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.sum_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their mean values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.mean();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 3.2     | 3.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn mean(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.mean_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their median values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.median();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 3       | 3       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn median(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.median_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their quantile values.
    pub fn quantile(&self, quantile: f64, interpol: QuantileInterpolOptions) -> PolarsResult<Self> {
        let columns = self.try_apply_columns_par(&|s| s.quantile_as_series(quantile, interpol))?;

        Ok(DataFrame::new_no_checks(columns))
    }

    /// Aggregate the column horizontally to their min values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmin(&self) -> PolarsResult<Option<Series>> {
        let min_fn = |acc: &Series, s: &Series| {
            let mask = acc.lt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => min_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| min_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their max values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmax(&self) -> PolarsResult<Option<Series>> {
        let max_fn = |acc: &Series, s: &Series| {
            let mask = acc.gt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => max_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| max_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their sum values.
    pub fn hsum(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        let sum_fn =
            |acc: &Series, s: &Series, none_strategy: NullStrategy| -> PolarsResult<Series> {
                let mut acc = acc.clone();
                let mut s = s.clone();
                if let NullStrategy::Ignore = none_strategy {
                    // if has nulls
                    if acc.has_validity() {
                        acc = acc.fill_null(FillNullStrategy::Zero)?;
                    }
                    if s.has_validity() {
                        s = s.fill_null(FillNullStrategy::Zero)?;
                    }
                }
                Ok(&acc + &s)
            };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => sum_fn(&self.columns[0], &self.columns[1], none_strategy).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| sum_fn(&l, &r, none_strategy).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their mean values.
    pub fn hmean(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            _ => {
                let columns = self
                    .columns
                    .iter()
                    .cloned()
                    .filter(|s| {
                        let dtype = s.dtype();
                        dtype.is_numeric() || matches!(dtype, DataType::Boolean)
                    })
                    .collect();
                let numeric_df = DataFrame::new_no_checks(columns);

                let sum = || numeric_df.hsum(none_strategy);

                let null_count = || {
                    numeric_df
                        .columns
                        .par_iter()
                        .map(|s| s.is_null().cast(&DataType::UInt32).unwrap())
                        .reduce_with(|l, r| &l + &r)
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 2 columns
                        .unwrap()
                };

                let (sum, null_count) = POOL.install(|| rayon::join(sum, null_count));
                let sum = sum?;

                // value lengths: len - null_count
                let value_length: UInt32Chunked =
                    (numeric_df.width().sub(&null_count)).u32().unwrap().clone();

                // make sure that we do not divide by zero
                // by replacing with None
                let value_length = value_length
                    .set(&value_length.equal(0), None)?
                    .into_series()
                    .cast(&DataType::Float64)?;

                Ok(sum.map(|sum| &sum / &value_length))
            }
        }
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe<F, B>(self, f: F) -> PolarsResult<B>
    where
        F: Fn(DataFrame) -> PolarsResult<B>,
    {
        f(self)
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe_mut<F, B>(&mut self, f: F) -> PolarsResult<B>
    where
        F: Fn(&mut DataFrame) -> PolarsResult<B>,
    {
        f(self)
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe_with_args<F, B, Args>(self, f: F, args: Args) -> PolarsResult<B>
    where
        F: Fn(DataFrame, Args) -> PolarsResult<B>,
    {
        f(self, args)
    }

    /// Drop duplicate rows from a `DataFrame`.
    /// *This fails when there is a column of type List in DataFrame*
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///               "flt" => [1., 1., 2., 2., 3., 3.],
    ///               "int" => [1, 1, 2, 2, 3, 3, ],
    ///               "str" => ["a", "a", "b", "b", "c", "c"]
    ///           }?;
    ///
    /// println!("{}", df.drop_duplicates(true, None)?);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Returns
    ///
    /// ```text
    /// +-----+-----+-----+
    /// | flt | int | str |
    /// | --- | --- | --- |
    /// | f64 | i32 | str |
    /// +=====+=====+=====+
    /// | 1   | 1   | "a" |
    /// +-----+-----+-----+
    /// | 2   | 2   | "b" |
    /// +-----+-----+-----+
    /// | 3   | 3   | "c" |
    /// +-----+-----+-----+
    /// ```
    #[deprecated(note = "use DataFrame::unique")]
    pub fn drop_duplicates(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
    ) -> PolarsResult<Self> {
        match maintain_order {
            true => self.unique_stable(subset, UniqueKeepStrategy::First),
            false => self.unique(subset, UniqueKeepStrategy::First),
        }
    }

    /// Drop duplicate rows from a `DataFrame`.
    /// *This fails when there is a column of type List in DataFrame*
    ///
    /// Stable means that the order is maintained. This has a higher cost than an unstable distinct.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///               "flt" => [1., 1., 2., 2., 3., 3.],
    ///               "int" => [1, 1, 2, 2, 3, 3, ],
    ///               "str" => ["a", "a", "b", "b", "c", "c"]
    ///           }?;
    ///
    /// println!("{}", df.unique_stable(None, UniqueKeepStrategy::First)?);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Returns
    ///
    /// ```text
    /// +-----+-----+-----+
    /// | flt | int | str |
    /// | --- | --- | --- |
    /// | f64 | i32 | str |
    /// +=====+=====+=====+
    /// | 1   | 1   | "a" |
    /// +-----+-----+-----+
    /// | 2   | 2   | "b" |
    /// +-----+-----+-----+
    /// | 3   | 3   | "c" |
    /// +-----+-----+-----+
    /// ```
    pub fn unique_stable(
        &self,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<DataFrame> {
        self.unique_impl(true, subset, keep)
    }

    /// Unstable distinct. See [`DataFrame::unique_stable`].
    pub fn unique(
        &self,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<DataFrame> {
        self.unique_impl(false, subset, keep)
    }

    fn unique_impl(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<Self> {
        use UniqueKeepStrategy::*;
        let names = match &subset {
            Some(s) => s.iter().map(|s| &**s).collect(),
            None => self.get_column_names(),
        };

        let columns = match (keep, maintain_order) {
            (First, true) => {
                let gb = self.groupby_stable(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, true) => {
                // maintain order by last values, so the sorted groups are not correct as they
                // are sorted by the first value
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                let last_idx: NoNull<IdxCa> = groups
                    .iter()
                    .map(|g| match g {
                        GroupsIndicator::Idx((_first, idx)) => idx[idx.len() - 1],
                        GroupsIndicator::Slice([first, len]) => first + len,
                    })
                    .collect();

                let last_idx = last_idx.sort(false);
                return Ok(unsafe { self.take_unchecked(&last_idx) });
            }
            (First, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
            }
        };
        Ok(DataFrame::new_no_checks(columns))
    }

    /// Get a mask of all the unique rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
    ///                         "ISIN" => &["US0378331005", "US5949181045"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_unique()?;
    ///
    /// assert!(ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_unique(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            true,
            false,
        ))
    }

    /// Get a mask of all the duplicated rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
    ///                         "ISIN" => &["US02079K3059", "US02079K1079"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;
    ///
    /// assert!(!ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_duplicated(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            false,
            true,
        ))
    }

    /// Create a new `DataFrame` that shows the null counts per column.
    #[must_use]
    pub fn null_count(&self) -> Self {
        let cols = self
            .columns
            .iter()
            .map(|s| Series::new(s.name(), &[s.null_count() as IdxSize]))
            .collect();
        Self::new_no_checks(cols)
    }

    /// Hash and combine the row values
    #[cfg(feature = "row_hash")]
    pub fn hash_rows(
        &mut self,
        hasher_builder: Option<RandomState>,
    ) -> PolarsResult<UInt64Chunked> {
        let dfs = split_df(self, POOL.current_num_threads())?;
        let (cas, _) = df_rows_to_hashes_threaded(&dfs, hasher_builder)?;

        let mut iter = cas.into_iter();
        let mut acc_ca = iter.next().unwrap();
        for ca in iter {
            acc_ca.append(&ca);
        }
        Ok(acc_ca.rechunk())
    }

    /// Get the supertype of the columns in this DataFrame
    pub fn get_supertype(&self) -> Option<PolarsResult<DataType>> {
        self.columns
            .iter()
            .map(|s| Ok(s.dtype().clone()))
            .reduce(|acc, b| try_get_supertype(&acc?, &b.unwrap()))
    }

    #[cfg(feature = "chunked_ids")]
    #[doc(hidden)]
    //// Take elements by a slice of [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    /// `sorted` indicates if the chunks are sorted.
    #[doc(hidden)]
    pub unsafe fn _take_chunked_unchecked_seq(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns(&|s| s._take_chunked_unchecked(idx, sorted));

        DataFrame::new_no_checks(cols)
    }
    #[cfg(feature = "chunked_ids")]
    //// Take elements by a slice of optional [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    #[doc(hidden)]
    pub unsafe fn _take_opt_chunked_unchecked_seq(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_chunked_unchecked(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_chunked_unchecked_threaded(idx, sorted, true),
            _ => s._take_chunked_unchecked(idx, sorted),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_opt_chunked_unchecked(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    /// Be careful with allowing threads when calling this in a large hot loop
    /// every thread split may be on rayon stack and lead to SO
    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice(&self, idx: &[IdxSize], allow_threads: bool) -> Self {
        self._take_unchecked_slice2(idx, allow_threads, IsSorted::Not)
    }

    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice2(
        &self,
        idx: &[IdxSize],
        allow_threads: bool,
        sorted: IsSorted,
    ) -> Self {
        #[cfg(debug_assertions)]
        {
            if idx.len() > 2 {
                match sorted {
                    IsSorted::Ascending => {
                        assert!(idx[0] <= idx[idx.len() - 1]);
                    }
                    IsSorted::Descending => {
                        assert!(idx[0] >= idx[idx.len() - 1]);
                    }
                    _ => {}
                }
            }
        }
        let ptr = idx.as_ptr() as *mut IdxSize;
        let len = idx.len();

        // create a temporary vec. we will not drop it.
        let mut ca = IdxCa::from_vec("", Vec::from_raw_parts(ptr, len, len));
        ca.set_sorted2(sorted);
        let out = self.take_unchecked_impl(&ca, allow_threads);

        // ref count of buffers should be one because we dropped all allocations
        let arr = {
            let arr_ref = std::mem::take(&mut ca.chunks).pop().unwrap();
            arr_ref
                .as_any()
                .downcast_ref::<PrimitiveArray<IdxSize>>()
                .unwrap()
                .clone()
        };
        // the only owned heap allocation is the `Vec` we created and must not be dropped
        let _ = std::mem::ManuallyDrop::new(arr.into_mut().right().unwrap());
        out
    }

    #[cfg(feature = "partition_by")]
    #[doc(hidden)]
    pub fn _partition_by_impl(
        &self,
        cols: &[String],
        stable: bool,
    ) -> PolarsResult<Vec<DataFrame>> {
        let groups = if stable {
            self.groupby_stable(cols)?.take_groups()
        } else {
            self.groupby(cols)?.take_groups()
        };

        // don't parallelize this
        // there is a lot of parallelization in take and this may easily SO
        POOL.install(|| {
            match groups {
                GroupsProxy::Idx(idx) => {
                    Ok(idx
                        .into_par_iter()
                        .map(|(_, group)| {
                            // groups are in bounds
                            unsafe { self._take_unchecked_slice(&group, false) }
                        })
                        .collect())
                }
                GroupsProxy::Slice { groups, .. } => Ok(groups
                    .into_par_iter()
                    .map(|[first, len]| self.slice(first as i64, len as usize))
                    .collect()),
            }
        })
    }

    /// Split into multiple DataFrames partitioned by groups
    #[cfg(feature = "partition_by")]
    #[cfg_attr(docsrs, doc(cfg(feature = "partition_by")))]
    pub fn partition_by(&self, cols: impl IntoVec<String>) -> PolarsResult<Vec<DataFrame>> {
        let cols = cols.into_vec();
        self._partition_by_impl(&cols, false)
    }

    /// Split into multiple DataFrames partitioned by groups
    /// Order of the groups are maintained.
    #[cfg(feature = "partition_by")]
    #[cfg_attr(docsrs, doc(cfg(feature = "partition_by")))]
    pub fn partition_by_stable(&self, cols: impl IntoVec<String>) -> PolarsResult<Vec<DataFrame>> {
        let cols = cols.into_vec();
        self._partition_by_impl(&cols, true)
    }

    /// Unnest the given `Struct` columns. This means that the fields of the `Struct` type will be
    /// inserted as columns.
    #[cfg(feature = "dtype-struct")]
    #[cfg_attr(docsrs, doc(cfg(feature = "dtype-struct")))]
    pub fn unnest<I: IntoVec<String>>(&self, cols: I) -> PolarsResult<DataFrame> {
        let cols = cols.into_vec();
        self.unnest_impl(cols.into_iter().collect())
    }

    #[cfg(feature = "dtype-struct")]
    fn unnest_impl(&self, cols: PlHashSet<String>) -> PolarsResult<DataFrame> {
        let mut new_cols = Vec::with_capacity(std::cmp::min(self.width() * 2, self.width() + 128));
        let mut count = 0;
        for s in &self.columns {
            if cols.contains(s.name()) {
                let ca = s.struct_()?;
                new_cols.extend_from_slice(ca.fields());
                count += 1;
            } else {
                new_cols.push(s.clone())
            }
        }
        if count != cols.len() {
            // one or more columns not found
            // the code below will return an error with the missing name
            let schema = self.schema();
            for col in cols {
                let _ = schema
                    .get(&col)
                    .ok_or_else(|| PolarsError::NotFound(col.into()))?;
            }
        }
        DataFrame::new(new_cols)
    }
}

pub struct RecordBatchIter<'a> {
    columns: &'a Vec<Series>,
    idx: usize,
    n_chunks: usize,
}

impl<'a> Iterator for RecordBatchIter<'a> {
    type Item = ArrowChunk;

    fn next(&mut self) -> Option<Self::Item> {
        if self.idx >= self.n_chunks {
            None
        } else {
            // create a batch of the columns with the same chunk no.
            let batch_cols = self.columns.iter().map(|s| s.to_arrow(self.idx)).collect();
            self.idx += 1;

            Some(ArrowChunk::new(batch_cols))
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let n = self.n_chunks - self.idx;
        (n, Some(n))
    }
}

pub struct PhysRecordBatchIter<'a> {
    iters: Vec<std::slice::Iter<'a, ArrayRef>>,
}

impl Iterator for PhysRecordBatchIter<'_> {
    type Item = ArrowChunk;

    fn next(&mut self) -> Option<Self::Item> {
        self.iters
            .iter_mut()
            .map(|phys_iter| phys_iter.next().cloned())
            .collect::<Option<Vec<_>>>()
            .map(ArrowChunk::new)
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        if let Some(iter) = self.iters.first() {
            iter.size_hint()
        } else {
            (0, None)
        }
    }
}

impl Default for DataFrame {
    fn default() -> Self {
        DataFrame::new_no_checks(vec![])
    }
}

impl From<DataFrame> for Vec<Series> {
    fn from(df: DataFrame) -> Self {
        df.columns
    }
}

// utility to test if we can vstack/extend the columns
fn can_extend(left: &Series, right: &Series) -> PolarsResult<()> {
    if left.dtype() != right.dtype() || left.name() != right.name() {
        if left.dtype() != right.dtype() {
            return Err(PolarsError::SchemaMisMatch(
                format!(
                    "cannot vstack: because column datatypes (dtypes) in the two DataFrames do not match for \
                                left.name='{}' with left.dtype={} != right.dtype={} with right.name='{}'",
                    left.name(),
                    left.dtype(),
                    right.dtype(),
                    right.name()
                )
                    .into(),
            ));
        } else {
            return Err(PolarsError::SchemaMisMatch(
                format!(
                    "cannot vstack: because column names in the two DataFrames do not match for \
                                left.name='{}' != right.name='{}'",
                    left.name(),
                    right.name()
                )
                .into(),
            ));
        }
    };
    Ok(())
}

More examples

Hide additional examples

src/frame/groupby/aggregations/dispatch.rs (line 10)

    fn restore_logical(&self, out: Series) -> Series {
        if self.dtype().is_logical() {
            out.cast(self.dtype()).unwrap()
        } else {
            out
        }
    }

    #[doc(hidden)]
    pub fn agg_valid_count(&self, groups: &GroupsProxy) -> Series {
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<IdxType, _>(groups, |idx| {
                debug_assert!(idx.len() <= self.len());
                if idx.is_empty() {
                    None
                } else if !self.has_validity() {
                    Some(idx.len() as IdxSize)
                } else {
                    let take =
                        unsafe { self.take_iter_unchecked(&mut idx.iter().map(|i| *i as usize)) };
                    Some((take.len() - take.null_count()) as IdxSize)
                }
            }),
            GroupsProxy::Slice { groups, .. } => {
                _agg_helper_slice::<IdxType, _>(groups, |[first, len]| {
                    debug_assert!(len <= self.len() as IdxSize);
                    if len == 0 {
                        None
                    } else if !self.has_validity() {
                        Some(len)
                    } else {
                        let take = self.slice_from_offsets(first, len);
                        Some((take.len() - take.null_count()) as IdxSize)
                    }
                })
            }
        }
    }

    #[doc(hidden)]
    pub unsafe fn agg_first(&self, groups: &GroupsProxy) -> Series {
        let out = match groups {
            GroupsProxy::Idx(groups) => {
                let mut iter = groups.iter().map(|(first, idx)| {
                    if idx.is_empty() {
                        None
                    } else {
                        Some(first as usize)
                    }
                });
                // Safety:
                // groups are always in bounds
                self.take_opt_iter_unchecked(&mut iter)
            }
            GroupsProxy::Slice { groups, .. } => {
                let mut iter =
                    groups.iter().map(
                        |&[first, len]| {
                            if len == 0 {
                                None
                            } else {
                                Some(first as usize)
                            }
                        },
                    );
                // Safety:
                // groups are always in bounds
                self.take_opt_iter_unchecked(&mut iter)
            }
        };
        self.restore_logical(out)
    }

    #[doc(hidden)]
    pub unsafe fn agg_n_unique(&self, groups: &GroupsProxy) -> Series {
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<IdxType, _>(groups, |idx| {
                debug_assert!(idx.len() <= self.len());
                if idx.is_empty() {
                    None
                } else {
                    let take = self.take_iter_unchecked(&mut idx.iter().map(|i| *i as usize));
                    take.n_unique().ok().map(|v| v as IdxSize)
                }
            }),
            GroupsProxy::Slice { groups, .. } => {
                _agg_helper_slice::<IdxType, _>(groups, |[first, len]| {
                    debug_assert!(len <= self.len() as IdxSize);
                    if len == 0 {
                        None
                    } else {
                        let take = self.slice_from_offsets(first, len);
                        take.n_unique().ok().map(|v| v as IdxSize)
                    }
                })
            }
        }
    }

    #[doc(hidden)]
    pub unsafe fn agg_median(&self, groups: &GroupsProxy) -> Series {
        use DataType::*;

        match self.dtype() {
            Float32 => SeriesWrap(self.f32().unwrap().clone()).agg_median(groups),
            Float64 => SeriesWrap(self.f64().unwrap().clone()).agg_median(groups),
            dt if dt.is_numeric() || dt.is_temporal() => {
                let ca = self.to_physical_repr();
                let physical_type = ca.dtype();
                let s = apply_method_physical_integer!(ca, agg_median, groups);
                if dt.is_logical() {
                    // back to physical and then
                    // back to logical type
                    s.cast(physical_type).unwrap().cast(dt).unwrap()
                } else {
                    s
                }
            }
            _ => Series::full_null("", groups.len(), self.dtype()),
        }
    }

    #[doc(hidden)]
    pub unsafe fn agg_quantile(
        &self,
        groups: &GroupsProxy,
        quantile: f64,
        interpol: QuantileInterpolOptions,
    ) -> Series {
        use DataType::*;

        match self.dtype() {
            Float32 => {
                SeriesWrap(self.f32().unwrap().clone()).agg_quantile(groups, quantile, interpol)
            }
            Float64 => {
                SeriesWrap(self.f64().unwrap().clone()).agg_quantile(groups, quantile, interpol)
            }
            dt if dt.is_numeric() || dt.is_temporal() => {
                let ca = self.to_physical_repr();
                let physical_type = ca.dtype();
                let s =
                    apply_method_physical_integer!(ca, agg_quantile, groups, quantile, interpol);
                if dt.is_logical() {
                    // back to physical and then
                    // back to logical type
                    s.cast(physical_type).unwrap().cast(dt).unwrap()
                } else {
                    s
                }
            }
            _ => Series::full_null("", groups.len(), self.dtype()),
        }
    }

    #[doc(hidden)]
    pub unsafe fn agg_mean(&self, groups: &GroupsProxy) -> Series {
        use DataType::*;

        match self.dtype() {
            Boolean => self.cast(&Float64).unwrap().agg_mean(groups),
            Float32 => SeriesWrap(self.f32().unwrap().clone()).agg_mean(groups),
            Float64 => SeriesWrap(self.f64().unwrap().clone()).agg_mean(groups),
            dt if dt.is_numeric() => {
                apply_method_physical_integer!(self, agg_mean, groups)
            }
            dt @ Duration(_) => {
                let s = self.to_physical_repr();
                // agg_mean returns Float64
                let out = s.agg_mean(groups);
                // cast back to Int64 and then to logical duration type
                out.cast(&Int64).unwrap().cast(dt).unwrap()
            }
            _ => Series::full_null("", groups.len(), self.dtype()),
        }
    }

src/testing.rs (line 9)

    pub fn series_equal(&self, other: &Series) -> bool {
        if self.null_count() > 0 || other.null_count() > 0 || self.dtype() != other.dtype() {
            false
        } else {
            self.series_equal_missing(other)
        }
    }

src/frame/arithmetic.rs (line 13)

fn get_supertype_all(df: &DataFrame, rhs: &Series) -> PolarsResult<DataType> {
    df.columns
        .iter()
        .fold(Ok(rhs.dtype().clone()), |dt, s| match dt {
            Ok(dt) => try_get_supertype(s.dtype(), &dt),
            e => e,
        })
}

macro_rules! impl_arithmetic {
    ($self:expr, $rhs:expr, $operand: tt) => {{
        let st = get_supertype_all($self, $rhs)?;
        let rhs = $rhs.cast(&st)?;
        let cols = $self.columns.par_iter().map(|s| {
            Ok(&s.cast(&st)? $operand &rhs)
        }).collect::<PolarsResult<_>>()?;
        Ok(DataFrame::new_no_checks(cols))
    }}
}

impl Add<&Series> for &DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn add(self, rhs: &Series) -> Self::Output {
        impl_arithmetic!(self, rhs, +)
    }
}

impl Add<&Series> for DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn add(self, rhs: &Series) -> Self::Output {
        (&self).add(rhs)
    }
}

impl Sub<&Series> for &DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn sub(self, rhs: &Series) -> Self::Output {
        impl_arithmetic!(self, rhs, -)
    }
}

impl Sub<&Series> for DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn sub(self, rhs: &Series) -> Self::Output {
        (&self).sub(rhs)
    }
}

impl Mul<&Series> for &DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn mul(self, rhs: &Series) -> Self::Output {
        impl_arithmetic!(self, rhs, *)
    }
}

impl Mul<&Series> for DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn mul(self, rhs: &Series) -> Self::Output {
        (&self).mul(rhs)
    }
}

impl Div<&Series> for &DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn div(self, rhs: &Series) -> Self::Output {
        impl_arithmetic!(self, rhs, /)
    }
}

impl Div<&Series> for DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn div(self, rhs: &Series) -> Self::Output {
        (&self).div(rhs)
    }
}

impl Rem<&Series> for &DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn rem(self, rhs: &Series) -> Self::Output {
        impl_arithmetic!(self, rhs, %)
    }
}

impl Rem<&Series> for DataFrame {
    type Output = PolarsResult<DataFrame>;

    fn rem(self, rhs: &Series) -> Self::Output {
        (&self).rem(rhs)
    }
}

impl DataFrame {
    fn binary_aligned(
        &self,
        other: &DataFrame,
        f: &(dyn Fn(&Series, &Series) -> PolarsResult<Series> + Sync + Send),
    ) -> PolarsResult<DataFrame> {
        let max_len = std::cmp::max(self.height(), other.height());
        let max_width = std::cmp::max(self.width(), other.width());
        let mut cols = self
            .get_columns()
            .par_iter()
            .zip(other.get_columns().par_iter())
            .map(|(l, r)| {
                let diff_l = max_len - l.len();
                let diff_r = max_len - r.len();

                let st = try_get_supertype(l.dtype(), r.dtype())?;
                let mut l = l.cast(&st)?;
                let mut r = r.cast(&st)?;

                if diff_l > 0 {
                    l = l.extend_constant(AnyValue::Null, diff_l)?;
                };
                if diff_r > 0 {
                    r = r.extend_constant(AnyValue::Null, diff_r)?;
                };

                f(&l, &r)
            })
            .collect::<PolarsResult<Vec<_>>>()?;

        let col_len = cols.len();
        if col_len < max_width {
            let df = if col_len < self.width() { self } else { other };

            for i in col_len..max_len {
                let s = &df.get_columns()[i];
                let name = s.name();
                let dtype = s.dtype();

                // trick to fill a series with nulls
                let vals: &[Option<i32>] = &[None];
                let s = Series::new(name, vals).cast(dtype)?;
                cols.push(s.new_from_index(0, max_len))
            }
        }
        DataFrame::new(cols)
    }

src/series/series_trait.rs (line 208)

    fn bitand(&self, _other: &Series) -> PolarsResult<Series> {
        Err(PolarsError::InvalidOperation(
            format!(
                "bitwise 'AND' operation not supported for dtype {:?}",
                self.dtype()
            )
            .into(),
        ))
    }

    fn bitor(&self, _other: &Series) -> PolarsResult<Series> {
        Err(PolarsError::InvalidOperation(
            format!(
                "bitwise 'OR' operation not supported for dtype {:?}",
                self.dtype()
            )
            .into(),
        ))
    }

    fn bitxor(&self, _other: &Series) -> PolarsResult<Series> {
        Err(PolarsError::InvalidOperation(
            format!(
                "bitwise 'XOR' operation not supported for dtype {:?}",
                self.dtype()
            )
            .into(),
        ))
    }

    /// Get the lengths of the underlying chunks
    fn chunk_lengths(&self) -> ChunkIdIter {
        invalid_operation_panic!(self)
    }
    /// Name of series.
    fn name(&self) -> &str {
        invalid_operation_panic!(self)
    }

    /// Get field (used in schema)
    fn field(&self) -> Cow<Field> {
        self._field()
    }

    /// Get datatype of series.
    fn dtype(&self) -> &DataType {
        self._dtype()
    }

    /// Underlying chunks.
    fn chunks(&self) -> &Vec<ArrayRef>;

    /// Number of chunks in this Series
    fn n_chunks(&self) -> usize {
        self.chunks().len()
    }

    /// Shrink the capacity of this array to fit its length.
    fn shrink_to_fit(&mut self) {
        panic!("shrink to fit not supported for dtype {:?}", self.dtype())
    }

    /// Take `num_elements` from the top as a zero copy view.
    fn limit(&self, num_elements: usize) -> Series {
        self.slice(0, num_elements)
    }

    /// Get a zero copy view of the data.
    ///
    /// When offset is negative the offset is counted from the
    /// end of the array
    fn slice(&self, _offset: i64, _length: usize) -> Series {
        invalid_operation_panic!(self)
    }

    #[doc(hidden)]
    fn append(&mut self, _other: &Series) -> PolarsResult<()> {
        invalid_operation_panic!(self)
    }

    #[doc(hidden)]
    fn extend(&mut self, _other: &Series) -> PolarsResult<()> {
        invalid_operation_panic!(self)
    }

    /// Filter by boolean mask. This operation clones data.
    fn filter(&self, _filter: &BooleanChunked) -> PolarsResult<Series> {
        invalid_operation_panic!(self)
    }

    #[doc(hidden)]
    #[cfg(feature = "chunked_ids")]
    unsafe fn _take_chunked_unchecked(&self, by: &[ChunkId], sorted: IsSorted) -> Series;

    #[doc(hidden)]
    #[cfg(feature = "chunked_ids")]
    unsafe fn _take_opt_chunked_unchecked(&self, by: &[Option<ChunkId>]) -> Series;

    /// Take by index from an iterator. This operation clones the data.
    fn take_iter(&self, _iter: &mut dyn TakeIterator) -> PolarsResult<Series>;

    /// Take by index from an iterator. This operation clones the data.
    ///
    /// # Safety
    ///
    /// - This doesn't check any bounds.
    /// - Iterator must be TrustedLen
    unsafe fn take_iter_unchecked(&self, _iter: &mut dyn TakeIterator) -> Series;

    /// Take by index if ChunkedArray contains a single chunk.
    ///
    /// # Safety
    /// This doesn't check any bounds.
    unsafe fn take_unchecked(&self, _idx: &IdxCa) -> PolarsResult<Series>;

    /// Take by index from an iterator. This operation clones the data.
    ///
    /// # Safety
    ///
    /// - This doesn't check any bounds.
    /// - Iterator must be TrustedLen
    unsafe fn take_opt_iter_unchecked(&self, _iter: &mut dyn TakeIteratorNulls) -> Series;

    /// Take by index from an iterator. This operation clones the data.
    /// todo! remove?
    #[cfg(feature = "take_opt_iter")]
    #[cfg_attr(docsrs, doc(cfg(feature = "take_opt_iter")))]
    fn take_opt_iter(&self, _iter: &mut dyn TakeIteratorNulls) -> PolarsResult<Series> {
        invalid_operation_panic!(self)
    }

    /// Take by index. This operation is clone.
    fn take(&self, _indices: &IdxCa) -> PolarsResult<Series>;

    /// Get length of series.
    fn len(&self) -> usize;

    /// Check if Series is empty.
    fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Aggregate all chunks to a contiguous array of memory.
    fn rechunk(&self) -> Series {
        invalid_operation_panic!(self)
    }

    /// Take every nth value as a new Series
    fn take_every(&self, n: usize) -> Series;

    /// Drop all null values and return a new Series.
    fn drop_nulls(&self) -> Series {
        if self.null_count() == 0 {
            Series(self.clone_inner())
        } else {
            self.filter(&self.is_not_null()).unwrap()
        }
    }

    /// Returns the mean value in the array
    /// Returns an option because the array is nullable.
    fn mean(&self) -> Option<f64> {
        None
    }

    /// Returns the median value in the array
    /// Returns an option because the array is nullable.
    fn median(&self) -> Option<f64> {
        None
    }

    /// Create a new Series filled with values from the given index.
    ///
    /// # Example
    ///
    /// ```rust
    /// use polars_core::prelude::*;
    /// let s = Series::new("a", [0i32, 1, 8]);
    /// let s2 = s.new_from_index(2, 4);
    /// assert_eq!(Vec::from(s2.i32().unwrap()), &[Some(8), Some(8), Some(8), Some(8)])
    /// ```
    fn new_from_index(&self, _index: usize, _length: usize) -> Series {
        invalid_operation_panic!(self)
    }

    fn cast(&self, _data_type: &DataType) -> PolarsResult<Series> {
        invalid_operation_panic!(self)
    }

    /// Get a single value by index. Don't use this operation for loops as a runtime cast is
    /// needed for every iteration.
    fn get(&self, _index: usize) -> PolarsResult<AnyValue> {
        invalid_operation_panic!(self)
    }

    /// Get a single value by index. Don't use this operation for loops as a runtime cast is
    /// needed for every iteration.
    ///
    /// This may refer to physical types
    ///
    /// # Safety
    /// Does not do any bounds checking
    #[cfg(feature = "private")]
    unsafe fn get_unchecked(&self, _index: usize) -> AnyValue {
        invalid_operation_panic!(self)
    }

    fn sort_with(&self, _options: SortOptions) -> Series {
        invalid_operation_panic!(self)
    }

    /// Retrieve the indexes needed for a sort.
    #[allow(unused)]
    fn argsort(&self, options: SortOptions) -> IdxCa {
        invalid_operation_panic!(self)
    }

    /// Count the null values.
    fn null_count(&self) -> usize {
        invalid_operation_panic!(self)
    }

    /// Return if any the chunks in this `[ChunkedArray]` have a validity bitmap.
    /// no bitmap means no null values.
    fn has_validity(&self) -> bool;

    /// Get unique values in the Series.
    fn unique(&self) -> PolarsResult<Series> {
        invalid_operation!(self)
    }

    /// Get unique values in the Series.
    fn n_unique(&self) -> PolarsResult<usize> {
        invalid_operation_panic!(self)
    }

    /// Get first indexes of unique values.
    fn arg_unique(&self) -> PolarsResult<IdxCa> {
        invalid_operation_panic!(self)
    }

    /// Get min index
    fn arg_min(&self) -> Option<usize> {
        None
    }

    /// Get max index
    fn arg_max(&self) -> Option<usize> {
        None
    }

    /// Get a mask of the null values.
    fn is_null(&self) -> BooleanChunked {
        invalid_operation_panic!(self)
    }

    /// Get a mask of the non-null values.
    fn is_not_null(&self) -> BooleanChunked {
        invalid_operation_panic!(self)
    }

    /// Get a mask of all the unique values.
    fn is_unique(&self) -> PolarsResult<BooleanChunked> {
        invalid_operation_panic!(self)
    }

    /// Get a mask of all the duplicated values.
    fn is_duplicated(&self) -> PolarsResult<BooleanChunked> {
        invalid_operation_panic!(self)
    }

    /// return a Series in reversed order
    fn reverse(&self) -> Series {
        invalid_operation_panic!(self)
    }

    /// Rechunk and return a pointer to the start of the Series.
    /// Only implemented for numeric types
    fn as_single_ptr(&mut self) -> PolarsResult<usize> {
        Err(PolarsError::InvalidOperation(
            "operation 'as_single_ptr' not supported".into(),
        ))
    }

    /// Shift the values by a given period and fill the parts that will be empty due to this operation
    /// with `Nones`.
    ///
    /// *NOTE: If you want to fill the Nones with a value use the
    /// [`shift` operation on `ChunkedArray<T>`](../chunked_array/ops/trait.ChunkShift.html).*
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example() -> PolarsResult<()> {
    ///     let s = Series::new("series", &[1, 2, 3]);
    ///
    ///     let shifted = s.shift(1);
    ///     assert_eq!(Vec::from(shifted.i32()?), &[None, Some(1), Some(2)]);
    ///
    ///     let shifted = s.shift(-1);
    ///     assert_eq!(Vec::from(shifted.i32()?), &[Some(2), Some(3), None]);
    ///
    ///     let shifted = s.shift(2);
    ///     assert_eq!(Vec::from(shifted.i32()?), &[None, None, Some(1)]);
    ///
    ///     Ok(())
    /// }
    /// example();
    /// ```
    fn shift(&self, _periods: i64) -> Series {
        invalid_operation_panic!(self)
    }

    /// Replace None values with one of the following strategies:
    /// * Forward fill (replace None with the previous value)
    /// * Backward fill (replace None with the next value)
    /// * Mean fill (replace None with the mean of the whole array)
    /// * Min fill (replace None with the minimum of the whole array)
    /// * Max fill (replace None with the maximum of the whole array)
    ///
    /// *NOTE: If you want to fill the Nones with a value use the
    /// [`fill_null` operation on `ChunkedArray<T>`](../chunked_array/ops/trait.ChunkFillNull.html)*.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example() -> PolarsResult<()> {
    ///     let s = Series::new("some_missing", &[Some(1), None, Some(2)]);
    ///
    ///     let filled = s.fill_null(FillNullStrategy::Forward(None))?;
    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
    ///
    ///     let filled = s.fill_null(FillNullStrategy::Backward(None))?;
    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);
    ///
    ///     let filled = s.fill_null(FillNullStrategy::Min)?;
    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
    ///
    ///     let filled = s.fill_null(FillNullStrategy::Max)?;
    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);
    ///
    ///     let filled = s.fill_null(FillNullStrategy::Mean)?;
    ///     assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);
    ///
    ///     Ok(())
    /// }
    /// example();
    /// ```
    fn fill_null(&self, _strategy: FillNullStrategy) -> PolarsResult<Series> {
        invalid_operation_panic!(self)
    }

    /// Get the sum of the Series as a new Series of length 1.
    ///
    /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
    /// first cast to `Int64` to prevent overflow issues.
    fn _sum_as_series(&self) -> Series {
        invalid_operation_panic!(self)
    }
    /// Get the max of the Series as a new Series of length 1.
    fn max_as_series(&self) -> Series {
        invalid_operation_panic!(self)
    }
    /// Get the min of the Series as a new Series of length 1.
    fn min_as_series(&self) -> Series {
        invalid_operation_panic!(self)
    }
    /// Get the median of the Series as a new Series of length 1.
    fn median_as_series(&self) -> Series {
        Series::full_null(self.name(), 1, self.dtype())
    }
    /// Get the variance of the Series as a new Series of length 1.
    fn var_as_series(&self, _ddof: u8) -> Series {
        Series::full_null(self.name(), 1, self.dtype())
    }
    /// Get the standard deviation of the Series as a new Series of length 1.
    fn std_as_series(&self, _ddof: u8) -> Series {
        Series::full_null(self.name(), 1, self.dtype())
    }
    /// Get the quantile of the ChunkedArray as a new Series of length 1.
    fn quantile_as_series(
        &self,
        _quantile: f64,
        _interpol: QuantileInterpolOptions,
    ) -> PolarsResult<Series> {
        Ok(Series::full_null(self.name(), 1, self.dtype()))
    }

    fn fmt_list(&self) -> String {
        "fmt implemented".into()
    }

    /// Clone inner ChunkedArray and wrap in a new Arc
    fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
        invalid_operation_panic!(self)
    }

    #[cfg(feature = "object")]
    #[cfg_attr(docsrs, doc(cfg(feature = "object")))]
    /// Get the value at this index as a downcastable Any trait ref.
    fn get_object(&self, _index: usize) -> Option<&dyn PolarsObjectSafe> {
        invalid_operation_panic!(self)
    }

    /// Get a hold to self as `Any` trait reference.
    /// Only implemented for ObjectType
    fn as_any(&self) -> &dyn Any {
        invalid_operation_panic!(self)
    }

    /// Get a hold to self as `Any` trait reference.
    /// Only implemented for ObjectType
    fn as_any_mut(&mut self) -> &mut dyn Any {
        invalid_operation_panic!(self)
    }

    /// Get a boolean mask of the local maximum peaks.
    fn peak_max(&self) -> BooleanChunked {
        invalid_operation_panic!(self)
    }

    /// Get a boolean mask of the local minimum peaks.
    fn peak_min(&self) -> BooleanChunked {
        invalid_operation_panic!(self)
    }

    /// Check if elements of this Series are in the right Series, or List values of the right Series.
    #[cfg(feature = "is_in")]
    #[cfg_attr(docsrs, doc(cfg(feature = "is_in")))]
    fn is_in(&self, _other: &Series) -> PolarsResult<BooleanChunked> {
        invalid_operation_panic!(self)
    }
    #[cfg(feature = "repeat_by")]
    #[cfg_attr(docsrs, doc(cfg(feature = "repeat_by")))]
    fn repeat_by(&self, _by: &IdxCa) -> ListChunked {
        invalid_operation_panic!(self)
    }
    #[cfg(feature = "checked_arithmetic")]
    #[cfg_attr(docsrs, doc(cfg(feature = "checked_arithmetic")))]
    fn checked_div(&self, _rhs: &Series) -> PolarsResult<Series> {
        invalid_operation_panic!(self)
    }

    #[cfg(feature = "is_first")]
    #[cfg_attr(docsrs, doc(cfg(feature = "is_first")))]
    /// Get a mask of the first unique values.
    fn is_first(&self) -> PolarsResult<BooleanChunked> {
        invalid_operation_panic!(self)
    }

    #[cfg(feature = "mode")]
    #[cfg_attr(docsrs, doc(cfg(feature = "mode")))]
    /// Compute the most occurring element in the array.
    fn mode(&self) -> PolarsResult<Series> {
        invalid_operation_panic!(self)
    }

    #[cfg(feature = "rolling_window")]
    #[cfg_attr(docsrs, doc(cfg(feature = "rolling_window")))]
    /// Apply a custom function over a rolling/ moving window of the array.
    /// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
    fn rolling_apply(
        &self,
        _f: &dyn Fn(&Series) -> Series,
        _options: RollingOptionsFixedWindow,
    ) -> PolarsResult<Series> {
        panic!("rolling apply not implemented for this dtype. Only implemented for numeric data.")
    }
    #[cfg(feature = "concat_str")]
    #[cfg_attr(docsrs, doc(cfg(feature = "concat_str")))]
    /// Concat the values into a string array.
    /// # Arguments
    ///
    /// * `delimiter` - A string that will act as delimiter between values.
    fn str_concat(&self, _delimiter: &str) -> Utf8Chunked {
        invalid_operation_panic!(self)
    }
}

impl<'a> (dyn SeriesTrait + 'a) {
    pub fn unpack<N: 'static>(&self) -> PolarsResult<&ChunkedArray<N>>
    where
        N: PolarsDataType,
    {
        if &N::get_dtype() == self.dtype() {
            Ok(self.as_ref())
        } else {
            Err(PolarsError::SchemaMisMatch(
                "cannot unpack Series; data types don't match".into(),
            ))
        }
    }

src/chunked_array/ops/full.rs (line 92)

    fn full(name: &str, value: &Series, length: usize) -> ListChunked {
        let mut builder =
            get_list_builder(value.dtype(), value.len() * length, length, name).unwrap();
        for _ in 0..length {
            builder.append_series(value)
        }
        builder.finish()
    }

Additional examples can be found in:

source

fn n_chunks(&self) -> usize

Number of chunks in this Series

Examples found in repository ?

src/frame/mod.rs (line 433)

    pub fn as_single_chunk_par(&mut self) -> &mut Self {
        if self.columns.iter().any(|s| s.n_chunks() > 1) {
            self.columns = self.apply_columns_par(&|s| s.rechunk());
        }
        self
    }

    /// Estimates of the DataFrames columns consist of the same chunk sizes
    pub fn should_rechunk(&self) -> bool {
        let hb = RandomState::default();
        let hb2 = RandomState::with_seeds(392498, 98132457, 0, 412059);
        !self
            .columns
            .iter()
            // The idea is that we create a hash of the chunk lengths.
            // Consisting of the combined hash + the sum (assuming collision probability is nihil)
            // if not, we can add more hashes or at worst case we do an extra rechunk.
            // the old solution to this was clone all lengths to a vec and compare the vecs
            .map(|s| {
                s.chunk_lengths().map(|i| i as u64).fold(
                    (0u64, 0u64, s.n_chunks()),
                    |(lhash, lh2, n), rval| {
                        let mut h = hb.build_hasher();
                        rval.hash(&mut h);
                        let rhash = h.finish();
                        let mut h = hb2.build_hasher();
                        rval.hash(&mut h);
                        let rh2 = h.finish();
                        (
                            _boost_hash_combine(lhash, rhash),
                            _boost_hash_combine(lh2, rh2),
                            n,
                        )
                    },
                )
            })
            .all_equal()
    }

    /// Ensure all the chunks in the DataFrame are aligned.
    pub fn rechunk(&mut self) -> &mut Self {
        if self.should_rechunk() {
            self.as_single_chunk_par()
        } else {
            self
        }
    }

    /// Get the `DataFrame` schema.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
    ///                         "Diameter (m)" => &[8.8e26, f64::INFINITY])?;
    ///
    /// let f1: Field = Field::new("Thing", DataType::Utf8);
    /// let f2: Field = Field::new("Diameter (m)", DataType::Float64);
    /// let sc: Schema = Schema::from(vec![f1, f2].into_iter());
    ///
    /// assert_eq!(df.schema(), sc);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn schema(&self) -> Schema {
        Schema::from(self.iter().map(|s| s.field().into_owned()))
    }

    /// Get a reference to the `DataFrame` columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
    ///                         "Symbol" => &["A", "C", "G", "T"])?;
    /// let columns: &Vec<Series> = df.get_columns();
    ///
    /// assert_eq!(columns[0].name(), "Name");
    /// assert_eq!(columns[1].name(), "Symbol");
    /// # Ok::<(), PolarsError>(())
    /// ```
    #[inline]
    pub fn get_columns(&self) -> &Vec<Series> {
        &self.columns
    }

    #[cfg(feature = "private")]
    #[inline]
    pub fn get_columns_mut(&mut self) -> &mut Vec<Series> {
        &mut self.columns
    }

    /// Iterator over the columns as `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
    /// let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// let mut iterator = df.iter();
    ///
    /// assert_eq!(iterator.next(), Some(&s1));
    /// assert_eq!(iterator.next(), Some(&s2));
    /// assert_eq!(iterator.next(), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn iter(&self) -> std::slice::Iter<'_, Series> {
        self.columns.iter()
    }

    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Language" => &["Rust", "Python"],
    ///                         "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Language", "Designer"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn get_column_names(&self) -> Vec<&str> {
        self.columns.iter().map(|s| s.name()).collect()
    }

    /// Get the `Vec<String>` representing the column names.
    pub fn get_column_names_owned(&self) -> Vec<String> {
        self.columns.iter().map(|s| s.name().to_string()).collect()
    }

    /// Set the column names.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
    /// df.set_column_names(&["Set"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Set"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> PolarsResult<()> {
        if names.len() != self.columns.len() {
            return Err(PolarsError::ShapeMisMatch("the provided slice with column names has not the same size as the DataFrame's width".into()));
        }
        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(names.iter().map(|name| name.as_ref()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }

        let columns = mem::take(&mut self.columns);
        self.columns = columns
            .into_iter()
            .zip(names)
            .map(|(s, name)| {
                let mut s = s;
                s.rename(name.as_ref());
                s
            })
            .collect();
        Ok(())
    }

    /// Get the data types of the columns in the DataFrame.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
    ///                                "Fraction" => &[0.965, 0.035])?;
    ///
    /// assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn dtypes(&self) -> Vec<DataType> {
        self.columns.iter().map(|s| s.dtype().clone()).collect()
    }

    /// The number of chunks per column
    pub fn n_chunks(&self) -> usize {
        match self.columns.get(0) {
            None => 0,
            Some(s) => s.n_chunks(),
        }
    }

source

fn shrink_to_fit(&mut self)

Shrink the capacity of this array to fit its length.

Examples found in repository ?

src/series/mod.rs (line 198)

197
198
199

    pub fn shrink_to_fit(&mut self) {
        self._get_inner_mut().shrink_to_fit()
    }

source

fn limit(&self, num_elements: usize) -> Series

Take num_elements from the top as a zero copy view.

source

fn slice(&self, _offset: i64, _length: usize) -> Series

Get a zero copy view of the data.

When offset is negative the offset is counted from the end of the array

Examples found in repository ?

src/series/series_trait.rs (line 268)

267
268
269

    fn limit(&self, num_elements: usize) -> Series {
        self.slice(0, num_elements)
    }

More examples

Hide additional examples

src/frame/groupby/aggregations/dispatch.rs (line 6)

5
6
7

    fn slice_from_offsets(&self, first: IdxSize, len: IdxSize) -> Self {
        self.slice(first as i64, len as usize)
    }

src/series/mod.rs (line 839)

    pub fn head(&self, length: Option<usize>) -> Series {
        match length {
            Some(len) => self.slice(0, std::cmp::min(len, self.len())),
            None => self.slice(0, std::cmp::min(10, self.len())),
        }
    }

    /// Get the tail of the Series.
    pub fn tail(&self, length: Option<usize>) -> Series {
        let len = match length {
            Some(len) => std::cmp::min(len, self.len()),
            None => std::cmp::min(10, self.len()),
        };
        self.slice(-(len as i64), len)
    }

src/series/ops/diff.rs (line 11)

    pub fn diff(&self, n: usize, null_behavior: NullBehavior) -> Series {
        match null_behavior {
            NullBehavior::Ignore => self - &self.shift(n as i64),
            NullBehavior::Drop => {
                let len = self.len() - n;
                &self.slice(n as i64, len) - &self.slice(0, len)
            }
        }
    }

src/utils/mod.rs (line 647)

pub fn parallel_op_series<F>(f: F, s: Series, n_threads: Option<usize>) -> PolarsResult<Series>
where
    F: Fn(Series) -> PolarsResult<Series> + Send + Sync,
{
    let n_threads = n_threads.unwrap_or_else(|| POOL.current_num_threads());
    let splits = _split_offsets(s.len(), n_threads);

    let chunks = POOL.install(|| {
        splits
            .into_par_iter()
            .map(|(offset, len)| {
                let s = s.slice(offset as i64, len);
                f(s)
            })
            .collect::<PolarsResult<Vec<_>>>()
    })?;

    let mut iter = chunks.into_iter();
    let first = iter.next().unwrap();
    let out = iter.fold(first, |mut acc, s| {
        acc.append(&s).unwrap();
        acc
    });

    f(out)
}

src/frame/mod.rs (line 1135)

        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

    /// Slice the `DataFrame` along the rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
    ///                         "Color" => &["Green", "Red", "White", "White", "Red"])?;
    /// let sl: DataFrame = df.slice(2, 3);
    ///
    /// assert_eq!(sl.shape(), (3, 2));
    /// println!("{}", sl);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Output:
    /// ```text
    /// shape: (3, 2)
    /// +-------+-------+
    /// | Fruit | Color |
    /// | ---   | ---   |
    /// | str   | str   |
    /// +=======+=======+
    /// | Grape | White |
    /// +-------+-------+
    /// | Fig   | White |
    /// +-------+-------+
    /// | Fig   | Red   |
    /// +-------+-------+
    /// ```
    #[must_use]
    pub fn slice(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        let col = self
            .columns
            .iter()
            .map(|s| s.slice(offset, length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    #[must_use]
    pub fn slice_par(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns_par(&|s| s.slice(offset, length)))
    }

    #[must_use]
    pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns(&|s| {
            let mut out = s.slice(offset, length);
            out.shrink_to_fit();
            out
        }))
    }

Additional examples can be found in:

source

fn filter(&self, _filter: &BooleanChunked) -> PolarsResult<Series>

Filter by boolean mask. This operation clones data.

Examples found in repository ?

src/series/series_trait.rs (line 359)

    fn drop_nulls(&self) -> Series {
        if self.null_count() == 0 {
            Series(self.clone_inner())
        } else {
            self.filter(&self.is_not_null()).unwrap()
        }
    }

More examples

Hide additional examples

src/series/mod.rs (line 491)

    pub fn filter_threaded(&self, filter: &BooleanChunked, rechunk: bool) -> PolarsResult<Series> {
        // this would fail if there is a broadcasting filter.
        // because we cannot split that filter over threads
        // besides they are a no-op, so we do the standard filter.
        if filter.len() == 1 {
            return self.filter(filter);
        }
        let n_threads = POOL.current_num_threads();
        let filters = split_ca(filter, n_threads).unwrap();
        let series = split_series(self, n_threads).unwrap();

        let series: PolarsResult<Vec<_>> = POOL.install(|| {
            filters
                .par_iter()
                .zip(series)
                .map(|(filter, s)| s.filter(filter))
                .collect()
        });

        Ok(self.finish_take_threaded(series?, rechunk))
    }

src/frame/mod.rs (line 1555)

    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

source

fn is_empty(&self) -> bool

Check if Series is empty.

Examples found in repository ?

src/series/mod.rs (line 521)

    pub fn sum_as_series(&self) -> Series {
        use DataType::*;
        if self.is_empty() && self.dtype().is_numeric() {
            return Series::new("", [0])
                .cast(self.dtype())
                .unwrap()
                .sum_as_series();
        }
        match self.dtype() {
            Int8 | UInt8 | Int16 | UInt16 => self.cast(&Int64).unwrap().sum_as_series(),
            _ => self._sum_as_series(),
        }
    }

More examples

Hide additional examples

src/chunked_array/ops/apply.rs (line 678)

    fn apply<F>(&'a self, f: F) -> Self
    where
        F: Fn(Series) -> Series + Copy,
    {
        if self.is_empty() {
            return self.clone();
        }
        let mut fast_explode = true;
        let mut function = |s: Series| {
            let out = f(s);
            if out.is_empty() {
                fast_explode = false;
            }
            out
        };
        let mut ca: ListChunked = apply!(self, &mut function);
        if fast_explode {
            ca.set_fast_explode()
        }
        ca
    }

    fn try_apply<F>(&'a self, f: F) -> PolarsResult<Self>
    where
        F: Fn(Series) -> PolarsResult<Series> + Copy,
    {
        if self.is_empty() {
            return Ok(self.clone());
        }

        let mut fast_explode = true;
        let mut function = |s: Series| {
            let out = f(s);
            if let Ok(out) = &out {
                if out.is_empty() {
                    fast_explode = false;
                }
            }
            out
        };
        let ca: PolarsResult<ListChunked> = try_apply!(self, &mut function);
        let mut ca = ca?;
        if fast_explode {
            ca.set_fast_explode()
        }
        Ok(ca)
    }

    fn apply_on_opt<F>(&'a self, f: F) -> Self
    where
        F: Fn(Option<Series>) -> Option<Series> + Copy,
    {
        if self.is_empty() {
            return self.clone();
        }
        self.into_iter().map(f).collect_trusted()
    }

    /// Apply a closure elementwise. The closure gets the index of the element as first argument.
    fn apply_with_idx<F>(&'a self, f: F) -> Self
    where
        F: Fn((usize, Series)) -> Series + Copy,
    {
        if self.is_empty() {
            return self.clone();
        }
        let mut fast_explode = true;
        let mut function = |(idx, s)| {
            let out = f((idx, s));
            if out.is_empty() {
                fast_explode = false;
            }
            out
        };
        let mut ca: ListChunked = apply_enumerate!(self, function);
        if fast_explode {
            ca.set_fast_explode()
        }
        ca
    }

    /// Apply a closure elementwise. The closure gets the index of the element as first argument.
    fn apply_with_idx_on_opt<F>(&'a self, f: F) -> Self
    where
        F: Fn((usize, Option<Series>)) -> Option<Series> + Copy,
    {
        if self.is_empty() {
            return self.clone();
        }
        let mut fast_explode = true;
        let function = |(idx, s)| {
            let out = f((idx, s));
            if let Some(out) = &out {
                if out.is_empty() {
                    fast_explode = false;
                }
            }
            out
        };
        let mut ca: ListChunked = self.into_iter().enumerate().map(function).collect_trusted();
        if fast_explode {
            ca.set_fast_explode()
        }
        ca
    }

src/chunked_array/list/iterator.rs (line 138)

    pub fn apply_amortized<'a, F>(&'a self, mut f: F) -> Self
    where
        F: FnMut(UnstableSeries<'a>) -> Series,
    {
        if self.is_empty() {
            return self.clone();
        }
        let mut fast_explode = self.null_count() == 0;
        let mut ca: ListChunked = self
            .amortized_iter()
            .map(|opt_v| {
                opt_v.map(|v| {
                    let out = f(v);
                    if out.is_empty() {
                        fast_explode = false;
                    }
                    out
                })
            })
            .collect_trusted();

        ca.rename(self.name());
        if fast_explode {
            ca.set_fast_explode();
        }
        ca
    }

    pub fn try_apply_amortized<'a, F>(&'a self, mut f: F) -> PolarsResult<Self>
    where
        F: FnMut(UnstableSeries<'a>) -> PolarsResult<Series>,
    {
        if self.is_empty() {
            return Ok(self.clone());
        }
        let mut fast_explode = self.null_count() == 0;
        let mut ca: ListChunked = self
            .amortized_iter()
            .map(|opt_v| {
                opt_v
                    .map(|v| {
                        let out = f(v);
                        if let Ok(out) = &out {
                            if out.is_empty() {
                                fast_explode = false
                            }
                        };
                        out
                    })
                    .transpose()
            })
            .collect::<PolarsResult<_>>()?;
        ca.rename(self.name());
        if fast_explode {
            ca.set_fast_explode();
        }
        Ok(ca)
    }

src/chunked_array/builder/list.rs (line 156)

    fn append_series(&mut self, s: &Series) {
        if s.is_empty() {
            self.fast_explode = false;
        }
        let physical = s.to_physical_repr();
        let ca = physical.unpack::<T>().unwrap();
        let values = self.builder.mut_values();

        ca.downcast_iter().for_each(|arr| {
            if !arr.has_validity() {
                values.extend_from_slice(arr.values().as_slice())
            } else {
                // Safety:
                // Arrow arrays are trusted length iterators.
                unsafe { values.extend_trusted_len_unchecked(arr.into_iter()) }
            }
        });
        // overflow of i64 is far beyond polars capable lengths.
        unsafe { self.builder.try_push_valid().unwrap_unchecked() };
    }

    fn finish(&mut self) -> ListChunked {
        finish_list_builder!(self)
    }
}

type LargePrimitiveBuilder<T> = MutableListArray<i64, MutablePrimitiveArray<T>>;
type LargeListUtf8Builder = MutableListArray<i64, MutableUtf8Array<i64>>;
#[cfg(feature = "dtype-binary")]
type LargeListBinaryBuilder = MutableListArray<i64, MutableBinaryArray<i64>>;
type LargeListBooleanBuilder = MutableListArray<i64, MutableBooleanArray>;

pub struct ListUtf8ChunkedBuilder {
    builder: LargeListUtf8Builder,
    field: Field,
    fast_explode: bool,
}

impl ListUtf8ChunkedBuilder {
    pub fn new(name: &str, capacity: usize, values_capacity: usize) -> Self {
        let values = MutableUtf8Array::<i64>::with_capacity(values_capacity);
        let builder = LargeListUtf8Builder::new_with_capacity(values, capacity);
        let field = Field::new(name, DataType::List(Box::new(DataType::Utf8)));

        ListUtf8ChunkedBuilder {
            builder,
            field,
            fast_explode: true,
        }
    }

    pub fn append_trusted_len_iter<'a, I: Iterator<Item = Option<&'a str>> + TrustedLen>(
        &mut self,
        iter: I,
    ) {
        let values = self.builder.mut_values();

        if iter.size_hint().0 == 0 {
            self.fast_explode = false;
        }
        // Safety
        // trusted len, trust the type system
        unsafe { values.extend_trusted_len_unchecked(iter) };
        self.builder.try_push_valid().unwrap();
    }

    pub fn append_values_iter<'a, I: Iterator<Item = &'a str>>(&mut self, iter: I) {
        let values = self.builder.mut_values();

        if iter.size_hint().0 == 0 {
            self.fast_explode = false;
        }
        values.extend_values(iter);
        self.builder.try_push_valid().unwrap();
    }

    pub(crate) fn append(&mut self, ca: &Utf8Chunked) {
        let value_builder = self.builder.mut_values();
        value_builder.try_extend(ca).unwrap();
        self.builder.try_push_valid().unwrap();
    }
}

impl ListBuilderTrait for ListUtf8ChunkedBuilder {
    fn append_opt_series(&mut self, opt_s: Option<&Series>) {
        match opt_s {
            Some(s) => self.append_series(s),
            None => {
                self.append_null();
            }
        }
    }

    #[inline]
    fn append_null(&mut self) {
        self.fast_explode = false;
        self.builder.push_null();
    }

    fn append_series(&mut self, s: &Series) {
        if s.is_empty() {
            self.fast_explode = false;
        }
        let ca = s.utf8().unwrap();
        self.append(ca)
    }

    fn finish(&mut self) -> ListChunked {
        finish_list_builder!(self)
    }
}

#[cfg(feature = "dtype-binary")]
pub struct ListBinaryChunkedBuilder {
    builder: LargeListBinaryBuilder,
    field: Field,
    fast_explode: bool,
}

#[cfg(feature = "dtype-binary")]
impl ListBinaryChunkedBuilder {
    pub fn new(name: &str, capacity: usize, values_capacity: usize) -> Self {
        let values = MutableBinaryArray::<i64>::with_capacity(values_capacity);
        let builder = LargeListBinaryBuilder::new_with_capacity(values, capacity);
        let field = Field::new(name, DataType::List(Box::new(DataType::Binary)));

        ListBinaryChunkedBuilder {
            builder,
            field,
            fast_explode: true,
        }
    }

    pub fn append_trusted_len_iter<'a, I: Iterator<Item = Option<&'a [u8]>> + TrustedLen>(
        &mut self,
        iter: I,
    ) {
        let values = self.builder.mut_values();

        if iter.size_hint().0 == 0 {
            self.fast_explode = false;
        }
        // Safety
        // trusted len, trust the type system
        unsafe { values.extend_trusted_len_unchecked(iter) };
        self.builder.try_push_valid().unwrap();
    }

    pub fn append_values_iter<'a, I: Iterator<Item = &'a [u8]>>(&mut self, iter: I) {
        let values = self.builder.mut_values();

        if iter.size_hint().0 == 0 {
            self.fast_explode = false;
        }
        values.extend_values(iter);
        self.builder.try_push_valid().unwrap();
    }

    pub(crate) fn append(&mut self, ca: &BinaryChunked) {
        let value_builder = self.builder.mut_values();
        value_builder.try_extend(ca).unwrap();
        self.builder.try_push_valid().unwrap();
    }
}

#[cfg(feature = "dtype-binary")]
impl ListBuilderTrait for ListBinaryChunkedBuilder {
    fn append_opt_series(&mut self, opt_s: Option<&Series>) {
        match opt_s {
            Some(s) => self.append_series(s),
            None => {
                self.append_null();
            }
        }
    }

    #[inline]
    fn append_null(&mut self) {
        self.fast_explode = false;
        self.builder.push_null();
    }

    fn append_series(&mut self, s: &Series) {
        if s.is_empty() {
            self.fast_explode = false;
        }
        let ca = s.binary().unwrap();
        self.append(ca)
    }

    fn finish(&mut self) -> ListChunked {
        finish_list_builder!(self)
    }
}

pub struct ListBooleanChunkedBuilder {
    builder: LargeListBooleanBuilder,
    field: Field,
    fast_explode: bool,
}

impl ListBooleanChunkedBuilder {
    pub fn new(name: &str, capacity: usize, values_capacity: usize) -> Self {
        let values = MutableBooleanArray::with_capacity(values_capacity);
        let builder = LargeListBooleanBuilder::new_with_capacity(values, capacity);
        let field = Field::new(name, DataType::List(Box::new(DataType::Boolean)));

        Self {
            builder,
            field,
            fast_explode: true,
        }
    }

    #[inline]
    pub fn append_iter<I: Iterator<Item = Option<bool>> + TrustedLen>(&mut self, iter: I) {
        let values = self.builder.mut_values();

        if iter.size_hint().0 == 0 {
            self.fast_explode = false;
        }
        // Safety
        // trusted len, trust the type system
        unsafe { values.extend_trusted_len_unchecked(iter) };
        self.builder.try_push_valid().unwrap();
    }

    #[inline]
    pub(crate) fn append(&mut self, ca: &BooleanChunked) {
        if ca.is_empty() {
            self.fast_explode = false;
        }
        let value_builder = self.builder.mut_values();
        value_builder.extend(ca);
        self.builder.try_push_valid().unwrap();
    }
}

impl ListBuilderTrait for ListBooleanChunkedBuilder {
    fn append_opt_series(&mut self, opt_s: Option<&Series>) {
        match opt_s {
            Some(s) => self.append_series(s),
            None => {
                self.append_null();
            }
        }
    }

    #[inline]
    fn append_null(&mut self) {
        self.fast_explode = false;
        self.builder.push_null();
    }

    #[inline]
    fn append_series(&mut self, s: &Series) {
        let ca = s.bool().unwrap();
        self.append(ca)
    }

    fn finish(&mut self) -> ListChunked {
        finish_list_builder!(self)
    }
}

pub fn get_list_builder(
    dt: &DataType,
    value_capacity: usize,
    list_capacity: usize,
    name: &str,
) -> PolarsResult<Box<dyn ListBuilderTrait>> {
    let physical_type = dt.to_physical();

    let _err = || -> PolarsResult<Box<dyn ListBuilderTrait>> {
        Err(PolarsError::ComputeError(
            format!(
                "list builder not supported for this dtype: {}",
                &physical_type
            )
            .into(),
        ))
    };

    match &physical_type {
        #[cfg(feature = "object")]
        DataType::Object(_) => _err(),
        #[cfg(feature = "dtype-struct")]
        DataType::Struct(_) => Ok(Box::new(AnonymousOwnedListBuilder::new(
            name,
            list_capacity,
            Some(physical_type),
        ))),
        DataType::List(_) => Ok(Box::new(AnonymousOwnedListBuilder::new(
            name,
            list_capacity,
            Some(physical_type),
        ))),
        _ => {
            macro_rules! get_primitive_builder {
                ($type:ty) => {{
                    let builder = ListPrimitiveChunkedBuilder::<$type>::new(
                        name,
                        list_capacity,
                        value_capacity,
                        dt.clone(),
                    );
                    Box::new(builder)
                }};
            }
            macro_rules! get_bool_builder {
                () => {{
                    let builder =
                        ListBooleanChunkedBuilder::new(&name, list_capacity, value_capacity);
                    Box::new(builder)
                }};
            }
            macro_rules! get_utf8_builder {
                () => {{
                    let builder =
                        ListUtf8ChunkedBuilder::new(&name, list_capacity, 5 * value_capacity);
                    Box::new(builder)
                }};
            }
            #[cfg(feature = "dtype-binary")]
            macro_rules! get_binary_builder {
                () => {{
                    let builder =
                        ListBinaryChunkedBuilder::new(&name, list_capacity, 5 * value_capacity);
                    Box::new(builder)
                }};
            }
            Ok(match_dtype_to_logical_apply_macro!(
                physical_type,
                get_primitive_builder,
                get_utf8_builder,
                get_binary_builder,
                get_bool_builder
            ))
        }
    }
}

pub struct AnonymousListBuilder<'a> {
    name: String,
    builder: AnonymousBuilder<'a>,
    fast_explode: bool,
    pub dtype: Option<DataType>,
}

impl Default for AnonymousListBuilder<'_> {
    fn default() -> Self {
        Self::new("", 0, None)
    }
}

impl<'a> AnonymousListBuilder<'a> {
    pub fn new(name: &str, capacity: usize, inner_dtype: Option<DataType>) -> Self {
        Self {
            name: name.into(),
            builder: AnonymousBuilder::new(capacity),
            fast_explode: true,
            dtype: inner_dtype,
        }
    }

    pub fn append_opt_series(&mut self, opt_s: Option<&'a Series>) {
        match opt_s {
            Some(s) => self.append_series(s),
            None => {
                self.append_null();
            }
        }
    }

    pub fn append_opt_array(&mut self, opt_s: Option<&'a dyn Array>) {
        match opt_s {
            Some(s) => self.append_array(s),
            None => {
                self.append_null();
            }
        }
    }

    pub fn append_array(&mut self, arr: &'a dyn Array) {
        self.builder.push(arr)
    }

    #[inline]
    pub fn append_null(&mut self) {
        self.builder.push_null();
    }

    #[inline]
    pub fn append_empty(&mut self) {
        self.fast_explode = false;
        self.builder.push_empty()
    }

    pub fn append_series(&mut self, s: &'a Series) {
        // empty arrays tend to be null type and thus differ
        // if we would push it the concat would fail.
        if s.is_empty() && matches!(s.dtype(), DataType::Null) {
            self.append_empty();
        } else {
            match s.dtype() {
                #[cfg(feature = "dtype-struct")]
                DataType::Struct(_) => {
                    let arr = &**s.array_ref(0);
                    self.builder.push(arr)
                }
                _ => {
                    self.builder.push_multiple(s.chunks());
                }
            }
        }
    }

    pub fn finish(&mut self) -> ListChunked {
        let slf = std::mem::take(self);
        if slf.builder.is_empty() {
            ListChunked::full_null_with_dtype(&slf.name, 0, &slf.dtype.unwrap_or(DataType::Null))
        } else {
            let dtype = slf.dtype.map(|dt| dt.to_physical().to_arrow());
            let arr = slf.builder.finish(dtype.as_ref()).unwrap();
            let dtype = DataType::from(arr.data_type());
            let mut ca = ListChunked::from_chunks("", vec![Box::new(arr)]);

            if self.fast_explode {
                ca.set_fast_explode();
            }

            ca.field = Arc::new(Field::new(&slf.name, dtype));
            ca
        }
    }
}

pub struct AnonymousOwnedListBuilder {
    name: String,
    builder: AnonymousBuilder<'static>,
    owned: Vec<Series>,
    inner_dtype: Option<DataType>,
    fast_explode: bool,
}

impl Default for AnonymousOwnedListBuilder {
    fn default() -> Self {
        Self::new("", 0, None)
    }
}

impl ListBuilderTrait for AnonymousOwnedListBuilder {
    fn append_series(&mut self, s: &Series) {
        if s.is_empty() {
            self.append_empty();
        } else {
            // Safety
            // we deref a raw pointer with a lifetime that is not static
            // it is safe because we also clone Series (Arc +=1) and therefore the &dyn Arrays
            // will not be dropped until the owned series are dropped
            unsafe {
                match s.dtype() {
                    #[cfg(feature = "dtype-struct")]
                    DataType::Struct(_) => {
                        self.builder.push(&*(&**s.array_ref(0) as *const dyn Array))
                    }
                    _ => {
                        self.builder
                            .push_multiple(&*(s.chunks().as_ref() as *const [ArrayRef]));
                    }
                }
            }
            // this make sure that the underlying ArrayRef's are not dropped
            self.owned.push(s.clone());
        }
    }

src/frame/row.rs (line 98)

    pub fn from_rows_iter_and_schema<'a, I>(mut rows: I, schema: &Schema) -> PolarsResult<Self>
    where
        I: Iterator<Item = &'a Row<'a>>,
    {
        let capacity = rows.size_hint().0;

        let mut buffers: Vec<_> = schema
            .iter_dtypes()
            .map(|dtype| {
                let buf: AnyValueBuffer = (dtype, capacity).into();
                buf
            })
            .collect();

        let mut expected_len = 0;
        rows.try_for_each::<_, PolarsResult<()>>(|row| {
            expected_len += 1;
            for (value, buf) in row.0.iter().zip(&mut buffers) {
                buf.add_fallible(value)?
            }
            Ok(())
        })?;
        let v = buffers
            .into_iter()
            .zip(schema.iter_names())
            .map(|(b, name)| {
                let mut s = b.into_series();
                // if the schema adds a column not in the rows, we
                // fill it with nulls
                if s.is_empty() {
                    Series::full_null(name, expected_len, s.dtype())
                } else {
                    s.rename(name);
                    s
                }
            })
            .collect();
        DataFrame::new(v)
    }

src/functions.rs (line 146)

pub fn concat_str(s: &[Series], delimiter: &str) -> PolarsResult<Utf8Chunked> {
    if s.is_empty() {
        return Err(PolarsError::NoData(
            "expected multiple series in concat_str function".into(),
        ));
    }
    if s.iter().any(|s| s.is_empty()) {
        return Ok(Utf8Chunked::full_null(s[0].name(), 0));
    }

    let len = s.iter().map(|s| s.len()).max().unwrap();

    let cas = s
        .iter()
        .map(|s| {
            let s = s.cast(&DataType::Utf8)?;
            let mut ca = s.utf8()?.clone();
            // broadcast
            if ca.len() == 1 && len > 1 {
                ca = ca.new_from_index(0, len)
            }

            Ok(ca)
        })
        .collect::<PolarsResult<Vec<_>>>()?;

    if !s.iter().all(|s| s.len() == 1 || s.len() == len) {
        return Err(PolarsError::ComputeError(
            "All series in concat_str function should have equal length or unit length".into(),
        ));
    }
    let mut iters = cas
        .iter()
        .map(|ca| match ca.len() {
            1 => IterBroadCast::Value(ca.get(0)),
            _ => IterBroadCast::Column(ca.into_iter()),
        })
        .collect::<Vec<_>>();

    let bytes_cap = cas.iter().map(|ca| ca.get_values_size()).sum();
    let mut builder = Utf8ChunkedBuilder::new(s[0].name(), len, bytes_cap);

    // use a string buffer, to amortize alloc
    let mut buf = String::with_capacity(128);

    for _ in 0..len {
        let mut has_null = false;

        iters.iter_mut().enumerate().for_each(|(i, it)| {
            if i > 0 {
                buf.push_str(delimiter);
            }

            match it.next() {
                Some(Some(s)) => buf.push_str(s),
                Some(None) => has_null = true,
                None => {
                    // should not happen as the out loop counts to length
                    unreachable!()
                }
            }
        });

        if has_null {
            builder.append_null();
        } else {
            builder.append_value(&buf)
        }
        buf.truncate(0)
    }
    Ok(builder.finish())
}

Additional examples can be found in:

src/chunked_array/upstream_traits.rs

source

fn rechunk(&self) -> Series

Aggregate all chunks to a contiguous array of memory.

Examples found in repository ?

src/frame/mod.rs (line 408)

    pub fn agg_chunks(&self) -> Self {
        // Don't parallelize this. Memory overhead
        let f = |s: &Series| s.rechunk();
        let cols = self.columns.iter().map(f).collect();
        DataFrame::new_no_checks(cols)
    }

    /// Shrink the capacity of this DataFrame to fit its length.
    pub fn shrink_to_fit(&mut self) {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            s.shrink_to_fit();
        }
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk.
    pub fn as_single_chunk(&mut self) -> &mut Self {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            *s = s.rechunk();
        }
        self
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk in parallel.
    /// This may lead to more peak memory consumption.
    pub fn as_single_chunk_par(&mut self) -> &mut Self {
        if self.columns.iter().any(|s| s.n_chunks() > 1) {
            self.columns = self.apply_columns_par(&|s| s.rechunk());
        }
        self
    }

More examples

Hide additional examples

src/series/mod.rs (line 402)

    fn finish_take_threaded(&self, s: Vec<Series>, rechunk: bool) -> Series {
        let s = s
            .into_iter()
            .reduce(|mut s, s1| {
                s.append(&s1).unwrap();
                s
            })
            .unwrap();
        if rechunk {
            s.rechunk()
        } else {
            s
        }
    }

src/utils/mod.rs (line 945)

pub fn coalesce_nulls_series(a: &Series, b: &Series) -> (Series, Series) {
    if a.null_count() > 0 || b.null_count() > 0 {
        let mut a = a.rechunk();
        let mut b = b.rechunk();
        for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
            let validity = match (arr_a.validity(), arr_b.validity()) {
                (None, Some(b)) => Some(b.clone()),
                (Some(a), Some(b)) => Some(a & b),
                (Some(a), None) => Some(a.clone()),
                (None, None) => None,
            };
            *arr_a = arr_a.with_validity(validity.clone());
            *arr_b = arr_b.with_validity(validity);
        }
        (a, b)
    } else {
        (a.clone(), b.clone())
    }
}

src/series/ops/to_list.rs (line 33)

    pub fn to_list(&self) -> PolarsResult<ListChunked> {
        let s = self.rechunk();
        let values = s.array_ref(0);

        let offsets = vec![0i64, values.len() as i64];
        let inner_type = self.dtype();

        let data_type = ListArray::<i64>::default_datatype(inner_type.to_physical().to_arrow());

        // Safety:
        // offsets are correct;
        let arr = unsafe {
            ListArray::new(
                data_type,
                Offsets::new_unchecked(offsets).into(),
                values.clone(),
                None,
            )
        };
        let name = self.name();

        let mut ca = ListChunked::from_chunks(name, vec![Box::new(arr)]);
        if self.dtype() != &self.dtype().to_physical() {
            ca.to_logical(inner_type.clone())
        }
        ca.set_fast_explode();

        Ok(ca)
    }

src/frame/hash_join/mod.rs (line 474)

    pub fn _left_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
        verbose: bool,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // ensure that the chunks are aligned otherwise we go OOB
        let mut left = self.clone();
        let mut s_left = s_left.clone();
        let mut right = other.clone();
        let mut s_right = s_right.clone();
        if left.should_rechunk() {
            left.as_single_chunk_par();
            s_left = s_left.rechunk();
        }
        if right.should_rechunk() {
            right.as_single_chunk_par();
            s_right = s_right.rechunk();
        }
        let ids = sort_or_hash_left(&s_left, &s_right, verbose);
        left._finish_left_join(ids, &right.drop(s_right.name()).unwrap(), suffix, slice)
    }

src/chunked_array/ops/sort/mod.rs (line 694)

pub(crate) fn prepare_argsort(
    columns: Vec<Series>,
    mut reverse: Vec<bool>,
) -> PolarsResult<(Series, Vec<Series>, Vec<bool>)> {
    let n_cols = columns.len();

    let mut columns = columns
        .iter()
        .map(|s| {
            use DataType::*;
            match s.dtype() {
                Float32 | Float64 | Int32 | Int64 | Utf8 | UInt32 | UInt64 => s.clone(),
                #[cfg(feature = "dtype-categorical")]
                Categorical(_) => s.rechunk(),
                _ => {
                    // small integers i8, u8 etc are casted to reduce compiler bloat
                    // not that we don't expect any logical types at this point
                    if s.bit_repr_is_large() {
                        s.cast(&DataType::Int64).unwrap()
                    } else {
                        s.cast(&DataType::Int32).unwrap()
                    }
                }
            }
        })
        .collect::<Vec<_>>();

    let first = columns.remove(0);

    // broadcast ordering
    if n_cols > reverse.len() && reverse.len() == 1 {
        while n_cols != reverse.len() {
            reverse.push(reverse[0]);
        }
    }
    Ok((first, columns, reverse))
}

Additional examples can be found in:

src/chunked_array/ndarray.rs

source

fn drop_nulls(&self) -> Series

Drop all null values and return a new Series.

source

fn mean(&self) -> Option<f64>

Returns the mean value in the array Returns an option because the array is nullable.

Examples found in repository ?

src/series/mod.rs (line 856)

    pub fn mean_as_series(&self) -> Series {
        match self.dtype() {
            DataType::Float32 => {
                let val = &[self.mean().map(|m| m as f32)];
                Series::new(self.name(), val)
            }
            dt if dt.is_numeric() || matches!(dt, DataType::Boolean) => {
                let val = &[self.mean()];
                Series::new(self.name(), val)
            }
            dt @ DataType::Duration(_) => {
                Series::new(self.name(), &[self.mean().map(|v| v as i64)])
                    .cast(dt)
                    .unwrap()
            }
            _ => return Series::full_null(self.name(), 1, self.dtype()),
        }
    }

More examples

Hide additional examples

src/series/ops/moment.rs (line 35)

fn moment_precomputed_mean(s: &Series, moment: usize, mean: f64) -> PolarsResult<Option<f64>> {
    // see: https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L922
    let out = match moment {
        0 => Some(1.0),
        1 => Some(0.0),
        _ => {
            let mut n_list = vec![moment];
            let mut current_n = moment;
            while current_n > 2 {
                if current_n % 2 == 1 {
                    current_n = (current_n - 1) / 2
                } else {
                    current_n /= 2
                }
                n_list.push(current_n)
            }

            let a_zero_mean = s.cast(&DataType::Float64)? - mean;

            let mut s = if n_list.pop().unwrap() == 1 {
                a_zero_mean.clone()
            } else {
                &a_zero_mean * &a_zero_mean
            };

            for n in n_list.iter().rev() {
                s = &s * &s;
                if n % 2 == 1 {
                    s = &s * &a_zero_mean;
                }
            }
            s.mean()
        }
    };
    Ok(out)
}

impl Series {
    /// Compute the sample skewness of a data set.
    ///
    /// For normally distributed data, the skewness should be about zero. For
    /// uni-modal continuous distributions, a skewness value greater than zero means
    /// that there is more weight in the right tail of the distribution. The
    /// function `skewtest` can be used to determine if the skewness value
    /// is close enough to zero, statistically speaking.
    ///
    /// see: https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024
    #[cfg_attr(docsrs, doc(cfg(feature = "moment")))]
    pub fn skew(&self, bias: bool) -> PolarsResult<Option<f64>> {
        let mean = match self.mean() {
            Some(mean) => mean,
            None => return Ok(None),
        };
        // we can unwrap because if it were None, we already return None above
        let m2 = moment_precomputed_mean(self, 2, mean)?.unwrap();
        let m3 = moment_precomputed_mean(self, 3, mean)?.unwrap();

        let out = m3 / m2.powf(1.5);

        if !bias {
            let n = (self.len() - self.null_count()) as f64;
            Ok(Some(((n - 1.0) * n).sqrt() / (n - 2.0) * out))
        } else {
            Ok(Some(out))
        }
    }

    /// Compute the kurtosis (Fisher or Pearson) of a dataset.
    ///
    /// Kurtosis is the fourth central moment divided by the square of the
    /// variance. If Fisher's definition is used, then 3.0 is subtracted from
    /// the result to give 0.0 for a normal distribution.
    /// If bias is `false` then the kurtosis is calculated using k statistics to
    /// eliminate bias coming from biased moment estimators
    ///
    /// see: https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1027
    #[cfg_attr(docsrs, doc(cfg(feature = "moment")))]
    pub fn kurtosis(&self, fisher: bool, bias: bool) -> PolarsResult<Option<f64>> {
        let mean = match self.mean() {
            Some(mean) => mean,
            None => return Ok(None),
        };
        // we can unwrap because if it were None, we already return None above
        let m2 = moment_precomputed_mean(self, 2, mean)?.unwrap();
        let m4 = moment_precomputed_mean(self, 4, mean)?.unwrap();

        let out = if !bias {
            let n = (self.len() - self.null_count()) as f64;
            3.0 + 1.0 / (n - 2.0) / (n - 3.0)
                * ((n.powf(2.0) - 1.0) * m4 / m2.powf(2.0) - 3.0 * (n - 1.0).powf(2.0))
        } else {
            m4 / m2.powf(2.0)
        };
        if fisher {
            Ok(Some(out - 3.0))
        } else {
            Ok(Some(out))
        }
    }

source

fn median(&self) -> Option<f64>

Returns the median value in the array Returns an option because the array is nullable.

source

fn new_from_index(&self, _index: usize, _length: usize) -> Series

Create a new Series filled with values from the given index.

Example

use polars_core::prelude::*;
let s = Series::new("a", [0i32, 1, 8]);
let s2 = s.new_from_index(2, 4);
assert_eq!(Vec::from(s2.i32().unwrap()), &[Some(8), Some(8), Some(8), Some(8)])

Examples found in repository ?

src/series/ops/extend.rs (line 20)

    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
        use AnyValue::*;
        let s = match value {
            Float32(v) => Series::new("", vec![v]),
            Float64(v) => Series::new("", vec![v]),
            UInt32(v) => Series::new("", vec![v]),
            UInt64(v) => Series::new("", vec![v]),
            Int32(v) => Series::new("", vec![v]),
            Int64(v) => Series::new("", vec![v]),
            Utf8(v) => Series::new("", vec![v]),
            Boolean(v) => Series::new("", vec![v]),
            Null => BooleanChunked::full_null("", 1).into_series(),
            dt => panic!("{dt:?} not supported"),
        };
        let s = s.cast(self.dtype())?;
        let to_append = s.new_from_index(0, n);

        let mut out = self.clone();
        out.append(&to_append)?;
        Ok(out)
    }

More examples

Hide additional examples

src/frame/mod.rs (line 1126)

        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

src/frame/arithmetic.rs (line 153)

    fn binary_aligned(
        &self,
        other: &DataFrame,
        f: &(dyn Fn(&Series, &Series) -> PolarsResult<Series> + Sync + Send),
    ) -> PolarsResult<DataFrame> {
        let max_len = std::cmp::max(self.height(), other.height());
        let max_width = std::cmp::max(self.width(), other.width());
        let mut cols = self
            .get_columns()
            .par_iter()
            .zip(other.get_columns().par_iter())
            .map(|(l, r)| {
                let diff_l = max_len - l.len();
                let diff_r = max_len - r.len();

                let st = try_get_supertype(l.dtype(), r.dtype())?;
                let mut l = l.cast(&st)?;
                let mut r = r.cast(&st)?;

                if diff_l > 0 {
                    l = l.extend_constant(AnyValue::Null, diff_l)?;
                };
                if diff_r > 0 {
                    r = r.extend_constant(AnyValue::Null, diff_r)?;
                };

                f(&l, &r)
            })
            .collect::<PolarsResult<Vec<_>>>()?;

        let col_len = cols.len();
        if col_len < max_width {
            let df = if col_len < self.width() { self } else { other };

            for i in col_len..max_len {
                let s = &df.get_columns()[i];
                let name = s.name();
                let dtype = s.dtype();

                // trick to fill a series with nulls
                let vals: &[Option<i32>] = &[None];
                let s = Series::new(name, vals).cast(dtype)?;
                cols.push(s.new_from_index(0, max_len))
            }
        }
        DataFrame::new(cols)
    }

src/frame/groupby/mod.rs (line 101)

    pub fn groupby_with_series(
        &self,
        mut by: Vec<Series>,
        multithreaded: bool,
        sorted: bool,
    ) -> PolarsResult<GroupBy> {
        if by.is_empty() {
            return Err(PolarsError::ComputeError(
                "expected keys in groupby operation, got nothing".into(),
            ));
        }

        macro_rules! finish_packed_bit_path {
            ($ca0:expr, $ca1:expr, $pack_fn:expr) => {{
                let n_partitions = _set_partition_size();

                // we split so that we can prepare the data over multiple threads.
                // pack the bit values together and add a final byte that will be 0
                // when there are no null values.
                // otherwise we use two bits of this byte to represent null values.
                let splits = _split_offsets($ca0.len(), n_partitions);

                let keys = POOL.install(|| {
                    splits
                        .into_par_iter()
                        .map(|(offset, len)| {
                            let ca0 = $ca0.slice(offset as i64, len);
                            let ca1 = $ca1.slice(offset as i64, len);
                            ca0.into_iter()
                                .zip(ca1.into_iter())
                                .map(|(l, r)| $pack_fn(l, r))
                                .collect_trusted::<Vec<_>>()
                        })
                        .collect::<Vec<_>>()
                });

                return Ok(GroupBy::new(
                    self,
                    by,
                    groupby_threaded_num(keys, 0, n_partitions as u64, sorted),
                    None,
                ));
            }};
        }

        let by_len = by[0].len();

        // we only throw this error if self.width > 0
        // so that we can still call this on a dummy dataframe where we provide the keys
        if (by_len != self.height()) && (self.width() > 0) {
            if by_len == 1 {
                by[0] = by[0].new_from_index(0, self.height())
            } else {
                return Err(PolarsError::ShapeMisMatch(
                    "the Series used as keys should have the same length as the DataFrame".into(),
                ));
            }
        };

        let n_partitions = _set_partition_size();

        let groups = match by.len() {
            1 => {
                let series = &by[0];
                series.group_tuples(multithreaded, sorted)
            }
            2 => {
                // multiple keys is always multi-threaded
                // reduce code paths
                let keys_df = prepare_dataframe_unsorted(&by);

                let s0 = &keys_df.get_columns()[0];
                let s1 = &keys_df.get_columns()[1];

                // fast path for numeric data
                // uses the bit values to tightly pack those into arrays.
                if s0.dtype().is_numeric() && s1.dtype().is_numeric() {
                    match (s0.bit_repr_is_large(), s1.bit_repr_is_large()) {
                        (false, false) => {
                            let ca0 = s0.bit_repr_small();
                            let ca1 = s1.bit_repr_small();
                            finish_packed_bit_path!(ca0, ca1, pack_u32_tuples)
                        }
                        (true, true) => {
                            let ca0 = s0.bit_repr_large();
                            let ca1 = s1.bit_repr_large();
                            finish_packed_bit_path!(ca0, ca1, pack_u64_tuples)
                        }
                        (true, false) => {
                            let ca0 = s0.bit_repr_large();
                            let ca1 = s1.bit_repr_small();
                            // small first
                            finish_packed_bit_path!(ca1, ca0, pack_u32_u64_tuples)
                        }
                        (false, true) => {
                            let ca0 = s0.bit_repr_small();
                            let ca1 = s1.bit_repr_large();
                            // small first
                            finish_packed_bit_path!(ca0, ca1, pack_u32_u64_tuples)
                        }
                    }
                } else if matches!((s0.dtype(), s1.dtype()), (DataType::Utf8, DataType::Utf8)) {
                    let lhs = s0.utf8().unwrap();
                    let rhs = s1.utf8().unwrap();

                    // arbitrarily chosen bound, if avg no of bytes to encode is larger than this
                    // value we fall back to default groupby
                    if (lhs.get_values_size() + rhs.get_values_size()) / (lhs.len() + 1) < 128 {
                        Ok(pack_utf8_columns(lhs, rhs, n_partitions, sorted))
                    } else {
                        groupby_threaded_multiple_keys_flat(keys_df, n_partitions, sorted)
                    }
                } else {
                    groupby_threaded_multiple_keys_flat(keys_df, n_partitions, sorted)
                }
            }
            _ => {
                let keys_df = prepare_dataframe_unsorted(&by);
                groupby_threaded_multiple_keys_flat(keys_df, n_partitions, sorted)
            }
        };
        Ok(GroupBy::new(self, by, groups?, None))
    }

source

fn cast(&self, _data_type: &DataType) -> PolarsResult<Series>

Examples found in repository ?

src/series/mod.rs (line 231)

230
231
232

    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {
        self.0.cast(dtype)
    }

source

fn get(&self, _index: usize) -> PolarsResult<AnyValue<'_>>

Get a single value by index. Don’t use this operation for loops as a runtime cast is needed for every iteration.

Examples found in repository ?

src/frame/row.rs (line 26)

    pub fn get_row(&self, idx: usize) -> PolarsResult<Row> {
        let values = self
            .columns
            .iter()
            .map(|s| s.get(idx))
            .collect::<PolarsResult<Vec<_>>>()?;
        Ok(Row(values))
    }

    /// Amortize allocations by reusing a row.
    /// The caller is responsible to make sure that the row has at least the capacity for the number
    /// of columns in the DataFrame
    #[cfg_attr(docsrs, doc(cfg(feature = "rows")))]
    pub fn get_row_amortized<'a>(&'a self, idx: usize, row: &mut Row<'a>) -> PolarsResult<()> {
        for (s, any_val) in self.columns.iter().zip(&mut row.0) {
            *any_val = s.get(idx)?;
        }
        Ok(())
    }

More examples

Hide additional examples

src/series/mod.rs (line 827)

    pub fn str_value(&self, index: usize) -> PolarsResult<Cow<str>> {
        let out = match self.0.get(index)? {
            AnyValue::Utf8(s) => Cow::Borrowed(s),
            AnyValue::Null => Cow::Borrowed("null"),
            #[cfg(feature = "dtype-categorical")]
            AnyValue::Categorical(idx, rev) => Cow::Borrowed(rev.get(idx)),
            av => Cow::Owned(format!("{av}")),
        };
        Ok(out)
    }

source

unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_>

Get a single value by index. Don’t use this operation for loops as a runtime cast is needed for every iteration.

This may refer to physical types

Safety

Does not do any bounds checking

Examples found in repository ?

src/frame/row.rs (line 55)

    pub unsafe fn get_row_amortized_unchecked<'a>(&'a self, idx: usize, row: &mut Row<'a>) {
        self.columns
            .iter()
            .zip(&mut row.0)
            .for_each(|(s, any_val)| {
                *any_val = s.get_unchecked(idx);
            });
    }

More examples

Hide additional examples

src/frame/hash_join/multiple_keys.rs (line 24)

pub(crate) unsafe fn compare_df_rows2(
    left: &DataFrame,
    right: &DataFrame,
    left_idx: usize,
    right_idx: usize,
) -> bool {
    for (l, r) in left.get_columns().iter().zip(right.get_columns()) {
        if !(l.get_unchecked(left_idx) == r.get_unchecked(right_idx)) {
            return false;
        }
    }
    true
}

src/frame/mod.rs (line 1237)

    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

source

fn sort_with(&self, _options: SortOptions) -> Series

Examples found in repository ?

src/series/mod.rs (lines 218-221)

    pub fn sort(&self, reverse: bool) -> Self {
        self.sort_with(SortOptions {
            descending: reverse,
            ..Default::default()
        })
    }

More examples

Hide additional examples

src/frame/mod.rs (line 1852)

    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

source

fn argsort(&self, options: SortOptions) -> IdxCa

Retrieve the indexes needed for a sort.

Examples found in repository ?

src/frame/mod.rs (line 1859)

    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

More examples

Hide additional examples

src/chunked_array/ops/unique/rank.rs (lines 80-83)

pub(crate) fn rank(s: &Series, method: RankMethod, reverse: bool) -> Series {
    match s.len() {
        1 => {
            return match method {
                Average => Series::new(s.name(), &[1.0f32]),
                _ => Series::new(s.name(), &[1 as IdxSize]),
            };
        }
        0 => {
            return match method {
                Average => Float32Chunked::from_slice(s.name(), &[]).into_series(),
                _ => IdxCa::from_slice(s.name(), &[]).into_series(),
            };
        }
        _ => {}
    }

    if s.null_count() > 0 {
        let nulls = s.is_not_null().rechunk();
        let arr = nulls.downcast_iter().next().unwrap();
        let validity = arr.values();
        // Currently, nulls tie with the minimum or maximum bound for a type, depending on reverse.
        // TODO: Need to expose nulls_last in argsort to prevent this.
        // Fill using MaxBound/MinBound to give nulls last rank.
        // we will replace them later.
        let null_strategy = if reverse {
            FillNullStrategy::MinBound
        } else {
            FillNullStrategy::MaxBound
        };
        let s = s.fill_null(null_strategy).unwrap();

        let mut out = rank(&s, method, reverse);
        unsafe {
            let arr = &mut out.chunks_mut()[0];
            *arr = arr.with_validity(Some(validity.clone()))
        }
        return out;
    }

    // See: https://github.com/scipy/scipy/blob/v1.7.1/scipy/stats/stats.py#L8631-L8737

    let len = s.len();
    let null_count = s.null_count();
    let sort_idx_ca = s.argsort(SortOptions {
        descending: reverse,
        ..Default::default()
    });
    let sort_idx = sort_idx_ca.downcast_iter().next().unwrap().values();

    let mut inv: Vec<IdxSize> = Vec::with_capacity(len);
    // Safety:
    // Values will be filled next and there is only primitive data
    #[allow(clippy::uninit_vec)]
    unsafe {
        inv.set_len(len)
    }
    let inv_values = inv.as_mut_slice();

    #[cfg(feature = "random")]
    let mut count = if let RankMethod::Ordinal | RankMethod::Random = method {
        1 as IdxSize
    } else {
        0
    };

    #[cfg(not(feature = "random"))]
    let mut count = if let RankMethod::Ordinal = method {
        1 as IdxSize
    } else {
        0
    };

    // Safety:
    // we are in bounds
    unsafe {
        sort_idx.iter().for_each(|&i| {
            *inv_values.get_unchecked_mut(i as usize) = count;
            count += 1;
        });
    }

    use RankMethod::*;
    match method {
        Ordinal => {
            let inv_ca = IdxCa::from_vec(s.name(), inv);
            inv_ca.into_series()
        }
        #[cfg(feature = "random")]
        Random => {
            // Safety:
            // in bounds
            let arr = unsafe { s.take_unchecked(&sort_idx_ca).unwrap() };
            let not_consecutive_same = arr
                .slice(1, len - 1)
                .not_equal(&arr.slice(0, len - 1))
                .unwrap()
                .rechunk();
            let obs = not_consecutive_same.downcast_iter().next().unwrap();

            // Collect slice indices for sort_idx which point to ties in the original series.
            let mut ties_indices = Vec::with_capacity(len + 1);
            let mut ties_index: usize = 0;

            ties_indices.push(ties_index);
            obs.iter().for_each(|b| {
                if let Some(b) = b {
                    ties_index += 1;
                    if b {
                        ties_indices.push(ties_index)
                    }
                }
            });
            // Close last slice (if there where nulls in the original series, they will always be in the last slice).
            ties_indices.push(len);

            let mut sort_idx = sort_idx.to_vec();

            let mut thread_rng = thread_rng();
            let rng = &mut SmallRng::from_rng(&mut thread_rng).unwrap();

            // Shuffle sort_idx positions which point to ties in the original series.
            for i in 0..(ties_indices.len() - 1) {
                let ties_index_start = ties_indices[i];
                let ties_index_end = ties_indices[i + 1];
                if ties_index_end - ties_index_start > 1 {
                    sort_idx[ties_index_start..ties_index_end].shuffle(rng);
                }
            }

            // Recreate inv_ca (where ties are randomly shuffled compared with Ordinal).
            let mut count = 1 as IdxSize;
            unsafe {
                sort_idx.iter().for_each(|&i| {
                    *inv_values.get_unchecked_mut(i as usize) = count;
                    count += 1;
                });
            }

            let inv_ca = IdxCa::from_vec(s.name(), inv);
            inv_ca.into_series()
        }
        _ => {
            let inv_ca = IdxCa::from_vec(s.name(), inv);
            // Safety:
            // in bounds
            let arr = unsafe { s.take_unchecked(&sort_idx_ca).unwrap() };
            let validity = arr.chunks()[0].validity().cloned();
            let not_consecutive_same = arr
                .slice(1, len - 1)
                .not_equal(&arr.slice(0, len - 1))
                .unwrap()
                .rechunk();
            // this obs is shorter than that of scipy stats, because we can just start the cumsum by 1
            // instead of 0
            let obs = not_consecutive_same.downcast_iter().next().unwrap();
            let mut dense = Vec::with_capacity(len);

            // this offset save an offset on the whole column, what scipy does in:
            //
            // ```python
            //     if method == 'min':
            //         return count[dense - 1] + 1
            // ```
            // INVALID LINT REMOVE LATER
            #[allow(clippy::bool_to_int_with_if)]
            let mut cumsum: IdxSize = if let RankMethod::Min = method {
                0
            } else {
                // nulls will be first, rank, but we will replace them (with null)
                // so this ensures the second rank will be 1
                if matches!(method, RankMethod::Dense) && s.null_count() > 0 {
                    0
                } else {
                    1
                }
            };

            dense.push(cumsum);
            obs.values_iter().for_each(|b| {
                if b {
                    cumsum += 1;
                }
                dense.push(cumsum)
            });
            let arr = IdxArr::from_data_default(dense.into(), validity);
            let dense: IdxCa = (s.name(), arr).into();
            // Safety:
            // in bounds
            let dense = unsafe { dense.take_unchecked((&inv_ca).into()) };

            if let RankMethod::Dense = method {
                return if s.null_count() == 0 {
                    dense.into_series()
                } else {
                    // null will be the first rank
                    // we restore original nulls and shift all ranks by one
                    let validity = s.is_null().rechunk();
                    let validity = validity.downcast_iter().next().unwrap();
                    let validity = validity.values().clone();

                    let arr = dense.downcast_iter().next().unwrap();
                    let arr = arr.with_validity(Some(validity));
                    let dtype = arr.data_type().clone();

                    // Safety:
                    // given dtype is correct
                    unsafe {
                        Series::try_from_arrow_unchecked(s.name(), vec![arr], &dtype).unwrap()
                    }
                };
            }

            let bitmap = obs.values();
            let cap = bitmap.len() - bitmap.unset_bits();
            let mut count = Vec::with_capacity(cap + 1);
            let mut cnt: IdxSize = 0;
            count.push(cnt);

            if null_count > 0 {
                obs.iter().for_each(|b| {
                    if let Some(b) = b {
                        cnt += 1;
                        if b {
                            count.push(cnt)
                        }
                    }
                });
            } else {
                obs.values_iter().for_each(|b| {
                    cnt += 1;
                    if b {
                        count.push(cnt)
                    }
                });
            }

            count.push((len - null_count) as IdxSize);
            let count = IdxCa::from_vec(s.name(), count);

            match method {
                Max => {
                    // Safety:
                    // within bounds
                    unsafe { count.take_unchecked((&dense).into()).into_series() }
                }
                Min => {
                    // Safety:
                    // within bounds
                    unsafe { (count.take_unchecked((&dense).into()) + 1).into_series() }
                }
                Average => {
                    // Safety:
                    // in bounds
                    let a = unsafe { count.take_unchecked((&dense).into()) }
                        .cast(&DataType::Float32)
                        .unwrap();
                    let b = unsafe { count.take_unchecked((&(dense - 1)).into()) }
                        .cast(&DataType::Float32)
                        .unwrap()
                        + 1.0;
                    (&a + &b) * 0.5
                }
                #[cfg(feature = "random")]
                Dense | Ordinal | Random => unimplemented!(),
                #[cfg(not(feature = "random"))]
                Dense | Ordinal => unimplemented!(),
            }
        }
    }
}

source

fn null_count(&self) -> usize

Count the null values.

Examples found in repository ?

src/series/series_trait.rs (line 356)

    fn drop_nulls(&self) -> Series {
        if self.null_count() == 0 {
            Series(self.clone_inner())
        } else {
            self.filter(&self.is_not_null()).unwrap()
        }
    }

More examples

Hide additional examples

src/frame/mod.rs (line 3168)

    pub fn null_count(&self) -> Self {
        let cols = self
            .columns
            .iter()
            .map(|s| Series::new(s.name(), &[s.null_count() as IdxSize]))
            .collect();
        Self::new_no_checks(cols)
    }

src/testing.rs (line 9)

    pub fn series_equal(&self, other: &Series) -> bool {
        if self.null_count() > 0 || other.null_count() > 0 || self.dtype() != other.dtype() {
            false
        } else {
            self.series_equal_missing(other)
        }
    }

    /// Check if all values in series are equal where `None == None` evaluates to `true`.
    /// Two `Datetime` series are *not* equal if their timezones are different, regardless
    /// if they represent the same UTC time or not.
    pub fn series_equal_missing(&self, other: &Series) -> bool {
        // TODO! remove this? Default behavior already includes equal missing
        #[cfg(feature = "timezones")]
        {
            use crate::datatypes::DataType::Datetime;

            if let Datetime(_, tz_lhs) = self.dtype() {
                if let Datetime(_, tz_rhs) = other.dtype() {
                    if tz_lhs != tz_rhs {
                        return false;
                    }
                } else {
                    return false;
                }
            }
        }

        // differences from Partial::eq in that numerical dtype may be different
        self.len() == other.len()
            && self.name() == other.name()
            && self.null_count() == other.null_count()
            && {
                let eq = self.equal(other);
                match eq {
                    Ok(b) => b.sum().map(|s| s as usize).unwrap_or(0) == self.len(),
                    Err(_) => false,
                }
            }
    }

    /// Get a pointer to the underlying data of this Series.
    /// Can be useful for fast comparisons.
    pub fn get_data_ptr(&self) -> usize {
        let object = self.0.deref();

        // Safety:
        // A fat pointer consists of a data ptr and a ptr to the vtable.
        // we specifically check that we only transmute &dyn SeriesTrait e.g.
        // a trait object, therefore this is sound.
        #[allow(clippy::transmute_undefined_repr)]
        let (data_ptr, _vtable_ptr) =
            unsafe { std::mem::transmute::<&dyn SeriesTrait, (usize, usize)>(object) };
        data_ptr
    }
}

impl PartialEq for Series {
    fn eq(&self, other: &Self) -> bool {
        self.len() == other.len()
            && self.field() == other.field()
            && self.null_count() == other.null_count()
            && self
                .equal(other)
                .unwrap()
                .sum()
                .map(|s| s as usize)
                .unwrap_or(0)
                == self.len()
    }

src/frame/row.rs (line 257)

fn is_nested_null(av: &AnyValue) -> bool {
    match av {
        AnyValue::Null => true,
        AnyValue::List(s) => s.null_count() == s.len(),
        #[cfg(feature = "dtype-struct")]
        AnyValue::Struct(_, _, _) => av._iter_struct_av().all(|av| is_nested_null(&av)),
        _ => false,
    }
}

// nested dtypes that are all null, will be set as null leave dtype
fn infer_dtype_dynamic(av: &AnyValue) -> DataType {
    match av {
        AnyValue::List(s) if s.null_count() == s.len() => DataType::List(Box::new(DataType::Null)),
        #[cfg(feature = "dtype-struct")]
        AnyValue::Struct(_, _, _) => DataType::Struct(
            av._iter_struct_av()
                .map(|av| {
                    let dtype = infer_dtype_dynamic(&av);
                    Field::new("", dtype)
                })
                .collect(),
        ),
        av => av.into(),
    }
}

pub fn any_values_to_dtype(column: &[AnyValue]) -> PolarsResult<DataType> {
    // we need an index-map as the order of dtypes influences how the
    // struct fields are constructed.
    let mut types_set = PlIndexSet::new();
    for val in column.iter() {
        let dtype = infer_dtype_dynamic(val);
        types_set.insert(dtype);
    }
    types_set_to_dtype(types_set)
}

fn types_set_to_dtype(types_set: PlIndexSet<DataType>) -> PolarsResult<DataType> {
    types_set
        .into_iter()
        .map(Ok)
        .fold_first_(|a, b| try_get_supertype(&a?, &b?))
        .unwrap()
}

/// Infer schema from rows and set the supertypes of the columns as column data type.
pub fn rows_to_schema_supertypes(
    rows: &[Row],
    infer_schema_length: Option<usize>,
) -> PolarsResult<Schema> {
    // no of rows to use to infer dtype
    let max_infer = infer_schema_length.unwrap_or(rows.len());

    let mut dtypes: Vec<PlIndexSet<DataType>> = vec![PlIndexSet::new(); rows[0].0.len()];

    for row in rows.iter().take(max_infer) {
        for (val, types_set) in row.0.iter().zip(dtypes.iter_mut()) {
            let dtype = infer_dtype_dynamic(val);
            types_set.insert(dtype);
        }
    }

    dtypes
        .into_iter()
        .enumerate()
        .map(|(i, types_set)| {
            let dtype = types_set_to_dtype(types_set)?;
            Ok(Field::new(format!("column_{i}").as_ref(), dtype))
        })
        .collect::<PolarsResult<_>>()
}

/// Infer schema from rows and set the first no null type as column data type.
pub fn rows_to_schema_first_non_null(rows: &[Row], infer_schema_length: Option<usize>) -> Schema {
    // no of rows to use to infer dtype
    let max_infer = infer_schema_length.unwrap_or(rows.len());
    let mut schema: Schema = (&rows[0]).into();

    // the first row that has no nulls will be used to infer the schema.
    // if there is a null, we check the next row and see if we can update the schema

    for row in rows.iter().take(max_infer).skip(1) {
        // for i in 1..max_infer {
        let nulls: Vec<_> = schema
            .iter_dtypes()
            .enumerate()
            .filter_map(|(i, dtype)| {
                // double check struct and list types types
                // nested null values can be wrongly inferred by front ends
                match dtype {
                    DataType::Null | DataType::List(_) => Some(i),
                    #[cfg(feature = "dtype-struct")]
                    DataType::Struct(_) => Some(i),
                    _ => None,
                }
            })
            .collect();
        if nulls.is_empty() {
            break;
        } else {
            for i in nulls {
                let val = &row.0[i];

                if !is_nested_null(val) {
                    let dtype = val.into();
                    schema.coerce_by_index(i, dtype).unwrap();
                }
            }
        }
    }
    schema
}

impl<'a> From<&AnyValue<'a>> for Field {
    fn from(val: &AnyValue<'a>) -> Self {
        Field::new("", val.into())
    }
}

impl From<&Row<'_>> for Schema {
    fn from(row: &Row) -> Self {
        let fields = row.0.iter().enumerate().map(|(i, av)| {
            let dtype = av.into();
            Field::new(format!("column_{i}").as_ref(), dtype)
        });

        Schema::from(fields)
    }
}

pub enum AnyValueBuffer<'a> {
    Boolean(BooleanChunkedBuilder),
    Int32(PrimitiveChunkedBuilder<Int32Type>),
    Int64(PrimitiveChunkedBuilder<Int64Type>),
    UInt32(PrimitiveChunkedBuilder<UInt32Type>),
    UInt64(PrimitiveChunkedBuilder<UInt64Type>),
    #[cfg(feature = "dtype-date")]
    Date(PrimitiveChunkedBuilder<Int32Type>),
    #[cfg(feature = "dtype-datetime")]
    Datetime(
        PrimitiveChunkedBuilder<Int64Type>,
        TimeUnit,
        Option<TimeZone>,
    ),
    #[cfg(feature = "dtype-duration")]
    Duration(PrimitiveChunkedBuilder<Int64Type>, TimeUnit),
    #[cfg(feature = "dtype-time")]
    Time(PrimitiveChunkedBuilder<Int64Type>),
    Float32(PrimitiveChunkedBuilder<Float32Type>),
    Float64(PrimitiveChunkedBuilder<Float64Type>),
    Utf8(Utf8ChunkedBuilder),
    All(DataType, Vec<AnyValue<'a>>),
}

impl<'a> AnyValueBuffer<'a> {
    #[inline]
    pub fn add(&mut self, val: AnyValue<'a>) -> Option<()> {
        use AnyValueBuffer::*;
        match (self, val) {
            (Boolean(builder), AnyValue::Null) => builder.append_null(),
            (Boolean(builder), AnyValue::Boolean(v)) => builder.append_value(v),
            (Boolean(builder), val) => {
                let v = val.extract::<u8>()?;
                builder.append_value(v == 1)
            }
            (Int32(builder), AnyValue::Null) => builder.append_null(),
            (Int32(builder), val) => builder.append_value(val.extract()?),
            (Int64(builder), AnyValue::Null) => builder.append_null(),
            (Int64(builder), val) => builder.append_value(val.extract()?),
            (UInt32(builder), AnyValue::Null) => builder.append_null(),
            (UInt32(builder), val) => builder.append_value(val.extract()?),
            (UInt64(builder), AnyValue::Null) => builder.append_null(),
            (UInt64(builder), val) => builder.append_value(val.extract()?),
            #[cfg(feature = "dtype-date")]
            (Date(builder), AnyValue::Null) => builder.append_null(),
            #[cfg(feature = "dtype-date")]
            (Date(builder), AnyValue::Date(v)) => builder.append_value(v),
            #[cfg(feature = "dtype-datetime")]
            (Datetime(builder, _, _), AnyValue::Null) => builder.append_null(),
            #[cfg(feature = "dtype-datetime")]
            (Datetime(builder, tu_l, _), AnyValue::Datetime(v, tu_r, _)) => {
                // we convert right tu to left tu
                // so we swap.
                let v = convert_time_units(v, tu_r, *tu_l);
                builder.append_value(v)
            }
            #[cfg(feature = "dtype-duration")]
            (Duration(builder, _), AnyValue::Null) => builder.append_null(),
            #[cfg(feature = "dtype-duration")]
            (Duration(builder, tu_l), AnyValue::Duration(v, tu_r)) => {
                let v = convert_time_units(v, tu_r, *tu_l);
                builder.append_value(v)
            }
            #[cfg(feature = "dtype-time")]
            (Time(builder), AnyValue::Time(v)) => builder.append_value(v),
            #[cfg(feature = "dtype-time")]
            (Time(builder), AnyValue::Null) => builder.append_null(),
            (Float32(builder), AnyValue::Null) => builder.append_null(),
            (Float64(builder), AnyValue::Null) => builder.append_null(),
            (Float32(builder), val) => builder.append_value(val.extract()?),
            (Float64(builder), val) => builder.append_value(val.extract()?),
            (Utf8(builder), AnyValue::Utf8(v)) => builder.append_value(v),
            (Utf8(builder), AnyValue::Utf8Owned(v)) => builder.append_value(v),
            (Utf8(builder), AnyValue::Null) => builder.append_null(),
            // Struct and List can be recursive so use anyvalues for that
            (All(_, vals), v) => vals.push(v),

            // dynamic types
            (Utf8(builder), av) => match av {
                AnyValue::Int64(v) => builder.append_value(&format!("{v}")),
                AnyValue::Float64(v) => builder.append_value(&format!("{v}")),
                AnyValue::Boolean(true) => builder.append_value("true"),
                AnyValue::Boolean(false) => builder.append_value("false"),
                _ => return None,
            },
            _ => return None,
        };
        Some(())
    }

    pub(crate) fn add_fallible(&mut self, val: &AnyValue<'a>) -> PolarsResult<()> {
        self.add(val.clone()).ok_or_else(|| {
            PolarsError::ComputeError(format!("Could not append {val:?} to builder; make sure that all rows have the same schema.\n\
            Or consider increasing the the 'schema_inference_length' argument.").into())
        })
    }

    pub fn into_series(self) -> Series {
        use AnyValueBuffer::*;
        match self {
            Boolean(b) => b.finish().into_series(),
            Int32(b) => b.finish().into_series(),
            Int64(b) => b.finish().into_series(),
            UInt32(b) => b.finish().into_series(),
            UInt64(b) => b.finish().into_series(),
            #[cfg(feature = "dtype-date")]
            Date(b) => b.finish().into_date().into_series(),
            #[cfg(feature = "dtype-datetime")]
            Datetime(b, tu, tz) => b.finish().into_datetime(tu, tz).into_series(),
            #[cfg(feature = "dtype-duration")]
            Duration(b, tu) => b.finish().into_duration(tu).into_series(),
            #[cfg(feature = "dtype-time")]
            Time(b) => b.finish().into_time().into_series(),
            Float32(b) => b.finish().into_series(),
            Float64(b) => b.finish().into_series(),
            Utf8(b) => b.finish().into_series(),
            All(dtype, vals) => Series::from_any_values_and_dtype("", &vals, &dtype).unwrap(),
        }
    }

    pub fn new(dtype: &DataType, capacity: usize) -> AnyValueBuffer<'a> {
        (dtype, capacity).into()
    }
}

// datatype and length
impl From<(&DataType, usize)> for AnyValueBuffer<'_> {
    fn from(a: (&DataType, usize)) -> Self {
        let (dt, len) = a;
        use DataType::*;
        match dt {
            Boolean => AnyValueBuffer::Boolean(BooleanChunkedBuilder::new("", len)),
            Int32 => AnyValueBuffer::Int32(PrimitiveChunkedBuilder::new("", len)),
            Int64 => AnyValueBuffer::Int64(PrimitiveChunkedBuilder::new("", len)),
            UInt32 => AnyValueBuffer::UInt32(PrimitiveChunkedBuilder::new("", len)),
            UInt64 => AnyValueBuffer::UInt64(PrimitiveChunkedBuilder::new("", len)),
            #[cfg(feature = "dtype-date")]
            Date => AnyValueBuffer::Date(PrimitiveChunkedBuilder::new("", len)),
            #[cfg(feature = "dtype-datetime")]
            Datetime(tu, tz) => {
                AnyValueBuffer::Datetime(PrimitiveChunkedBuilder::new("", len), *tu, tz.clone())
            }
            #[cfg(feature = "dtype-duration")]
            Duration(tu) => AnyValueBuffer::Duration(PrimitiveChunkedBuilder::new("", len), *tu),
            #[cfg(feature = "dtype-time")]
            Time => AnyValueBuffer::Time(PrimitiveChunkedBuilder::new("", len)),
            Float32 => AnyValueBuffer::Float32(PrimitiveChunkedBuilder::new("", len)),
            Float64 => AnyValueBuffer::Float64(PrimitiveChunkedBuilder::new("", len)),
            Utf8 => AnyValueBuffer::Utf8(Utf8ChunkedBuilder::new("", len, len * 5)),
            // Struct and List can be recursive so use anyvalues for that
            dt => AnyValueBuffer::All(dt.clone(), Vec::with_capacity(len)),
        }
    }
}

#[inline]
unsafe fn add_value<T: NumericNative>(
    values_buf_ptr: usize,
    col_idx: usize,
    row_idx: usize,
    value: T,
) {
    let column = (*(values_buf_ptr as *mut Vec<Vec<T>>)).get_unchecked_mut(col_idx);
    let el_ptr = column.as_mut_ptr();
    *el_ptr.add(row_idx) = value;
}

fn numeric_transpose<T>(cols: &[Series]) -> PolarsResult<DataFrame>
where
    T: PolarsNumericType,
    ChunkedArray<T>: IntoSeries,
{
    let new_width = cols[0].len();
    let new_height = cols.len();

    let has_nulls = cols.iter().any(|s| s.null_count() > 0);

    let mut values_buf: Vec<Vec<T::Native>> = (0..new_width)
        .map(|_| Vec::with_capacity(new_height))
        .collect();
    let mut validity_buf: Vec<_> = if has_nulls {
        // we first use bools instead of bits, because we can access these in parallel without aliasing
        (0..new_width).map(|_| vec![true; new_height]).collect()
    } else {
        (0..new_width).map(|_| vec![]).collect()
    };

    // work with *mut pointers because we it is UB write to &refs.
    let values_buf_ptr = &mut values_buf as *mut Vec<Vec<T::Native>> as usize;
    let validity_buf_ptr = &mut validity_buf as *mut Vec<Vec<bool>> as usize;

    POOL.install(|| {
        cols.iter().enumerate().for_each(|(row_idx, s)| {
            let s = s.cast(&T::get_dtype()).unwrap();
            let ca = s.unpack::<T>().unwrap();

            // Safety
            // we access in parallel, but every access is unique, so we don't break aliasing rules
            // we also ensured we allocated enough memory, so we never reallocate and thus
            // the pointers remain valid.
            if has_nulls {
                for (col_idx, opt_v) in ca.into_iter().enumerate() {
                    match opt_v {
                        None => unsafe {
                            let column = (*(validity_buf_ptr as *mut Vec<Vec<bool>>))
                                .get_unchecked_mut(col_idx);
                            let el_ptr = column.as_mut_ptr();
                            *el_ptr.add(row_idx) = false;
                            // we must initialize this memory otherwise downstream code
                            // might access uninitialized memory when the masked out values
                            // are changed.
                            add_value(values_buf_ptr, col_idx, row_idx, T::Native::default());
                        },
                        Some(v) => unsafe {
                            add_value(values_buf_ptr, col_idx, row_idx, v);
                        },
                    }
                }
            } else {
                for (col_idx, v) in ca.into_no_null_iter().enumerate() {
                    unsafe {
                        let column = (*(values_buf_ptr as *mut Vec<Vec<T::Native>>))
                            .get_unchecked_mut(col_idx);
                        let el_ptr = column.as_mut_ptr();
                        *el_ptr.add(row_idx) = v;
                    }
                }
            }
        })
    });

    let series = POOL.install(|| {
        values_buf
            .into_par_iter()
            .zip(validity_buf)
            .enumerate()
            .map(|(i, (mut values, validity))| {
                // Safety:
                // all values are written we can now set len
                unsafe {
                    values.set_len(new_height);
                }

                let validity = if has_nulls {
                    let validity = Bitmap::from_trusted_len_iter(validity.iter().copied());
                    if validity.unset_bits() > 0 {
                        Some(validity)
                    } else {
                        None
                    }
                } else {
                    None
                };

                let arr = PrimitiveArray::<T::Native>::new(
                    T::get_dtype().to_arrow(),
                    values.into(),
                    validity,
                );
                let name = format!("column_{i}");
                ChunkedArray::<T>::from_chunks(&name, vec![Box::new(arr) as ArrayRef]).into_series()
            })
            .collect()
    });

    Ok(DataFrame::new_no_checks(series))
}

src/frame/asof_join/mod.rs (line 40)

fn check_asof_columns(a: &Series, b: &Series) -> PolarsResult<()> {
    if a.dtype() != b.dtype() {
        Err(PolarsError::ComputeError(
            format!(
                "keys used in asof-join must have equal dtypes. We got: left: {:?}\tright: {:?}",
                a.dtype(),
                b.dtype()
            )
            .into(),
        ))
    } else if a.null_count() > 0 || b.null_count() > 0 {
        Err(PolarsError::ComputeError(
            "asof join must not have null values in 'on' arguments".into(),
        ))
    } else {
        Ok(())
    }
}

src/series/ops/moment.rs (line 64)

    pub fn skew(&self, bias: bool) -> PolarsResult<Option<f64>> {
        let mean = match self.mean() {
            Some(mean) => mean,
            None => return Ok(None),
        };
        // we can unwrap because if it were None, we already return None above
        let m2 = moment_precomputed_mean(self, 2, mean)?.unwrap();
        let m3 = moment_precomputed_mean(self, 3, mean)?.unwrap();

        let out = m3 / m2.powf(1.5);

        if !bias {
            let n = (self.len() - self.null_count()) as f64;
            Ok(Some(((n - 1.0) * n).sqrt() / (n - 2.0) * out))
        } else {
            Ok(Some(out))
        }
    }

    /// Compute the kurtosis (Fisher or Pearson) of a dataset.
    ///
    /// Kurtosis is the fourth central moment divided by the square of the
    /// variance. If Fisher's definition is used, then 3.0 is subtracted from
    /// the result to give 0.0 for a normal distribution.
    /// If bias is `false` then the kurtosis is calculated using k statistics to
    /// eliminate bias coming from biased moment estimators
    ///
    /// see: https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1027
    #[cfg_attr(docsrs, doc(cfg(feature = "moment")))]
    pub fn kurtosis(&self, fisher: bool, bias: bool) -> PolarsResult<Option<f64>> {
        let mean = match self.mean() {
            Some(mean) => mean,
            None => return Ok(None),
        };
        // we can unwrap because if it were None, we already return None above
        let m2 = moment_precomputed_mean(self, 2, mean)?.unwrap();
        let m4 = moment_precomputed_mean(self, 4, mean)?.unwrap();

        let out = if !bias {
            let n = (self.len() - self.null_count()) as f64;
            3.0 + 1.0 / (n - 2.0) / (n - 3.0)
                * ((n.powf(2.0) - 1.0) * m4 / m2.powf(2.0) - 3.0 * (n - 1.0).powf(2.0))
        } else {
            m4 / m2.powf(2.0)
        };
        if fisher {
            Ok(Some(out - 3.0))
        } else {
            Ok(Some(out))
        }
    }

Additional examples can be found in:

source

fn unique(&self) -> PolarsResult<Series>

Get unique values in the Series.

Examples found in repository ?

src/series/mod.rs (line 696)

    pub fn strict_cast(&self, data_type: &DataType) -> PolarsResult<Series> {
        let s = self.cast(data_type)?;
        if self.null_count() != s.null_count() {
            let failure_mask = !self.is_null() & s.is_null();
            let failures = self.filter_threaded(&failure_mask, false)?.unique()?;
            Err(PolarsError::ComputeError(
                format!(
                    "Strict conversion from {:?} to {:?} failed for values {}. \
                    If you were trying to cast Utf8 to Date, Time, or Datetime, \
                    consider using `strptime`.",
                    self.dtype(),
                    data_type,
                    failures.fmt_list(),
                )
                .into(),
            ))
        } else {
            Ok(s)
        }
    }

source

fn n_unique(&self) -> PolarsResult<usize>

Get unique values in the Series.

Examples found in repository ?

src/frame/groupby/aggregations/dispatch.rs (line 91)

    pub unsafe fn agg_n_unique(&self, groups: &GroupsProxy) -> Series {
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<IdxType, _>(groups, |idx| {
                debug_assert!(idx.len() <= self.len());
                if idx.is_empty() {
                    None
                } else {
                    let take = self.take_iter_unchecked(&mut idx.iter().map(|i| *i as usize));
                    take.n_unique().ok().map(|v| v as IdxSize)
                }
            }),
            GroupsProxy::Slice { groups, .. } => {
                _agg_helper_slice::<IdxType, _>(groups, |[first, len]| {
                    debug_assert!(len <= self.len() as IdxSize);
                    if len == 0 {
                        None
                    } else {
                        let take = self.slice_from_offsets(first, len);
                        take.n_unique().ok().map(|v| v as IdxSize)
                    }
                })
            }
        }
    }

source

fn arg_unique(&self) -> PolarsResult<IdxCa>

Get first indexes of unique values.

Examples found in repository ?

src/series/mod.rs (line 875)

    pub fn unique_stable(&self) -> PolarsResult<Series> {
        let idx = self.arg_unique()?;
        // Safety:
        // Indices are in bounds.
        unsafe { self.take_unchecked(&idx) }
    }

source

fn arg_min(&self) -> Option<usize>

Get min index

source

fn arg_max(&self) -> Option<usize>

Get max index

source

fn is_null(&self) -> BooleanChunked

Get a mask of the null values.

Examples found in repository ?

src/series/comparison.rs (line 97)

fn compare_cat_to_str_series<Compare>(
    cat: &Series,
    string: &Series,
    name: &str,
    compare: Compare,
    fill_value: bool,
) -> PolarsResult<BooleanChunked>
where
    Compare: Fn(&Series, u32) -> PolarsResult<BooleanChunked>,
{
    match string.utf8()?.get(0) {
        None => Ok(cat.is_null()),
        Some(value) => compare_cat_to_str_value(cat, value, name, compare, fill_value),
    }
}

More examples

Hide additional examples

src/series/mod.rs (line 695)

    pub fn strict_cast(&self, data_type: &DataType) -> PolarsResult<Series> {
        let s = self.cast(data_type)?;
        if self.null_count() != s.null_count() {
            let failure_mask = !self.is_null() & s.is_null();
            let failures = self.filter_threaded(&failure_mask, false)?.unique()?;
            Err(PolarsError::ComputeError(
                format!(
                    "Strict conversion from {:?} to {:?} failed for values {}. \
                    If you were trying to cast Utf8 to Date, Time, or Datetime, \
                    consider using `strptime`.",
                    self.dtype(),
                    data_type,
                    failures.fmt_list(),
                )
                .into(),
            ))
        } else {
            Ok(s)
        }
    }

src/frame/mod.rs (line 2806)

    pub fn hmin(&self) -> PolarsResult<Option<Series>> {
        let min_fn = |acc: &Series, s: &Series| {
            let mask = acc.lt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => min_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| min_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their max values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmax(&self) -> PolarsResult<Option<Series>> {
        let max_fn = |acc: &Series, s: &Series| {
            let mask = acc.gt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => max_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| max_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their sum values.
    pub fn hsum(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        let sum_fn =
            |acc: &Series, s: &Series, none_strategy: NullStrategy| -> PolarsResult<Series> {
                let mut acc = acc.clone();
                let mut s = s.clone();
                if let NullStrategy::Ignore = none_strategy {
                    // if has nulls
                    if acc.has_validity() {
                        acc = acc.fill_null(FillNullStrategy::Zero)?;
                    }
                    if s.has_validity() {
                        s = s.fill_null(FillNullStrategy::Zero)?;
                    }
                }
                Ok(&acc + &s)
            };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => sum_fn(&self.columns[0], &self.columns[1], none_strategy).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| sum_fn(&l, &r, none_strategy).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their mean values.
    pub fn hmean(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            _ => {
                let columns = self
                    .columns
                    .iter()
                    .cloned()
                    .filter(|s| {
                        let dtype = s.dtype();
                        dtype.is_numeric() || matches!(dtype, DataType::Boolean)
                    })
                    .collect();
                let numeric_df = DataFrame::new_no_checks(columns);

                let sum = || numeric_df.hsum(none_strategy);

                let null_count = || {
                    numeric_df
                        .columns
                        .par_iter()
                        .map(|s| s.is_null().cast(&DataType::UInt32).unwrap())
                        .reduce_with(|l, r| &l + &r)
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 2 columns
                        .unwrap()
                };

                let (sum, null_count) = POOL.install(|| rayon::join(sum, null_count));
                let sum = sum?;

                // value lengths: len - null_count
                let value_length: UInt32Chunked =
                    (numeric_df.width().sub(&null_count)).u32().unwrap().clone();

                // make sure that we do not divide by zero
                // by replacing with None
                let value_length = value_length
                    .set(&value_length.equal(0), None)?
                    .into_series()
                    .cast(&DataType::Float64)?;

                Ok(sum.map(|sum| &sum / &value_length))
            }
        }
    }

src/chunked_array/ops/unique/rank.rs (line 233)

pub(crate) fn rank(s: &Series, method: RankMethod, reverse: bool) -> Series {
    match s.len() {
        1 => {
            return match method {
                Average => Series::new(s.name(), &[1.0f32]),
                _ => Series::new(s.name(), &[1 as IdxSize]),
            };
        }
        0 => {
            return match method {
                Average => Float32Chunked::from_slice(s.name(), &[]).into_series(),
                _ => IdxCa::from_slice(s.name(), &[]).into_series(),
            };
        }
        _ => {}
    }

    if s.null_count() > 0 {
        let nulls = s.is_not_null().rechunk();
        let arr = nulls.downcast_iter().next().unwrap();
        let validity = arr.values();
        // Currently, nulls tie with the minimum or maximum bound for a type, depending on reverse.
        // TODO: Need to expose nulls_last in argsort to prevent this.
        // Fill using MaxBound/MinBound to give nulls last rank.
        // we will replace them later.
        let null_strategy = if reverse {
            FillNullStrategy::MinBound
        } else {
            FillNullStrategy::MaxBound
        };
        let s = s.fill_null(null_strategy).unwrap();

        let mut out = rank(&s, method, reverse);
        unsafe {
            let arr = &mut out.chunks_mut()[0];
            *arr = arr.with_validity(Some(validity.clone()))
        }
        return out;
    }

    // See: https://github.com/scipy/scipy/blob/v1.7.1/scipy/stats/stats.py#L8631-L8737

    let len = s.len();
    let null_count = s.null_count();
    let sort_idx_ca = s.argsort(SortOptions {
        descending: reverse,
        ..Default::default()
    });
    let sort_idx = sort_idx_ca.downcast_iter().next().unwrap().values();

    let mut inv: Vec<IdxSize> = Vec::with_capacity(len);
    // Safety:
    // Values will be filled next and there is only primitive data
    #[allow(clippy::uninit_vec)]
    unsafe {
        inv.set_len(len)
    }
    let inv_values = inv.as_mut_slice();

    #[cfg(feature = "random")]
    let mut count = if let RankMethod::Ordinal | RankMethod::Random = method {
        1 as IdxSize
    } else {
        0
    };

    #[cfg(not(feature = "random"))]
    let mut count = if let RankMethod::Ordinal = method {
        1 as IdxSize
    } else {
        0
    };

    // Safety:
    // we are in bounds
    unsafe {
        sort_idx.iter().for_each(|&i| {
            *inv_values.get_unchecked_mut(i as usize) = count;
            count += 1;
        });
    }

    use RankMethod::*;
    match method {
        Ordinal => {
            let inv_ca = IdxCa::from_vec(s.name(), inv);
            inv_ca.into_series()
        }
        #[cfg(feature = "random")]
        Random => {
            // Safety:
            // in bounds
            let arr = unsafe { s.take_unchecked(&sort_idx_ca).unwrap() };
            let not_consecutive_same = arr
                .slice(1, len - 1)
                .not_equal(&arr.slice(0, len - 1))
                .unwrap()
                .rechunk();
            let obs = not_consecutive_same.downcast_iter().next().unwrap();

            // Collect slice indices for sort_idx which point to ties in the original series.
            let mut ties_indices = Vec::with_capacity(len + 1);
            let mut ties_index: usize = 0;

            ties_indices.push(ties_index);
            obs.iter().for_each(|b| {
                if let Some(b) = b {
                    ties_index += 1;
                    if b {
                        ties_indices.push(ties_index)
                    }
                }
            });
            // Close last slice (if there where nulls in the original series, they will always be in the last slice).
            ties_indices.push(len);

            let mut sort_idx = sort_idx.to_vec();

            let mut thread_rng = thread_rng();
            let rng = &mut SmallRng::from_rng(&mut thread_rng).unwrap();

            // Shuffle sort_idx positions which point to ties in the original series.
            for i in 0..(ties_indices.len() - 1) {
                let ties_index_start = ties_indices[i];
                let ties_index_end = ties_indices[i + 1];
                if ties_index_end - ties_index_start > 1 {
                    sort_idx[ties_index_start..ties_index_end].shuffle(rng);
                }
            }

            // Recreate inv_ca (where ties are randomly shuffled compared with Ordinal).
            let mut count = 1 as IdxSize;
            unsafe {
                sort_idx.iter().for_each(|&i| {
                    *inv_values.get_unchecked_mut(i as usize) = count;
                    count += 1;
                });
            }

            let inv_ca = IdxCa::from_vec(s.name(), inv);
            inv_ca.into_series()
        }
        _ => {
            let inv_ca = IdxCa::from_vec(s.name(), inv);
            // Safety:
            // in bounds
            let arr = unsafe { s.take_unchecked(&sort_idx_ca).unwrap() };
            let validity = arr.chunks()[0].validity().cloned();
            let not_consecutive_same = arr
                .slice(1, len - 1)
                .not_equal(&arr.slice(0, len - 1))
                .unwrap()
                .rechunk();
            // this obs is shorter than that of scipy stats, because we can just start the cumsum by 1
            // instead of 0
            let obs = not_consecutive_same.downcast_iter().next().unwrap();
            let mut dense = Vec::with_capacity(len);

            // this offset save an offset on the whole column, what scipy does in:
            //
            // ```python
            //     if method == 'min':
            //         return count[dense - 1] + 1
            // ```
            // INVALID LINT REMOVE LATER
            #[allow(clippy::bool_to_int_with_if)]
            let mut cumsum: IdxSize = if let RankMethod::Min = method {
                0
            } else {
                // nulls will be first, rank, but we will replace them (with null)
                // so this ensures the second rank will be 1
                if matches!(method, RankMethod::Dense) && s.null_count() > 0 {
                    0
                } else {
                    1
                }
            };

            dense.push(cumsum);
            obs.values_iter().for_each(|b| {
                if b {
                    cumsum += 1;
                }
                dense.push(cumsum)
            });
            let arr = IdxArr::from_data_default(dense.into(), validity);
            let dense: IdxCa = (s.name(), arr).into();
            // Safety:
            // in bounds
            let dense = unsafe { dense.take_unchecked((&inv_ca).into()) };

            if let RankMethod::Dense = method {
                return if s.null_count() == 0 {
                    dense.into_series()
                } else {
                    // null will be the first rank
                    // we restore original nulls and shift all ranks by one
                    let validity = s.is_null().rechunk();
                    let validity = validity.downcast_iter().next().unwrap();
                    let validity = validity.values().clone();

                    let arr = dense.downcast_iter().next().unwrap();
                    let arr = arr.with_validity(Some(validity));
                    let dtype = arr.data_type().clone();

                    // Safety:
                    // given dtype is correct
                    unsafe {
                        Series::try_from_arrow_unchecked(s.name(), vec![arr], &dtype).unwrap()
                    }
                };
            }

            let bitmap = obs.values();
            let cap = bitmap.len() - bitmap.unset_bits();
            let mut count = Vec::with_capacity(cap + 1);
            let mut cnt: IdxSize = 0;
            count.push(cnt);

            if null_count > 0 {
                obs.iter().for_each(|b| {
                    if let Some(b) = b {
                        cnt += 1;
                        if b {
                            count.push(cnt)
                        }
                    }
                });
            } else {
                obs.values_iter().for_each(|b| {
                    cnt += 1;
                    if b {
                        count.push(cnt)
                    }
                });
            }

            count.push((len - null_count) as IdxSize);
            let count = IdxCa::from_vec(s.name(), count);

            match method {
                Max => {
                    // Safety:
                    // within bounds
                    unsafe { count.take_unchecked((&dense).into()).into_series() }
                }
                Min => {
                    // Safety:
                    // within bounds
                    unsafe { (count.take_unchecked((&dense).into()) + 1).into_series() }
                }
                Average => {
                    // Safety:
                    // in bounds
                    let a = unsafe { count.take_unchecked((&dense).into()) }
                        .cast(&DataType::Float32)
                        .unwrap();
                    let b = unsafe { count.take_unchecked((&(dense - 1)).into()) }
                        .cast(&DataType::Float32)
                        .unwrap()
                        + 1.0;
                    (&a + &b) * 0.5
                }
                #[cfg(feature = "random")]
                Dense | Ordinal | Random => unimplemented!(),
                #[cfg(not(feature = "random"))]
                Dense | Ordinal => unimplemented!(),
            }
        }
    }
}

source

fn is_not_null(&self) -> BooleanChunked

Get a mask of the non-null values.

Examples found in repository ?

src/series/series_trait.rs (line 359)

    fn drop_nulls(&self) -> Series {
        if self.null_count() == 0 {
            Series(self.clone_inner())
        } else {
            self.filter(&self.is_not_null()).unwrap()
        }
    }

More examples

Hide additional examples

src/frame/mod.rs (line 1031)

    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

    /// Slice the `DataFrame` along the rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
    ///                         "Color" => &["Green", "Red", "White", "White", "Red"])?;
    /// let sl: DataFrame = df.slice(2, 3);
    ///
    /// assert_eq!(sl.shape(), (3, 2));
    /// println!("{}", sl);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Output:
    /// ```text
    /// shape: (3, 2)
    /// +-------+-------+
    /// | Fruit | Color |
    /// | ---   | ---   |
    /// | str   | str   |
    /// +=======+=======+
    /// | Grape | White |
    /// +-------+-------+
    /// | Fig   | White |
    /// +-------+-------+
    /// | Fig   | Red   |
    /// +-------+-------+
    /// ```
    #[must_use]
    pub fn slice(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        let col = self
            .columns
            .iter()
            .map(|s| s.slice(offset, length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    #[must_use]
    pub fn slice_par(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns_par(&|s| s.slice(offset, length)))
    }

    #[must_use]
    pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns(&|s| {
            let mut out = s.slice(offset, length);
            out.shrink_to_fit();
            out
        }))
    }

    /// Get the head of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
    ///         "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
    ///         "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
    ///         "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
    /// assert_eq!(countries.shape(), (5, 4));
    ///
    /// println!("{}", countries.head(Some(3)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 4)
    /// +--------------------+---------------+---------------+------------+
    /// | Rank by GDP (2021) | Continent     | Country       | Capital    |
    /// | ---                | ---           | ---           | ---        |
    /// | i32                | str           | str           | str        |
    /// +====================+===============+===============+============+
    /// | 1                  | North America | United States | Washington |
    /// +--------------------+---------------+---------------+------------+
    /// | 2                  | Asia          | China         | Beijing    |
    /// +--------------------+---------------+---------------+------------+
    /// | 3                  | Asia          | Japan         | Tokyo      |
    /// +--------------------+---------------+---------------+------------+
    /// ```
    #[must_use]
    pub fn head(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.head(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Get the tail of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank (2021)" => &[105, 106, 107, 108, 109],
    ///         "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
    ///         "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
    /// assert_eq!(countries.shape(), (5, 3));
    ///
    /// println!("{}", countries.tail(Some(2)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (2, 3)
    /// +-------------+--------------------+---------+
    /// | Rank (2021) | Apple Price (€/kg) | Country |
    /// | ---         | ---                | ---     |
    /// | i32         | f64                | str     |
    /// +=============+====================+=========+
    /// | 108         | 0.63               | Syria   |
    /// +-------------+--------------------+---------+
    /// | 109         | 0.63               | Turkey  |
    /// +-------------+--------------------+---------+
    /// ```
    #[must_use]
    pub fn tail(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.tail(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks(&self) -> RecordBatchIter {
        RecordBatchIter {
            columns: &self.columns,
            idx: 0,
            n_chunks: self.n_chunks(),
        }
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches as physical values.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> {
        PhysRecordBatchIter {
            iters: self.columns.iter().map(|s| s.chunks().iter()).collect(),
        }
    }

    /// Get a `DataFrame` with all the columns in reversed order.
    #[must_use]
    pub fn reverse(&self) -> Self {
        let col = self.columns.iter().map(|s| s.reverse()).collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Shift the values by a given period and fill the parts that will be empty due to this operation
    /// with `Nones`.
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.shift) for more info on the `shift` operation.
    #[must_use]
    pub fn shift(&self, periods: i64) -> Self {
        let col = self.apply_columns_par(&|s| s.shift(periods));

        DataFrame::new_no_checks(col)
    }

    /// Replace None values with one of the following strategies:
    /// * Forward fill (replace None with the previous value)
    /// * Backward fill (replace None with the next value)
    /// * Mean fill (replace None with the mean of the whole array)
    /// * Min fill (replace None with the minimum of the whole array)
    /// * Max fill (replace None with the maximum of the whole array)
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.fill_null) for more info on the `fill_null` operation.
    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
        let col = self.try_apply_columns_par(&|s| s.fill_null(strategy))?;

        Ok(DataFrame::new_no_checks(col))
    }

    /// Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.
    /// Try in keep output similar to pandas
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("categorical" => &["d","e","f"],
    ///                          "numeric" => &[1, 2, 3],
    ///                          "object" => &["a", "b", "c"])?;
    /// assert_eq!(df1.shape(), (3, 3));
    ///
    /// let df2: DataFrame = df1.describe(None);
    /// assert_eq!(df2.shape(), (8, 4));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (8, 4)
    /// ┌──────────┬─────────────┬─────────┬────────┐
    /// │ describe ┆ categorical ┆ numeric ┆ object │
    /// │ ---      ┆ ---         ┆ ---     ┆ ---    │
    /// │ str      ┆ f64         ┆ f64     ┆ f64    │
    /// ╞══════════╪═════════════╪═════════╪════════╡
    /// │ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ mean     ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ std      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ min      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 25%      ┆ null        ┆ 1.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 50%      ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 75%      ┆ null        ┆ 2.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ max      ┆ null        ┆ 3.0     ┆ null   │
    /// └──────────┴─────────────┴─────────┴────────┘
    /// ```
    #[must_use]
    #[cfg(feature = "describe")]
    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

    /// Aggregate the columns to their maximum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.max();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 6       | 5       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn max(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.max_as_series());

        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their standard deviation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.std(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +-------------------+--------------------+
    /// | Die n°1           | Die n°2            |
    /// | ---               | ---                |
    /// | f64               | f64                |
    /// +===================+====================+
    /// | 2.280350850198276 | 1.0954451150103321 |
    /// +-------------------+--------------------+
    /// ```
    #[must_use]
    pub fn std(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.std_as_series(ddof));

        DataFrame::new_no_checks(columns)
    }
    /// Aggregate the columns to their variation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.var(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 5.2     | 1.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn var(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.var_as_series(ddof));
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their minimum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.min();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 1       | 2       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn min(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.min_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their sum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.sum();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 16      | 16      |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn sum(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.sum_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their mean values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.mean();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 3.2     | 3.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn mean(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.mean_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their median values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.median();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 3       | 3       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn median(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.median_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their quantile values.
    pub fn quantile(&self, quantile: f64, interpol: QuantileInterpolOptions) -> PolarsResult<Self> {
        let columns = self.try_apply_columns_par(&|s| s.quantile_as_series(quantile, interpol))?;

        Ok(DataFrame::new_no_checks(columns))
    }

    /// Aggregate the column horizontally to their min values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmin(&self) -> PolarsResult<Option<Series>> {
        let min_fn = |acc: &Series, s: &Series| {
            let mask = acc.lt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => min_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| min_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their max values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmax(&self) -> PolarsResult<Option<Series>> {
        let max_fn = |acc: &Series, s: &Series| {
            let mask = acc.gt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => max_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| max_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

src/chunked_array/ops/unique/rank.rs (line 54)

pub(crate) fn rank(s: &Series, method: RankMethod, reverse: bool) -> Series {
    match s.len() {
        1 => {
            return match method {
                Average => Series::new(s.name(), &[1.0f32]),
                _ => Series::new(s.name(), &[1 as IdxSize]),
            };
        }
        0 => {
            return match method {
                Average => Float32Chunked::from_slice(s.name(), &[]).into_series(),
                _ => IdxCa::from_slice(s.name(), &[]).into_series(),
            };
        }
        _ => {}
    }

    if s.null_count() > 0 {
        let nulls = s.is_not_null().rechunk();
        let arr = nulls.downcast_iter().next().unwrap();
        let validity = arr.values();
        // Currently, nulls tie with the minimum or maximum bound for a type, depending on reverse.
        // TODO: Need to expose nulls_last in argsort to prevent this.
        // Fill using MaxBound/MinBound to give nulls last rank.
        // we will replace them later.
        let null_strategy = if reverse {
            FillNullStrategy::MinBound
        } else {
            FillNullStrategy::MaxBound
        };
        let s = s.fill_null(null_strategy).unwrap();

        let mut out = rank(&s, method, reverse);
        unsafe {
            let arr = &mut out.chunks_mut()[0];
            *arr = arr.with_validity(Some(validity.clone()))
        }
        return out;
    }

    // See: https://github.com/scipy/scipy/blob/v1.7.1/scipy/stats/stats.py#L8631-L8737

    let len = s.len();
    let null_count = s.null_count();
    let sort_idx_ca = s.argsort(SortOptions {
        descending: reverse,
        ..Default::default()
    });
    let sort_idx = sort_idx_ca.downcast_iter().next().unwrap().values();

    let mut inv: Vec<IdxSize> = Vec::with_capacity(len);
    // Safety:
    // Values will be filled next and there is only primitive data
    #[allow(clippy::uninit_vec)]
    unsafe {
        inv.set_len(len)
    }
    let inv_values = inv.as_mut_slice();

    #[cfg(feature = "random")]
    let mut count = if let RankMethod::Ordinal | RankMethod::Random = method {
        1 as IdxSize
    } else {
        0
    };

    #[cfg(not(feature = "random"))]
    let mut count = if let RankMethod::Ordinal = method {
        1 as IdxSize
    } else {
        0
    };

    // Safety:
    // we are in bounds
    unsafe {
        sort_idx.iter().for_each(|&i| {
            *inv_values.get_unchecked_mut(i as usize) = count;
            count += 1;
        });
    }

    use RankMethod::*;
    match method {
        Ordinal => {
            let inv_ca = IdxCa::from_vec(s.name(), inv);
            inv_ca.into_series()
        }
        #[cfg(feature = "random")]
        Random => {
            // Safety:
            // in bounds
            let arr = unsafe { s.take_unchecked(&sort_idx_ca).unwrap() };
            let not_consecutive_same = arr
                .slice(1, len - 1)
                .not_equal(&arr.slice(0, len - 1))
                .unwrap()
                .rechunk();
            let obs = not_consecutive_same.downcast_iter().next().unwrap();

            // Collect slice indices for sort_idx which point to ties in the original series.
            let mut ties_indices = Vec::with_capacity(len + 1);
            let mut ties_index: usize = 0;

            ties_indices.push(ties_index);
            obs.iter().for_each(|b| {
                if let Some(b) = b {
                    ties_index += 1;
                    if b {
                        ties_indices.push(ties_index)
                    }
                }
            });
            // Close last slice (if there where nulls in the original series, they will always be in the last slice).
            ties_indices.push(len);

            let mut sort_idx = sort_idx.to_vec();

            let mut thread_rng = thread_rng();
            let rng = &mut SmallRng::from_rng(&mut thread_rng).unwrap();

            // Shuffle sort_idx positions which point to ties in the original series.
            for i in 0..(ties_indices.len() - 1) {
                let ties_index_start = ties_indices[i];
                let ties_index_end = ties_indices[i + 1];
                if ties_index_end - ties_index_start > 1 {
                    sort_idx[ties_index_start..ties_index_end].shuffle(rng);
                }
            }

            // Recreate inv_ca (where ties are randomly shuffled compared with Ordinal).
            let mut count = 1 as IdxSize;
            unsafe {
                sort_idx.iter().for_each(|&i| {
                    *inv_values.get_unchecked_mut(i as usize) = count;
                    count += 1;
                });
            }

            let inv_ca = IdxCa::from_vec(s.name(), inv);
            inv_ca.into_series()
        }
        _ => {
            let inv_ca = IdxCa::from_vec(s.name(), inv);
            // Safety:
            // in bounds
            let arr = unsafe { s.take_unchecked(&sort_idx_ca).unwrap() };
            let validity = arr.chunks()[0].validity().cloned();
            let not_consecutive_same = arr
                .slice(1, len - 1)
                .not_equal(&arr.slice(0, len - 1))
                .unwrap()
                .rechunk();
            // this obs is shorter than that of scipy stats, because we can just start the cumsum by 1
            // instead of 0
            let obs = not_consecutive_same.downcast_iter().next().unwrap();
            let mut dense = Vec::with_capacity(len);

            // this offset save an offset on the whole column, what scipy does in:
            //
            // ```python
            //     if method == 'min':
            //         return count[dense - 1] + 1
            // ```
            // INVALID LINT REMOVE LATER
            #[allow(clippy::bool_to_int_with_if)]
            let mut cumsum: IdxSize = if let RankMethod::Min = method {
                0
            } else {
                // nulls will be first, rank, but we will replace them (with null)
                // so this ensures the second rank will be 1
                if matches!(method, RankMethod::Dense) && s.null_count() > 0 {
                    0
                } else {
                    1
                }
            };

            dense.push(cumsum);
            obs.values_iter().for_each(|b| {
                if b {
                    cumsum += 1;
                }
                dense.push(cumsum)
            });
            let arr = IdxArr::from_data_default(dense.into(), validity);
            let dense: IdxCa = (s.name(), arr).into();
            // Safety:
            // in bounds
            let dense = unsafe { dense.take_unchecked((&inv_ca).into()) };

            if let RankMethod::Dense = method {
                return if s.null_count() == 0 {
                    dense.into_series()
                } else {
                    // null will be the first rank
                    // we restore original nulls and shift all ranks by one
                    let validity = s.is_null().rechunk();
                    let validity = validity.downcast_iter().next().unwrap();
                    let validity = validity.values().clone();

                    let arr = dense.downcast_iter().next().unwrap();
                    let arr = arr.with_validity(Some(validity));
                    let dtype = arr.data_type().clone();

                    // Safety:
                    // given dtype is correct
                    unsafe {
                        Series::try_from_arrow_unchecked(s.name(), vec![arr], &dtype).unwrap()
                    }
                };
            }

            let bitmap = obs.values();
            let cap = bitmap.len() - bitmap.unset_bits();
            let mut count = Vec::with_capacity(cap + 1);
            let mut cnt: IdxSize = 0;
            count.push(cnt);

            if null_count > 0 {
                obs.iter().for_each(|b| {
                    if let Some(b) = b {
                        cnt += 1;
                        if b {
                            count.push(cnt)
                        }
                    }
                });
            } else {
                obs.values_iter().for_each(|b| {
                    cnt += 1;
                    if b {
                        count.push(cnt)
                    }
                });
            }

            count.push((len - null_count) as IdxSize);
            let count = IdxCa::from_vec(s.name(), count);

            match method {
                Max => {
                    // Safety:
                    // within bounds
                    unsafe { count.take_unchecked((&dense).into()).into_series() }
                }
                Min => {
                    // Safety:
                    // within bounds
                    unsafe { (count.take_unchecked((&dense).into()) + 1).into_series() }
                }
                Average => {
                    // Safety:
                    // in bounds
                    let a = unsafe { count.take_unchecked((&dense).into()) }
                        .cast(&DataType::Float32)
                        .unwrap();
                    let b = unsafe { count.take_unchecked((&(dense - 1)).into()) }
                        .cast(&DataType::Float32)
                        .unwrap()
                        + 1.0;
                    (&a + &b) * 0.5
                }
                #[cfg(feature = "random")]
                Dense | Ordinal | Random => unimplemented!(),
                #[cfg(not(feature = "random"))]
                Dense | Ordinal => unimplemented!(),
            }
        }
    }
}

source

fn is_unique(&self) -> PolarsResult<BooleanChunked>

Get a mask of all the unique values.

source

fn is_duplicated(&self) -> PolarsResult<BooleanChunked>

Get a mask of all the duplicated values.

source

fn reverse(&self) -> Series

return a Series in reversed order

Examples found in repository ?

src/frame/mod.rs (line 2426)

    pub fn reverse(&self) -> Self {
        let col = self.columns.iter().map(|s| s.reverse()).collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

source

fn as_single_ptr(&mut self) -> PolarsResult<usize>

Rechunk and return a pointer to the start of the Series. Only implemented for numeric types

Examples found in repository ?

src/series/mod.rs (line 226)

225
226
227

    pub fn as_single_ptr(&mut self) -> PolarsResult<usize> {
        self._get_inner_mut().as_single_ptr()
    }

source

fn shift(&self, _periods: i64) -> Series

Shift the values by a given period and fill the parts that will be empty due to this operation with Nones.

NOTE: If you want to fill the Nones with a value use the shift operation on ChunkedArray<T>.

Example

fn example() -> PolarsResult<()> {
    let s = Series::new("series", &[1, 2, 3]);

    let shifted = s.shift(1);
    assert_eq!(Vec::from(shifted.i32()?), &[None, Some(1), Some(2)]);

    let shifted = s.shift(-1);
    assert_eq!(Vec::from(shifted.i32()?), &[Some(2), Some(3), None]);

    let shifted = s.shift(2);
    assert_eq!(Vec::from(shifted.i32()?), &[None, None, Some(1)]);

    Ok(())
}
example();

Examples found in repository ?

src/frame/mod.rs (line 2436)

    pub fn shift(&self, periods: i64) -> Self {
        let col = self.apply_columns_par(&|s| s.shift(periods));

        DataFrame::new_no_checks(col)
    }

More examples

Hide additional examples

src/series/ops/diff.rs (line 8)

    pub fn diff(&self, n: usize, null_behavior: NullBehavior) -> Series {
        match null_behavior {
            NullBehavior::Ignore => self - &self.shift(n as i64),
            NullBehavior::Drop => {
                let len = self.len() - n;
                &self.slice(n as i64, len) - &self.slice(0, len)
            }
        }
    }

source

fn fill_null(&self, _strategy: FillNullStrategy) -> PolarsResult<Series>

Replace None values with one of the following strategies:

Forward fill (replace None with the previous value)
Backward fill (replace None with the next value)
Mean fill (replace None with the mean of the whole array)
Min fill (replace None with the minimum of the whole array)
Max fill (replace None with the maximum of the whole array)

NOTE: If you want to fill the Nones with a value use the fill_null operation on ChunkedArray<T>.

Example

fn example() -> PolarsResult<()> {
    let s = Series::new("some_missing", &[Some(1), None, Some(2)]);

    let filled = s.fill_null(FillNullStrategy::Forward(None))?;
    assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);

    let filled = s.fill_null(FillNullStrategy::Backward(None))?;
    assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);

    let filled = s.fill_null(FillNullStrategy::Min)?;
    assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);

    let filled = s.fill_null(FillNullStrategy::Max)?;
    assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(2), Some(2)]);

    let filled = s.fill_null(FillNullStrategy::Mean)?;
    assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]);

    Ok(())
}
example();

Examples found in repository ?

src/frame/mod.rs (line 2450)

    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
        let col = self.try_apply_columns_par(&|s| s.fill_null(strategy))?;

        Ok(DataFrame::new_no_checks(col))
    }

    /// Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.
    /// Try in keep output similar to pandas
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("categorical" => &["d","e","f"],
    ///                          "numeric" => &[1, 2, 3],
    ///                          "object" => &["a", "b", "c"])?;
    /// assert_eq!(df1.shape(), (3, 3));
    ///
    /// let df2: DataFrame = df1.describe(None);
    /// assert_eq!(df2.shape(), (8, 4));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (8, 4)
    /// ┌──────────┬─────────────┬─────────┬────────┐
    /// │ describe ┆ categorical ┆ numeric ┆ object │
    /// │ ---      ┆ ---         ┆ ---     ┆ ---    │
    /// │ str      ┆ f64         ┆ f64     ┆ f64    │
    /// ╞══════════╪═════════════╪═════════╪════════╡
    /// │ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ mean     ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ std      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ min      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 25%      ┆ null        ┆ 1.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 50%      ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 75%      ┆ null        ┆ 2.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ max      ┆ null        ┆ 3.0     ┆ null   │
    /// └──────────┴─────────────┴─────────┴────────┘
    /// ```
    #[must_use]
    #[cfg(feature = "describe")]
    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

    /// Aggregate the columns to their maximum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.max();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 6       | 5       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn max(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.max_as_series());

        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their standard deviation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.std(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +-------------------+--------------------+
    /// | Die n°1           | Die n°2            |
    /// | ---               | ---                |
    /// | f64               | f64                |
    /// +===================+====================+
    /// | 2.280350850198276 | 1.0954451150103321 |
    /// +-------------------+--------------------+
    /// ```
    #[must_use]
    pub fn std(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.std_as_series(ddof));

        DataFrame::new_no_checks(columns)
    }
    /// Aggregate the columns to their variation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.var(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 5.2     | 1.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn var(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.var_as_series(ddof));
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their minimum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.min();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 1       | 2       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn min(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.min_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their sum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.sum();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 16      | 16      |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn sum(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.sum_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their mean values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.mean();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 3.2     | 3.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn mean(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.mean_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their median values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.median();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 3       | 3       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn median(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.median_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their quantile values.
    pub fn quantile(&self, quantile: f64, interpol: QuantileInterpolOptions) -> PolarsResult<Self> {
        let columns = self.try_apply_columns_par(&|s| s.quantile_as_series(quantile, interpol))?;

        Ok(DataFrame::new_no_checks(columns))
    }

    /// Aggregate the column horizontally to their min values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmin(&self) -> PolarsResult<Option<Series>> {
        let min_fn = |acc: &Series, s: &Series| {
            let mask = acc.lt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => min_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| min_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their max values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmax(&self) -> PolarsResult<Option<Series>> {
        let max_fn = |acc: &Series, s: &Series| {
            let mask = acc.gt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => max_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| max_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their sum values.
    pub fn hsum(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        let sum_fn =
            |acc: &Series, s: &Series, none_strategy: NullStrategy| -> PolarsResult<Series> {
                let mut acc = acc.clone();
                let mut s = s.clone();
                if let NullStrategy::Ignore = none_strategy {
                    // if has nulls
                    if acc.has_validity() {
                        acc = acc.fill_null(FillNullStrategy::Zero)?;
                    }
                    if s.has_validity() {
                        s = s.fill_null(FillNullStrategy::Zero)?;
                    }
                }
                Ok(&acc + &s)
            };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => sum_fn(&self.columns[0], &self.columns[1], none_strategy).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| sum_fn(&l, &r, none_strategy).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

More examples

Hide additional examples

src/chunked_array/ops/unique/rank.rs (line 66)

pub(crate) fn rank(s: &Series, method: RankMethod, reverse: bool) -> Series {
    match s.len() {
        1 => {
            return match method {
                Average => Series::new(s.name(), &[1.0f32]),
                _ => Series::new(s.name(), &[1 as IdxSize]),
            };
        }
        0 => {
            return match method {
                Average => Float32Chunked::from_slice(s.name(), &[]).into_series(),
                _ => IdxCa::from_slice(s.name(), &[]).into_series(),
            };
        }
        _ => {}
    }

    if s.null_count() > 0 {
        let nulls = s.is_not_null().rechunk();
        let arr = nulls.downcast_iter().next().unwrap();
        let validity = arr.values();
        // Currently, nulls tie with the minimum or maximum bound for a type, depending on reverse.
        // TODO: Need to expose nulls_last in argsort to prevent this.
        // Fill using MaxBound/MinBound to give nulls last rank.
        // we will replace them later.
        let null_strategy = if reverse {
            FillNullStrategy::MinBound
        } else {
            FillNullStrategy::MaxBound
        };
        let s = s.fill_null(null_strategy).unwrap();

        let mut out = rank(&s, method, reverse);
        unsafe {
            let arr = &mut out.chunks_mut()[0];
            *arr = arr.with_validity(Some(validity.clone()))
        }
        return out;
    }

    // See: https://github.com/scipy/scipy/blob/v1.7.1/scipy/stats/stats.py#L8631-L8737

    let len = s.len();
    let null_count = s.null_count();
    let sort_idx_ca = s.argsort(SortOptions {
        descending: reverse,
        ..Default::default()
    });
    let sort_idx = sort_idx_ca.downcast_iter().next().unwrap().values();

    let mut inv: Vec<IdxSize> = Vec::with_capacity(len);
    // Safety:
    // Values will be filled next and there is only primitive data
    #[allow(clippy::uninit_vec)]
    unsafe {
        inv.set_len(len)
    }
    let inv_values = inv.as_mut_slice();

    #[cfg(feature = "random")]
    let mut count = if let RankMethod::Ordinal | RankMethod::Random = method {
        1 as IdxSize
    } else {
        0
    };

    #[cfg(not(feature = "random"))]
    let mut count = if let RankMethod::Ordinal = method {
        1 as IdxSize
    } else {
        0
    };

    // Safety:
    // we are in bounds
    unsafe {
        sort_idx.iter().for_each(|&i| {
            *inv_values.get_unchecked_mut(i as usize) = count;
            count += 1;
        });
    }

    use RankMethod::*;
    match method {
        Ordinal => {
            let inv_ca = IdxCa::from_vec(s.name(), inv);
            inv_ca.into_series()
        }
        #[cfg(feature = "random")]
        Random => {
            // Safety:
            // in bounds
            let arr = unsafe { s.take_unchecked(&sort_idx_ca).unwrap() };
            let not_consecutive_same = arr
                .slice(1, len - 1)
                .not_equal(&arr.slice(0, len - 1))
                .unwrap()
                .rechunk();
            let obs = not_consecutive_same.downcast_iter().next().unwrap();

            // Collect slice indices for sort_idx which point to ties in the original series.
            let mut ties_indices = Vec::with_capacity(len + 1);
            let mut ties_index: usize = 0;

            ties_indices.push(ties_index);
            obs.iter().for_each(|b| {
                if let Some(b) = b {
                    ties_index += 1;
                    if b {
                        ties_indices.push(ties_index)
                    }
                }
            });
            // Close last slice (if there where nulls in the original series, they will always be in the last slice).
            ties_indices.push(len);

            let mut sort_idx = sort_idx.to_vec();

            let mut thread_rng = thread_rng();
            let rng = &mut SmallRng::from_rng(&mut thread_rng).unwrap();

            // Shuffle sort_idx positions which point to ties in the original series.
            for i in 0..(ties_indices.len() - 1) {
                let ties_index_start = ties_indices[i];
                let ties_index_end = ties_indices[i + 1];
                if ties_index_end - ties_index_start > 1 {
                    sort_idx[ties_index_start..ties_index_end].shuffle(rng);
                }
            }

            // Recreate inv_ca (where ties are randomly shuffled compared with Ordinal).
            let mut count = 1 as IdxSize;
            unsafe {
                sort_idx.iter().for_each(|&i| {
                    *inv_values.get_unchecked_mut(i as usize) = count;
                    count += 1;
                });
            }

            let inv_ca = IdxCa::from_vec(s.name(), inv);
            inv_ca.into_series()
        }
        _ => {
            let inv_ca = IdxCa::from_vec(s.name(), inv);
            // Safety:
            // in bounds
            let arr = unsafe { s.take_unchecked(&sort_idx_ca).unwrap() };
            let validity = arr.chunks()[0].validity().cloned();
            let not_consecutive_same = arr
                .slice(1, len - 1)
                .not_equal(&arr.slice(0, len - 1))
                .unwrap()
                .rechunk();
            // this obs is shorter than that of scipy stats, because we can just start the cumsum by 1
            // instead of 0
            let obs = not_consecutive_same.downcast_iter().next().unwrap();
            let mut dense = Vec::with_capacity(len);

            // this offset save an offset on the whole column, what scipy does in:
            //
            // ```python
            //     if method == 'min':
            //         return count[dense - 1] + 1
            // ```
            // INVALID LINT REMOVE LATER
            #[allow(clippy::bool_to_int_with_if)]
            let mut cumsum: IdxSize = if let RankMethod::Min = method {
                0
            } else {
                // nulls will be first, rank, but we will replace them (with null)
                // so this ensures the second rank will be 1
                if matches!(method, RankMethod::Dense) && s.null_count() > 0 {
                    0
                } else {
                    1
                }
            };

            dense.push(cumsum);
            obs.values_iter().for_each(|b| {
                if b {
                    cumsum += 1;
                }
                dense.push(cumsum)
            });
            let arr = IdxArr::from_data_default(dense.into(), validity);
            let dense: IdxCa = (s.name(), arr).into();
            // Safety:
            // in bounds
            let dense = unsafe { dense.take_unchecked((&inv_ca).into()) };

            if let RankMethod::Dense = method {
                return if s.null_count() == 0 {
                    dense.into_series()
                } else {
                    // null will be the first rank
                    // we restore original nulls and shift all ranks by one
                    let validity = s.is_null().rechunk();
                    let validity = validity.downcast_iter().next().unwrap();
                    let validity = validity.values().clone();

                    let arr = dense.downcast_iter().next().unwrap();
                    let arr = arr.with_validity(Some(validity));
                    let dtype = arr.data_type().clone();

                    // Safety:
                    // given dtype is correct
                    unsafe {
                        Series::try_from_arrow_unchecked(s.name(), vec![arr], &dtype).unwrap()
                    }
                };
            }

            let bitmap = obs.values();
            let cap = bitmap.len() - bitmap.unset_bits();
            let mut count = Vec::with_capacity(cap + 1);
            let mut cnt: IdxSize = 0;
            count.push(cnt);

            if null_count > 0 {
                obs.iter().for_each(|b| {
                    if let Some(b) = b {
                        cnt += 1;
                        if b {
                            count.push(cnt)
                        }
                    }
                });
            } else {
                obs.values_iter().for_each(|b| {
                    cnt += 1;
                    if b {
                        count.push(cnt)
                    }
                });
            }

            count.push((len - null_count) as IdxSize);
            let count = IdxCa::from_vec(s.name(), count);

            match method {
                Max => {
                    // Safety:
                    // within bounds
                    unsafe { count.take_unchecked((&dense).into()).into_series() }
                }
                Min => {
                    // Safety:
                    // within bounds
                    unsafe { (count.take_unchecked((&dense).into()) + 1).into_series() }
                }
                Average => {
                    // Safety:
                    // in bounds
                    let a = unsafe { count.take_unchecked((&dense).into()) }
                        .cast(&DataType::Float32)
                        .unwrap();
                    let b = unsafe { count.take_unchecked((&(dense - 1)).into()) }
                        .cast(&DataType::Float32)
                        .unwrap()
                        + 1.0;
                    (&a + &b) * 0.5
                }
                #[cfg(feature = "random")]
                Dense | Ordinal | Random => unimplemented!(),
                #[cfg(not(feature = "random"))]
                Dense | Ordinal => unimplemented!(),
            }
        }
    }
}

source

fn _sum_as_series(&self) -> Series

Get the sum of the Series as a new Series of length 1.

If the DataType is one of {Int8, UInt8, Int16, UInt16} the Series is first cast to Int64 to prevent overflow issues.

Examples found in repository ?

src/series/mod.rs (line 529)

    pub fn sum_as_series(&self) -> Series {
        use DataType::*;
        if self.is_empty() && self.dtype().is_numeric() {
            return Series::new("", [0])
                .cast(self.dtype())
                .unwrap()
                .sum_as_series();
        }
        match self.dtype() {
            Int8 | UInt8 | Int16 | UInt16 => self.cast(&Int64).unwrap().sum_as_series(),
            _ => self._sum_as_series(),
        }
    }

source

fn max_as_series(&self) -> Series

Get the max of the Series as a new Series of length 1.

Examples found in repository ?

src/frame/mod.rs (line 2585)

    pub fn max(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.max_as_series());

        DataFrame::new_no_checks(columns)
    }

More examples

Hide additional examples

src/series/mod.rs (line 284)

    pub fn max<T>(&self) -> Option<T>
    where
        T: NumCast,
    {
        self.max_as_series()
            .cast(&DataType::Float64)
            .ok()
            .and_then(|s| s.f64().unwrap().get(0).and_then(T::from))
    }

source

fn min_as_series(&self) -> Series

Get the min of the Series as a new Series of length 1.

Examples found in repository ?

src/frame/mod.rs (line 2688)

    pub fn min(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.min_as_series());
        DataFrame::new_no_checks(columns)
    }

More examples

Hide additional examples

src/series/mod.rs (line 267)

    pub fn min<T>(&self) -> Option<T>
    where
        T: NumCast,
    {
        self.min_as_series()
            .cast(&DataType::Float64)
            .ok()
            .and_then(|s| s.f64().unwrap().get(0).and_then(T::from))
    }

source

fn median_as_series(&self) -> Series

Get the median of the Series as a new Series of length 1.

Examples found in repository ?

src/frame/mod.rs (line 2790)

    pub fn median(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.median_as_series());
        DataFrame::new_no_checks(columns)
    }

More examples

Hide additional examples

src/series/implementations/boolean.rs (line 344)

    fn median_as_series(&self) -> Series {
        // first convert array to f32 as that's cheaper
        // finally the single value to f64
        self.0
            .cast(&DataType::Float32)
            .unwrap()
            .median_as_series()
            .cast(&DataType::Float64)
            .unwrap()
    }

source

fn var_as_series(&self, _ddof: u8) -> Series

Get the variance of the Series as a new Series of length 1.

Examples found in repository ?

src/frame/mod.rs (line 2654)

    pub fn var(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.var_as_series(ddof));
        DataFrame::new_no_checks(columns)
    }

More examples

Hide additional examples

src/series/implementations/boolean.rs (line 355)

    fn var_as_series(&self, _ddof: u8) -> Series {
        // first convert array to f32 as that's cheaper
        // finally the single value to f64
        self.0
            .cast(&DataType::Float32)
            .unwrap()
            .var_as_series(_ddof)
            .cast(&DataType::Float64)
            .unwrap()
    }

source

fn std_as_series(&self, _ddof: u8) -> Series

Get the standard deviation of the Series as a new Series of length 1.

Examples found in repository ?

src/frame/mod.rs (line 2620)

    pub fn std(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.std_as_series(ddof));

        DataFrame::new_no_checks(columns)
    }

More examples

Hide additional examples

src/series/implementations/boolean.rs (line 366)

    fn std_as_series(&self, _ddof: u8) -> Series {
        // first convert array to f32 as that's cheaper
        // finally the single value to f64
        self.0
            .cast(&DataType::Float32)
            .unwrap()
            .std_as_series(_ddof)
            .cast(&DataType::Float64)
            .unwrap()
    }

source

fn quantile_as_series(
    &self,
    _quantile: f64,
    _interpol: QuantileInterpolOptions
) -> PolarsResult<Series>

Get the quantile of the ChunkedArray as a new Series of length 1.

Examples found in repository ?

src/frame/mod.rs (line 2796)

    pub fn quantile(&self, quantile: f64, interpol: QuantileInterpolOptions) -> PolarsResult<Self> {
        let columns = self.try_apply_columns_par(&|s| s.quantile_as_series(quantile, interpol))?;

        Ok(DataFrame::new_no_checks(columns))
    }

source

fn fmt_list(&self) -> String

Examples found in repository ?

src/series/mod.rs (line 704)

    pub fn strict_cast(&self, data_type: &DataType) -> PolarsResult<Series> {
        let s = self.cast(data_type)?;
        if self.null_count() != s.null_count() {
            let failure_mask = !self.is_null() & s.is_null();
            let failures = self.filter_threaded(&failure_mask, false)?.unique()?;
            Err(PolarsError::ComputeError(
                format!(
                    "Strict conversion from {:?} to {:?} failed for values {}. \
                    If you were trying to cast Utf8 to Date, Time, or Datetime, \
                    consider using `strptime`.",
                    self.dtype(),
                    data_type,
                    failures.fmt_list(),
                )
                .into(),
            ))
        } else {
            Ok(s)
        }
    }

More examples

Hide additional examples

src/fmt.rs (line 752)

    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        let width = 0;
        match self {
            AnyValue::Null => write!(f, "null"),
            AnyValue::UInt8(v) => write!(f, "{v}"),
            AnyValue::UInt16(v) => write!(f, "{v}"),
            AnyValue::UInt32(v) => write!(f, "{v}"),
            AnyValue::UInt64(v) => write!(f, "{v}"),
            AnyValue::Int8(v) => fmt_integer(f, width, *v),
            AnyValue::Int16(v) => fmt_integer(f, width, *v),
            AnyValue::Int32(v) => fmt_integer(f, width, *v),
            AnyValue::Int64(v) => fmt_integer(f, width, *v),
            AnyValue::Float32(v) => fmt_float(f, width, *v),
            AnyValue::Float64(v) => fmt_float(f, width, *v),
            AnyValue::Boolean(v) => write!(f, "{}", *v),
            AnyValue::Utf8(v) => write!(f, "{}", format_args!("\"{v}\"")),
            AnyValue::Utf8Owned(v) => write!(f, "{}", format_args!("\"{v}\"")),
            #[cfg(feature = "dtype-binary")]
            AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => write!(f, "[binary data]"),
            #[cfg(feature = "dtype-date")]
            AnyValue::Date(v) => write!(f, "{}", date32_to_date(*v)),
            #[cfg(feature = "dtype-datetime")]
            AnyValue::Datetime(v, tu, tz) => {
                let ndt = match tu {
                    TimeUnit::Nanoseconds => timestamp_ns_to_datetime(*v),
                    TimeUnit::Microseconds => timestamp_us_to_datetime(*v),
                    TimeUnit::Milliseconds => timestamp_ms_to_datetime(*v),
                };
                match tz {
                    None => write!(f, "{ndt}"),
                    Some(_tz) => {
                        #[cfg(feature = "timezones")]
                        {
                            match _tz.parse::<chrono_tz::Tz>() {
                                Ok(tz) => {
                                    let dt_utc = chrono::Utc.from_local_datetime(&ndt).unwrap();
                                    let dt_tz_aware = dt_utc.with_timezone(&tz);
                                    write!(f, "{dt_tz_aware}")
                                }
                                Err(_) => match parse_offset(_tz) {
                                    Ok(offset) => {
                                        let dt_tz_aware = offset.from_utc_datetime(&ndt);
                                        write!(f, "{dt_tz_aware}")
                                    }
                                    Err(_) => write!(f, "invalid timezone"),
                                },
                            }
                        }
                        #[cfg(not(feature = "timezones"))]
                        {
                            panic!("activate 'timezones' feature")
                        }
                    }
                }
            }
            #[cfg(feature = "dtype-duration")]
            AnyValue::Duration(v, tu) => match tu {
                TimeUnit::Nanoseconds => fmt_duration_ns(f, *v),
                TimeUnit::Microseconds => fmt_duration_us(f, *v),
                TimeUnit::Milliseconds => fmt_duration_ms(f, *v),
            },
            #[cfg(feature = "dtype-time")]
            AnyValue::Time(_) => {
                let nt: chrono::NaiveTime = self.into();
                write!(f, "{nt}")
            }
            #[cfg(feature = "dtype-categorical")]
            AnyValue::Categorical(idx, rev) => {
                let s = rev.get(*idx);
                write!(f, "\"{s}\"")
            }
            AnyValue::List(s) => write!(f, "{}", s.fmt_list()),
            #[cfg(feature = "object")]
            AnyValue::Object(v) => write!(f, "{v}"),
            #[cfg(feature = "dtype-struct")]
            av @ AnyValue::Struct(_, _, _) => {
                let mut avs = vec![];
                av._materialize_struct_av(&mut avs);
                fmt_struct(f, &avs)
            }
            #[cfg(feature = "dtype-struct")]
            AnyValue::StructOwned(payload) => fmt_struct(f, &payload.0),
        }
    }

source

fn clone_inner(&self) -> Arc<dyn SeriesTrait>

Clone inner ChunkedArray and wrap in a new Arc

Examples found in repository ?

src/series/series_trait.rs (line 357)

    fn drop_nulls(&self) -> Series {
        if self.null_count() == 0 {
            Series(self.clone_inner())
        } else {
            self.filter(&self.is_not_null()).unwrap()
        }
    }

More examples

Hide additional examples

src/series/mod.rs (line 165)

    pub fn _get_inner_mut(&mut self) -> &mut dyn SeriesTrait {
        if Arc::weak_count(&self.0) + Arc::strong_count(&self.0) != 1 {
            self.0 = self.0.clone_inner();
        }
        Arc::get_mut(&mut self.0).expect("implementation error")
    }

source

fn get_object(&self, _index: usize) -> Option<&dyn PolarsObjectSafe>

Available on crate feature object only.

Get the value at this index as a downcastable Any trait ref.

source

fn as_any(&self) -> &dyn Any

Get a hold to self as Any trait reference. Only implemented for ObjectType

Examples found in repository ?

src/chunked_array/object/extension/list.rs (line 42)

    fn append_series(&mut self, s: &Series) {
        let arr = s
            .as_any()
            .downcast_ref::<ObjectChunked<T>>()
            .expect("series of type object");

        for v in arr.into_iter() {
            self.values_builder.append_option(v.cloned())
        }
        if arr.is_empty() {
            self.fast_explode = false;
        }
        let len_so_far = self.offsets[self.offsets.len() - 1];
        self.offsets.push(len_so_far + arr.len() as i64);
    }

source

fn as_any_mut(&mut self) -> &mut dyn Any

Get a hold to self as Any trait reference. Only implemented for ObjectType

source

fn peak_max(&self) -> BooleanChunked

Get a boolean mask of the local maximum peaks.

source

fn peak_min(&self) -> BooleanChunked

Get a boolean mask of the local minimum peaks.

source

fn is_in(&self, _other: &Series) -> PolarsResult<BooleanChunked>

Available on crate feature is_in only.

Check if elements of this Series are in the right Series, or List values of the right Series.

Examples found in repository ?

src/chunked_array/ops/is_in.rs (line 53)

    fn is_in(&self, other: &Series) -> PolarsResult<BooleanChunked> {
        // We check implicitly cast to supertype here
        match other.dtype() {
            DataType::List(dt) => {
                let st = try_get_supertype(self.dtype(), dt)?;
                if &st != self.dtype() {
                    let left = self.cast(&st)?;
                    let right = other.cast(&DataType::List(Box::new(st)))?;
                    return left.is_in(&right);
                }

                let mut ca: BooleanChunked = if self.len() == 1 && other.len() != 1 {
                    let value = self.get(0);

                    other
                        .list()?
                        .amortized_iter()
                        .map(|opt_s| {
                            opt_s.map(|s| {
                                let ca = s.as_ref().unpack::<T>().unwrap();
                                ca.into_iter().any(|a| a == value)
                            }) == Some(true)
                        })
                        .collect_trusted()
                } else {
                    self.into_iter()
                        .zip(other.list()?.amortized_iter())
                        .map(|(value, series)| match (value, series) {
                            (val, Some(series)) => {
                                let ca = series.as_ref().unpack::<T>().unwrap();
                                ca.into_iter().any(|a| a == val)
                            }
                            _ => false,
                        })
                        .collect_trusted()
                };
                ca.rename(self.name());
                Ok(ca)
            }
            _ => {
                // first make sure that the types are equal
                let st = try_get_supertype(self.dtype(), other.dtype())?;
                if self.dtype() != other.dtype() {
                    let left = self.cast(&st)?;
                    let right = other.cast(&st)?;
                    return left.is_in(&right);
                }
                // now that the types are equal, we coerce every 32 bit array to u32
                // and every 64 bit array to u64 (including floats)
                // this allows hashing them and greatly reduces the number of code paths.
                match self.dtype() {
                    DataType::UInt64 | DataType::Int64 | DataType::Float64 => unsafe {
                        is_in_helper::<T, u64>(self, other)
                    },
                    DataType::UInt32 | DataType::Int32 | DataType::Float32 => unsafe {
                        is_in_helper::<T, u32>(self, other)
                    },
                    DataType::UInt8 | DataType::Int8 => unsafe {
                        is_in_helper::<T, u8>(self, other)
                    },
                    DataType::UInt16 | DataType::Int16 => unsafe {
                        is_in_helper::<T, u16>(self, other)
                    },
                    _ => Err(PolarsError::ComputeError(
                        format!(
                            "Data type {:?} not supported in is_in operation",
                            self.dtype()
                        )
                        .into(),
                    )),
                }
            }
        }
        .map(|mut ca| {
            ca.rename(self.name());
            ca
        })
    }

source

fn repeat_by(&self, _by: &IdxCa) -> ListChunked

Available on crate feature repeat_by only.

source

fn checked_div(&self, _rhs: &Series) -> PolarsResult<Series>

Available on crate feature checked_arithmetic only.

Examples found in repository ?

src/series/arithmetic/borrowed.rs (line 217)

        fn checked_div(&self, rhs: &Series) -> PolarsResult<Series> {
            let (lhs, rhs) = coerce_lhs_rhs(self, rhs).expect("cannot coerce datatypes");
            lhs.as_ref().as_ref().checked_div(rhs.as_ref())
        }

source

fn is_first(&self) -> PolarsResult<BooleanChunked>

Available on crate feature is_first only.

Get a mask of the first unique values.

Examples found in repository ?

src/chunked_array/ops/unique/mod.rs (line 463)

        fn is_first(&self) -> PolarsResult<BooleanChunked> {
            use DataType::*;
            match self.dtype() {
                // cast types to reduce compiler bloat
                Int8 | Int16 | UInt8 | UInt16 => {
                    let s = self.cast(&DataType::Int32).unwrap();
                    s.is_first()
                }
                _ => {
                    if Self::bit_repr_is_large() {
                        let ca = self.bit_repr_large();
                        Ok(is_first(&ca))
                    } else {
                        let ca = self.bit_repr_small();
                        Ok(is_first(&ca))
                    }
                }
            }
        }

source

fn mode(&self) -> PolarsResult<Series>

Available on crate feature mode only.

Compute the most occurring element in the array.

source

fn rolling_apply(
    &self,
    _f: &dyn Fn(&Series) -> Series,
    _options: RollingOptionsFixedWindow
) -> PolarsResult<Series>

Available on crate feature rolling_window only.

Apply a custom function over a rolling/ moving window of the array. This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.

Examples found in repository ?

src/chunked_array/ops/rolling_window.rs (line 82)

        fn rolling_apply(
            &self,
            f: &dyn Fn(&Series) -> Series,
            options: RollingOptionsFixedWindow,
        ) -> PolarsResult<Series> {
            check_input(options.window_size, options.min_periods)?;

            let ca = self.rechunk();
            if options.weights.is_some()
                && !matches!(self.dtype(), DataType::Float64 | DataType::Float32)
            {
                let s = self.cast(&DataType::Float64)?;
                return s.rolling_apply(f, options);
            }

            if options.window_size >= self.len() {
                return Ok(Self::full_null(self.name(), self.len()).into_series());
            }

            let len = self.len();
            let arr = ca.downcast_iter().next().unwrap();
            let mut series_container =
                ChunkedArray::<T>::from_slice("", &[T::Native::zero()]).into_series();
            let array_ptr = series_container.array_ref(0);
            let ptr = array_ptr.as_ref() as *const dyn Array as *mut dyn Array
                as *mut PrimitiveArray<T::Native>;
            let mut builder = PrimitiveChunkedBuilder::<T>::new(self.name(), self.len());

            if let Some(weights) = options.weights {
                let weights_series = Float64Chunked::new("weights", &weights).into_series();

                let weights_series = weights_series.cast(self.dtype()).unwrap();

                for idx in 0..len {
                    let (start, size) = window_edges(idx, len, options.window_size, options.center);

                    if size < options.min_periods {
                        builder.append_null();
                    } else {
                        // safety:
                        // we are in bounds
                        let arr_window = unsafe { arr.slice_unchecked(start, size) };

                        // Safety.
                        // ptr is not dropped as we are in scope
                        // We are also the only owner of the contents of the Arc
                        // we do this to reduce heap allocs.
                        unsafe {
                            *ptr = arr_window;
                        }
                        // ensure the length is correct
                        series_container._get_inner_mut().compute_len();

                        let s = if size == options.window_size {
                            f(&series_container.multiply(&weights_series).unwrap())
                        } else {
                            let weights_cutoff: Series = match self.dtype() {
                                DataType::Float64 => weights_series
                                    .f64()
                                    .unwrap()
                                    .into_iter()
                                    .take(series_container.len())
                                    .collect(),
                                _ => weights_series // Float32 case
                                    .f32()
                                    .unwrap()
                                    .into_iter()
                                    .take(series_container.len())
                                    .collect(),
                            };
                            f(&series_container.multiply(&weights_cutoff).unwrap())
                        };

                        let out = self.unpack_series_matching_type(&s)?;
                        builder.append_option(out.get(0));
                    }
                }

                Ok(builder.finish().into_series())
            } else {
                for idx in 0..len {
                    let (start, size) = window_edges(idx, len, options.window_size, options.center);

                    if size < options.min_periods {
                        builder.append_null();
                    } else {
                        // safety:
                        // we are in bounds
                        let arr_window = unsafe { arr.slice_unchecked(start, size) };

                        // Safety.
                        // ptr is not dropped as we are in scope
                        // We are also the only owner of the contents of the Arc
                        // we do this to reduce heap allocs.
                        unsafe {
                            *ptr = arr_window;
                        }
                        // ensure the length is correct
                        series_container._get_inner_mut().compute_len();

                        let s = f(&series_container);
                        let out = self.unpack_series_matching_type(&s)?;
                        builder.append_option(out.get(0));
                    }
                }

                Ok(builder.finish().into_series())
            }
        }

source

fn str_concat(&self, _delimiter: &str) -> Utf8Chunked

Available on crate feature concat_str only.

Concat the values into a string array.

Arguments

delimiter - A string that will act as delimiter between values.

Implementations§

source §

impl<'a> dyn SeriesTrait + 'a

source

pub fn unpack<N>(&self) -> PolarsResult<&ChunkedArray<N>>where
N: PolarsDataType + 'static,

Examples found in repository ?

src/chunked_array/builder/list.rs (line 160)

    fn append_series(&mut self, s: &Series) {
        if s.is_empty() {
            self.fast_explode = false;
        }
        let physical = s.to_physical_repr();
        let ca = physical.unpack::<T>().unwrap();
        let values = self.builder.mut_values();

        ca.downcast_iter().for_each(|arr| {
            if !arr.has_validity() {
                values.extend_from_slice(arr.values().as_slice())
            } else {
                // Safety:
                // Arrow arrays are trusted length iterators.
                unsafe { values.extend_trusted_len_unchecked(arr.into_iter()) }
            }
        });
        // overflow of i64 is far beyond polars capable lengths.
        unsafe { self.builder.try_push_valid().unwrap_unchecked() };
    }

More examples

Hide additional examples

src/chunked_array/ndarray.rs (line 44)

    pub fn to_ndarray<N>(&self) -> PolarsResult<Array2<N::Native>>
    where
        N: PolarsNumericType,
    {
        if self.null_count() != 0 {
            Err(PolarsError::ComputeError(
                "Creation of ndarray with null values is not supported.".into(),
            ))
        } else {
            let mut iter = self.into_no_null_iter();

            let mut ndarray;
            let width;

            // first iteration determine the size
            if let Some(series) = iter.next() {
                width = series.len();

                let mut row_idx = 0;
                ndarray = ndarray::Array::uninit((self.len(), width));

                let series = series.cast(&N::get_dtype())?;
                let ca = series.unpack::<N>()?;
                let a = ca.to_ndarray()?;
                let mut row = ndarray.slice_mut(s![row_idx, ..]);
                a.assign_to(&mut row);
                row_idx += 1;

                for series in iter {
                    if series.len() != width {
                        return Err(PolarsError::ShapeMisMatch(
                            "Could not create a 2D array. Series have different lengths".into(),
                        ));
                    }
                    let series = series.cast(&N::get_dtype())?;
                    let ca = series.unpack::<N>()?;
                    let a = ca.to_ndarray()?;
                    let mut row = ndarray.slice_mut(s![row_idx, ..]);
                    a.assign_to(&mut row);
                    row_idx += 1;
                }

                debug_assert_eq!(row_idx, self.len());
                // Safety:
                // We have assigned to every row and element of the array
                unsafe { Ok(ndarray.assume_init()) }
            } else {
                Err(PolarsError::NoData(
                    "cannot create ndarray of empty ListChunked".into(),
                ))
            }
        }
    }
}

impl DataFrame {
    /// Create a 2D `ndarray::Array` from this `DataFrame`. This requires all columns in the
    /// `DataFrame` to be non-null and numeric. They will be casted to the same data type
    /// (if they aren't already).
    ///
    /// For floating point data we implicitly convert `None` to `NaN` without failure.
    ///
    /// ```rust
    /// use polars_core::prelude::*;
    /// let a = UInt32Chunked::new("a", &[1, 2, 3]).into_series();
    /// let b = Float64Chunked::new("b", &[10., 8., 6.]).into_series();
    ///
    /// let df = DataFrame::new(vec![a, b]).unwrap();
    /// let ndarray = df.to_ndarray::<Float64Type>().unwrap();
    /// println!("{:?}", ndarray);
    /// ```
    /// Outputs:
    /// ```text
    /// [[1.0, 10.0],
    ///  [2.0, 8.0],
    ///  [3.0, 6.0]], shape=[3, 2], strides=[2, 1], layout=C (0x1), const ndim=2/
    /// ```
    #[cfg_attr(docsrs, doc(cfg(feature = "ndarray")))]
    pub fn to_ndarray<N>(&self) -> PolarsResult<Array2<N::Native>>
    where
        N: PolarsNumericType,
    {
        let columns = self
            .get_columns()
            .par_iter()
            .map(|s| {
                let s = s.cast(&N::get_dtype())?;
                let s = match s.dtype() {
                    DataType::Float32 => {
                        let ca = s.f32().unwrap();
                        ca.none_to_nan().into_series()
                    }
                    DataType::Float64 => {
                        let ca = s.f64().unwrap();
                        ca.none_to_nan().into_series()
                    }
                    _ => s,
                };
                Ok(s.rechunk())
            })
            .collect::<PolarsResult<Vec<_>>>()?;

        let shape = self.shape();
        let height = self.height();
        let mut membuf = Vec::with_capacity(shape.0 * shape.1);
        let ptr = membuf.as_ptr() as usize;

        columns.par_iter().enumerate().map(|(col_idx, s)| {
            if s.null_count() != 0 {
                return Err(PolarsError::ComputeError(
                    "Creation of ndarray with null values is not supported. Consider using floats and NaNs".into(),
                ));
            }

            // this is an Arc clone if already of type N
            let s = s.cast(&N::get_dtype())?;
            let ca = s.unpack::<N>()?;
            let vals = ca.cont_slice().unwrap();

            // Safety:
            // we get parallel access to the vector
            // but we make sure that we don't get aliased access by offsetting the column indices + length
            unsafe {
                let offset_ptr = (ptr as *mut N::Native).add(col_idx * height) ;
                // Safety:
                // this is uninitialized memory, so we must never read from this data
                // copy_from_slice does not read
                let buf = std::slice::from_raw_parts_mut(offset_ptr, height);
                buf.copy_from_slice(vals)
            }

            Ok(())
        }).collect::<PolarsResult<Vec<_>>>()?;

        // Safety:
        // we have written all data, so we can now safely set length
        unsafe {
            membuf.set_len(shape.0 * shape.1);
        }
        let ndarr = Array2::from_shape_vec((shape.1, shape.0), membuf).unwrap();
        Ok(ndarr.reversed_axes())
    }

src/frame/groupby/aggregations/mod.rs (line 781)

    pub(crate) unsafe fn agg_var(&self, groups: &GroupsProxy, ddof: u8) -> Series {
        let ca = &self.0;
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<T, _>(groups, |idx| {
                debug_assert!(idx.len() <= ca.len());
                if idx.is_empty() {
                    return None;
                }
                let take = { ca.take_unchecked(idx.into()) };
                take.var_as_series(ddof).unpack::<T>().unwrap().get(0)
            }),
            GroupsProxy::Slice { groups, .. } => {
                if _use_rolling_kernels(groups, self.chunks()) {
                    let arr = self.downcast_iter().next().unwrap();
                    let values = arr.values().as_slice();
                    let offset_iter = groups.iter().map(|[first, len]| (*first, *len));
                    let arr = match arr.validity() {
                        None => _rolling_apply_agg_window_no_nulls::<VarWindow<_>, _, _>(
                            values,
                            offset_iter,
                        ),
                        Some(validity) => _rolling_apply_agg_window_nulls::<
                            rolling::nulls::VarWindow<_>,
                            _,
                            _,
                        >(values, validity, offset_iter),
                    };
                    ChunkedArray::<T>::from_chunks("", vec![arr]).into_series()
                } else {
                    _agg_helper_slice::<T, _>(groups, |[first, len]| {
                        debug_assert!(len <= self.len() as IdxSize);
                        match len {
                            0 => None,
                            1 => NumCast::from(0),
                            _ => {
                                let arr_group = _slice_from_offsets(self, first, len);
                                arr_group.var(ddof).map(|flt| NumCast::from(flt).unwrap())
                            }
                        }
                    })
                }
            }
        }
    }
    pub(crate) unsafe fn agg_std(&self, groups: &GroupsProxy, ddof: u8) -> Series {
        let ca = &self.0;
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<T, _>(groups, |idx| {
                debug_assert!(idx.len() <= ca.len());
                if idx.is_empty() {
                    return None;
                }
                let take = { ca.take_unchecked(idx.into()) };
                take.std_as_series(ddof).unpack::<T>().unwrap().get(0)
            }),
            GroupsProxy::Slice { groups, .. } => {
                if _use_rolling_kernels(groups, self.chunks()) {
                    let arr = self.downcast_iter().next().unwrap();
                    let values = arr.values().as_slice();
                    let offset_iter = groups.iter().map(|[first, len]| (*first, *len));
                    let arr = match arr.validity() {
                        None => _rolling_apply_agg_window_no_nulls::<StdWindow<_>, _, _>(
                            values,
                            offset_iter,
                        ),
                        Some(validity) => _rolling_apply_agg_window_nulls::<
                            rolling::nulls::StdWindow<_>,
                            _,
                            _,
                        >(values, validity, offset_iter),
                    };
                    ChunkedArray::<T>::from_chunks("", vec![arr]).into_series()
                } else {
                    _agg_helper_slice::<T, _>(groups, |[first, len]| {
                        debug_assert!(len <= self.len() as IdxSize);
                        match len {
                            0 => None,
                            1 => NumCast::from(0),
                            _ => {
                                let arr_group = _slice_from_offsets(self, first, len);
                                arr_group.std(ddof).map(|flt| NumCast::from(flt).unwrap())
                            }
                        }
                    })
                }
            }
        }
    }

    pub(crate) unsafe fn agg_quantile(
        &self,
        groups: &GroupsProxy,
        quantile: f64,
        interpol: QuantileInterpolOptions,
    ) -> Series {
        let ca = &self.0;
        let invalid_quantile = !(0.0..=1.0).contains(&quantile);
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<T, _>(groups, |idx| {
                debug_assert!(idx.len() <= ca.len());
                if idx.is_empty() | invalid_quantile {
                    return None;
                }
                let take = { ca.take_unchecked(idx.into()) };
                take.quantile_as_series(quantile, interpol)
                    .unwrap() // checked with invalid quantile check
                    .unpack::<T>()
                    .unwrap()
                    .get(0)
            }),
            GroupsProxy::Slice { groups, .. } => {
                if _use_rolling_kernels(groups, self.chunks()) {
                    let arr = self.downcast_iter().next().unwrap();
                    let values = arr.values().as_slice();
                    let offset_iter = groups.iter().map(|[first, len]| (*first, *len));
                    let arr = match arr.validity() {
                        None => rolling::no_nulls::rolling_quantile_by_iter(
                            values,
                            quantile,
                            interpol,
                            offset_iter,
                        ),
                        Some(validity) => rolling::nulls::rolling_quantile_by_iter(
                            values,
                            validity,
                            quantile,
                            interpol,
                            offset_iter,
                        ),
                    };
                    ChunkedArray::<T>::from_chunks("", vec![arr]).into_series()
                } else {
                    _agg_helper_slice::<T, _>(groups, |[first, len]| {
                        debug_assert!(first + len <= self.len() as IdxSize);
                        match len {
                            0 => None,
                            1 => self.get(first as usize),
                            _ => {
                                let arr_group = _slice_from_offsets(self, first, len);
                                // unwrap checked with invalid quantile check
                                arr_group
                                    .quantile(quantile, interpol)
                                    .unwrap()
                                    .map(|flt| NumCast::from(flt).unwrap())
                            }
                        }
                    })
                }
            }
        }
    }
    pub(crate) unsafe fn agg_median(&self, groups: &GroupsProxy) -> Series {
        let ca = &self.0;
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<T, _>(groups, |idx| {
                debug_assert!(idx.len() <= ca.len());
                if idx.is_empty() {
                    return None;
                }
                let take = { ca.take_unchecked(idx.into()) };
                take.median_as_series().unpack::<T>().unwrap().get(0)
            }),
            GroupsProxy::Slice { .. } => {
                self.agg_quantile(groups, 0.5, QuantileInterpolOptions::Linear)
            }
        }
    }
}

impl<T> ChunkedArray<T>
where
    T: PolarsIntegerType,
    ChunkedArray<T>: IntoSeries,
    T::Native: NumericNative + Ord,
    <T::Native as Simd>::Simd: std::ops::Add<Output = <T::Native as Simd>::Simd>
        + arrow::compute::aggregate::Sum<T::Native>
        + arrow::compute::aggregate::SimdOrd<T::Native>,
{
    pub(crate) unsafe fn agg_mean(&self, groups: &GroupsProxy) -> Series {
        match groups {
            GroupsProxy::Idx(groups) => {
                _agg_helper_idx::<Float64Type, _>(groups, |(first, idx)| {
                    // this can fail due to a bug in lazy code.
                    // here users can create filters in aggregations
                    // and thereby creating shorter columns than the original group tuples.
                    // the group tuples are modified, but if that's done incorrect there can be out of bounds
                    // access
                    debug_assert!(idx.len() <= self.len());
                    if idx.is_empty() {
                        None
                    } else if idx.len() == 1 {
                        self.get(first as usize).map(|sum| sum.to_f64().unwrap())
                    } else {
                        match (self.has_validity(), self.chunks.len()) {
                            (false, 1) => {
                                take_agg_no_null_primitive_iter_unchecked(
                                    self.downcast_iter().next().unwrap(),
                                    idx.iter().map(|i| *i as usize),
                                    |a, b| a + b,
                                    0.0f64,
                                )
                            }
                            .to_f64()
                            .map(|sum| sum / idx.len() as f64),
                            (_, 1) => {
                                {
                                    take_agg_primitive_iter_unchecked_count_nulls::<
                                        T::Native,
                                        f64,
                                        _,
                                        _,
                                    >(
                                        self.downcast_iter().next().unwrap(),
                                        idx.iter().map(|i| *i as usize),
                                        |a, b| a + b,
                                        0.0,
                                        idx.len() as IdxSize,
                                    )
                                }
                                .map(|(sum, null_count)| {
                                    sum / (idx.len() as f64 - null_count as f64)
                                })
                            }
                            _ => {
                                let take = { self.take_unchecked(idx.into()) };
                                take.mean()
                            }
                        }
                    }
                })
            }
            GroupsProxy::Slice {
                groups: groups_slice,
                ..
            } => {
                if _use_rolling_kernels(groups_slice, self.chunks()) {
                    let ca = self.cast(&DataType::Float64).unwrap();
                    ca.agg_mean(groups)
                } else {
                    _agg_helper_slice::<Float64Type, _>(groups_slice, |[first, len]| {
                        debug_assert!(first + len <= self.len() as IdxSize);
                        match len {
                            0 => None,
                            1 => self.get(first as usize).map(|v| NumCast::from(v).unwrap()),
                            _ => {
                                let arr_group = _slice_from_offsets(self, first, len);
                                arr_group.mean()
                            }
                        }
                    })
                }
            }
        }
    }

    pub(crate) unsafe fn agg_var(&self, groups: &GroupsProxy, ddof: u8) -> Series {
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<Float64Type, _>(groups, |idx| {
                debug_assert!(idx.len() <= self.len());
                if idx.is_empty() {
                    return None;
                }
                let take = { self.take_unchecked(idx.into()) };
                take.var_as_series(ddof)
                    .unpack::<Float64Type>()
                    .unwrap()
                    .get(0)
            }),
            GroupsProxy::Slice {
                groups: groups_slice,
                ..
            } => {
                if _use_rolling_kernels(groups_slice, self.chunks()) {
                    let ca = self.cast(&DataType::Float64).unwrap();
                    ca.agg_var(groups, ddof)
                } else {
                    _agg_helper_slice::<Float64Type, _>(groups_slice, |[first, len]| {
                        debug_assert!(first + len <= self.len() as IdxSize);
                        match len {
                            0 => None,
                            1 => NumCast::from(0),
                            _ => {
                                let arr_group = _slice_from_offsets(self, first, len);
                                arr_group.var(ddof)
                            }
                        }
                    })
                }
            }
        }
    }
    pub(crate) unsafe fn agg_std(&self, groups: &GroupsProxy, ddof: u8) -> Series {
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<Float64Type, _>(groups, |idx| {
                debug_assert!(idx.len() <= self.len());
                if idx.is_empty() {
                    return None;
                }
                let take = { self.take_unchecked(idx.into()) };
                take.std_as_series(ddof)
                    .unpack::<Float64Type>()
                    .unwrap()
                    .get(0)
            }),
            GroupsProxy::Slice {
                groups: groups_slice,
                ..
            } => {
                if _use_rolling_kernels(groups_slice, self.chunks()) {
                    let ca = self.cast(&DataType::Float64).unwrap();
                    ca.agg_std(groups, ddof)
                } else {
                    _agg_helper_slice::<Float64Type, _>(groups_slice, |[first, len]| {
                        debug_assert!(first + len <= self.len() as IdxSize);
                        match len {
                            0 => None,
                            1 => NumCast::from(0),
                            _ => {
                                let arr_group = _slice_from_offsets(self, first, len);
                                arr_group.std(ddof)
                            }
                        }
                    })
                }
            }
        }
    }

    pub(crate) unsafe fn agg_quantile(
        &self,
        groups: &GroupsProxy,
        quantile: f64,
        interpol: QuantileInterpolOptions,
    ) -> Series {
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<Float64Type, _>(groups, |idx| {
                debug_assert!(idx.len() <= self.len());
                if idx.is_empty() {
                    return None;
                }
                let take = self.take_unchecked(idx.into());
                take.quantile_as_series(quantile, interpol)
                    .unwrap()
                    .unpack::<Float64Type>()
                    .unwrap()
                    .get(0)
            }),
            GroupsProxy::Slice {
                groups: groups_slice,
                ..
            } => {
                if _use_rolling_kernels(groups_slice, self.chunks()) {
                    let ca = self.cast(&DataType::Float64).unwrap();
                    ca.agg_quantile(groups, quantile, interpol)
                } else {
                    _agg_helper_slice::<Float64Type, _>(groups_slice, |[first, len]| {
                        debug_assert!(len <= self.len() as IdxSize);
                        match len {
                            0 => None,
                            1 => self.get(first as usize).map(|v| NumCast::from(v).unwrap()),
                            _ => {
                                let arr_group = _slice_from_offsets(self, first, len);
                                arr_group.quantile(quantile, interpol).unwrap()
                            }
                        }
                    })
                }
            }
        }
    }
    pub(crate) unsafe fn agg_median(&self, groups: &GroupsProxy) -> Series {
        match groups {
            GroupsProxy::Idx(groups) => agg_helper_idx_on_all::<Float64Type, _>(groups, |idx| {
                debug_assert!(idx.len() <= self.len());
                if idx.is_empty() {
                    return None;
                }
                let take = self.take_unchecked(idx.into());
                take.median_as_series()
                    .unpack::<Float64Type>()
                    .unwrap()
                    .get(0)
            }),
            GroupsProxy::Slice {
                groups: groups_slice,
                ..
            } => {
                if _use_rolling_kernels(groups_slice, self.chunks()) {
                    let ca = self.cast(&DataType::Float64).unwrap();
                    ca.agg_median(groups)
                } else {
                    _agg_helper_slice::<Float64Type, _>(groups_slice, |[first, len]| {
                        debug_assert!(len <= self.len() as IdxSize);
                        match len {
                            0 => None,
                            1 => self.get(first as usize).map(|v| NumCast::from(v).unwrap()),
                            _ => {
                                let arr_group = _slice_from_offsets(self, first, len);
                                arr_group.median()
                            }
                        }
                    })
                }
            }
        }
    }

src/chunked_array/ops/is_in.rs (line 64)

    fn is_in(&self, other: &Series) -> PolarsResult<BooleanChunked> {
        // We check implicitly cast to supertype here
        match other.dtype() {
            DataType::List(dt) => {
                let st = try_get_supertype(self.dtype(), dt)?;
                if &st != self.dtype() {
                    let left = self.cast(&st)?;
                    let right = other.cast(&DataType::List(Box::new(st)))?;
                    return left.is_in(&right);
                }

                let mut ca: BooleanChunked = if self.len() == 1 && other.len() != 1 {
                    let value = self.get(0);

                    other
                        .list()?
                        .amortized_iter()
                        .map(|opt_s| {
                            opt_s.map(|s| {
                                let ca = s.as_ref().unpack::<T>().unwrap();
                                ca.into_iter().any(|a| a == value)
                            }) == Some(true)
                        })
                        .collect_trusted()
                } else {
                    self.into_iter()
                        .zip(other.list()?.amortized_iter())
                        .map(|(value, series)| match (value, series) {
                            (val, Some(series)) => {
                                let ca = series.as_ref().unpack::<T>().unwrap();
                                ca.into_iter().any(|a| a == val)
                            }
                            _ => false,
                        })
                        .collect_trusted()
                };
                ca.rename(self.name());
                Ok(ca)
            }
            _ => {
                // first make sure that the types are equal
                let st = try_get_supertype(self.dtype(), other.dtype())?;
                if self.dtype() != other.dtype() {
                    let left = self.cast(&st)?;
                    let right = other.cast(&st)?;
                    return left.is_in(&right);
                }
                // now that the types are equal, we coerce every 32 bit array to u32
                // and every 64 bit array to u64 (including floats)
                // this allows hashing them and greatly reduces the number of code paths.
                match self.dtype() {
                    DataType::UInt64 | DataType::Int64 | DataType::Float64 => unsafe {
                        is_in_helper::<T, u64>(self, other)
                    },
                    DataType::UInt32 | DataType::Int32 | DataType::Float32 => unsafe {
                        is_in_helper::<T, u32>(self, other)
                    },
                    DataType::UInt8 | DataType::Int8 => unsafe {
                        is_in_helper::<T, u8>(self, other)
                    },
                    DataType::UInt16 | DataType::Int16 => unsafe {
                        is_in_helper::<T, u16>(self, other)
                    },
                    _ => Err(PolarsError::ComputeError(
                        format!(
                            "Data type {:?} not supported in is_in operation",
                            self.dtype()
                        )
                        .into(),
                    )),
                }
            }
        }
        .map(|mut ca| {
            ca.rename(self.name());
            ca
        })
    }
}
impl IsIn for Utf8Chunked {
    fn is_in(&self, other: &Series) -> PolarsResult<BooleanChunked> {
        match other.dtype() {
            #[cfg(feature = "dtype-categorical")]
            DataType::List(dt) if matches!(&**dt, DataType::Categorical(_)) => {
                if let DataType::Categorical(Some(rev_map)) = &**dt {
                    let opt_val = self.get(0);

                    let other = other.list()?;
                    match opt_val {
                        None => {
                            let mut ca: BooleanChunked = other
                                .amortized_iter()
                                .map(|opt_s| {
                                    opt_s.map(|s| s.as_ref().null_count() > 0) == Some(true)
                                })
                                .collect_trusted();
                            ca.rename(self.name());
                            Ok(ca)
                        }
                        Some(value) => {
                            match rev_map.find(value) {
                                // all false
                                None => Ok(BooleanChunked::full(self.name(), false, other.len())),
                                Some(idx) => {
                                    let mut ca: BooleanChunked = other
                                        .amortized_iter()
                                        .map(|opt_s| {
                                            opt_s.map(|s| {
                                                let s = s.as_ref().to_physical_repr();
                                                let ca = s.as_ref().u32().unwrap();
                                                if ca.null_count() == 0 {
                                                    ca.into_no_null_iter().any(|a| a == idx)
                                                } else {
                                                    ca.into_iter().any(|a| a == Some(idx))
                                                }
                                            }) == Some(true)
                                        })
                                        .collect_trusted();
                                    ca.rename(self.name());
                                    Ok(ca)
                                }
                            }
                        }
                    }
                } else {
                    unreachable!()
                }
            }
            DataType::List(dt) if DataType::Utf8 == **dt => {
                let mut ca: BooleanChunked = if self.len() == 1 && other.len() != 1 {
                    let value = self.get(0);
                    other
                        .list()?
                        .amortized_iter()
                        .map(|opt_s| {
                            opt_s.map(|s| {
                                let ca = s.as_ref().unpack::<Utf8Type>().unwrap();
                                ca.into_iter().any(|a| a == value)
                            }) == Some(true)
                        })
                        .collect_trusted()
                } else {
                    self.into_iter()
                        .zip(other.list()?.amortized_iter())
                        .map(|(value, series)| match (value, series) {
                            (val, Some(series)) => {
                                let ca = series.as_ref().unpack::<Utf8Type>().unwrap();
                                ca.into_iter().any(|a| a == val)
                            }
                            _ => false,
                        })
                        .collect_trusted()
                };
                ca.rename(self.name());
                Ok(ca)
            }
            DataType::Utf8 => {
                let mut set = HashSet::with_capacity(other.len());

                let other = other.utf8()?;
                other.downcast_iter().for_each(|iter| {
                    iter.into_iter().for_each(|opt_val| {
                        set.insert(opt_val);
                    })
                });
                let mut ca: BooleanChunked = self
                    .into_iter()
                    .map(|opt_val| set.contains(&opt_val))
                    .collect_trusted();
                ca.rename(self.name());
                Ok(ca)
            }
            _ => Err(PolarsError::SchemaMisMatch(
                format!(
                    "cannot do is_in operation with left a dtype: {:?} and right a dtype {:?}",
                    self.dtype(),
                    other.dtype()
                )
                .into(),
            )),
        }
        .map(|mut ca| {
            ca.rename(self.name());
            ca
        })
    }
}

#[cfg(feature = "dtype-binary")]
impl IsIn for BinaryChunked {
    fn is_in(&self, other: &Series) -> PolarsResult<BooleanChunked> {
        match other.dtype() {
            DataType::List(dt) if DataType::Binary == **dt => {
                let mut ca: BooleanChunked = if self.len() == 1 && other.len() != 1 {
                    let value = self.get(0);
                    other
                        .list()?
                        .amortized_iter()
                        .map(|opt_b| {
                            opt_b.map(|s| {
                                let ca = s.as_ref().unpack::<BinaryType>().unwrap();
                                ca.into_iter().any(|a| a == value)
                            }) == Some(true)
                        })
                        .collect_trusted()
                } else {
                    self.into_iter()
                        .zip(other.list()?.amortized_iter())
                        .map(|(value, series)| match (value, series) {
                            (val, Some(series)) => {
                                let ca = series.as_ref().unpack::<BinaryType>().unwrap();
                                ca.into_iter().any(|a| a == val)
                            }
                            _ => false,
                        })
                        .collect_trusted()
                };
                ca.rename(self.name());
                Ok(ca)
            }
            DataType::Binary => {
                let mut set = HashSet::with_capacity(other.len());

                let other = other.binary()?;
                other.downcast_iter().for_each(|iter| {
                    iter.into_iter().for_each(|opt_val| {
                        set.insert(opt_val);
                    })
                });
                let mut ca: BooleanChunked = self
                    .into_iter()
                    .map(|opt_val| set.contains(&opt_val))
                    .collect_trusted();
                ca.rename(self.name());
                Ok(ca)
            }
            _ => Err(PolarsError::SchemaMisMatch(
                format!(
                    "cannot do is_in operation with left a dtype: {:?} and right a dtype {:?}",
                    self.dtype(),
                    other.dtype()
                )
                .into(),
            )),
        }
        .map(|mut ca| {
            ca.rename(self.name());
            ca
        })
    }
}

impl IsIn for BooleanChunked {
    fn is_in(&self, other: &Series) -> PolarsResult<BooleanChunked> {
        match other.dtype() {
            DataType::List(dt) if self.dtype() == &**dt => {
                let mut ca: BooleanChunked = if self.len() == 1 && other.len() != 1 {
                    let value = self.get(0);
                    // safety: we know the iterators len
                    unsafe {
                        other
                            .list()?
                            .amortized_iter()
                            .map(|opt_s| {
                                opt_s.map(|s| {
                                    let ca = s.as_ref().unpack::<BooleanType>().unwrap();
                                    ca.into_iter().any(|a| a == value)
                                }) == Some(true)
                            })
                            .trust_my_length(other.len())
                            .collect_trusted()
                    }
                } else {
                    self.into_iter()
                        .zip(other.list()?.amortized_iter())
                        .map(|(value, series)| match (value, series) {
                            (val, Some(series)) => {
                                let ca = series.as_ref().unpack::<BooleanType>().unwrap();
                                ca.into_iter().any(|a| a == val)
                            }
                            _ => false,
                        })
                        .collect_trusted()
                };
                ca.rename(self.name());
                Ok(ca)
            }
            DataType::Boolean => {
                let other = other.bool().unwrap();
                let has_true = other.any();
                let has_false = !other.all();
                Ok(self.apply(|v| if v { has_true } else { has_false }))
            }
            _ => Err(PolarsError::SchemaMisMatch(
                format!(
                    "cannot do is_in operation with left a dtype: {:?} and right a dtype {:?}",
                    self.dtype(),
                    other.dtype()
                )
                .into(),
            )),
        }
        .map(|mut ca| {
            ca.rename(self.name());
            ca
        })
    }

src/frame/row.rs (line 579)

fn numeric_transpose<T>(cols: &[Series]) -> PolarsResult<DataFrame>
where
    T: PolarsNumericType,
    ChunkedArray<T>: IntoSeries,
{
    let new_width = cols[0].len();
    let new_height = cols.len();

    let has_nulls = cols.iter().any(|s| s.null_count() > 0);

    let mut values_buf: Vec<Vec<T::Native>> = (0..new_width)
        .map(|_| Vec::with_capacity(new_height))
        .collect();
    let mut validity_buf: Vec<_> = if has_nulls {
        // we first use bools instead of bits, because we can access these in parallel without aliasing
        (0..new_width).map(|_| vec![true; new_height]).collect()
    } else {
        (0..new_width).map(|_| vec![]).collect()
    };

    // work with *mut pointers because we it is UB write to &refs.
    let values_buf_ptr = &mut values_buf as *mut Vec<Vec<T::Native>> as usize;
    let validity_buf_ptr = &mut validity_buf as *mut Vec<Vec<bool>> as usize;

    POOL.install(|| {
        cols.iter().enumerate().for_each(|(row_idx, s)| {
            let s = s.cast(&T::get_dtype()).unwrap();
            let ca = s.unpack::<T>().unwrap();

            // Safety
            // we access in parallel, but every access is unique, so we don't break aliasing rules
            // we also ensured we allocated enough memory, so we never reallocate and thus
            // the pointers remain valid.
            if has_nulls {
                for (col_idx, opt_v) in ca.into_iter().enumerate() {
                    match opt_v {
                        None => unsafe {
                            let column = (*(validity_buf_ptr as *mut Vec<Vec<bool>>))
                                .get_unchecked_mut(col_idx);
                            let el_ptr = column.as_mut_ptr();
                            *el_ptr.add(row_idx) = false;
                            // we must initialize this memory otherwise downstream code
                            // might access uninitialized memory when the masked out values
                            // are changed.
                            add_value(values_buf_ptr, col_idx, row_idx, T::Native::default());
                        },
                        Some(v) => unsafe {
                            add_value(values_buf_ptr, col_idx, row_idx, v);
                        },
                    }
                }
            } else {
                for (col_idx, v) in ca.into_no_null_iter().enumerate() {
                    unsafe {
                        let column = (*(values_buf_ptr as *mut Vec<Vec<T::Native>>))
                            .get_unchecked_mut(col_idx);
                        let el_ptr = column.as_mut_ptr();
                        *el_ptr.add(row_idx) = v;
                    }
                }
            }
        })
    });

    let series = POOL.install(|| {
        values_buf
            .into_par_iter()
            .zip(validity_buf)
            .enumerate()
            .map(|(i, (mut values, validity))| {
                // Safety:
                // all values are written we can now set len
                unsafe {
                    values.set_len(new_height);
                }

                let validity = if has_nulls {
                    let validity = Bitmap::from_trusted_len_iter(validity.iter().copied());
                    if validity.unset_bits() > 0 {
                        Some(validity)
                    } else {
                        None
                    }
                } else {
                    None
                };

                let arr = PrimitiveArray::<T::Native>::new(
                    T::get_dtype().to_arrow(),
                    values.into(),
                    validity,
                );
                let name = format!("column_{i}");
                ChunkedArray::<T>::from_chunks(&name, vec![Box::new(arr) as ArrayRef]).into_series()
            })
            .collect()
    });

    Ok(DataFrame::new_no_checks(series))
}

Trait Implementations§

source §

impl<'a, T> AsMut<ChunkedArray<T>> for dyn SeriesTrait + 'awhere
T: 'static + PolarsDataType,

source §

fn as_mut(&mut self) -> &mut ChunkedArray<T>

Converts this type into a mutable reference of the (usually inferred) input type.

source §

impl<'a, T> AsRef<ChunkedArray<T>> for dyn SeriesTrait + 'awhere
T: 'static + PolarsDataType,

source §

fn as_ref(&self) -> &ChunkedArray<T>

Converts this type into a shared reference of the (usually inferred) input type.

source §

impl<'a> AsRef<dyn SeriesTrait + 'a> for Series

source §

fn as_ref(&self) -> &(dyn SeriesTrait + 'a)

Converts this type into a shared reference of the (usually inferred) input type.

Trait polars_core::prelude::SeriesTrait

Required Methods§

fn rename(&mut self, name: &str)

fn chunks(&self) -> &Vec<ArrayRef> ⓘ

fn take_iter(&self, _iter: &mut dyn TakeIterator) -> PolarsResult<Series>

unsafe fn take_iter_unchecked(&self, _iter: &mut dyn TakeIterator) -> Series

unsafe fn take_unchecked(&self, _idx: &IdxCa) -> PolarsResult<Series>

unsafe fn take_opt_iter_unchecked( &self, _iter: &mut dyn TakeIteratorNulls) -> Series

fn take(&self, _indices: &IdxCa) -> PolarsResult<Series>

fn len(&self) -> usize

fn take_every(&self, n: usize) -> Series

fn has_validity(&self) -> bool

Provided Methods§

fn is_sorted(&self) -> IsSorted

fn bitand(&self, _other: &Series) -> PolarsResult<Series>

fn bitor(&self, _other: &Series) -> PolarsResult<Series>

fn bitxor(&self, _other: &Series) -> PolarsResult<Series>

fn chunk_lengths(&self) -> ChunkIdIter<'_>

fn name(&self) -> &str

fn field(&self) -> Cow<'_, Field>

fn dtype(&self) -> &DataType

fn n_chunks(&self) -> usize

fn shrink_to_fit(&mut self)

fn limit(&self, num_elements: usize) -> Series

fn slice(&self, _offset: i64, _length: usize) -> Series

fn filter(&self, _filter: &BooleanChunked) -> PolarsResult<Series>

fn is_empty(&self) -> bool

fn rechunk(&self) -> Series

fn drop_nulls(&self) -> Series

fn mean(&self) -> Option<f64>

fn median(&self) -> Option<f64>

fn new_from_index(&self, _index: usize, _length: usize) -> Series

fn cast(&self, _data_type: &DataType) -> PolarsResult<Series>

fn get(&self, _index: usize) -> PolarsResult<AnyValue<'_>>

unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_>

fn sort_with(&self, _options: SortOptions) -> Series

fn argsort(&self, options: SortOptions) -> IdxCa

fn null_count(&self) -> usize

fn unique(&self) -> PolarsResult<Series>

fn n_unique(&self) -> PolarsResult<usize>

fn arg_unique(&self) -> PolarsResult<IdxCa>

fn arg_min(&self) -> Option<usize>

fn arg_max(&self) -> Option<usize>

fn is_null(&self) -> BooleanChunked

fn is_not_null(&self) -> BooleanChunked

fn is_unique(&self) -> PolarsResult<BooleanChunked>

fn is_duplicated(&self) -> PolarsResult<BooleanChunked>

fn reverse(&self) -> Series

fn as_single_ptr(&mut self) -> PolarsResult<usize>

fn shift(&self, _periods: i64) -> Series

fn fill_null(&self, _strategy: FillNullStrategy) -> PolarsResult<Series>

fn _sum_as_series(&self) -> Series

fn max_as_series(&self) -> Series

fn min_as_series(&self) -> Series

fn median_as_series(&self) -> Series

fn var_as_series(&self, _ddof: u8) -> Series

fn std_as_series(&self, _ddof: u8) -> Series

fn quantile_as_series( &self, _quantile: f64, _interpol: QuantileInterpolOptions) -> PolarsResult<Series>

fn fmt_list(&self) -> String

fn clone_inner(&self) -> Arc<dyn SeriesTrait>

fn get_object(&self, _index: usize) -> Option<&dyn PolarsObjectSafe>

fn as_any(&self) -> &dyn Any

fn as_any_mut(&mut self) -> &mut dyn Any

fn peak_max(&self) -> BooleanChunked

fn peak_min(&self) -> BooleanChunked

fn is_in(&self, _other: &Series) -> PolarsResult<BooleanChunked>

fn repeat_by(&self, _by: &IdxCa) -> ListChunked

fn checked_div(&self, _rhs: &Series) -> PolarsResult<Series>

fn is_first(&self) -> PolarsResult<BooleanChunked>

fn mode(&self) -> PolarsResult<Series>

fn rolling_apply( &self, _f: &dyn Fn(&Series) -> Series, _options: RollingOptionsFixedWindow) -> PolarsResult<Series>

fn str_concat(&self, _delimiter: &str) -> Utf8Chunked

Implementations§

impl<'a> dyn SeriesTrait + 'a

pub fn unpack<N>(&self) -> PolarsResult<&ChunkedArray<N>>where N: PolarsDataType + 'static,

Trait Implementations§

impl<'a, T> AsMut<ChunkedArray<T>> for dyn SeriesTrait + 'awhere T: 'static + PolarsDataType,

fn as_mut(&mut self) -> &mut ChunkedArray<T>

impl<'a, T> AsRef<ChunkedArray<T>> for dyn SeriesTrait + 'awhere T: 'static + PolarsDataType,

fn as_ref(&self) -> &ChunkedArray<T>

unsafe fn take_opt_iter_unchecked(
&self,
_iter: &mut dyn TakeIteratorNulls
) -> Series

fn quantile_as_series(
&self,
_quantile: f64,
_interpol: QuantileInterpolOptions
) -> PolarsResult<Series>

fn rolling_apply(
&self,
_f: &dyn Fn(&Series) -> Series,
_options: RollingOptionsFixedWindow
) -> PolarsResult<Series>

pub fn unpack<N>(&self) -> PolarsResult<&ChunkedArray<N>>where
N: PolarsDataType + 'static,

impl<'a, T> AsMut<ChunkedArray<T>> for dyn SeriesTrait + 'awhere
T: 'static + PolarsDataType,

impl<'a, T> AsRef<ChunkedArray<T>> for dyn SeriesTrait + 'awhere
T: 'static + PolarsDataType,