Struct polars_core::frame::DataFrame

source ·

pub struct DataFrame { /* private fields */ }

Expand description

A contiguous growable collection of Series that have the same length.

Use declarations

All the common tools can be found in crate::prelude (or in polars::prelude).

use polars_core::prelude::*; // if the crate polars-core is used directly
// use polars::prelude::*;      if the crate polars is used

Indexing

By a number

The Index<usize> is implemented for the DataFrame.

let df = df!("Fruit" => &["Apple", "Apple", "Pear"],
             "Color" => &["Red", "Yellow", "Green"])?;

assert_eq!(df[0], Series::new("Fruit", &["Apple", "Apple", "Pear"]));
assert_eq!(df[1], Series::new("Color", &["Red", "Yellow", "Green"]));

By a `Series` name

let df = df!("Fruit" => &["Apple", "Apple", "Pear"],
             "Color" => &["Red", "Yellow", "Green"])?;

assert_eq!(df["Fruit"], Series::new("Fruit", &["Apple", "Apple", "Pear"]));
assert_eq!(df["Color"], Series::new("Color", &["Red", "Yellow", "Green"]));

Implementations§

source §

impl DataFrame

source

pub fn sample_n(
 &self,
 n: usize,
 with_replacement: bool,
 shuffle: bool,
 seed: Option<u64>
) -> PolarsResult<Self>

Available on crate feature random only.

Sample n datapoints from this DataFrame.

Examples found in repository ?

src/chunked_array/random.rs (line 199)

    pub fn sample_frac(
        &self,
        frac: f64,
        with_replacement: bool,
        shuffle: bool,
        seed: Option<u64>,
    ) -> PolarsResult<Self> {
        let n = (self.height() as f64 * frac) as usize;
        self.sample_n(n, with_replacement, shuffle, seed)
    }

source

pub fn sample_frac(
 &self,
 frac: f64,
 with_replacement: bool,
 shuffle: bool,
 seed: Option<u64>
) -> PolarsResult<Self>

Available on crate feature random only.

Sample a fraction between 0.0-1.0 of this DataFrame.

source §

impl DataFrame

source

pub fn join_asof_by<I, S>(
 &self,
 other: &DataFrame,
 left_on: &str,
 right_on: &str,
 left_by: I,
 right_by: I,
 strategy: AsofStrategy,
 tolerance: Option<AnyValue<'static>>
) -> PolarsResult<DataFrame>where
 I: IntoIterator<Item = S>,
 S: AsRef<str>,

Available on crate feature asof_join only.

This is similar to a left-join except that we match on nearest key rather than equal keys. The keys must be sorted to perform an asof join. This is a special implementation of an asof join that searches for the nearest keys within a subgroup set by by.

source §

impl DataFrame

source

pub fn join_asof(
 &self,
 other: &DataFrame,
 left_on: &str,
 right_on: &str,
 strategy: AsofStrategy,
 tolerance: Option<AnyValue<'static>>,
 suffix: Option<String>
) -> PolarsResult<DataFrame>

Available on crate feature asof_join only.

This is similar to a left-join except that we match on nearest key rather than equal keys. The keys must be sorted to perform an asof join

source §

impl DataFrame

source

pub fn cross_join(
 &self,
 other: &DataFrame,
 suffix: Option<&str>,
 slice: Option<(i64, usize)>
) -> PolarsResult<DataFrame>

Creates the cartesian product from both frames, preserves the order of the left keys.

source §

impl DataFrame

source

pub fn explode_impl(&self, columns: Vec<Series>) -> PolarsResult<DataFrame>

Examples found in repository ?

src/frame/explode.rs (line 149)

    pub fn explode<I, S>(&self, columns: I) -> PolarsResult<DataFrame>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        // We need to sort the column by order of original occurrence. Otherwise the insert by index
        // below will panic
        let columns = self.select_series(columns)?;
        self.explode_impl(columns)
    }

source

pub fn explode<I, S>(&self, columns: I) -> PolarsResult<DataFrame>where
I: IntoIterator<Item = S>,
S: AsRef<str>,

Explode DataFrame to long format by exploding a column with Lists.

Example

let s0 = Series::new("a", &[1i64, 2, 3]);
let s1 = Series::new("b", &[1i64, 1, 1]);
let s2 = Series::new("c", &[2i64, 2, 2]);
let list = Series::new("foo", &[s0, s1, s2]);

let s0 = Series::new("B", [1, 2, 3]);
let s1 = Series::new("C", [1, 1, 1]);
let df = DataFrame::new(vec![list, s0, s1])?;
let exploded = df.explode(["foo"])?;

println!("{:?}", df);
println!("{:?}", exploded);

Outputs:

 +-------------+-----+-----+
 | foo         | B   | C   |
 | ---         | --- | --- |
 | list [i64]  | i32 | i32 |
 +=============+=====+=====+
 | "[1, 2, 3]" | 1   | 1   |
 +-------------+-----+-----+
 | "[1, 1, 1]" | 2   | 1   |
 +-------------+-----+-----+
 | "[2, 2, 2]" | 3   | 1   |
 +-------------+-----+-----+

 +-----+-----+-----+
 | foo | B   | C   |
 | --- | --- | --- |
 | i64 | i32 | i32 |
 +=====+=====+=====+
 | 1   | 1   | 1   |
 +-----+-----+-----+
 | 2   | 1   | 1   |
 +-----+-----+-----+
 | 3   | 1   | 1   |
 +-----+-----+-----+
 | 1   | 2   | 1   |
 +-----+-----+-----+
 | 1   | 2   | 1   |
 +-----+-----+-----+
 | 1   | 2   | 1   |
 +-----+-----+-----+
 | 2   | 3   | 1   |
 +-----+-----+-----+
 | 2   | 3   | 1   |
 +-----+-----+-----+
 | 2   | 3   | 1   |
 +-----+-----+-----+

source

pub fn melt<I, J>(&self, id_vars: I, value_vars: J) -> PolarsResult<Self>where
I: IntoVec<String>,
J: IntoVec<String>,

Unpivot a DataFrame from wide to long format.

Example

Arguments

id_vars - String slice that represent the columns to use as id variables.
value_vars - String slice that represent the columns to use as value variables.

If value_vars is empty all columns that are not in id_vars will be used.

let df = df!("A" => &["a", "b", "a"],
             "B" => &[1, 3, 5],
             "C" => &[10, 11, 12],
             "D" => &[2, 4, 6]
    )?;

let melted = df.melt(&["A", "B"], &["C", "D"])?;
println!("{:?}", df);
println!("{:?}", melted);

Outputs:

 +-----+-----+-----+-----+
 | A   | B   | C   | D   |
 | --- | --- | --- | --- |
 | str | i32 | i32 | i32 |
 +=====+=====+=====+=====+
 | "a" | 1   | 10  | 2   |
 +-----+-----+-----+-----+
 | "b" | 3   | 11  | 4   |
 +-----+-----+-----+-----+
 | "a" | 5   | 12  | 6   |
 +-----+-----+-----+-----+

 +-----+-----+----------+-------+
 | A   | B   | variable | value |
 | --- | --- | ---      | ---   |
 | str | i32 | str      | i32   |
 +=====+=====+==========+=======+
 | "a" | 1   | "C"      | 10    |
 +-----+-----+----------+-------+
 | "b" | 3   | "C"      | 11    |
 +-----+-----+----------+-------+
 | "a" | 5   | "C"      | 12    |
 +-----+-----+----------+-------+
 | "a" | 1   | "D"      | 2     |
 +-----+-----+----------+-------+
 | "b" | 3   | "D"      | 4     |
 +-----+-----+----------+-------+
 | "a" | 5   | "D"      | 6     |
 +-----+-----+----------+-------+

source

pub fn melt2(&self, args: MeltArgs) -> PolarsResult<Self>

Similar to melt, but without generics. This may be easier if you want to pass an empty id_vars or empty value_vars.

Examples found in repository ?

src/frame/explode.rs (lines 216-220)

    pub fn melt<I, J>(&self, id_vars: I, value_vars: J) -> PolarsResult<Self>
    where
        I: IntoVec<String>,
        J: IntoVec<String>,
    {
        let id_vars = id_vars.into_vec();
        let value_vars = value_vars.into_vec();
        self.melt2(MeltArgs {
            id_vars,
            value_vars,
            ..Default::default()
        })
    }

source §

impl DataFrame

source

pub fn groupby_with_series(
 &self,
 by: Vec<Series>,
 multithreaded: bool,
 sorted: bool
) -> PolarsResult<GroupBy<'_>>

Examples found in repository ?

src/frame/groupby/mod.rs (line 192)

    pub fn groupby<I, S>(&self, by: I) -> PolarsResult<GroupBy>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let selected_keys = self.select_series(by)?;
        self.groupby_with_series(selected_keys, true, false)
    }

    /// Group DataFrame using a Series column.
    /// The groups are ordered by their smallest row index.
    pub fn groupby_stable<I, S>(&self, by: I) -> PolarsResult<GroupBy>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let selected_keys = self.select_series(by)?;
        self.groupby_with_series(selected_keys, true, true)
    }

source

pub fn groupby<I, S>(&self, by: I) -> PolarsResult<GroupBy<'_>>where
I: IntoIterator<Item = S>,
S: AsRef<str>,

Group DataFrame using a Series column.

Example

use polars_core::prelude::*;
fn groupby_sum(df: &DataFrame) -> PolarsResult<DataFrame> {
    df.groupby(["column_name"])?
    .select(["agg_column_name"])
    .sum()
}

Examples found in repository ?

src/frame/mod.rs (line 3087)

    fn unique_impl(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<Self> {
        use UniqueKeepStrategy::*;
        let names = match &subset {
            Some(s) => s.iter().map(|s| &**s).collect(),
            None => self.get_column_names(),
        };

        let columns = match (keep, maintain_order) {
            (First, true) => {
                let gb = self.groupby_stable(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, true) => {
                // maintain order by last values, so the sorted groups are not correct as they
                // are sorted by the first value
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                let last_idx: NoNull<IdxCa> = groups
                    .iter()
                    .map(|g| match g {
                        GroupsIndicator::Idx((_first, idx)) => idx[idx.len() - 1],
                        GroupsIndicator::Slice([first, len]) => first + len,
                    })
                    .collect();

                let last_idx = last_idx.sort(false);
                return Ok(unsafe { self.take_unchecked(&last_idx) });
            }
            (First, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
            }
        };
        Ok(DataFrame::new_no_checks(columns))
    }

    /// Get a mask of all the unique rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
    ///                         "ISIN" => &["US0378331005", "US5949181045"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_unique()?;
    ///
    /// assert!(ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_unique(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            true,
            false,
        ))
    }

    /// Get a mask of all the duplicated rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
    ///                         "ISIN" => &["US02079K3059", "US02079K1079"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;
    ///
    /// assert!(!ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_duplicated(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            false,
            true,
        ))
    }

    /// Create a new `DataFrame` that shows the null counts per column.
    #[must_use]
    pub fn null_count(&self) -> Self {
        let cols = self
            .columns
            .iter()
            .map(|s| Series::new(s.name(), &[s.null_count() as IdxSize]))
            .collect();
        Self::new_no_checks(cols)
    }

    /// Hash and combine the row values
    #[cfg(feature = "row_hash")]
    pub fn hash_rows(
        &mut self,
        hasher_builder: Option<RandomState>,
    ) -> PolarsResult<UInt64Chunked> {
        let dfs = split_df(self, POOL.current_num_threads())?;
        let (cas, _) = df_rows_to_hashes_threaded(&dfs, hasher_builder)?;

        let mut iter = cas.into_iter();
        let mut acc_ca = iter.next().unwrap();
        for ca in iter {
            acc_ca.append(&ca);
        }
        Ok(acc_ca.rechunk())
    }

    /// Get the supertype of the columns in this DataFrame
    pub fn get_supertype(&self) -> Option<PolarsResult<DataType>> {
        self.columns
            .iter()
            .map(|s| Ok(s.dtype().clone()))
            .reduce(|acc, b| try_get_supertype(&acc?, &b.unwrap()))
    }

    #[cfg(feature = "chunked_ids")]
    #[doc(hidden)]
    //// Take elements by a slice of [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    /// `sorted` indicates if the chunks are sorted.
    #[doc(hidden)]
    pub unsafe fn _take_chunked_unchecked_seq(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns(&|s| s._take_chunked_unchecked(idx, sorted));

        DataFrame::new_no_checks(cols)
    }
    #[cfg(feature = "chunked_ids")]
    //// Take elements by a slice of optional [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    #[doc(hidden)]
    pub unsafe fn _take_opt_chunked_unchecked_seq(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_chunked_unchecked(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_chunked_unchecked_threaded(idx, sorted, true),
            _ => s._take_chunked_unchecked(idx, sorted),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_opt_chunked_unchecked(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    /// Be careful with allowing threads when calling this in a large hot loop
    /// every thread split may be on rayon stack and lead to SO
    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice(&self, idx: &[IdxSize], allow_threads: bool) -> Self {
        self._take_unchecked_slice2(idx, allow_threads, IsSorted::Not)
    }

    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice2(
        &self,
        idx: &[IdxSize],
        allow_threads: bool,
        sorted: IsSorted,
    ) -> Self {
        #[cfg(debug_assertions)]
        {
            if idx.len() > 2 {
                match sorted {
                    IsSorted::Ascending => {
                        assert!(idx[0] <= idx[idx.len() - 1]);
                    }
                    IsSorted::Descending => {
                        assert!(idx[0] >= idx[idx.len() - 1]);
                    }
                    _ => {}
                }
            }
        }
        let ptr = idx.as_ptr() as *mut IdxSize;
        let len = idx.len();

        // create a temporary vec. we will not drop it.
        let mut ca = IdxCa::from_vec("", Vec::from_raw_parts(ptr, len, len));
        ca.set_sorted2(sorted);
        let out = self.take_unchecked_impl(&ca, allow_threads);

        // ref count of buffers should be one because we dropped all allocations
        let arr = {
            let arr_ref = std::mem::take(&mut ca.chunks).pop().unwrap();
            arr_ref
                .as_any()
                .downcast_ref::<PrimitiveArray<IdxSize>>()
                .unwrap()
                .clone()
        };
        // the only owned heap allocation is the `Vec` we created and must not be dropped
        let _ = std::mem::ManuallyDrop::new(arr.into_mut().right().unwrap());
        out
    }

    #[cfg(feature = "partition_by")]
    #[doc(hidden)]
    pub fn _partition_by_impl(
        &self,
        cols: &[String],
        stable: bool,
    ) -> PolarsResult<Vec<DataFrame>> {
        let groups = if stable {
            self.groupby_stable(cols)?.take_groups()
        } else {
            self.groupby(cols)?.take_groups()
        };

        // don't parallelize this
        // there is a lot of parallelization in take and this may easily SO
        POOL.install(|| {
            match groups {
                GroupsProxy::Idx(idx) => {
                    Ok(idx
                        .into_par_iter()
                        .map(|(_, group)| {
                            // groups are in bounds
                            unsafe { self._take_unchecked_slice(&group, false) }
                        })
                        .collect())
                }
                GroupsProxy::Slice { groups, .. } => Ok(groups
                    .into_par_iter()
                    .map(|[first, len]| self.slice(first as i64, len as usize))
                    .collect()),
            }
        })
    }

source

pub fn groupby_stable<I, S>(&self, by: I) -> PolarsResult<GroupBy<'_>>where
I: IntoIterator<Item = S>,
S: AsRef<str>,

Group DataFrame using a Series column. The groups are ordered by their smallest row index.

Examples found in repository ?

src/frame/mod.rs (line 3080)

    fn unique_impl(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<Self> {
        use UniqueKeepStrategy::*;
        let names = match &subset {
            Some(s) => s.iter().map(|s| &**s).collect(),
            None => self.get_column_names(),
        };

        let columns = match (keep, maintain_order) {
            (First, true) => {
                let gb = self.groupby_stable(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, true) => {
                // maintain order by last values, so the sorted groups are not correct as they
                // are sorted by the first value
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                let last_idx: NoNull<IdxCa> = groups
                    .iter()
                    .map(|g| match g {
                        GroupsIndicator::Idx((_first, idx)) => idx[idx.len() - 1],
                        GroupsIndicator::Slice([first, len]) => first + len,
                    })
                    .collect();

                let last_idx = last_idx.sort(false);
                return Ok(unsafe { self.take_unchecked(&last_idx) });
            }
            (First, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
            }
        };
        Ok(DataFrame::new_no_checks(columns))
    }

    /// Get a mask of all the unique rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
    ///                         "ISIN" => &["US0378331005", "US5949181045"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_unique()?;
    ///
    /// assert!(ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_unique(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            true,
            false,
        ))
    }

    /// Get a mask of all the duplicated rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
    ///                         "ISIN" => &["US02079K3059", "US02079K1079"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;
    ///
    /// assert!(!ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_duplicated(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            false,
            true,
        ))
    }

    /// Create a new `DataFrame` that shows the null counts per column.
    #[must_use]
    pub fn null_count(&self) -> Self {
        let cols = self
            .columns
            .iter()
            .map(|s| Series::new(s.name(), &[s.null_count() as IdxSize]))
            .collect();
        Self::new_no_checks(cols)
    }

    /// Hash and combine the row values
    #[cfg(feature = "row_hash")]
    pub fn hash_rows(
        &mut self,
        hasher_builder: Option<RandomState>,
    ) -> PolarsResult<UInt64Chunked> {
        let dfs = split_df(self, POOL.current_num_threads())?;
        let (cas, _) = df_rows_to_hashes_threaded(&dfs, hasher_builder)?;

        let mut iter = cas.into_iter();
        let mut acc_ca = iter.next().unwrap();
        for ca in iter {
            acc_ca.append(&ca);
        }
        Ok(acc_ca.rechunk())
    }

    /// Get the supertype of the columns in this DataFrame
    pub fn get_supertype(&self) -> Option<PolarsResult<DataType>> {
        self.columns
            .iter()
            .map(|s| Ok(s.dtype().clone()))
            .reduce(|acc, b| try_get_supertype(&acc?, &b.unwrap()))
    }

    #[cfg(feature = "chunked_ids")]
    #[doc(hidden)]
    //// Take elements by a slice of [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    /// `sorted` indicates if the chunks are sorted.
    #[doc(hidden)]
    pub unsafe fn _take_chunked_unchecked_seq(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns(&|s| s._take_chunked_unchecked(idx, sorted));

        DataFrame::new_no_checks(cols)
    }
    #[cfg(feature = "chunked_ids")]
    //// Take elements by a slice of optional [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    #[doc(hidden)]
    pub unsafe fn _take_opt_chunked_unchecked_seq(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_chunked_unchecked(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_chunked_unchecked_threaded(idx, sorted, true),
            _ => s._take_chunked_unchecked(idx, sorted),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_opt_chunked_unchecked(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    /// Be careful with allowing threads when calling this in a large hot loop
    /// every thread split may be on rayon stack and lead to SO
    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice(&self, idx: &[IdxSize], allow_threads: bool) -> Self {
        self._take_unchecked_slice2(idx, allow_threads, IsSorted::Not)
    }

    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice2(
        &self,
        idx: &[IdxSize],
        allow_threads: bool,
        sorted: IsSorted,
    ) -> Self {
        #[cfg(debug_assertions)]
        {
            if idx.len() > 2 {
                match sorted {
                    IsSorted::Ascending => {
                        assert!(idx[0] <= idx[idx.len() - 1]);
                    }
                    IsSorted::Descending => {
                        assert!(idx[0] >= idx[idx.len() - 1]);
                    }
                    _ => {}
                }
            }
        }
        let ptr = idx.as_ptr() as *mut IdxSize;
        let len = idx.len();

        // create a temporary vec. we will not drop it.
        let mut ca = IdxCa::from_vec("", Vec::from_raw_parts(ptr, len, len));
        ca.set_sorted2(sorted);
        let out = self.take_unchecked_impl(&ca, allow_threads);

        // ref count of buffers should be one because we dropped all allocations
        let arr = {
            let arr_ref = std::mem::take(&mut ca.chunks).pop().unwrap();
            arr_ref
                .as_any()
                .downcast_ref::<PrimitiveArray<IdxSize>>()
                .unwrap()
                .clone()
        };
        // the only owned heap allocation is the `Vec` we created and must not be dropped
        let _ = std::mem::ManuallyDrop::new(arr.into_mut().right().unwrap());
        out
    }

    #[cfg(feature = "partition_by")]
    #[doc(hidden)]
    pub fn _partition_by_impl(
        &self,
        cols: &[String],
        stable: bool,
    ) -> PolarsResult<Vec<DataFrame>> {
        let groups = if stable {
            self.groupby_stable(cols)?.take_groups()
        } else {
            self.groupby(cols)?.take_groups()
        };

        // don't parallelize this
        // there is a lot of parallelization in take and this may easily SO
        POOL.install(|| {
            match groups {
                GroupsProxy::Idx(idx) => {
                    Ok(idx
                        .into_par_iter()
                        .map(|(_, group)| {
                            // groups are in bounds
                            unsafe { self._take_unchecked_slice(&group, false) }
                        })
                        .collect())
                }
                GroupsProxy::Slice { groups, .. } => Ok(groups
                    .into_par_iter()
                    .map(|[first, len]| self.slice(first as i64, len as usize))
                    .collect()),
            }
        })
    }

source §

impl DataFrame

source

pub unsafe fn _create_left_df_from_slice(
    &self,
    join_tuples: &[IdxSize],
    left_join: bool,
    sorted: bool
) -> DataFrame

Safety

Join tuples must be in bounds

Examples found in repository ?

src/frame/hash_join/mod.rs (line 419)

    pub fn _finish_left_join(
        &self,
        ids: LeftJoinIds,
        other: &DataFrame,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let (left_idx, right_idx) = ids;
        let materialize_left = || match left_idx {
            JoinIds::Left(left_idx) => {
                let mut left_idx = &*left_idx;
                if let Some((offset, len)) = slice {
                    left_idx = slice_slice(left_idx, offset, len);
                }
                unsafe { self._create_left_df_from_slice(left_idx, true, true) }
            }
            JoinIds::Right(left_idx) => {
                let mut left_idx = &*left_idx;
                if let Some((offset, len)) = slice {
                    left_idx = slice_slice(left_idx, offset, len);
                }
                unsafe { self.create_left_df_chunked(left_idx, true) }
            }
        };

        let materialize_right = || match right_idx {
            JoinOptIds::Left(right_idx) => {
                let mut right_idx = &*right_idx;
                if let Some((offset, len)) = slice {
                    right_idx = slice_slice(right_idx, offset, len);
                }
                unsafe {
                    other.take_opt_iter_unchecked(
                        right_idx.iter().map(|opt_i| opt_i.map(|i| i as usize)),
                    )
                }
            }
            JoinOptIds::Right(right_idx) => {
                let mut right_idx = &*right_idx;
                if let Some((offset, len)) = slice {
                    right_idx = slice_slice(right_idx, offset, len);
                }
                unsafe { other.take_opt_chunked_unchecked(right_idx) }
            }
        };
        let (df_left, df_right) = POOL.join(materialize_left, materialize_right);

        _finish_join(df_left, df_right, suffix.as_deref())
    }

source

pub fn _finish_left_join(
 &self,
 ids: LeftJoinIds,
 other: &DataFrame,
 suffix: Option<String>,
 slice: Option<(i64, usize)>
) -> PolarsResult<DataFrame>

Examples found in repository ?

src/frame/hash_join/mod.rs (line 481)

    pub fn _left_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
        verbose: bool,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // ensure that the chunks are aligned otherwise we go OOB
        let mut left = self.clone();
        let mut s_left = s_left.clone();
        let mut right = other.clone();
        let mut s_right = s_right.clone();
        if left.should_rechunk() {
            left.as_single_chunk_par();
            s_left = s_left.rechunk();
        }
        if right.should_rechunk() {
            right.as_single_chunk_par();
            s_right = s_right.rechunk();
        }
        let ids = sort_or_hash_left(&s_left, &s_right, verbose);
        left._finish_left_join(ids, &right.drop(s_right.name()).unwrap(), suffix, slice)
    }

source

pub fn _left_join_from_series(
 &self,
 other: &DataFrame,
 s_left: &Series,
 s_right: &Series,
 suffix: Option<String>,
 slice: Option<(i64, usize)>,
 verbose: bool
) -> PolarsResult<DataFrame>

source

pub unsafe fn _finish_anti_semi_join(
 &self,
 idx: &[IdxSize],
 slice: Option<(i64, usize)>
) -> DataFrame

Safety

idx must be in bounds

Examples found in repository ?

src/frame/hash_join/mod.rs (line 513)

    pub fn _semi_anti_join_from_series(
        &self,
        s_left: &Series,
        s_right: &Series,
        slice: Option<(i64, usize)>,
        anti: bool,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        let idx = s_left.hash_join_semi_anti(s_right, anti);
        // Safety:
        // indices are in bounds
        Ok(unsafe { self._finish_anti_semi_join(&idx, slice) })
    }

source

pub fn _semi_anti_join_from_series(
 &self,
 s_left: &Series,
 s_right: &Series,
 slice: Option<(i64, usize)>,
 anti: bool
) -> PolarsResult<DataFrame>

source

pub fn _outer_join_from_series(
 &self,
 other: &DataFrame,
 s_left: &Series,
 s_right: &Series,
 suffix: Option<String>,
 slice: Option<(i64, usize)>
) -> PolarsResult<DataFrame>

source §

impl DataFrame

source

pub fn get_row(&self, idx: usize) -> PolarsResult<Row<'_>>

Available on crate feature rows only.

Get a row from a DataFrame. Use of this is discouraged as it will likely be slow.

source

pub fn get_row_amortized<'a>(
 &'a self,
 idx: usize,
 row: &mut Row<'a>
) -> PolarsResult<()>

Available on crate feature rows only.

Amortize allocations by reusing a row. The caller is responsible to make sure that the row has at least the capacity for the number of columns in the DataFrame

source

pub unsafe fn get_row_amortized_unchecked<'a>(
 &'a self,
 idx: usize,
 row: &mut Row<'a>
)

Available on crate feature rows only.

Amortize allocations by reusing a row. The caller is responsible to make sure that the row has at least the capacity for the number of columns in the DataFrame

Safety

Does not do any bounds checking.

source

pub fn from_rows_and_schema(
rows: &[Row<'_>],
schema: &Schema
) -> PolarsResult<Self>

Available on crate feature rows only.

Create a new DataFrame from rows. This should only be used when you have row wise data, as this is a lot slower than creating the Series in a columnar fashion

Examples found in repository ?

src/frame/row.rs (line 122)

    pub fn from_rows(rows: &[Row]) -> PolarsResult<Self> {
        let schema = rows_to_schema_first_non_null(rows, Some(50));
        let has_nulls = schema
            .iter_dtypes()
            .any(|dtype| matches!(dtype, DataType::Null));
        if has_nulls {
            return Err(PolarsError::ComputeError(
                "Could not infer row types, because of the null values".into(),
            ));
        }
        Self::from_rows_and_schema(rows, &schema)
    }

source

pub fn from_rows_iter_and_schema<'a, I>(
 rows: I,
 schema: &Schema
) -> PolarsResult<Self>where
 I: Iterator<Item = &'a Row<'a>>,

Available on crate feature rows only.

Create a new DataFrame from an iterator over rows. This should only be used when you have row wise data, as this is a lot slower than creating the Series in a columnar fashion

Examples found in repository ?

src/frame/row.rs (line 63)

62
63
64

    pub fn from_rows_and_schema(rows: &[Row], schema: &Schema) -> PolarsResult<Self> {
        Self::from_rows_iter_and_schema(rows.iter(), schema)
    }

source

pub fn from_rows(rows: &[Row<'_>]) -> PolarsResult<Self>

Available on crate feature rows only.

Create a new DataFrame from rows. This should only be used when you have row wise data, as this is a lot slower than creating the Series in a columnar fashion

source

pub fn transpose(&self) -> PolarsResult<DataFrame>

Available on crate feature rows only.

Transpose a DataFrame. This is a very expensive operation.

source §

impl DataFrame

source

pub fn estimated_size(&self) -> usize

Returns an estimation of the total (heap) allocated size of the DataFrame in bytes.

Implementation

This estimation is the sum of the size of its buffers, validity, including nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the sum of the sizes computed from this function. In particular, StructArray’s size is an upper bound.

When an array is sliced, its allocated size remains constant because the buffer unchanged. However, this function will yield a smaller number. This is because this function returns the visible size of the buffer, not its total capacity.

FFI buffers are included in this estimation.

source

pub fn new<S: IntoSeries>(columns: Vec<S>) -> PolarsResult<Self>

Create a DataFrame from a Vector of Series.

Example

let s0 = Series::new("days", [0, 1, 2].as_ref());
let s1 = Series::new("temp", [22.1, 19.9, 7.].as_ref());

let df = DataFrame::new(vec![s0, s1])?;

Examples found in repository ?

src/frame/upstream_traits.rs (line 12)

    fn from_iter<T: IntoIterator<Item = Series>>(iter: T) -> Self {
        let v = iter.into_iter().collect();
        DataFrame::new(v).expect("could not create DataFrame from iterator")
    }

More examples

Hide additional examples

src/frame/chunks.rs (line 19)

    fn try_from(arg: (ArrowChunk, &[ArrowField])) -> PolarsResult<DataFrame> {
        let columns: PolarsResult<Vec<Series>> = arg
            .0
            .columns()
            .iter()
            .zip(arg.1)
            .map(|(arr, field)| Series::try_from((field.name.as_ref(), arr.clone())))
            .collect();

        DataFrame::new(columns?)
    }

src/frame/groupby/mod.rs (line 435)

    pub fn mean(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;

        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Mean);
            let mut agg = unsafe { agg_col.agg_mean(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the sum per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).sum()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+----------+
    /// | date       | temp_sum |
    /// | ---        | ---      |
    /// | Date       | i32      |
    /// +============+==========+
    /// | 2020-08-23 | 9        |
    /// +------------+----------+
    /// | 2020-08-22 | 8        |
    /// +------------+----------+
    /// | 2020-08-21 | 30       |
    /// +------------+----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn sum(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;

        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Sum);
            let mut agg = unsafe { agg_col.agg_sum(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the minimal value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).min()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+----------+
    /// | date       | temp_min |
    /// | ---        | ---      |
    /// | Date       | i32      |
    /// +============+==========+
    /// | 2020-08-23 | 9        |
    /// +------------+----------+
    /// | 2020-08-22 | 1        |
    /// +------------+----------+
    /// | 2020-08-21 | 10       |
    /// +------------+----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn min(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Min);
            let mut agg = unsafe { agg_col.agg_min(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the maximum value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).max()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+----------+
    /// | date       | temp_max |
    /// | ---        | ---      |
    /// | Date       | i32      |
    /// +============+==========+
    /// | 2020-08-23 | 9        |
    /// +------------+----------+
    /// | 2020-08-22 | 7        |
    /// +------------+----------+
    /// | 2020-08-21 | 20       |
    /// +------------+----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn max(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Max);
            let mut agg = unsafe { agg_col.agg_max(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and find the first value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).first()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------+
    /// | date       | temp_first |
    /// | ---        | ---        |
    /// | Date       | i32        |
    /// +============+============+
    /// | 2020-08-23 | 9          |
    /// +------------+------------+
    /// | 2020-08-22 | 7          |
    /// +------------+------------+
    /// | 2020-08-21 | 20         |
    /// +------------+------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn first(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::First);
            let mut agg = unsafe { agg_col.agg_first(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and return the last value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).last()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------+
    /// | date       | temp_last |
    /// | ---        | ---        |
    /// | Date       | i32        |
    /// +============+============+
    /// | 2020-08-23 | 9          |
    /// +------------+------------+
    /// | 2020-08-22 | 1          |
    /// +------------+------------+
    /// | 2020-08-21 | 10         |
    /// +------------+------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn last(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Last);
            let mut agg = unsafe { agg_col.agg_last(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` by counting the number of unique values.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).n_unique()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+---------------+
    /// | date       | temp_n_unique |
    /// | ---        | ---           |
    /// | Date       | u32           |
    /// +============+===============+
    /// | 2020-08-23 | 1             |
    /// +------------+---------------+
    /// | 2020-08-22 | 2             |
    /// +------------+---------------+
    /// | 2020-08-21 | 2             |
    /// +------------+---------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn n_unique(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::NUnique);
            let mut agg = unsafe { agg_col.agg_n_unique(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the quantile per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// # use polars_arrow::prelude::QuantileInterpolOptions;
    ///
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).quantile(0.2, QuantileInterpolOptions::default())
    /// }
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn quantile(
        &self,
        quantile: f64,
        interpol: QuantileInterpolOptions,
    ) -> PolarsResult<DataFrame> {
        if !(0.0..=1.0).contains(&quantile) {
            return Err(PolarsError::ComputeError(
                "quantile should be within 0.0 and 1.0".into(),
            ));
        }
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name =
                fmt_groupby_column(agg_col.name(), GroupByMethod::Quantile(quantile, interpol));
            let mut agg = unsafe { agg_col.agg_quantile(&self.groups, quantile, interpol) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the median per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).median()
    /// }
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn median(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Median);
            let mut agg = unsafe { agg_col.agg_median(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the variance per group.
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn var(&self, ddof: u8) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Var(ddof));
            let mut agg = unsafe { agg_col.agg_var(&self.groups, ddof) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the standard deviation per group.
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn std(&self, ddof: u8) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Std(ddof));
            let mut agg = unsafe { agg_col.agg_std(&self.groups, ddof) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the number of values per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).count()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------+
    /// | date       | temp_count |
    /// | ---        | ---        |
    /// | Date       | u32        |
    /// +============+============+
    /// | 2020-08-23 | 1          |
    /// +------------+------------+
    /// | 2020-08-22 | 2          |
    /// +------------+------------+
    /// | 2020-08-21 | 2          |
    /// +------------+------------+
    /// ```
    pub fn count(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;

        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Count);
            let mut ca = self.groups.group_count();
            ca.rename(&new_name);
            cols.push(ca.into_series());
        }
        DataFrame::new(cols)
    }

    /// Get the groupby group indexes.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.groups()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +--------------+------------+
    /// | date         | groups     |
    /// | ---          | ---        |
    /// | Date(days)   | list [u32] |
    /// +==============+============+
    /// | 2020-08-23   | "[3]"      |
    /// +--------------+------------+
    /// | 2020-08-22   | "[2, 4]"   |
    /// +--------------+------------+
    /// | 2020-08-21   | "[0, 1]"   |
    /// +--------------+------------+
    /// ```
    pub fn groups(&self) -> PolarsResult<DataFrame> {
        let mut cols = self.keys();
        let mut column = self.groups.as_list_chunked();
        let new_name = fmt_groupby_column("", GroupByMethod::Groups);
        column.rename(&new_name);
        cols.push(column.into_series());
        DataFrame::new(cols)
    }

    /// Aggregate the groups of the groupby operation into lists.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     // GroupBy and aggregate to Lists
    ///     df.groupby(["date"])?.select(["temp"]).agg_list()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------------------+
    /// | date       | temp_agg_list          |
    /// | ---        | ---                    |
    /// | Date       | list [i32]             |
    /// +============+========================+
    /// | 2020-08-23 | "[Some(9)]"            |
    /// +------------+------------------------+
    /// | 2020-08-22 | "[Some(7), Some(1)]"   |
    /// +------------+------------------------+
    /// | 2020-08-21 | "[Some(20), Some(10)]" |
    /// +------------+------------------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn agg_list(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::List);
            let mut agg = unsafe { agg_col.agg_list(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

src/frame/mod.rs (line 377)

    pub fn with_row_count(&self, name: &str, offset: Option<IdxSize>) -> PolarsResult<Self> {
        let mut columns = Vec::with_capacity(self.columns.len() + 1);
        let offset = offset.unwrap_or(0);

        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);
        columns.push(ca.into_series());

        columns.extend_from_slice(&self.columns);
        DataFrame::new(columns)
    }

    /// Add a row count in place.
    pub fn with_row_count_mut(&mut self, name: &str, offset: Option<IdxSize>) -> &mut Self {
        let offset = offset.unwrap_or(0);
        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);

        self.columns.insert(0, ca.into_series());
        self
    }

    /// Create a new `DataFrame` but does not check the length or duplicate occurrence of the `Series`.
    ///
    /// It is advised to use [Series::new](Series::new) in favor of this method.
    ///
    /// # Panic
    /// It is the callers responsibility to uphold the contract of all `Series`
    /// having an equal length, if not this may panic down the line.
    pub const fn new_no_checks(columns: Vec<Series>) -> DataFrame {
        DataFrame { columns }
    }

    /// Aggregate all chunks to contiguous memory.
    #[must_use]
    pub fn agg_chunks(&self) -> Self {
        // Don't parallelize this. Memory overhead
        let f = |s: &Series| s.rechunk();
        let cols = self.columns.iter().map(f).collect();
        DataFrame::new_no_checks(cols)
    }

    /// Shrink the capacity of this DataFrame to fit its length.
    pub fn shrink_to_fit(&mut self) {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            s.shrink_to_fit();
        }
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk.
    pub fn as_single_chunk(&mut self) -> &mut Self {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            *s = s.rechunk();
        }
        self
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk in parallel.
    /// This may lead to more peak memory consumption.
    pub fn as_single_chunk_par(&mut self) -> &mut Self {
        if self.columns.iter().any(|s| s.n_chunks() > 1) {
            self.columns = self.apply_columns_par(&|s| s.rechunk());
        }
        self
    }

    /// Estimates of the DataFrames columns consist of the same chunk sizes
    pub fn should_rechunk(&self) -> bool {
        let hb = RandomState::default();
        let hb2 = RandomState::with_seeds(392498, 98132457, 0, 412059);
        !self
            .columns
            .iter()
            // The idea is that we create a hash of the chunk lengths.
            // Consisting of the combined hash + the sum (assuming collision probability is nihil)
            // if not, we can add more hashes or at worst case we do an extra rechunk.
            // the old solution to this was clone all lengths to a vec and compare the vecs
            .map(|s| {
                s.chunk_lengths().map(|i| i as u64).fold(
                    (0u64, 0u64, s.n_chunks()),
                    |(lhash, lh2, n), rval| {
                        let mut h = hb.build_hasher();
                        rval.hash(&mut h);
                        let rhash = h.finish();
                        let mut h = hb2.build_hasher();
                        rval.hash(&mut h);
                        let rh2 = h.finish();
                        (
                            _boost_hash_combine(lhash, rhash),
                            _boost_hash_combine(lh2, rh2),
                            n,
                        )
                    },
                )
            })
            .all_equal()
    }

    /// Ensure all the chunks in the DataFrame are aligned.
    pub fn rechunk(&mut self) -> &mut Self {
        if self.should_rechunk() {
            self.as_single_chunk_par()
        } else {
            self
        }
    }

    /// Get the `DataFrame` schema.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
    ///                         "Diameter (m)" => &[8.8e26, f64::INFINITY])?;
    ///
    /// let f1: Field = Field::new("Thing", DataType::Utf8);
    /// let f2: Field = Field::new("Diameter (m)", DataType::Float64);
    /// let sc: Schema = Schema::from(vec![f1, f2].into_iter());
    ///
    /// assert_eq!(df.schema(), sc);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn schema(&self) -> Schema {
        Schema::from(self.iter().map(|s| s.field().into_owned()))
    }

    /// Get a reference to the `DataFrame` columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
    ///                         "Symbol" => &["A", "C", "G", "T"])?;
    /// let columns: &Vec<Series> = df.get_columns();
    ///
    /// assert_eq!(columns[0].name(), "Name");
    /// assert_eq!(columns[1].name(), "Symbol");
    /// # Ok::<(), PolarsError>(())
    /// ```
    #[inline]
    pub fn get_columns(&self) -> &Vec<Series> {
        &self.columns
    }

    #[cfg(feature = "private")]
    #[inline]
    pub fn get_columns_mut(&mut self) -> &mut Vec<Series> {
        &mut self.columns
    }

    /// Iterator over the columns as `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
    /// let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// let mut iterator = df.iter();
    ///
    /// assert_eq!(iterator.next(), Some(&s1));
    /// assert_eq!(iterator.next(), Some(&s2));
    /// assert_eq!(iterator.next(), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn iter(&self) -> std::slice::Iter<'_, Series> {
        self.columns.iter()
    }

    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Language" => &["Rust", "Python"],
    ///                         "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Language", "Designer"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn get_column_names(&self) -> Vec<&str> {
        self.columns.iter().map(|s| s.name()).collect()
    }

    /// Get the `Vec<String>` representing the column names.
    pub fn get_column_names_owned(&self) -> Vec<String> {
        self.columns.iter().map(|s| s.name().to_string()).collect()
    }

    /// Set the column names.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
    /// df.set_column_names(&["Set"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Set"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> PolarsResult<()> {
        if names.len() != self.columns.len() {
            return Err(PolarsError::ShapeMisMatch("the provided slice with column names has not the same size as the DataFrame's width".into()));
        }
        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(names.iter().map(|name| name.as_ref()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }

        let columns = mem::take(&mut self.columns);
        self.columns = columns
            .into_iter()
            .zip(names)
            .map(|(s, name)| {
                let mut s = s;
                s.rename(name.as_ref());
                s
            })
            .collect();
        Ok(())
    }

    /// Get the data types of the columns in the DataFrame.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
    ///                                "Fraction" => &[0.965, 0.035])?;
    ///
    /// assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn dtypes(&self) -> Vec<DataType> {
        self.columns.iter().map(|s| s.dtype().clone()).collect()
    }

    /// The number of chunks per column
    pub fn n_chunks(&self) -> usize {
        match self.columns.get(0) {
            None => 0,
            Some(s) => s.n_chunks(),
        }
    }

    /// Get a reference to the schema fields of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
    ///                            "Fraction" => &[0.708, 0.292])?;
    ///
    /// let f1: Field = Field::new("Surface type", DataType::Utf8);
    /// let f2: Field = Field::new("Fraction", DataType::Float64);
    ///
    /// assert_eq!(earth.fields(), &[f1, f2]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn fields(&self) -> Vec<Field> {
        self.columns
            .iter()
            .map(|s| s.field().into_owned())
            .collect()
    }

    /// Get (height, width) of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
    /// let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
    ///                          "2" => &[1, 2, 3, 4, 5])?;
    ///
    /// assert_eq!(df0.shape(), (0 ,0));
    /// assert_eq!(df1.shape(), (5, 1));
    /// assert_eq!(df2.shape(), (5, 2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn shape(&self) -> (usize, usize) {
        match self.columns.as_slice() {
            &[] => (0, 0),
            v => (v[0].len(), v.len()),
        }
    }

    /// Get the width of the `DataFrame` which is the number of columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Series 1" => &[0; 0])?;
    /// let df2: DataFrame = df!("Series 1" => &[0; 0],
    ///                          "Series 2" => &[0; 0])?;
    ///
    /// assert_eq!(df0.width(), 0);
    /// assert_eq!(df1.width(), 1);
    /// assert_eq!(df2.width(), 2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn width(&self) -> usize {
        self.columns.len()
    }

    /// Get the height of the `DataFrame` which is the number of rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Currency" => &["€", "$"])?;
    /// let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;
    ///
    /// assert_eq!(df0.height(), 0);
    /// assert_eq!(df1.height(), 2);
    /// assert_eq!(df2.height(), 5);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn height(&self) -> usize {
        self.shape().0
    }

    /// Check if the `DataFrame` is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = DataFrame::default();
    /// assert!(df1.is_empty());
    ///
    /// let df2: DataFrame = df!("First name" => &["Forever"],
    ///                          "Last name" => &["Alone"])?;
    /// assert!(!df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_empty(&self) -> bool {
        self.columns.is_empty()
    }

    pub(crate) fn hstack_mut_no_checks(&mut self, columns: &[Series]) -> &mut Self {
        for col in columns {
            self.columns.push(col.clone());
        }
        self
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn stack(df: &mut DataFrame, columns: &[Series]) {
    ///     df.hstack_mut(columns);
    /// }
    /// ```
    pub fn hstack_mut(&mut self, columns: &[Series]) -> PolarsResult<&mut Self> {
        let mut names = PlHashSet::with_capacity(self.columns.len());
        for s in &self.columns {
            names.insert(s.name());
        }

        let height = self.height();
        // first loop check validity. We don't do this in a single pass otherwise
        // this DataFrame is already modified when an error occurs.
        for col in columns {
            if col.len() != height && height != 0 {
                return Err(PolarsError::ShapeMisMatch(
                    format!("Could not horizontally stack Series. The Series length {} differs from the DataFrame height: {height}", col.len()).into()));
            }

            let name = col.name();
            if names.contains(name) {
                return Err(PolarsError::Duplicate(
                    format!("Cannot do hstack operation. Column with name: {name} already exists",)
                        .into(),
                ));
            }
            names.insert(name);
        }
        drop(names);
        Ok(self.hstack_mut_no_checks(columns))
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
    /// let s1: Series = Series::new("Proton", &[29, 47, 79]);
    /// let s2: Series = Series::new("Electron", &[29, 47, 79]);
    ///
    /// let df2: DataFrame = df1.hstack(&[s1, s2])?;
    /// assert_eq!(df2.shape(), (3, 3));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 3)
    /// +---------+--------+----------+
    /// | Element | Proton | Electron |
    /// | ---     | ---    | ---      |
    /// | str     | i32    | i32      |
    /// +=========+========+==========+
    /// | Copper  | 29     | 29       |
    /// +---------+--------+----------+
    /// | Silver  | 47     | 47       |
    /// +---------+--------+----------+
    /// | Gold    | 79     | 79       |
    /// +---------+--------+----------+
    /// ```
    pub fn hstack(&self, columns: &[Series]) -> PolarsResult<Self> {
        let mut new_cols = self.columns.clone();
        new_cols.extend_from_slice(columns);
        DataFrame::new(new_cols)
    }

    /// Concatenate a `DataFrame` to this `DataFrame` and return as newly allocated `DataFrame`.
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// let df3: DataFrame = df1.vstack(&df2)?;
    ///
    /// assert_eq!(df3.shape(), (5, 2));
    /// println!("{}", df3);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack(&self, other: &DataFrame) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.vstack_mut(other)?;
        Ok(df)
    }

    /// Concatenate a DataFrame to this DataFrame
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// df1.vstack_mut(&df2)?;
    ///
    /// assert_eq!(df1.shape(), (5, 2));
    /// println!("{}", df1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self> {
        if self.width() != other.width() {
            if self.width() == 0 {
                self.columns = other.columns.clone();
                return Ok(self);
            }

            return Err(PolarsError::ShapeMisMatch(
                format!("Could not vertically stack DataFrame. The DataFrames appended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.append(right).expect("should not fail");
                Ok(())
            })?;
        Ok(self)
    }

    /// Does not check if schema is correct
    pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) {
        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .for_each(|(left, right)| {
                left.append(right).expect("should not fail");
            });
    }

    /// Extend the memory backed by this [`DataFrame`] with the values from `other`.
    ///
    /// Different from [`vstack`](Self::vstack) which adds the chunks from `other` to the chunks of this [`DataFrame`]
    /// `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
    ///
    /// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
    /// and thus will yield faster queries.
    ///
    /// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
    /// online operations where you add `n` rows and rerun a query.
    ///
    /// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
    /// when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence
    /// of `append` operations with a [`rechunk`](Self::rechunk).
    pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()> {
        if self.width() != other.width() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Could not extend DataFrame. The DataFrames extended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.extend(right).unwrap();
                Ok(())
            })?;
        Ok(())
    }

    /// Remove a column by name and return the column removed.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
    ///                             "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
    ///
    /// let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
    /// assert!(s1.is_err());
    ///
    /// let s2: Series = df.drop_in_place("Animal")?;
    /// assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series> {
        let idx = self.check_name_to_idx(name)?;
        Ok(self.columns.remove(idx))
    }

    /// Return a new `DataFrame` where all null values are dropped.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
    ///                         "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
    /// assert_eq!(df1.shape(), (3, 2));
    ///
    /// let df2: DataFrame = df1.drop_nulls(None)?;
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------------------+
    /// | Country | Tax revenue (% GDP) |
    /// | ---     | ---                 |
    /// | str     | f64                 |
    /// +=========+=====================+
    /// | Malta   | 32.7                |
    /// +---------+---------------------+
    /// ```
    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

    /// Slice the `DataFrame` along the rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
    ///                         "Color" => &["Green", "Red", "White", "White", "Red"])?;
    /// let sl: DataFrame = df.slice(2, 3);
    ///
    /// assert_eq!(sl.shape(), (3, 2));
    /// println!("{}", sl);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Output:
    /// ```text
    /// shape: (3, 2)
    /// +-------+-------+
    /// | Fruit | Color |
    /// | ---   | ---   |
    /// | str   | str   |
    /// +=======+=======+
    /// | Grape | White |
    /// +-------+-------+
    /// | Fig   | White |
    /// +-------+-------+
    /// | Fig   | Red   |
    /// +-------+-------+
    /// ```
    #[must_use]
    pub fn slice(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        let col = self
            .columns
            .iter()
            .map(|s| s.slice(offset, length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    #[must_use]
    pub fn slice_par(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns_par(&|s| s.slice(offset, length)))
    }

    #[must_use]
    pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns(&|s| {
            let mut out = s.slice(offset, length);
            out.shrink_to_fit();
            out
        }))
    }

    /// Get the head of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
    ///         "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
    ///         "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
    ///         "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
    /// assert_eq!(countries.shape(), (5, 4));
    ///
    /// println!("{}", countries.head(Some(3)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 4)
    /// +--------------------+---------------+---------------+------------+
    /// | Rank by GDP (2021) | Continent     | Country       | Capital    |
    /// | ---                | ---           | ---           | ---        |
    /// | i32                | str           | str           | str        |
    /// +====================+===============+===============+============+
    /// | 1                  | North America | United States | Washington |
    /// +--------------------+---------------+---------------+------------+
    /// | 2                  | Asia          | China         | Beijing    |
    /// +--------------------+---------------+---------------+------------+
    /// | 3                  | Asia          | Japan         | Tokyo      |
    /// +--------------------+---------------+---------------+------------+
    /// ```
    #[must_use]
    pub fn head(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.head(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Get the tail of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank (2021)" => &[105, 106, 107, 108, 109],
    ///         "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
    ///         "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
    /// assert_eq!(countries.shape(), (5, 3));
    ///
    /// println!("{}", countries.tail(Some(2)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (2, 3)
    /// +-------------+--------------------+---------+
    /// | Rank (2021) | Apple Price (€/kg) | Country |
    /// | ---         | ---                | ---     |
    /// | i32         | f64                | str     |
    /// +=============+====================+=========+
    /// | 108         | 0.63               | Syria   |
    /// +-------------+--------------------+---------+
    /// | 109         | 0.63               | Turkey  |
    /// +-------------+--------------------+---------+
    /// ```
    #[must_use]
    pub fn tail(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.tail(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks(&self) -> RecordBatchIter {
        RecordBatchIter {
            columns: &self.columns,
            idx: 0,
            n_chunks: self.n_chunks(),
        }
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches as physical values.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> {
        PhysRecordBatchIter {
            iters: self.columns.iter().map(|s| s.chunks().iter()).collect(),
        }
    }

    /// Get a `DataFrame` with all the columns in reversed order.
    #[must_use]
    pub fn reverse(&self) -> Self {
        let col = self.columns.iter().map(|s| s.reverse()).collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Shift the values by a given period and fill the parts that will be empty due to this operation
    /// with `Nones`.
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.shift) for more info on the `shift` operation.
    #[must_use]
    pub fn shift(&self, periods: i64) -> Self {
        let col = self.apply_columns_par(&|s| s.shift(periods));

        DataFrame::new_no_checks(col)
    }

    /// Replace None values with one of the following strategies:
    /// * Forward fill (replace None with the previous value)
    /// * Backward fill (replace None with the next value)
    /// * Mean fill (replace None with the mean of the whole array)
    /// * Min fill (replace None with the minimum of the whole array)
    /// * Max fill (replace None with the maximum of the whole array)
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.fill_null) for more info on the `fill_null` operation.
    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
        let col = self.try_apply_columns_par(&|s| s.fill_null(strategy))?;

        Ok(DataFrame::new_no_checks(col))
    }

    /// Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.
    /// Try in keep output similar to pandas
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("categorical" => &["d","e","f"],
    ///                          "numeric" => &[1, 2, 3],
    ///                          "object" => &["a", "b", "c"])?;
    /// assert_eq!(df1.shape(), (3, 3));
    ///
    /// let df2: DataFrame = df1.describe(None);
    /// assert_eq!(df2.shape(), (8, 4));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (8, 4)
    /// ┌──────────┬─────────────┬─────────┬────────┐
    /// │ describe ┆ categorical ┆ numeric ┆ object │
    /// │ ---      ┆ ---         ┆ ---     ┆ ---    │
    /// │ str      ┆ f64         ┆ f64     ┆ f64    │
    /// ╞══════════╪═════════════╪═════════╪════════╡
    /// │ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ mean     ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ std      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ min      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 25%      ┆ null        ┆ 1.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 50%      ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 75%      ┆ null        ┆ 2.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ max      ┆ null        ┆ 3.0     ┆ null   │
    /// └──────────┴─────────────┴─────────┴────────┘
    /// ```
    #[must_use]
    #[cfg(feature = "describe")]
    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

src/frame/from.rs (line 24)

    fn try_from(arr: StructArray) -> PolarsResult<Self> {
        let (fld, arrs, nulls) = arr.into_data();
        if nulls.is_some() {
            return Err(PolarsError::ComputeError(
                "cannot deserialize struct with nulls into a DataFrame".into(),
            ));
        }
        let columns = fld
            .iter()
            .zip(arrs)
            .map(|(fld, arr)| {
                // Safety
                // reported data type is correct
                unsafe { Series::try_from_arrow_unchecked(&fld.name, vec![arr], fld.data_type()) }
            })
            .collect::<PolarsResult<Vec<_>>>()?;
        DataFrame::new(columns)
    }

src/frame/row.rs (line 106)

    pub fn from_rows_iter_and_schema<'a, I>(mut rows: I, schema: &Schema) -> PolarsResult<Self>
    where
        I: Iterator<Item = &'a Row<'a>>,
    {
        let capacity = rows.size_hint().0;

        let mut buffers: Vec<_> = schema
            .iter_dtypes()
            .map(|dtype| {
                let buf: AnyValueBuffer = (dtype, capacity).into();
                buf
            })
            .collect();

        let mut expected_len = 0;
        rows.try_for_each::<_, PolarsResult<()>>(|row| {
            expected_len += 1;
            for (value, buf) in row.0.iter().zip(&mut buffers) {
                buf.add_fallible(value)?
            }
            Ok(())
        })?;
        let v = buffers
            .into_iter()
            .zip(schema.iter_names())
            .map(|(b, name)| {
                let mut s = b.into_series();
                // if the schema adds a column not in the rows, we
                // fill it with nulls
                if s.is_empty() {
                    Series::full_null(name, expected_len, s.dtype())
                } else {
                    s.rename(name);
                    s
                }
            })
            .collect();
        DataFrame::new(v)
    }

Additional examples can be found in:

src/frame/arithmetic.rs

source

pub const fn empty() -> Self

Creates an empty DataFrame usable in a compile time context (such as static initializers).

Example

use polars_core::prelude::DataFrame;
static EMPTY: DataFrame = DataFrame::empty();

source

pub fn pop(&mut self) -> Option<Series>

Removes the last Series from the DataFrame and returns it, or None if it is empty.

Example

let s1 = Series::new("Ocean", &["Atlantic", "Indian"]);
let s2 = Series::new("Area (km²)", &[106_460_000, 70_560_000]);
let mut df = DataFrame::new(vec![s1.clone(), s2.clone()])?;

assert_eq!(df.pop(), Some(s2));
assert_eq!(df.pop(), Some(s1));
assert_eq!(df.pop(), None);
assert!(df.is_empty());

source

pub fn with_row_count(
 &self,
 name: &str,
 offset: Option<IdxSize>
) -> PolarsResult<Self>

Add a new column at index 0 that counts the rows.

Example

let df1: DataFrame = df!("Name" => &["James", "Mary", "John", "Patricia"])?;
assert_eq!(df1.shape(), (4, 1));

let df2: DataFrame = df1.with_row_count("Id", None)?;
assert_eq!(df2.shape(), (4, 2));
println!("{}", df2);

Output:

 shape: (4, 2)
 +-----+----------+
 | Id  | Name     |
 | --- | ---      |
 | u32 | str      |
 +=====+==========+
 | 0   | James    |
 +-----+----------+
 | 1   | Mary     |
 +-----+----------+
 | 2   | John     |
 +-----+----------+
 | 3   | Patricia |
 +-----+----------+

source

pub fn with_row_count_mut(
 &mut self,
 name: &str,
 offset: Option<IdxSize>
) -> &mut Self

Add a row count in place.

source

pub const fn new_no_checks(columns: Vec<Series>) -> DataFrame

Create a new DataFrame but does not check the length or duplicate occurrence of the Series.

It is advised to use Series::new in favor of this method.

Panic

It is the callers responsibility to uphold the contract of all Series having an equal length, if not this may panic down the line.

Examples found in repository ?

src/frame/mod.rs (line 308)

    pub const fn empty() -> Self {
        DataFrame::new_no_checks(Vec::new())
    }

    /// Removes the last `Series` from the `DataFrame` and returns it, or [`None`] if it is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1 = Series::new("Ocean", &["Atlantic", "Indian"]);
    /// let s2 = Series::new("Area (km²)", &[106_460_000, 70_560_000]);
    /// let mut df = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// assert_eq!(df.pop(), Some(s2));
    /// assert_eq!(df.pop(), Some(s1));
    /// assert_eq!(df.pop(), None);
    /// assert!(df.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn pop(&mut self) -> Option<Series> {
        self.columns.pop()
    }

    /// Add a new column at index 0 that counts the rows.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Name" => &["James", "Mary", "John", "Patricia"])?;
    /// assert_eq!(df1.shape(), (4, 1));
    ///
    /// let df2: DataFrame = df1.with_row_count("Id", None)?;
    /// assert_eq!(df2.shape(), (4, 2));
    /// println!("{}", df2);
    ///
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    ///  shape: (4, 2)
    ///  +-----+----------+
    ///  | Id  | Name     |
    ///  | --- | ---      |
    ///  | u32 | str      |
    ///  +=====+==========+
    ///  | 0   | James    |
    ///  +-----+----------+
    ///  | 1   | Mary     |
    ///  +-----+----------+
    ///  | 2   | John     |
    ///  +-----+----------+
    ///  | 3   | Patricia |
    ///  +-----+----------+
    /// ```
    pub fn with_row_count(&self, name: &str, offset: Option<IdxSize>) -> PolarsResult<Self> {
        let mut columns = Vec::with_capacity(self.columns.len() + 1);
        let offset = offset.unwrap_or(0);

        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);
        columns.push(ca.into_series());

        columns.extend_from_slice(&self.columns);
        DataFrame::new(columns)
    }

    /// Add a row count in place.
    pub fn with_row_count_mut(&mut self, name: &str, offset: Option<IdxSize>) -> &mut Self {
        let offset = offset.unwrap_or(0);
        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);

        self.columns.insert(0, ca.into_series());
        self
    }

    /// Create a new `DataFrame` but does not check the length or duplicate occurrence of the `Series`.
    ///
    /// It is advised to use [Series::new](Series::new) in favor of this method.
    ///
    /// # Panic
    /// It is the callers responsibility to uphold the contract of all `Series`
    /// having an equal length, if not this may panic down the line.
    pub const fn new_no_checks(columns: Vec<Series>) -> DataFrame {
        DataFrame { columns }
    }

    /// Aggregate all chunks to contiguous memory.
    #[must_use]
    pub fn agg_chunks(&self) -> Self {
        // Don't parallelize this. Memory overhead
        let f = |s: &Series| s.rechunk();
        let cols = self.columns.iter().map(f).collect();
        DataFrame::new_no_checks(cols)
    }

    /// Shrink the capacity of this DataFrame to fit its length.
    pub fn shrink_to_fit(&mut self) {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            s.shrink_to_fit();
        }
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk.
    pub fn as_single_chunk(&mut self) -> &mut Self {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            *s = s.rechunk();
        }
        self
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk in parallel.
    /// This may lead to more peak memory consumption.
    pub fn as_single_chunk_par(&mut self) -> &mut Self {
        if self.columns.iter().any(|s| s.n_chunks() > 1) {
            self.columns = self.apply_columns_par(&|s| s.rechunk());
        }
        self
    }

    /// Estimates of the DataFrames columns consist of the same chunk sizes
    pub fn should_rechunk(&self) -> bool {
        let hb = RandomState::default();
        let hb2 = RandomState::with_seeds(392498, 98132457, 0, 412059);
        !self
            .columns
            .iter()
            // The idea is that we create a hash of the chunk lengths.
            // Consisting of the combined hash + the sum (assuming collision probability is nihil)
            // if not, we can add more hashes or at worst case we do an extra rechunk.
            // the old solution to this was clone all lengths to a vec and compare the vecs
            .map(|s| {
                s.chunk_lengths().map(|i| i as u64).fold(
                    (0u64, 0u64, s.n_chunks()),
                    |(lhash, lh2, n), rval| {
                        let mut h = hb.build_hasher();
                        rval.hash(&mut h);
                        let rhash = h.finish();
                        let mut h = hb2.build_hasher();
                        rval.hash(&mut h);
                        let rh2 = h.finish();
                        (
                            _boost_hash_combine(lhash, rhash),
                            _boost_hash_combine(lh2, rh2),
                            n,
                        )
                    },
                )
            })
            .all_equal()
    }

    /// Ensure all the chunks in the DataFrame are aligned.
    pub fn rechunk(&mut self) -> &mut Self {
        if self.should_rechunk() {
            self.as_single_chunk_par()
        } else {
            self
        }
    }

    /// Get the `DataFrame` schema.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
    ///                         "Diameter (m)" => &[8.8e26, f64::INFINITY])?;
    ///
    /// let f1: Field = Field::new("Thing", DataType::Utf8);
    /// let f2: Field = Field::new("Diameter (m)", DataType::Float64);
    /// let sc: Schema = Schema::from(vec![f1, f2].into_iter());
    ///
    /// assert_eq!(df.schema(), sc);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn schema(&self) -> Schema {
        Schema::from(self.iter().map(|s| s.field().into_owned()))
    }

    /// Get a reference to the `DataFrame` columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
    ///                         "Symbol" => &["A", "C", "G", "T"])?;
    /// let columns: &Vec<Series> = df.get_columns();
    ///
    /// assert_eq!(columns[0].name(), "Name");
    /// assert_eq!(columns[1].name(), "Symbol");
    /// # Ok::<(), PolarsError>(())
    /// ```
    #[inline]
    pub fn get_columns(&self) -> &Vec<Series> {
        &self.columns
    }

    #[cfg(feature = "private")]
    #[inline]
    pub fn get_columns_mut(&mut self) -> &mut Vec<Series> {
        &mut self.columns
    }

    /// Iterator over the columns as `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
    /// let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// let mut iterator = df.iter();
    ///
    /// assert_eq!(iterator.next(), Some(&s1));
    /// assert_eq!(iterator.next(), Some(&s2));
    /// assert_eq!(iterator.next(), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn iter(&self) -> std::slice::Iter<'_, Series> {
        self.columns.iter()
    }

    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Language" => &["Rust", "Python"],
    ///                         "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Language", "Designer"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn get_column_names(&self) -> Vec<&str> {
        self.columns.iter().map(|s| s.name()).collect()
    }

    /// Get the `Vec<String>` representing the column names.
    pub fn get_column_names_owned(&self) -> Vec<String> {
        self.columns.iter().map(|s| s.name().to_string()).collect()
    }

    /// Set the column names.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
    /// df.set_column_names(&["Set"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Set"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> PolarsResult<()> {
        if names.len() != self.columns.len() {
            return Err(PolarsError::ShapeMisMatch("the provided slice with column names has not the same size as the DataFrame's width".into()));
        }
        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(names.iter().map(|name| name.as_ref()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }

        let columns = mem::take(&mut self.columns);
        self.columns = columns
            .into_iter()
            .zip(names)
            .map(|(s, name)| {
                let mut s = s;
                s.rename(name.as_ref());
                s
            })
            .collect();
        Ok(())
    }

    /// Get the data types of the columns in the DataFrame.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
    ///                                "Fraction" => &[0.965, 0.035])?;
    ///
    /// assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn dtypes(&self) -> Vec<DataType> {
        self.columns.iter().map(|s| s.dtype().clone()).collect()
    }

    /// The number of chunks per column
    pub fn n_chunks(&self) -> usize {
        match self.columns.get(0) {
            None => 0,
            Some(s) => s.n_chunks(),
        }
    }

    /// Get a reference to the schema fields of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
    ///                            "Fraction" => &[0.708, 0.292])?;
    ///
    /// let f1: Field = Field::new("Surface type", DataType::Utf8);
    /// let f2: Field = Field::new("Fraction", DataType::Float64);
    ///
    /// assert_eq!(earth.fields(), &[f1, f2]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn fields(&self) -> Vec<Field> {
        self.columns
            .iter()
            .map(|s| s.field().into_owned())
            .collect()
    }

    /// Get (height, width) of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
    /// let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
    ///                          "2" => &[1, 2, 3, 4, 5])?;
    ///
    /// assert_eq!(df0.shape(), (0 ,0));
    /// assert_eq!(df1.shape(), (5, 1));
    /// assert_eq!(df2.shape(), (5, 2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn shape(&self) -> (usize, usize) {
        match self.columns.as_slice() {
            &[] => (0, 0),
            v => (v[0].len(), v.len()),
        }
    }

    /// Get the width of the `DataFrame` which is the number of columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Series 1" => &[0; 0])?;
    /// let df2: DataFrame = df!("Series 1" => &[0; 0],
    ///                          "Series 2" => &[0; 0])?;
    ///
    /// assert_eq!(df0.width(), 0);
    /// assert_eq!(df1.width(), 1);
    /// assert_eq!(df2.width(), 2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn width(&self) -> usize {
        self.columns.len()
    }

    /// Get the height of the `DataFrame` which is the number of rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Currency" => &["€", "$"])?;
    /// let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;
    ///
    /// assert_eq!(df0.height(), 0);
    /// assert_eq!(df1.height(), 2);
    /// assert_eq!(df2.height(), 5);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn height(&self) -> usize {
        self.shape().0
    }

    /// Check if the `DataFrame` is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = DataFrame::default();
    /// assert!(df1.is_empty());
    ///
    /// let df2: DataFrame = df!("First name" => &["Forever"],
    ///                          "Last name" => &["Alone"])?;
    /// assert!(!df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_empty(&self) -> bool {
        self.columns.is_empty()
    }

    pub(crate) fn hstack_mut_no_checks(&mut self, columns: &[Series]) -> &mut Self {
        for col in columns {
            self.columns.push(col.clone());
        }
        self
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn stack(df: &mut DataFrame, columns: &[Series]) {
    ///     df.hstack_mut(columns);
    /// }
    /// ```
    pub fn hstack_mut(&mut self, columns: &[Series]) -> PolarsResult<&mut Self> {
        let mut names = PlHashSet::with_capacity(self.columns.len());
        for s in &self.columns {
            names.insert(s.name());
        }

        let height = self.height();
        // first loop check validity. We don't do this in a single pass otherwise
        // this DataFrame is already modified when an error occurs.
        for col in columns {
            if col.len() != height && height != 0 {
                return Err(PolarsError::ShapeMisMatch(
                    format!("Could not horizontally stack Series. The Series length {} differs from the DataFrame height: {height}", col.len()).into()));
            }

            let name = col.name();
            if names.contains(name) {
                return Err(PolarsError::Duplicate(
                    format!("Cannot do hstack operation. Column with name: {name} already exists",)
                        .into(),
                ));
            }
            names.insert(name);
        }
        drop(names);
        Ok(self.hstack_mut_no_checks(columns))
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
    /// let s1: Series = Series::new("Proton", &[29, 47, 79]);
    /// let s2: Series = Series::new("Electron", &[29, 47, 79]);
    ///
    /// let df2: DataFrame = df1.hstack(&[s1, s2])?;
    /// assert_eq!(df2.shape(), (3, 3));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 3)
    /// +---------+--------+----------+
    /// | Element | Proton | Electron |
    /// | ---     | ---    | ---      |
    /// | str     | i32    | i32      |
    /// +=========+========+==========+
    /// | Copper  | 29     | 29       |
    /// +---------+--------+----------+
    /// | Silver  | 47     | 47       |
    /// +---------+--------+----------+
    /// | Gold    | 79     | 79       |
    /// +---------+--------+----------+
    /// ```
    pub fn hstack(&self, columns: &[Series]) -> PolarsResult<Self> {
        let mut new_cols = self.columns.clone();
        new_cols.extend_from_slice(columns);
        DataFrame::new(new_cols)
    }

    /// Concatenate a `DataFrame` to this `DataFrame` and return as newly allocated `DataFrame`.
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// let df3: DataFrame = df1.vstack(&df2)?;
    ///
    /// assert_eq!(df3.shape(), (5, 2));
    /// println!("{}", df3);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack(&self, other: &DataFrame) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.vstack_mut(other)?;
        Ok(df)
    }

    /// Concatenate a DataFrame to this DataFrame
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// df1.vstack_mut(&df2)?;
    ///
    /// assert_eq!(df1.shape(), (5, 2));
    /// println!("{}", df1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self> {
        if self.width() != other.width() {
            if self.width() == 0 {
                self.columns = other.columns.clone();
                return Ok(self);
            }

            return Err(PolarsError::ShapeMisMatch(
                format!("Could not vertically stack DataFrame. The DataFrames appended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.append(right).expect("should not fail");
                Ok(())
            })?;
        Ok(self)
    }

    /// Does not check if schema is correct
    pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) {
        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .for_each(|(left, right)| {
                left.append(right).expect("should not fail");
            });
    }

    /// Extend the memory backed by this [`DataFrame`] with the values from `other`.
    ///
    /// Different from [`vstack`](Self::vstack) which adds the chunks from `other` to the chunks of this [`DataFrame`]
    /// `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
    ///
    /// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
    /// and thus will yield faster queries.
    ///
    /// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
    /// online operations where you add `n` rows and rerun a query.
    ///
    /// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
    /// when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence
    /// of `append` operations with a [`rechunk`](Self::rechunk).
    pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()> {
        if self.width() != other.width() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Could not extend DataFrame. The DataFrames extended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.extend(right).unwrap();
                Ok(())
            })?;
        Ok(())
    }

    /// Remove a column by name and return the column removed.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
    ///                             "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
    ///
    /// let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
    /// assert!(s1.is_err());
    ///
    /// let s2: Series = df.drop_in_place("Animal")?;
    /// assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series> {
        let idx = self.check_name_to_idx(name)?;
        Ok(self.columns.remove(idx))
    }

    /// Return a new `DataFrame` where all null values are dropped.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
    ///                         "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
    /// assert_eq!(df1.shape(), (3, 2));
    ///
    /// let df2: DataFrame = df1.drop_nulls(None)?;
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------------------+
    /// | Country | Tax revenue (% GDP) |
    /// | ---     | ---                 |
    /// | str     | f64                 |
    /// +=========+=====================+
    /// | Malta   | 32.7                |
    /// +---------+---------------------+
    /// ```
    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

    /// Slice the `DataFrame` along the rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
    ///                         "Color" => &["Green", "Red", "White", "White", "Red"])?;
    /// let sl: DataFrame = df.slice(2, 3);
    ///
    /// assert_eq!(sl.shape(), (3, 2));
    /// println!("{}", sl);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Output:
    /// ```text
    /// shape: (3, 2)
    /// +-------+-------+
    /// | Fruit | Color |
    /// | ---   | ---   |
    /// | str   | str   |
    /// +=======+=======+
    /// | Grape | White |
    /// +-------+-------+
    /// | Fig   | White |
    /// +-------+-------+
    /// | Fig   | Red   |
    /// +-------+-------+
    /// ```
    #[must_use]
    pub fn slice(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        let col = self
            .columns
            .iter()
            .map(|s| s.slice(offset, length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    #[must_use]
    pub fn slice_par(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns_par(&|s| s.slice(offset, length)))
    }

    #[must_use]
    pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns(&|s| {
            let mut out = s.slice(offset, length);
            out.shrink_to_fit();
            out
        }))
    }

    /// Get the head of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
    ///         "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
    ///         "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
    ///         "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
    /// assert_eq!(countries.shape(), (5, 4));
    ///
    /// println!("{}", countries.head(Some(3)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 4)
    /// +--------------------+---------------+---------------+------------+
    /// | Rank by GDP (2021) | Continent     | Country       | Capital    |
    /// | ---                | ---           | ---           | ---        |
    /// | i32                | str           | str           | str        |
    /// +====================+===============+===============+============+
    /// | 1                  | North America | United States | Washington |
    /// +--------------------+---------------+---------------+------------+
    /// | 2                  | Asia          | China         | Beijing    |
    /// +--------------------+---------------+---------------+------------+
    /// | 3                  | Asia          | Japan         | Tokyo      |
    /// +--------------------+---------------+---------------+------------+
    /// ```
    #[must_use]
    pub fn head(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.head(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Get the tail of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank (2021)" => &[105, 106, 107, 108, 109],
    ///         "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
    ///         "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
    /// assert_eq!(countries.shape(), (5, 3));
    ///
    /// println!("{}", countries.tail(Some(2)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (2, 3)
    /// +-------------+--------------------+---------+
    /// | Rank (2021) | Apple Price (€/kg) | Country |
    /// | ---         | ---                | ---     |
    /// | i32         | f64                | str     |
    /// +=============+====================+=========+
    /// | 108         | 0.63               | Syria   |
    /// +-------------+--------------------+---------+
    /// | 109         | 0.63               | Turkey  |
    /// +-------------+--------------------+---------+
    /// ```
    #[must_use]
    pub fn tail(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.tail(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks(&self) -> RecordBatchIter {
        RecordBatchIter {
            columns: &self.columns,
            idx: 0,
            n_chunks: self.n_chunks(),
        }
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches as physical values.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> {
        PhysRecordBatchIter {
            iters: self.columns.iter().map(|s| s.chunks().iter()).collect(),
        }
    }

    /// Get a `DataFrame` with all the columns in reversed order.
    #[must_use]
    pub fn reverse(&self) -> Self {
        let col = self.columns.iter().map(|s| s.reverse()).collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Shift the values by a given period and fill the parts that will be empty due to this operation
    /// with `Nones`.
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.shift) for more info on the `shift` operation.
    #[must_use]
    pub fn shift(&self, periods: i64) -> Self {
        let col = self.apply_columns_par(&|s| s.shift(periods));

        DataFrame::new_no_checks(col)
    }

    /// Replace None values with one of the following strategies:
    /// * Forward fill (replace None with the previous value)
    /// * Backward fill (replace None with the next value)
    /// * Mean fill (replace None with the mean of the whole array)
    /// * Min fill (replace None with the minimum of the whole array)
    /// * Max fill (replace None with the maximum of the whole array)
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.fill_null) for more info on the `fill_null` operation.
    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
        let col = self.try_apply_columns_par(&|s| s.fill_null(strategy))?;

        Ok(DataFrame::new_no_checks(col))
    }

    /// Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.
    /// Try in keep output similar to pandas
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("categorical" => &["d","e","f"],
    ///                          "numeric" => &[1, 2, 3],
    ///                          "object" => &["a", "b", "c"])?;
    /// assert_eq!(df1.shape(), (3, 3));
    ///
    /// let df2: DataFrame = df1.describe(None);
    /// assert_eq!(df2.shape(), (8, 4));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (8, 4)
    /// ┌──────────┬─────────────┬─────────┬────────┐
    /// │ describe ┆ categorical ┆ numeric ┆ object │
    /// │ ---      ┆ ---         ┆ ---     ┆ ---    │
    /// │ str      ┆ f64         ┆ f64     ┆ f64    │
    /// ╞══════════╪═════════════╪═════════╪════════╡
    /// │ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ mean     ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ std      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ min      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 25%      ┆ null        ┆ 1.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 50%      ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 75%      ┆ null        ┆ 2.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ max      ┆ null        ┆ 3.0     ┆ null   │
    /// └──────────┴─────────────┴─────────┴────────┘
    /// ```
    #[must_use]
    #[cfg(feature = "describe")]
    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

    /// Aggregate the columns to their maximum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.max();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 6       | 5       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn max(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.max_as_series());

        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their standard deviation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.std(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +-------------------+--------------------+
    /// | Die n°1           | Die n°2            |
    /// | ---               | ---                |
    /// | f64               | f64                |
    /// +===================+====================+
    /// | 2.280350850198276 | 1.0954451150103321 |
    /// +-------------------+--------------------+
    /// ```
    #[must_use]
    pub fn std(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.std_as_series(ddof));

        DataFrame::new_no_checks(columns)
    }
    /// Aggregate the columns to their variation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.var(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 5.2     | 1.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn var(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.var_as_series(ddof));
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their minimum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.min();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 1       | 2       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn min(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.min_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their sum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.sum();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 16      | 16      |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn sum(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.sum_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their mean values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.mean();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 3.2     | 3.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn mean(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.mean_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their median values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.median();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 3       | 3       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn median(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.median_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their quantile values.
    pub fn quantile(&self, quantile: f64, interpol: QuantileInterpolOptions) -> PolarsResult<Self> {
        let columns = self.try_apply_columns_par(&|s| s.quantile_as_series(quantile, interpol))?;

        Ok(DataFrame::new_no_checks(columns))
    }

    /// Aggregate the column horizontally to their min values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmin(&self) -> PolarsResult<Option<Series>> {
        let min_fn = |acc: &Series, s: &Series| {
            let mask = acc.lt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => min_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| min_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their max values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmax(&self) -> PolarsResult<Option<Series>> {
        let max_fn = |acc: &Series, s: &Series| {
            let mask = acc.gt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => max_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| max_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their sum values.
    pub fn hsum(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        let sum_fn =
            |acc: &Series, s: &Series, none_strategy: NullStrategy| -> PolarsResult<Series> {
                let mut acc = acc.clone();
                let mut s = s.clone();
                if let NullStrategy::Ignore = none_strategy {
                    // if has nulls
                    if acc.has_validity() {
                        acc = acc.fill_null(FillNullStrategy::Zero)?;
                    }
                    if s.has_validity() {
                        s = s.fill_null(FillNullStrategy::Zero)?;
                    }
                }
                Ok(&acc + &s)
            };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => sum_fn(&self.columns[0], &self.columns[1], none_strategy).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| sum_fn(&l, &r, none_strategy).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their mean values.
    pub fn hmean(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            _ => {
                let columns = self
                    .columns
                    .iter()
                    .cloned()
                    .filter(|s| {
                        let dtype = s.dtype();
                        dtype.is_numeric() || matches!(dtype, DataType::Boolean)
                    })
                    .collect();
                let numeric_df = DataFrame::new_no_checks(columns);

                let sum = || numeric_df.hsum(none_strategy);

                let null_count = || {
                    numeric_df
                        .columns
                        .par_iter()
                        .map(|s| s.is_null().cast(&DataType::UInt32).unwrap())
                        .reduce_with(|l, r| &l + &r)
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 2 columns
                        .unwrap()
                };

                let (sum, null_count) = POOL.install(|| rayon::join(sum, null_count));
                let sum = sum?;

                // value lengths: len - null_count
                let value_length: UInt32Chunked =
                    (numeric_df.width().sub(&null_count)).u32().unwrap().clone();

                // make sure that we do not divide by zero
                // by replacing with None
                let value_length = value_length
                    .set(&value_length.equal(0), None)?
                    .into_series()
                    .cast(&DataType::Float64)?;

                Ok(sum.map(|sum| &sum / &value_length))
            }
        }
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe<F, B>(self, f: F) -> PolarsResult<B>
    where
        F: Fn(DataFrame) -> PolarsResult<B>,
    {
        f(self)
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe_mut<F, B>(&mut self, f: F) -> PolarsResult<B>
    where
        F: Fn(&mut DataFrame) -> PolarsResult<B>,
    {
        f(self)
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe_with_args<F, B, Args>(self, f: F, args: Args) -> PolarsResult<B>
    where
        F: Fn(DataFrame, Args) -> PolarsResult<B>,
    {
        f(self, args)
    }

    /// Drop duplicate rows from a `DataFrame`.
    /// *This fails when there is a column of type List in DataFrame*
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///               "flt" => [1., 1., 2., 2., 3., 3.],
    ///               "int" => [1, 1, 2, 2, 3, 3, ],
    ///               "str" => ["a", "a", "b", "b", "c", "c"]
    ///           }?;
    ///
    /// println!("{}", df.drop_duplicates(true, None)?);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Returns
    ///
    /// ```text
    /// +-----+-----+-----+
    /// | flt | int | str |
    /// | --- | --- | --- |
    /// | f64 | i32 | str |
    /// +=====+=====+=====+
    /// | 1   | 1   | "a" |
    /// +-----+-----+-----+
    /// | 2   | 2   | "b" |
    /// +-----+-----+-----+
    /// | 3   | 3   | "c" |
    /// +-----+-----+-----+
    /// ```
    #[deprecated(note = "use DataFrame::unique")]
    pub fn drop_duplicates(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
    ) -> PolarsResult<Self> {
        match maintain_order {
            true => self.unique_stable(subset, UniqueKeepStrategy::First),
            false => self.unique(subset, UniqueKeepStrategy::First),
        }
    }

    /// Drop duplicate rows from a `DataFrame`.
    /// *This fails when there is a column of type List in DataFrame*
    ///
    /// Stable means that the order is maintained. This has a higher cost than an unstable distinct.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///               "flt" => [1., 1., 2., 2., 3., 3.],
    ///               "int" => [1, 1, 2, 2, 3, 3, ],
    ///               "str" => ["a", "a", "b", "b", "c", "c"]
    ///           }?;
    ///
    /// println!("{}", df.unique_stable(None, UniqueKeepStrategy::First)?);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Returns
    ///
    /// ```text
    /// +-----+-----+-----+
    /// | flt | int | str |
    /// | --- | --- | --- |
    /// | f64 | i32 | str |
    /// +=====+=====+=====+
    /// | 1   | 1   | "a" |
    /// +-----+-----+-----+
    /// | 2   | 2   | "b" |
    /// +-----+-----+-----+
    /// | 3   | 3   | "c" |
    /// +-----+-----+-----+
    /// ```
    pub fn unique_stable(
        &self,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<DataFrame> {
        self.unique_impl(true, subset, keep)
    }

    /// Unstable distinct. See [`DataFrame::unique_stable`].
    pub fn unique(
        &self,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<DataFrame> {
        self.unique_impl(false, subset, keep)
    }

    fn unique_impl(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<Self> {
        use UniqueKeepStrategy::*;
        let names = match &subset {
            Some(s) => s.iter().map(|s| &**s).collect(),
            None => self.get_column_names(),
        };

        let columns = match (keep, maintain_order) {
            (First, true) => {
                let gb = self.groupby_stable(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, true) => {
                // maintain order by last values, so the sorted groups are not correct as they
                // are sorted by the first value
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                let last_idx: NoNull<IdxCa> = groups
                    .iter()
                    .map(|g| match g {
                        GroupsIndicator::Idx((_first, idx)) => idx[idx.len() - 1],
                        GroupsIndicator::Slice([first, len]) => first + len,
                    })
                    .collect();

                let last_idx = last_idx.sort(false);
                return Ok(unsafe { self.take_unchecked(&last_idx) });
            }
            (First, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
            }
        };
        Ok(DataFrame::new_no_checks(columns))
    }

    /// Get a mask of all the unique rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
    ///                         "ISIN" => &["US0378331005", "US5949181045"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_unique()?;
    ///
    /// assert!(ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_unique(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            true,
            false,
        ))
    }

    /// Get a mask of all the duplicated rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
    ///                         "ISIN" => &["US02079K3059", "US02079K1079"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;
    ///
    /// assert!(!ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_duplicated(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            false,
            true,
        ))
    }

    /// Create a new `DataFrame` that shows the null counts per column.
    #[must_use]
    pub fn null_count(&self) -> Self {
        let cols = self
            .columns
            .iter()
            .map(|s| Series::new(s.name(), &[s.null_count() as IdxSize]))
            .collect();
        Self::new_no_checks(cols)
    }

    /// Hash and combine the row values
    #[cfg(feature = "row_hash")]
    pub fn hash_rows(
        &mut self,
        hasher_builder: Option<RandomState>,
    ) -> PolarsResult<UInt64Chunked> {
        let dfs = split_df(self, POOL.current_num_threads())?;
        let (cas, _) = df_rows_to_hashes_threaded(&dfs, hasher_builder)?;

        let mut iter = cas.into_iter();
        let mut acc_ca = iter.next().unwrap();
        for ca in iter {
            acc_ca.append(&ca);
        }
        Ok(acc_ca.rechunk())
    }

    /// Get the supertype of the columns in this DataFrame
    pub fn get_supertype(&self) -> Option<PolarsResult<DataType>> {
        self.columns
            .iter()
            .map(|s| Ok(s.dtype().clone()))
            .reduce(|acc, b| try_get_supertype(&acc?, &b.unwrap()))
    }

    #[cfg(feature = "chunked_ids")]
    #[doc(hidden)]
    //// Take elements by a slice of [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    /// `sorted` indicates if the chunks are sorted.
    #[doc(hidden)]
    pub unsafe fn _take_chunked_unchecked_seq(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns(&|s| s._take_chunked_unchecked(idx, sorted));

        DataFrame::new_no_checks(cols)
    }
    #[cfg(feature = "chunked_ids")]
    //// Take elements by a slice of optional [`ChunkId`]s.
    /// # Safety
    /// Does not do any bound checks.
    #[doc(hidden)]
    pub unsafe fn _take_opt_chunked_unchecked_seq(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_chunked_unchecked(&self, idx: &[ChunkId], sorted: IsSorted) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_chunked_unchecked_threaded(idx, sorted, true),
            _ => s._take_chunked_unchecked(idx, sorted),
        });

        DataFrame::new_no_checks(cols)
    }

    #[cfg(feature = "chunked_ids")]
    pub(crate) unsafe fn take_opt_chunked_unchecked(&self, idx: &[Option<ChunkId>]) -> Self {
        let cols = self.apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s._take_opt_chunked_unchecked_threaded(idx, true),
            _ => s._take_opt_chunked_unchecked(idx),
        });

        DataFrame::new_no_checks(cols)
    }

    /// Be careful with allowing threads when calling this in a large hot loop
    /// every thread split may be on rayon stack and lead to SO
    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice(&self, idx: &[IdxSize], allow_threads: bool) -> Self {
        self._take_unchecked_slice2(idx, allow_threads, IsSorted::Not)
    }

    #[doc(hidden)]
    pub unsafe fn _take_unchecked_slice2(
        &self,
        idx: &[IdxSize],
        allow_threads: bool,
        sorted: IsSorted,
    ) -> Self {
        #[cfg(debug_assertions)]
        {
            if idx.len() > 2 {
                match sorted {
                    IsSorted::Ascending => {
                        assert!(idx[0] <= idx[idx.len() - 1]);
                    }
                    IsSorted::Descending => {
                        assert!(idx[0] >= idx[idx.len() - 1]);
                    }
                    _ => {}
                }
            }
        }
        let ptr = idx.as_ptr() as *mut IdxSize;
        let len = idx.len();

        // create a temporary vec. we will not drop it.
        let mut ca = IdxCa::from_vec("", Vec::from_raw_parts(ptr, len, len));
        ca.set_sorted2(sorted);
        let out = self.take_unchecked_impl(&ca, allow_threads);

        // ref count of buffers should be one because we dropped all allocations
        let arr = {
            let arr_ref = std::mem::take(&mut ca.chunks).pop().unwrap();
            arr_ref
                .as_any()
                .downcast_ref::<PrimitiveArray<IdxSize>>()
                .unwrap()
                .clone()
        };
        // the only owned heap allocation is the `Vec` we created and must not be dropped
        let _ = std::mem::ManuallyDrop::new(arr.into_mut().right().unwrap());
        out
    }

    #[cfg(feature = "partition_by")]
    #[doc(hidden)]
    pub fn _partition_by_impl(
        &self,
        cols: &[String],
        stable: bool,
    ) -> PolarsResult<Vec<DataFrame>> {
        let groups = if stable {
            self.groupby_stable(cols)?.take_groups()
        } else {
            self.groupby(cols)?.take_groups()
        };

        // don't parallelize this
        // there is a lot of parallelization in take and this may easily SO
        POOL.install(|| {
            match groups {
                GroupsProxy::Idx(idx) => {
                    Ok(idx
                        .into_par_iter()
                        .map(|(_, group)| {
                            // groups are in bounds
                            unsafe { self._take_unchecked_slice(&group, false) }
                        })
                        .collect())
                }
                GroupsProxy::Slice { groups, .. } => Ok(groups
                    .into_par_iter()
                    .map(|[first, len]| self.slice(first as i64, len as usize))
                    .collect()),
            }
        })
    }

    /// Split into multiple DataFrames partitioned by groups
    #[cfg(feature = "partition_by")]
    #[cfg_attr(docsrs, doc(cfg(feature = "partition_by")))]
    pub fn partition_by(&self, cols: impl IntoVec<String>) -> PolarsResult<Vec<DataFrame>> {
        let cols = cols.into_vec();
        self._partition_by_impl(&cols, false)
    }

    /// Split into multiple DataFrames partitioned by groups
    /// Order of the groups are maintained.
    #[cfg(feature = "partition_by")]
    #[cfg_attr(docsrs, doc(cfg(feature = "partition_by")))]
    pub fn partition_by_stable(&self, cols: impl IntoVec<String>) -> PolarsResult<Vec<DataFrame>> {
        let cols = cols.into_vec();
        self._partition_by_impl(&cols, true)
    }

    /// Unnest the given `Struct` columns. This means that the fields of the `Struct` type will be
    /// inserted as columns.
    #[cfg(feature = "dtype-struct")]
    #[cfg_attr(docsrs, doc(cfg(feature = "dtype-struct")))]
    pub fn unnest<I: IntoVec<String>>(&self, cols: I) -> PolarsResult<DataFrame> {
        let cols = cols.into_vec();
        self.unnest_impl(cols.into_iter().collect())
    }

    #[cfg(feature = "dtype-struct")]
    fn unnest_impl(&self, cols: PlHashSet<String>) -> PolarsResult<DataFrame> {
        let mut new_cols = Vec::with_capacity(std::cmp::min(self.width() * 2, self.width() + 128));
        let mut count = 0;
        for s in &self.columns {
            if cols.contains(s.name()) {
                let ca = s.struct_()?;
                new_cols.extend_from_slice(ca.fields());
                count += 1;
            } else {
                new_cols.push(s.clone())
            }
        }
        if count != cols.len() {
            // one or more columns not found
            // the code below will return an error with the missing name
            let schema = self.schema();
            for col in cols {
                let _ = schema
                    .get(&col)
                    .ok_or_else(|| PolarsError::NotFound(col.into()))?;
            }
        }
        DataFrame::new(new_cols)
    }
}

pub struct RecordBatchIter<'a> {
    columns: &'a Vec<Series>,
    idx: usize,
    n_chunks: usize,
}

impl<'a> Iterator for RecordBatchIter<'a> {
    type Item = ArrowChunk;

    fn next(&mut self) -> Option<Self::Item> {
        if self.idx >= self.n_chunks {
            None
        } else {
            // create a batch of the columns with the same chunk no.
            let batch_cols = self.columns.iter().map(|s| s.to_arrow(self.idx)).collect();
            self.idx += 1;

            Some(ArrowChunk::new(batch_cols))
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let n = self.n_chunks - self.idx;
        (n, Some(n))
    }
}

pub struct PhysRecordBatchIter<'a> {
    iters: Vec<std::slice::Iter<'a, ArrayRef>>,
}

impl Iterator for PhysRecordBatchIter<'_> {
    type Item = ArrowChunk;

    fn next(&mut self) -> Option<Self::Item> {
        self.iters
            .iter_mut()
            .map(|phys_iter| phys_iter.next().cloned())
            .collect::<Option<Vec<_>>>()
            .map(ArrowChunk::new)
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        if let Some(iter) = self.iters.first() {
            iter.size_hint()
        } else {
            (0, None)
        }
    }
}

impl Default for DataFrame {
    fn default() -> Self {
        DataFrame::new_no_checks(vec![])
    }

More examples

Hide additional examples

src/series/mod.rs (line 187)

186
187
188

    pub fn into_frame(self) -> DataFrame {
        DataFrame::new_no_checks(vec![self])
    }

src/frame/from.rs (line 34)

    fn from(schema: &Schema) -> Self {
        let cols = schema
            .iter()
            .map(|(name, dtype)| Series::new_empty(name, dtype))
            .collect();
        DataFrame::new_no_checks(cols)
    }

src/frame/hash_join/multiple_keys.rs (line 250)

pub fn private_left_join_multiple_keys(
    a: &DataFrame,
    b: &DataFrame,
    // map the global indices to [chunk_idx, array_idx]
    // only needed if we have non contiguous memory
    chunk_mapping_left: Option<&[ChunkId]>,
    chunk_mapping_right: Option<&[ChunkId]>,
) -> LeftJoinIds {
    let mut a = DataFrame::new_no_checks(_to_physical_and_bit_repr(a.get_columns()));
    let mut b = DataFrame::new_no_checks(_to_physical_and_bit_repr(b.get_columns()));
    _left_join_multiple_keys(&mut a, &mut b, chunk_mapping_left, chunk_mapping_right)
}

src/chunked_array/logical/categorical/ops/unique.rs (line 63)

    pub fn value_counts(&self) -> PolarsResult<DataFrame> {
        let groups = self.logical().group_tuples(true, false).unwrap();
        let logical_values = unsafe {
            self.logical()
                .clone()
                .into_series()
                .agg_first(&groups)
                .u32()
                .unwrap()
                .clone()
        };

        let mut values = self.clone();
        *values.logical_mut() = logical_values;

        let mut counts = groups.group_count();
        counts.rename("counts");
        let cols = vec![values.into_series(), counts.into_series()];
        let df = DataFrame::new_no_checks(cols);
        df.sort(["counts"], true)
    }

src/utils/mod.rs (lines 145-158)

fn flatten_df(df: &DataFrame) -> impl Iterator<Item = DataFrame> + '_ {
    df.iter_chunks_physical().flat_map(|chunk| {
        let df = DataFrame::new_no_checks(
            df.iter()
                .zip(chunk.into_arrays())
                .map(|(s, arr)| {
                    // Safety:
                    // datatypes are correct
                    let mut out = unsafe {
                        Series::from_chunks_and_dtype_unchecked(s.name(), vec![arr], s.dtype())
                    };
                    out.set_sorted(s.is_sorted());
                    out
                })
                .collect(),
        );
        if df.height() == 0 {
            None
        } else {
            Some(df)
        }
    })
}

Additional examples can be found in:

source

pub fn agg_chunks(&self) -> Self

Aggregate all chunks to contiguous memory.

source

pub fn shrink_to_fit(&mut self)

Shrink the capacity of this DataFrame to fit its length.

source

pub fn as_single_chunk(&mut self) -> &mut Self

Aggregate all the chunks in the DataFrame to a single chunk.

Examples found in repository ?

src/frame/groupby/mod.rs (line 887)

    pub fn par_apply<F>(&self, f: F) -> PolarsResult<DataFrame>
    where
        F: Fn(DataFrame) -> PolarsResult<DataFrame> + Send + Sync,
    {
        let df = self.prepare_apply()?;
        let dfs = self
            .get_groups()
            .par_iter()
            .map(|g| {
                // safety
                // groups are in bounds
                let sub_df = unsafe { take_df(&df, g) };
                f(sub_df)
            })
            .collect::<PolarsResult<Vec<_>>>()?;

        let mut df = accumulate_dataframes_vertical(dfs)?;
        df.as_single_chunk();
        Ok(df)
    }

    /// Apply a closure over the groups as a new DataFrame.
    pub fn apply<F>(&self, mut f: F) -> PolarsResult<DataFrame>
    where
        F: FnMut(DataFrame) -> PolarsResult<DataFrame> + Send + Sync,
    {
        let df = self.prepare_apply()?;
        let dfs = self
            .get_groups()
            .iter()
            .map(|g| {
                // safety
                // groups are in bounds
                let sub_df = unsafe { take_df(&df, g) };
                f(sub_df)
            })
            .collect::<PolarsResult<Vec<_>>>()?;

        let mut df = accumulate_dataframes_vertical(dfs)?;
        df.as_single_chunk();
        Ok(df)
    }

source

pub fn as_single_chunk_par(&mut self) -> &mut Self

Aggregate all the chunks in the DataFrame to a single chunk in parallel. This may lead to more peak memory consumption.

Examples found in repository ?

src/frame/mod.rs (line 474)

    pub fn rechunk(&mut self) -> &mut Self {
        if self.should_rechunk() {
            self.as_single_chunk_par()
        } else {
            self
        }
    }

    /// Get the `DataFrame` schema.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
    ///                         "Diameter (m)" => &[8.8e26, f64::INFINITY])?;
    ///
    /// let f1: Field = Field::new("Thing", DataType::Utf8);
    /// let f2: Field = Field::new("Diameter (m)", DataType::Float64);
    /// let sc: Schema = Schema::from(vec![f1, f2].into_iter());
    ///
    /// assert_eq!(df.schema(), sc);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn schema(&self) -> Schema {
        Schema::from(self.iter().map(|s| s.field().into_owned()))
    }

    /// Get a reference to the `DataFrame` columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
    ///                         "Symbol" => &["A", "C", "G", "T"])?;
    /// let columns: &Vec<Series> = df.get_columns();
    ///
    /// assert_eq!(columns[0].name(), "Name");
    /// assert_eq!(columns[1].name(), "Symbol");
    /// # Ok::<(), PolarsError>(())
    /// ```
    #[inline]
    pub fn get_columns(&self) -> &Vec<Series> {
        &self.columns
    }

    #[cfg(feature = "private")]
    #[inline]
    pub fn get_columns_mut(&mut self) -> &mut Vec<Series> {
        &mut self.columns
    }

    /// Iterator over the columns as `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
    /// let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// let mut iterator = df.iter();
    ///
    /// assert_eq!(iterator.next(), Some(&s1));
    /// assert_eq!(iterator.next(), Some(&s2));
    /// assert_eq!(iterator.next(), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn iter(&self) -> std::slice::Iter<'_, Series> {
        self.columns.iter()
    }

    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Language" => &["Rust", "Python"],
    ///                         "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Language", "Designer"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn get_column_names(&self) -> Vec<&str> {
        self.columns.iter().map(|s| s.name()).collect()
    }

    /// Get the `Vec<String>` representing the column names.
    pub fn get_column_names_owned(&self) -> Vec<String> {
        self.columns.iter().map(|s| s.name().to_string()).collect()
    }

    /// Set the column names.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
    /// df.set_column_names(&["Set"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Set"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> PolarsResult<()> {
        if names.len() != self.columns.len() {
            return Err(PolarsError::ShapeMisMatch("the provided slice with column names has not the same size as the DataFrame's width".into()));
        }
        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(names.iter().map(|name| name.as_ref()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }

        let columns = mem::take(&mut self.columns);
        self.columns = columns
            .into_iter()
            .zip(names)
            .map(|(s, name)| {
                let mut s = s;
                s.rename(name.as_ref());
                s
            })
            .collect();
        Ok(())
    }

    /// Get the data types of the columns in the DataFrame.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
    ///                                "Fraction" => &[0.965, 0.035])?;
    ///
    /// assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn dtypes(&self) -> Vec<DataType> {
        self.columns.iter().map(|s| s.dtype().clone()).collect()
    }

    /// The number of chunks per column
    pub fn n_chunks(&self) -> usize {
        match self.columns.get(0) {
            None => 0,
            Some(s) => s.n_chunks(),
        }
    }

    /// Get a reference to the schema fields of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
    ///                            "Fraction" => &[0.708, 0.292])?;
    ///
    /// let f1: Field = Field::new("Surface type", DataType::Utf8);
    /// let f2: Field = Field::new("Fraction", DataType::Float64);
    ///
    /// assert_eq!(earth.fields(), &[f1, f2]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn fields(&self) -> Vec<Field> {
        self.columns
            .iter()
            .map(|s| s.field().into_owned())
            .collect()
    }

    /// Get (height, width) of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
    /// let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
    ///                          "2" => &[1, 2, 3, 4, 5])?;
    ///
    /// assert_eq!(df0.shape(), (0 ,0));
    /// assert_eq!(df1.shape(), (5, 1));
    /// assert_eq!(df2.shape(), (5, 2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn shape(&self) -> (usize, usize) {
        match self.columns.as_slice() {
            &[] => (0, 0),
            v => (v[0].len(), v.len()),
        }
    }

    /// Get the width of the `DataFrame` which is the number of columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Series 1" => &[0; 0])?;
    /// let df2: DataFrame = df!("Series 1" => &[0; 0],
    ///                          "Series 2" => &[0; 0])?;
    ///
    /// assert_eq!(df0.width(), 0);
    /// assert_eq!(df1.width(), 1);
    /// assert_eq!(df2.width(), 2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn width(&self) -> usize {
        self.columns.len()
    }

    /// Get the height of the `DataFrame` which is the number of rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Currency" => &["€", "$"])?;
    /// let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;
    ///
    /// assert_eq!(df0.height(), 0);
    /// assert_eq!(df1.height(), 2);
    /// assert_eq!(df2.height(), 5);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn height(&self) -> usize {
        self.shape().0
    }

    /// Check if the `DataFrame` is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = DataFrame::default();
    /// assert!(df1.is_empty());
    ///
    /// let df2: DataFrame = df!("First name" => &["Forever"],
    ///                          "Last name" => &["Alone"])?;
    /// assert!(!df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_empty(&self) -> bool {
        self.columns.is_empty()
    }

    pub(crate) fn hstack_mut_no_checks(&mut self, columns: &[Series]) -> &mut Self {
        for col in columns {
            self.columns.push(col.clone());
        }
        self
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn stack(df: &mut DataFrame, columns: &[Series]) {
    ///     df.hstack_mut(columns);
    /// }
    /// ```
    pub fn hstack_mut(&mut self, columns: &[Series]) -> PolarsResult<&mut Self> {
        let mut names = PlHashSet::with_capacity(self.columns.len());
        for s in &self.columns {
            names.insert(s.name());
        }

        let height = self.height();
        // first loop check validity. We don't do this in a single pass otherwise
        // this DataFrame is already modified when an error occurs.
        for col in columns {
            if col.len() != height && height != 0 {
                return Err(PolarsError::ShapeMisMatch(
                    format!("Could not horizontally stack Series. The Series length {} differs from the DataFrame height: {height}", col.len()).into()));
            }

            let name = col.name();
            if names.contains(name) {
                return Err(PolarsError::Duplicate(
                    format!("Cannot do hstack operation. Column with name: {name} already exists",)
                        .into(),
                ));
            }
            names.insert(name);
        }
        drop(names);
        Ok(self.hstack_mut_no_checks(columns))
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
    /// let s1: Series = Series::new("Proton", &[29, 47, 79]);
    /// let s2: Series = Series::new("Electron", &[29, 47, 79]);
    ///
    /// let df2: DataFrame = df1.hstack(&[s1, s2])?;
    /// assert_eq!(df2.shape(), (3, 3));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 3)
    /// +---------+--------+----------+
    /// | Element | Proton | Electron |
    /// | ---     | ---    | ---      |
    /// | str     | i32    | i32      |
    /// +=========+========+==========+
    /// | Copper  | 29     | 29       |
    /// +---------+--------+----------+
    /// | Silver  | 47     | 47       |
    /// +---------+--------+----------+
    /// | Gold    | 79     | 79       |
    /// +---------+--------+----------+
    /// ```
    pub fn hstack(&self, columns: &[Series]) -> PolarsResult<Self> {
        let mut new_cols = self.columns.clone();
        new_cols.extend_from_slice(columns);
        DataFrame::new(new_cols)
    }

    /// Concatenate a `DataFrame` to this `DataFrame` and return as newly allocated `DataFrame`.
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// let df3: DataFrame = df1.vstack(&df2)?;
    ///
    /// assert_eq!(df3.shape(), (5, 2));
    /// println!("{}", df3);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack(&self, other: &DataFrame) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.vstack_mut(other)?;
        Ok(df)
    }

    /// Concatenate a DataFrame to this DataFrame
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// df1.vstack_mut(&df2)?;
    ///
    /// assert_eq!(df1.shape(), (5, 2));
    /// println!("{}", df1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self> {
        if self.width() != other.width() {
            if self.width() == 0 {
                self.columns = other.columns.clone();
                return Ok(self);
            }

            return Err(PolarsError::ShapeMisMatch(
                format!("Could not vertically stack DataFrame. The DataFrames appended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.append(right).expect("should not fail");
                Ok(())
            })?;
        Ok(self)
    }

    /// Does not check if schema is correct
    pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) {
        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .for_each(|(left, right)| {
                left.append(right).expect("should not fail");
            });
    }

    /// Extend the memory backed by this [`DataFrame`] with the values from `other`.
    ///
    /// Different from [`vstack`](Self::vstack) which adds the chunks from `other` to the chunks of this [`DataFrame`]
    /// `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
    ///
    /// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
    /// and thus will yield faster queries.
    ///
    /// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
    /// online operations where you add `n` rows and rerun a query.
    ///
    /// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
    /// when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence
    /// of `append` operations with a [`rechunk`](Self::rechunk).
    pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()> {
        if self.width() != other.width() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Could not extend DataFrame. The DataFrames extended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.extend(right).unwrap();
                Ok(())
            })?;
        Ok(())
    }

    /// Remove a column by name and return the column removed.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
    ///                             "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
    ///
    /// let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
    /// assert!(s1.is_err());
    ///
    /// let s2: Series = df.drop_in_place("Animal")?;
    /// assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series> {
        let idx = self.check_name_to_idx(name)?;
        Ok(self.columns.remove(idx))
    }

    /// Return a new `DataFrame` where all null values are dropped.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
    ///                         "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
    /// assert_eq!(df1.shape(), (3, 2));
    ///
    /// let df2: DataFrame = df1.drop_nulls(None)?;
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------------------+
    /// | Country | Tax revenue (% GDP) |
    /// | ---     | ---                 |
    /// | str     | f64                 |
    /// +=========+=====================+
    /// | Malta   | 32.7                |
    /// +---------+---------------------+
    /// ```
    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

More examples

Hide additional examples

src/frame/hash_join/mod.rs (line 473)

    pub fn _left_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
        verbose: bool,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // ensure that the chunks are aligned otherwise we go OOB
        let mut left = self.clone();
        let mut s_left = s_left.clone();
        let mut right = other.clone();
        let mut s_right = s_right.clone();
        if left.should_rechunk() {
            left.as_single_chunk_par();
            s_left = s_left.rechunk();
        }
        if right.should_rechunk() {
            right.as_single_chunk_par();
            s_right = s_right.rechunk();
        }
        let ids = sort_or_hash_left(&s_left, &s_right, verbose);
        left._finish_left_join(ids, &right.drop(s_right.name()).unwrap(), suffix, slice)
    }

src/frame/explode.rs (line 277)

    pub fn melt2(&self, args: MeltArgs) -> PolarsResult<Self> {
        let id_vars = args.id_vars;
        let mut value_vars = args.value_vars;

        let value_name = args.value_name.as_deref().unwrap_or("value");
        let variable_name = args.variable_name.as_deref().unwrap_or("variable");

        let len = self.height();

        // if value vars is empty we take all columns that are not in id_vars.
        if value_vars.is_empty() {
            let id_vars_set = PlHashSet::from_iter(id_vars.iter().map(|s| s.as_str()));
            value_vars = self
                .get_columns()
                .iter()
                .filter_map(|s| {
                    if id_vars_set.contains(s.name()) {
                        None
                    } else {
                        Some(s.name().to_string())
                    }
                })
                .collect();
        }

        // values will all be placed in single column, so we must find their supertype
        let schema = self.schema();
        let mut iter = value_vars.iter().map(|v| {
            schema
                .get(v)
                .ok_or_else(|| PolarsError::NotFound(v.to_string().into()))
        });
        let mut st = iter.next().unwrap()?.clone();
        for dt in iter {
            st = try_get_supertype(&st, dt?)?;
        }

        let values_len = value_vars.iter().map(|name| name.len()).sum::<usize>();

        // The column name of the variable that is melted
        let mut variable_col = MutableUtf8Array::<i64>::with_capacities(
            len * value_vars.len() + 1,
            len * values_len + 1,
        );
        // prepare ids
        let ids_ = self.select(id_vars)?;
        let mut ids = ids_.clone();
        if ids.width() > 0 {
            for _ in 0..value_vars.len() - 1 {
                ids.vstack_mut_unchecked(&ids_)
            }
        }
        ids.as_single_chunk_par();
        drop(ids_);

        let mut values = Vec::with_capacity(value_vars.len());

        for value_column_name in &value_vars {
            variable_col.extend_trusted_len_values(std::iter::repeat(value_column_name).take(len));
            let value_col = self.column(value_column_name)?.cast(&st)?;
            values.extend_from_slice(value_col.chunks())
        }
        let values_arr = concatenate_owned_unchecked(&values)?;
        // Safety
        // The give dtype is correct
        let values =
            unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) };

        let variable_col = variable_col.as_box();
        // Safety
        // The give dtype is correct
        let variables = unsafe {
            Series::from_chunks_and_dtype_unchecked(
                variable_name,
                vec![variable_col],
                &DataType::Utf8,
            )
        };

        ids.hstack_mut(&[variables, values])?;

        Ok(ids)
    }

source

pub fn should_rechunk(&self) -> bool

Estimates of the DataFrames columns consist of the same chunk sizes

Examples found in repository ?

src/frame/mod.rs (line 473)

    pub fn rechunk(&mut self) -> &mut Self {
        if self.should_rechunk() {
            self.as_single_chunk_par()
        } else {
            self
        }
    }

More examples

Hide additional examples

src/frame/hash_join/mod.rs (line 472)

    pub fn _left_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
        verbose: bool,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // ensure that the chunks are aligned otherwise we go OOB
        let mut left = self.clone();
        let mut s_left = s_left.clone();
        let mut right = other.clone();
        let mut s_right = s_right.clone();
        if left.should_rechunk() {
            left.as_single_chunk_par();
            s_left = s_left.rechunk();
        }
        if right.should_rechunk() {
            right.as_single_chunk_par();
            s_right = s_right.rechunk();
        }
        let ids = sort_or_hash_left(&s_left, &s_right, verbose);
        left._finish_left_join(ids, &right.drop(s_right.name()).unwrap(), suffix, slice)
    }

source

pub fn rechunk(&mut self) -> &mut Self

Ensure all the chunks in the DataFrame are aligned.

Examples found in repository ?

src/utils/mod.rs (line 220)

pub fn split_df(df: &mut DataFrame, n: usize) -> PolarsResult<Vec<DataFrame>> {
    if n == 0 || df.height() == 0 {
        return Ok(vec![df.clone()]);
    }
    // make sure that chunks are aligned.
    df.rechunk();
    split_df_as_ref(df, n)
}

source

pub fn schema(&self) -> Schema

Get the DataFrame schema.

Example

let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
                        "Diameter (m)" => &[8.8e26, f64::INFINITY])?;

let f1: Field = Field::new("Thing", DataType::Utf8);
let f2: Field = Field::new("Diameter (m)", DataType::Float64);
let sc: Schema = Schema::from(vec![f1, f2].into_iter());

assert_eq!(df.schema(), sc);

Examples found in repository ?

src/frame/explode.rs (line 251)

    pub fn melt2(&self, args: MeltArgs) -> PolarsResult<Self> {
        let id_vars = args.id_vars;
        let mut value_vars = args.value_vars;

        let value_name = args.value_name.as_deref().unwrap_or("value");
        let variable_name = args.variable_name.as_deref().unwrap_or("variable");

        let len = self.height();

        // if value vars is empty we take all columns that are not in id_vars.
        if value_vars.is_empty() {
            let id_vars_set = PlHashSet::from_iter(id_vars.iter().map(|s| s.as_str()));
            value_vars = self
                .get_columns()
                .iter()
                .filter_map(|s| {
                    if id_vars_set.contains(s.name()) {
                        None
                    } else {
                        Some(s.name().to_string())
                    }
                })
                .collect();
        }

        // values will all be placed in single column, so we must find their supertype
        let schema = self.schema();
        let mut iter = value_vars.iter().map(|v| {
            schema
                .get(v)
                .ok_or_else(|| PolarsError::NotFound(v.to_string().into()))
        });
        let mut st = iter.next().unwrap()?.clone();
        for dt in iter {
            st = try_get_supertype(&st, dt?)?;
        }

        let values_len = value_vars.iter().map(|name| name.len()).sum::<usize>();

        // The column name of the variable that is melted
        let mut variable_col = MutableUtf8Array::<i64>::with_capacities(
            len * value_vars.len() + 1,
            len * values_len + 1,
        );
        // prepare ids
        let ids_ = self.select(id_vars)?;
        let mut ids = ids_.clone();
        if ids.width() > 0 {
            for _ in 0..value_vars.len() - 1 {
                ids.vstack_mut_unchecked(&ids_)
            }
        }
        ids.as_single_chunk_par();
        drop(ids_);

        let mut values = Vec::with_capacity(value_vars.len());

        for value_column_name in &value_vars {
            variable_col.extend_trusted_len_values(std::iter::repeat(value_column_name).take(len));
            let value_col = self.column(value_column_name)?.cast(&st)?;
            values.extend_from_slice(value_col.chunks())
        }
        let values_arr = concatenate_owned_unchecked(&values)?;
        // Safety
        // The give dtype is correct
        let values =
            unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) };

        let variable_col = variable_col.as_box();
        // Safety
        // The give dtype is correct
        let variables = unsafe {
            Series::from_chunks_and_dtype_unchecked(
                variable_name,
                vec![variable_col],
                &DataType::Utf8,
            )
        };

        ids.hstack_mut(&[variables, values])?;

        Ok(ids)
    }

source

pub fn get_columns(&self) -> &Vec<Series> ⓘ

Get a reference to the DataFrame columns.

Example

let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
                        "Symbol" => &["A", "C", "G", "T"])?;
let columns: &Vec<Series> = df.get_columns();

assert_eq!(columns[0].name(), "Name");
assert_eq!(columns[1].name(), "Symbol");

Examples found in repository ?

src/frame/groupby/hashing.rs (line 167)

pub(crate) unsafe fn compare_df_rows(keys: &DataFrame, idx_a: usize, idx_b: usize) -> bool {
    for s in keys.get_columns() {
        if !s.equal_element(idx_a, idx_b, s) {
            return false;
        }
    }
    true
}

More examples

Hide additional examples

src/frame/hash_join/multiple_keys.rs (line 23)

pub(crate) unsafe fn compare_df_rows2(
    left: &DataFrame,
    right: &DataFrame,
    left_idx: usize,
    right_idx: usize,
) -> bool {
    for (l, r) in left.get_columns().iter().zip(right.get_columns()) {
        if !(l.get_unchecked(left_idx) == r.get_unchecked(right_idx)) {
            return false;
        }
    }
    true
}

pub(crate) fn create_probe_table(
    hashes: &[UInt64Chunked],
    keys: &DataFrame,
) -> Vec<HashMap<IdxHash, Vec<IdxSize>, IdBuildHasher>> {
    let n_partitions = _set_partition_size();

    // We will create a hashtable in every thread.
    // We use the hash to partition the keys to the matching hashtable.
    // Every thread traverses all keys/hashes and ignores the ones that doesn't fall in that partition.
    POOL.install(|| {
        (0..n_partitions).into_par_iter().map(|part_no| {
            let part_no = part_no as u64;
            let mut hash_tbl: HashMap<IdxHash, Vec<IdxSize>, IdBuildHasher> =
                HashMap::with_capacity_and_hasher(HASHMAP_INIT_SIZE, Default::default());

            let n_partitions = n_partitions as u64;
            let mut offset = 0;
            for hashes in hashes {
                for hashes in hashes.data_views() {
                    let len = hashes.len();
                    let mut idx = 0;
                    hashes.iter().for_each(|h| {
                        // partition hashes by thread no.
                        // So only a part of the hashes go to this hashmap
                        if this_partition(*h, part_no, n_partitions) {
                            let idx = idx + offset;
                            populate_multiple_key_hashmap(
                                &mut hash_tbl,
                                idx,
                                *h,
                                keys,
                                || vec![idx],
                                |v| v.push(idx),
                            )
                        }
                        idx += 1;
                    });

                    offset += len as IdxSize;
                }
            }
            hash_tbl
        })
    })
    .collect()
}

fn create_build_table_outer(
    hashes: &[UInt64Chunked],
    keys: &DataFrame,
) -> Vec<HashMap<IdxHash, (bool, Vec<IdxSize>), IdBuildHasher>> {
    // Outer join equivalent of create_build_table() adds a bool in the hashmap values for tracking
    // whether a value in the hash table has already been matched to a value in the probe hashes.
    let n_partitions = _set_partition_size();

    // We will create a hashtable in every thread.
    // We use the hash to partition the keys to the matching hashtable.
    // Every thread traverses all keys/hashes and ignores the ones that doesn't fall in that partition.
    POOL.install(|| {
        (0..n_partitions).into_par_iter().map(|part_no| {
            let part_no = part_no as u64;
            let mut hash_tbl: HashMap<IdxHash, (bool, Vec<IdxSize>), IdBuildHasher> =
                HashMap::with_capacity_and_hasher(HASHMAP_INIT_SIZE, Default::default());

            let n_partitions = n_partitions as u64;
            let mut offset = 0;
            for hashes in hashes {
                for hashes in hashes.data_views() {
                    let len = hashes.len();
                    let mut idx = 0;
                    hashes.iter().for_each(|h| {
                        // partition hashes by thread no.
                        // So only a part of the hashes go to this hashmap
                        if this_partition(*h, part_no, n_partitions) {
                            let idx = idx + offset;
                            populate_multiple_key_hashmap(
                                &mut hash_tbl,
                                idx,
                                *h,
                                keys,
                                || (false, vec![idx]),
                                |v| v.1.push(idx),
                            )
                        }
                        idx += 1;
                    });

                    offset += len as IdxSize;
                }
            }
            hash_tbl
        })
    })
    .collect()
}

/// Probe the build table and add tuples to the results (inner join)
#[allow(clippy::too_many_arguments)]
fn probe_inner<F>(
    probe_hashes: &UInt64Chunked,
    hash_tbls: &[HashMap<IdxHash, Vec<IdxSize>, IdBuildHasher>],
    results: &mut Vec<(IdxSize, IdxSize)>,
    local_offset: usize,
    n_tables: u64,
    a: &DataFrame,
    b: &DataFrame,
    swap_fn: F,
) where
    F: Fn(IdxSize, IdxSize) -> (IdxSize, IdxSize),
{
    let mut idx_a = local_offset as IdxSize;
    for probe_hashes in probe_hashes.data_views() {
        for &h in probe_hashes {
            // probe table that contains the hashed value
            let current_probe_table =
                unsafe { get_hash_tbl_threaded_join_partitioned(h, hash_tbls, n_tables) };

            let entry = current_probe_table.raw_entry().from_hash(h, |idx_hash| {
                let idx_b = idx_hash.idx;
                // Safety:
                // indices in a join operation are always in bounds.
                unsafe { compare_df_rows2(a, b, idx_a as usize, idx_b as usize) }
            });

            if let Some((_, indexes_b)) = entry {
                let tuples = indexes_b.iter().map(|&idx_b| swap_fn(idx_a, idx_b));
                results.extend(tuples);
            }
            idx_a += 1;
        }
    }
}

pub(crate) fn get_offsets(probe_hashes: &[UInt64Chunked]) -> Vec<usize> {
    probe_hashes
        .iter()
        .map(|ph| ph.len())
        .scan(0, |state, val| {
            let out = *state;
            *state += val;
            Some(out)
        })
        .collect()
}

pub fn _inner_join_multiple_keys(
    a: &mut DataFrame,
    b: &mut DataFrame,
    swap: bool,
) -> (Vec<IdxSize>, Vec<IdxSize>) {
    // we assume that the b DataFrame is the shorter relation.
    // b will be used for the build phase.

    let n_threads = POOL.current_num_threads();
    let dfs_a = split_df(a, n_threads).unwrap();
    let dfs_b = split_df(b, n_threads).unwrap();

    let (build_hashes, random_state) = df_rows_to_hashes_threaded(&dfs_b, None).unwrap();
    let (probe_hashes, _) = df_rows_to_hashes_threaded(&dfs_a, Some(random_state)).unwrap();

    let hash_tbls = create_probe_table(&build_hashes, b);
    // early drop to reduce memory pressure
    drop(build_hashes);

    let n_tables = hash_tbls.len() as u64;
    let offsets = get_offsets(&probe_hashes);
    // next we probe the other relation
    // code duplication is because we want to only do the swap check once
    POOL.install(|| {
        probe_hashes
            .into_par_iter()
            .zip(offsets)
            .map(|(probe_hashes, offset)| {
                // local reference
                let hash_tbls = &hash_tbls;
                let mut results =
                    Vec::with_capacity(probe_hashes.len() / POOL.current_num_threads());
                let local_offset = offset;
                // code duplication is to hoist swap out of the inner loop.
                if swap {
                    probe_inner(
                        &probe_hashes,
                        hash_tbls,
                        &mut results,
                        local_offset,
                        n_tables,
                        a,
                        b,
                        |idx_a, idx_b| (idx_b, idx_a),
                    )
                } else {
                    probe_inner(
                        &probe_hashes,
                        hash_tbls,
                        &mut results,
                        local_offset,
                        n_tables,
                        a,
                        b,
                        |idx_a, idx_b| (idx_a, idx_b),
                    )
                }

                results
            })
            .flatten()
            .unzip()
    })
}

#[cfg(feature = "private")]
pub fn private_left_join_multiple_keys(
    a: &DataFrame,
    b: &DataFrame,
    // map the global indices to [chunk_idx, array_idx]
    // only needed if we have non contiguous memory
    chunk_mapping_left: Option<&[ChunkId]>,
    chunk_mapping_right: Option<&[ChunkId]>,
) -> LeftJoinIds {
    let mut a = DataFrame::new_no_checks(_to_physical_and_bit_repr(a.get_columns()));
    let mut b = DataFrame::new_no_checks(_to_physical_and_bit_repr(b.get_columns()));
    _left_join_multiple_keys(&mut a, &mut b, chunk_mapping_left, chunk_mapping_right)
}

src/testing.rs (line 99)

    pub fn frame_equal(&self, other: &DataFrame) -> bool {
        if self.shape() != other.shape() {
            return false;
        }
        for (left, right) in self.get_columns().iter().zip(other.get_columns()) {
            if !left.series_equal(right) {
                return false;
            }
        }
        true
    }

    /// Check if all values in `DataFrames` are equal where `None == None` evaluates to `true`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
    ///                         "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
    /// let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
    ///                         "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
    ///
    /// assert!(df1.frame_equal_missing(&df2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn frame_equal_missing(&self, other: &DataFrame) -> bool {
        if self.shape() != other.shape() {
            return false;
        }
        for (left, right) in self.get_columns().iter().zip(other.get_columns()) {
            if !left.series_equal_missing(right) {
                return false;
            }
        }
        true
    }

src/utils/mod.rs (line 183)

pub fn split_df_as_ref(df: &DataFrame, n: usize) -> PolarsResult<Vec<DataFrame>> {
    let total_len = df.height();
    let chunk_size = total_len / n;

    if df.n_chunks() == n
        && df.get_columns()[0]
            .chunk_lengths()
            .all(|len| len.abs_diff(chunk_size) < 100)
    {
        return Ok(flatten_df(df).collect());
    }

    let mut out = Vec::with_capacity(n);

    for i in 0..n {
        let offset = i * chunk_size;
        let len = if i == (n - 1) {
            total_len - offset
        } else {
            chunk_size
        };
        let df = df.slice((i * chunk_size) as i64, len);
        if df.n_chunks() > 1 {
            // we add every chunk as separate dataframe. This make sure that every partition
            // deals with it.
            out.extend(flatten_df(&df))
        } else {
            out.push(df)
        }
    }

    Ok(out)
}

#[cfg(feature = "private")]
#[doc(hidden)]
/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
pub fn split_df(df: &mut DataFrame, n: usize) -> PolarsResult<Vec<DataFrame>> {
    if n == 0 || df.height() == 0 {
        return Ok(vec![df.clone()]);
    }
    // make sure that chunks are aligned.
    df.rechunk();
    split_df_as_ref(df, n)
}

pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
    let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
    &vals[raw_offset..raw_offset + slice_len]
}

#[inline]
#[cfg(feature = "private")]
#[doc(hidden)]
pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
    let abs_offset = offset.unsigned_abs() as usize;

    // The offset counted from the start of the array
    // negative index
    if offset < 0 {
        if abs_offset <= array_len {
            (array_len - abs_offset, std::cmp::min(length, abs_offset))
            // negative index larger that array: slice from start
        } else {
            (0, std::cmp::min(length, array_len))
        }
        // positive index
    } else if abs_offset <= array_len {
        (abs_offset, std::cmp::min(length, array_len - abs_offset))
        // empty slice
    } else {
        (array_len, 0)
    }
}

/// Apply a macro on the Series
#[macro_export]
macro_rules! match_dtype_to_physical_apply_macro {
    ($obj:expr, $macro:ident, $macro_utf8:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
        match $obj {
            DataType::Utf8 => $macro_utf8!($($opt_args)*),
            DataType::Boolean => $macro_bool!($($opt_args)*),
            #[cfg(feature = "dtype-u8")]
            DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
            #[cfg(feature = "dtype-u16")]
            DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
            DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
            DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
            #[cfg(feature = "dtype-i8")]
            DataType::Int8 => $macro!(i8 $(, $opt_args)*),
            #[cfg(feature = "dtype-i16")]
            DataType::Int16 => $macro!(i16 $(, $opt_args)*),
            DataType::Int32 => $macro!(i32 $(, $opt_args)*),
            DataType::Int64 => $macro!(i64 $(, $opt_args)*),
            DataType::Float32 => $macro!(f32 $(, $opt_args)*),
            DataType::Float64 => $macro!(f64 $(, $opt_args)*),
            dt => panic!("not implemented for dtype {:?}", dt),
        }
    }};
}

/// Apply a macro on the Series
#[macro_export]
macro_rules! match_dtype_to_logical_apply_macro {
    ($obj:expr, $macro:ident, $macro_utf8:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
        match $obj {
            DataType::Utf8 => $macro_utf8!($($opt_args)*),
            #[cfg(feature = "dtype-binary")]
            DataType::Binary => $macro_binary!($($opt_args)*),
            DataType::Boolean => $macro_bool!($($opt_args)*),
            #[cfg(feature = "dtype-u8")]
            DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
            #[cfg(feature = "dtype-u16")]
            DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
            DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
            DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
            #[cfg(feature = "dtype-i8")]
            DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
            #[cfg(feature = "dtype-i16")]
            DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
            DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
            DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
            DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
            DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
            dt => panic!("not implemented for dtype {:?}", dt),
        }
    }};
}

/// Apply a macro on the Downcasted ChunkedArray's
#[macro_export]
macro_rules! match_arrow_data_type_apply_macro_ca {
    ($self:expr, $macro:ident, $macro_utf8:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
        match $self.dtype() {
            DataType::Utf8 => $macro_utf8!($self.utf8().unwrap() $(, $opt_args)*),
            DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
            #[cfg(feature = "dtype-u8")]
            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
            #[cfg(feature = "dtype-u16")]
            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
            #[cfg(feature = "dtype-i8")]
            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
            #[cfg(feature = "dtype-i16")]
            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
            _ => unimplemented!(),
        }
    }};
}

#[macro_export]
macro_rules! with_match_physical_numeric_type {(
    $dtype:expr, | $_:tt $T:ident | $($body:tt)*
) => ({
    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
    use $crate::datatypes::DataType::*;
    match $dtype {
        Int8 => __with_ty__! { i8 },
        Int16 => __with_ty__! { i16 },
        Int32 => __with_ty__! { i32 },
        Int64 => __with_ty__! { i64 },
        UInt8 => __with_ty__! { u8 },
        UInt16 => __with_ty__! { u16 },
        UInt32 => __with_ty__! { u32 },
        UInt64 => __with_ty__! { u64 },
        Float32 => __with_ty__! { f32 },
        Float64 => __with_ty__! { f64 },
        _ => unimplemented!()
    }
})}

#[macro_export]
macro_rules! with_match_physical_numeric_polars_type {(
    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
) => ({
    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
    use $crate::datatypes::DataType::*;
    match $key_type {
        Int8 => __with_ty__! { Int8Type },
        Int16 => __with_ty__! { Int16Type },
        Int32 => __with_ty__! { Int32Type },
        Int64 => __with_ty__! { Int64Type },
        UInt8 => __with_ty__! { UInt8Type },
        UInt16 => __with_ty__! { UInt16Type },
        UInt32 => __with_ty__! { UInt32Type },
        UInt64 => __with_ty__! { UInt64Type },
        Float32 => __with_ty__! { Float32Type },
        Float64 => __with_ty__! { Float64Type },
        _ => unimplemented!()
    }
})}

#[macro_export]
macro_rules! with_match_physical_integer_polars_type {(
    $key_type:expr, | $_:tt $T:ident | $($body:tt)*
) => ({
    macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
    use $crate::datatypes::DataType::*;
    use $crate::datatypes::*;
    match $key_type {
            #[cfg(feature = "dtype-i8")]
        Int8 => __with_ty__! { Int8Type },
            #[cfg(feature = "dtype-i16")]
        Int16 => __with_ty__! { Int16Type },
        Int32 => __with_ty__! { Int32Type },
        Int64 => __with_ty__! { Int64Type },
            #[cfg(feature = "dtype-u8")]
        UInt8 => __with_ty__! { UInt8Type },
            #[cfg(feature = "dtype-u16")]
        UInt16 => __with_ty__! { UInt16Type },
        UInt32 => __with_ty__! { UInt32Type },
        UInt64 => __with_ty__! { UInt64Type },
        _ => unimplemented!()
    }
})}

/// Apply a macro on the Downcasted ChunkedArray's of DataTypes that are logical numerics.
/// So no logical.
#[macro_export]
macro_rules! downcast_as_macro_arg_physical {
    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
        match $self.dtype() {
            #[cfg(feature = "dtype-u8")]
            DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
            #[cfg(feature = "dtype-u16")]
            DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
            DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
            DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
            #[cfg(feature = "dtype-i8")]
            DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
            #[cfg(feature = "dtype-i16")]
            DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
            DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
            DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
            DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
            DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
            dt => panic!("not implemented for {:?}", dt),
        }
    }};
}

/// Apply a macro on the Downcasted ChunkedArray's of DataTypes that are logical numerics.
/// So no logical.
#[macro_export]
macro_rules! downcast_as_macro_arg_physical_mut {
    ($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
        // clone so that we do not borrow
        match $self.dtype().clone() {
            #[cfg(feature = "dtype-u8")]
            DataType::UInt8 => {
                let ca: &mut UInt8Chunked = $self.as_mut();
                $macro!(UInt8Type, ca $(, $opt_args)*)
            },
            #[cfg(feature = "dtype-u16")]
            DataType::UInt16 => {
                let ca: &mut UInt16Chunked = $self.as_mut();
                $macro!(UInt16Type, ca $(, $opt_args)*)
            },
            DataType::UInt32 => {
                let ca: &mut UInt32Chunked = $self.as_mut();
                $macro!(UInt32Type, ca $(, $opt_args)*)
            },
            DataType::UInt64 => {
                let ca: &mut UInt64Chunked = $self.as_mut();
                $macro!(UInt64Type, ca $(, $opt_args)*)
            },
            #[cfg(feature = "dtype-i8")]
            DataType::Int8 => {
                let ca: &mut Int8Chunked = $self.as_mut();
                $macro!(Int8Type, ca $(, $opt_args)*)
            },
            #[cfg(feature = "dtype-i16")]
            DataType::Int16 => {
                let ca: &mut Int16Chunked = $self.as_mut();
                $macro!(Int16Type, ca $(, $opt_args)*)
            },
            DataType::Int32 => {
                let ca: &mut Int32Chunked = $self.as_mut();
                $macro!(Int32Type, ca $(, $opt_args)*)
            },
            DataType::Int64 => {
                let ca: &mut Int64Chunked = $self.as_mut();
                $macro!(Int64Type, ca $(, $opt_args)*)
            },
            DataType::Float32 => {
                let ca: &mut Float32Chunked = $self.as_mut();
                $macro!(Float32Type, ca $(, $opt_args)*)
            },
            DataType::Float64 => {
                let ca: &mut Float64Chunked = $self.as_mut();
                $macro!(Float64Type, ca $(, $opt_args)*)
            },
            dt => panic!("not implemented for {:?}", dt),
        }
    }};
}

#[macro_export]
macro_rules! apply_method_all_arrow_series {
    ($self:expr, $method:ident, $($args:expr),*) => {
        match $self.dtype() {
            DataType::Boolean => $self.bool().unwrap().$method($($args),*),
            DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
            #[cfg(feature = "dtype-u8")]
            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
            #[cfg(feature = "dtype-u16")]
            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
            #[cfg(feature = "dtype-i8")]
            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
            #[cfg(feature = "dtype-i16")]
            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
            DataType::Time => $self.time().unwrap().$method($($args),*),
            DataType::Date => $self.date().unwrap().$method($($args),*),
            DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
            DataType::List(_) => $self.list().unwrap().$method($($args),*),
            DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
            dt => panic!("dtype {:?} not supported", dt)
        }
    }
}

#[macro_export]
macro_rules! apply_method_physical_integer {
    ($self:expr, $method:ident, $($args:expr),*) => {
        match $self.dtype() {
            #[cfg(feature = "dtype-u8")]
            DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
            #[cfg(feature = "dtype-u16")]
            DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
            DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
            DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
            #[cfg(feature = "dtype-i8")]
            DataType::Int8 => $self.i8().unwrap().$method($($args),*),
            #[cfg(feature = "dtype-i16")]
            DataType::Int16 => $self.i16().unwrap().$method($($args),*),
            DataType::Int32 => $self.i32().unwrap().$method($($args),*),
            DataType::Int64 => $self.i64().unwrap().$method($($args),*),
            _ => unimplemented!(),
        }
    }
}

// doesn't include Bool and Utf8
#[macro_export]
macro_rules! apply_method_physical_numeric {
    ($self:expr, $method:ident, $($args:expr),*) => {
        match $self.dtype() {
            DataType::Float32 => $self.f32().unwrap().$method($($args),*),
            DataType::Float64 => $self.f64().unwrap().$method($($args),*),
            _ => apply_method_physical_integer!($self, $method, $($args),*),
        }
    }
}

#[macro_export]
macro_rules! df {
    ($($col_name:expr => $slice:expr), + $(,)?) => {
        {
            $crate::prelude::DataFrame::new(vec![$($crate::prelude::Series::new($col_name, $slice),)+])
        }
    }
}

#[cfg(feature = "private")]
pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
    use TimeUnit::*;
    match (tu_l, tu_r) {
        (Nanoseconds, Microseconds) => Microseconds,
        (_, Milliseconds) => Milliseconds,
        _ => *tu_l,
    }
}

/// This takes ownership of the DataFrame so that drop is called earlier.
/// Does not check if schema is correct
pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
where
    I: IntoIterator<Item = DataFrame>,
{
    let mut iter = dfs.into_iter();
    let additional = iter.size_hint().0;
    let mut acc_df = iter.next().unwrap();
    acc_df.reserve_chunks(additional);

    for df in iter {
        acc_df.vstack_mut_unchecked(&df);
    }
    acc_df
}

/// This takes ownership of the DataFrame so that drop is called earlier.
pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
where
    I: IntoIterator<Item = DataFrame>,
{
    let mut iter = dfs.into_iter();
    let additional = iter.size_hint().0;
    let mut acc_df = iter.next().unwrap();
    acc_df.reserve_chunks(additional);
    for df in iter {
        acc_df.vstack_mut(&df)?;
    }
    Ok(acc_df)
}

/// Concat the DataFrames to a single DataFrame.
pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
where
    I: IntoIterator<Item = &'a DataFrame>,
{
    let mut iter = dfs.into_iter();
    let additional = iter.size_hint().0;
    let mut acc_df = iter.next().unwrap().clone();
    acc_df.reserve_chunks(additional);
    for df in iter {
        acc_df.vstack_mut(df)?;
    }
    Ok(acc_df)
}

/// Concat the DataFrames to a single DataFrame.
pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
where
    I: IntoIterator<Item = &'a DataFrame>,
{
    let mut iter = dfs.into_iter();
    let additional = iter.size_hint().0;
    let mut acc_df = iter.next().unwrap().clone();
    acc_df.reserve_chunks(additional);
    for df in iter {
        acc_df.vstack_mut_unchecked(df);
    }
    acc_df
}

pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
    let mut iter = dfs.into_iter();
    let mut acc_df = iter.next().unwrap();
    for df in iter {
        acc_df.hstack_mut(df.get_columns())?;
    }
    Ok(acc_df)
}

src/functions.rs (line 249)

pub fn hor_concat_df(dfs: &[DataFrame]) -> PolarsResult<DataFrame> {
    let max_len = dfs
        .iter()
        .map(|df| df.height())
        .max()
        .ok_or_else(|| PolarsError::ComputeError("cannot concat empty dataframes".into()))?;

    let owned_df;

    // if not all equal length, extend the DataFrame with nulls
    let dfs = if !dfs.iter().all(|df| df.height() == max_len) {
        owned_df = dfs
            .iter()
            .cloned()
            .map(|mut df| {
                if df.height() != max_len {
                    let diff = max_len - df.height();
                    df.columns
                        .iter_mut()
                        .for_each(|s| *s = s.extend_constant(AnyValue::Null, diff).unwrap());
                }
                df
            })
            .collect::<Vec<_>>();
        owned_df.as_slice()
    } else {
        dfs
    };

    let mut first_df = dfs[0].clone();

    for df in &dfs[1..] {
        first_df.hstack_mut(df.get_columns())?;
    }
    Ok(first_df)
}

/// Concat `[DataFrame]`s diagonally.
#[cfg(feature = "diagonal_concat")]
#[cfg_attr(docsrs, doc(cfg(feature = "diagonal_concat")))]
/// Concat diagonally thereby combining different schemas.
pub fn diag_concat_df(dfs: &[DataFrame]) -> PolarsResult<DataFrame> {
    // TODO! replace with lazy only?
    let upper_bound_width = dfs.iter().map(|df| df.width()).sum();
    let mut column_names = AHashSet::with_capacity(upper_bound_width);
    let mut schema = Vec::with_capacity(upper_bound_width);

    for df in dfs {
        df.get_columns().iter().for_each(|s| {
            let name = s.name();
            if column_names.insert(name) {
                schema.push((name, s.dtype()))
            }
        });
    }

    let dfs = dfs
        .iter()
        .map(|df| {
            let height = df.height();
            let mut columns = Vec::with_capacity(schema.len());

            for (name, dtype) in &schema {
                match df.column(name).ok() {
                    Some(s) => columns.push(s.clone()),
                    None => columns.push(Series::full_null(name, height, dtype)),
                }
            }
            DataFrame::new_no_checks(columns)
        })
        .collect::<Vec<_>>();

    concat_df(&dfs)
}

src/frame/arithmetic.rs (line 119)

    fn binary_aligned(
        &self,
        other: &DataFrame,
        f: &(dyn Fn(&Series, &Series) -> PolarsResult<Series> + Sync + Send),
    ) -> PolarsResult<DataFrame> {
        let max_len = std::cmp::max(self.height(), other.height());
        let max_width = std::cmp::max(self.width(), other.width());
        let mut cols = self
            .get_columns()
            .par_iter()
            .zip(other.get_columns().par_iter())
            .map(|(l, r)| {
                let diff_l = max_len - l.len();
                let diff_r = max_len - r.len();

                let st = try_get_supertype(l.dtype(), r.dtype())?;
                let mut l = l.cast(&st)?;
                let mut r = r.cast(&st)?;

                if diff_l > 0 {
                    l = l.extend_constant(AnyValue::Null, diff_l)?;
                };
                if diff_r > 0 {
                    r = r.extend_constant(AnyValue::Null, diff_r)?;
                };

                f(&l, &r)
            })
            .collect::<PolarsResult<Vec<_>>>()?;

        let col_len = cols.len();
        if col_len < max_width {
            let df = if col_len < self.width() { self } else { other };

            for i in col_len..max_len {
                let s = &df.get_columns()[i];
                let name = s.name();
                let dtype = s.dtype();

                // trick to fill a series with nulls
                let vals: &[Option<i32>] = &[None];
                let s = Series::new(name, vals).cast(dtype)?;
                cols.push(s.new_from_index(0, max_len))
            }
        }
        DataFrame::new(cols)
    }

Additional examples can be found in:

source

pub fn get_columns_mut(&mut self) -> &mut Vec<Series> ⓘ

Examples found in repository ?

src/frame/cross_join.rs (line 99)

    pub fn _cross_join_with_names(
        &self,
        other: &DataFrame,
        names: &[String],
    ) -> PolarsResult<DataFrame> {
        let (mut l_df, r_df) = self.cross_join_dfs(other, None, false)?;
        l_df.get_columns_mut().extend_from_slice(&r_df.columns);

        l_df.get_columns_mut()
            .iter_mut()
            .zip(names)
            .for_each(|(s, name)| {
                if s.name() != name {
                    s.rename(name);
                }
            });
        Ok(l_df)
    }

More examples

Hide additional examples

src/frame/hash_join/mod.rs (line 579)

    pub fn _outer_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // store this so that we can keep original column order.
        let join_column_index = self.iter().position(|s| s.name() == s_left.name()).unwrap();

        // Get the indexes of the joined relations
        let opt_join_tuples = s_left.hash_join_outer(s_right);
        let mut opt_join_tuples = &*opt_join_tuples;

        if let Some((offset, len)) = slice {
            opt_join_tuples = slice_slice(opt_join_tuples, offset, len);
        }

        // Take the left and right dataframes by join tuples
        let (mut df_left, df_right) = POOL.join(
            || unsafe {
                self.drop(s_left.name()).unwrap().take_opt_iter_unchecked(
                    opt_join_tuples
                        .iter()
                        .map(|(left, _right)| left.map(|i| i as usize)),
                )
            },
            || unsafe {
                other.drop(s_right.name()).unwrap().take_opt_iter_unchecked(
                    opt_join_tuples
                        .iter()
                        .map(|(_left, right)| right.map(|i| i as usize)),
                )
            },
        );

        let mut s = s_left
            .to_physical_repr()
            .zip_outer_join_column(&s_right.to_physical_repr(), opt_join_tuples);
        s.rename(s_left.name());
        let s = match s_left.dtype() {
            #[cfg(feature = "dtype-categorical")]
            DataType::Categorical(_) => {
                let ca_left = s_left.categorical().unwrap();
                let new_rev_map = ca_left.merge_categorical_map(s_right.categorical().unwrap())?;
                let logical = s.u32().unwrap().clone();
                // safety:
                // categorical maps are merged
                unsafe {
                    CategoricalChunked::from_cats_and_rev_map_unchecked(logical, new_rev_map)
                        .into_series()
                }
            }
            dt @ DataType::Datetime(_, _)
            | dt @ DataType::Time
            | dt @ DataType::Date
            | dt @ DataType::Duration(_) => s.cast(dt).unwrap(),
            _ => s,
        };

        df_left.get_columns_mut().insert(join_column_index, s);
        _finish_join(df_left, df_right, suffix.as_deref())
    }

source

pub fn iter(&self) -> Iter<'_, Series>

Iterator over the columns as Series.

Example

let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;

let mut iterator = df.iter();

assert_eq!(iterator.next(), Some(&s1));
assert_eq!(iterator.next(), Some(&s2));
assert_eq!(iterator.next(), None);

Examples found in repository ?

src/frame/mod.rs (line 497)

496
497
498

    pub fn schema(&self) -> Schema {
        Schema::from(self.iter().map(|s| s.field().into_owned()))
    }

More examples

Hide additional examples

src/vector_hasher.rs (line 733)

pub(crate) fn df_rows_to_hashes(
    keys: &DataFrame,
    build_hasher: Option<RandomState>,
) -> PolarsResult<(UInt64Chunked, RandomState)> {
    let build_hasher = build_hasher.unwrap_or_default();

    let mut iter = keys.iter();
    let first = iter.next().expect("at least one key");
    let mut hashes = vec![];
    first.vec_hash(build_hasher.clone(), &mut hashes)?;
    let hslice = hashes.as_mut_slice();

    for keys in iter {
        keys.vec_hash_combine(build_hasher.clone(), hslice)?;
    }

    let chunks = vec![Box::new(PrimitiveArray::new(
        ArrowDataType::UInt64,
        hashes.into(),
        None,
    )) as ArrayRef];
    Ok((UInt64Chunked::from_chunks("", chunks), build_hasher))
}

src/utils/mod.rs (line 146)

fn flatten_df(df: &DataFrame) -> impl Iterator<Item = DataFrame> + '_ {
    df.iter_chunks_physical().flat_map(|chunk| {
        let df = DataFrame::new_no_checks(
            df.iter()
                .zip(chunk.into_arrays())
                .map(|(s, arr)| {
                    // Safety:
                    // datatypes are correct
                    let mut out = unsafe {
                        Series::from_chunks_and_dtype_unchecked(s.name(), vec![arr], s.dtype())
                    };
                    out.set_sorted(s.is_sorted());
                    out
                })
                .collect(),
        );
        if df.height() == 0 {
            None
        } else {
            Some(df)
        }
    })
}

src/frame/groupby/hashing.rs (line 299)

pub(crate) fn groupby_threaded_multiple_keys_flat(
    mut keys: DataFrame,
    n_partitions: usize,
    sorted: bool,
) -> PolarsResult<GroupsProxy> {
    let dfs = split_df(&mut keys, n_partitions).unwrap();
    let (hashes, _random_state) = df_rows_to_hashes_threaded(&dfs, None)?;
    let n_partitions = n_partitions as u64;

    // trait object to compare inner types.
    let keys_cmp = keys
        .iter()
        .map(|s| s.into_partial_eq_inner())
        .collect::<Vec<_>>();

    // We will create a hashtable in every thread.
    // We use the hash to partition the keys to the matching hashtable.
    // Every thread traverses all keys/hashes and ignores the ones that doesn't fall in that partition.
    let groups = POOL
        .install(|| {
            (0..n_partitions).into_par_iter().map(|thread_no| {
                let hashes = &hashes;

                let mut hash_tbl: HashMap<IdxHash, (IdxSize, Vec<IdxSize>), IdBuildHasher> =
                    HashMap::with_capacity_and_hasher(HASHMAP_INIT_SIZE, Default::default());

                let mut offset = 0;
                for hashes in hashes {
                    let len = hashes.len() as IdxSize;

                    let mut idx = 0;
                    for hashes_chunk in hashes.data_views() {
                        for &h in hashes_chunk {
                            // partition hashes by thread no.
                            // So only a part of the hashes go to this hashmap
                            if this_partition(h, thread_no, n_partitions) {
                                let idx = idx + offset;
                                populate_multiple_key_hashmap2(
                                    &mut hash_tbl,
                                    idx,
                                    h,
                                    &keys_cmp,
                                    || (idx, vec![idx]),
                                    |v| v.1.push(idx),
                                );
                            }
                            idx += 1;
                        }
                    }

                    offset += len;
                }
                hash_tbl.into_iter().map(|(_k, v)| v).collect::<Vec<_>>()
            })
        })
        .collect::<Vec<_>>();
    Ok(finish_group_order(groups, sorted))
}

src/frame/hash_join/mod.rs (line 527)

    pub fn _outer_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // store this so that we can keep original column order.
        let join_column_index = self.iter().position(|s| s.name() == s_left.name()).unwrap();

        // Get the indexes of the joined relations
        let opt_join_tuples = s_left.hash_join_outer(s_right);
        let mut opt_join_tuples = &*opt_join_tuples;

        if let Some((offset, len)) = slice {
            opt_join_tuples = slice_slice(opt_join_tuples, offset, len);
        }

        // Take the left and right dataframes by join tuples
        let (mut df_left, df_right) = POOL.join(
            || unsafe {
                self.drop(s_left.name()).unwrap().take_opt_iter_unchecked(
                    opt_join_tuples
                        .iter()
                        .map(|(left, _right)| left.map(|i| i as usize)),
                )
            },
            || unsafe {
                other.drop(s_right.name()).unwrap().take_opt_iter_unchecked(
                    opt_join_tuples
                        .iter()
                        .map(|(_left, right)| right.map(|i| i as usize)),
                )
            },
        );

        let mut s = s_left
            .to_physical_repr()
            .zip_outer_join_column(&s_right.to_physical_repr(), opt_join_tuples);
        s.rename(s_left.name());
        let s = match s_left.dtype() {
            #[cfg(feature = "dtype-categorical")]
            DataType::Categorical(_) => {
                let ca_left = s_left.categorical().unwrap();
                let new_rev_map = ca_left.merge_categorical_map(s_right.categorical().unwrap())?;
                let logical = s.u32().unwrap().clone();
                // safety:
                // categorical maps are merged
                unsafe {
                    CategoricalChunked::from_cats_and_rev_map_unchecked(logical, new_rev_map)
                        .into_series()
                }
            }
            dt @ DataType::Datetime(_, _)
            | dt @ DataType::Time
            | dt @ DataType::Date
            | dt @ DataType::Duration(_) => s.cast(dt).unwrap(),
            _ => s,
        };

        df_left.get_columns_mut().insert(join_column_index, s);
        _finish_join(df_left, df_right, suffix.as_deref())
    }

src/frame/hash_join/multiple_keys.rs (line 264)

pub fn _left_join_multiple_keys(
    a: &mut DataFrame,
    b: &mut DataFrame,
    // map the global indices to [chunk_idx, array_idx]
    // only needed if we have non contiguous memory
    chunk_mapping_left: Option<&[ChunkId]>,
    chunk_mapping_right: Option<&[ChunkId]>,
) -> LeftJoinIds {
    // we should not join on logical types
    debug_assert!(!a.iter().any(|s| s.dtype().is_logical()));
    debug_assert!(!b.iter().any(|s| s.dtype().is_logical()));

    let n_threads = POOL.current_num_threads();
    let dfs_a = split_df(a, n_threads).unwrap();
    let dfs_b = split_df(b, n_threads).unwrap();

    let (build_hashes, random_state) = df_rows_to_hashes_threaded(&dfs_b, None).unwrap();
    let (probe_hashes, _) = df_rows_to_hashes_threaded(&dfs_a, Some(random_state)).unwrap();

    let hash_tbls = create_probe_table(&build_hashes, b);
    // early drop to reduce memory pressure
    drop(build_hashes);

    let n_tables = hash_tbls.len() as u64;
    let offsets = get_offsets(&probe_hashes);

    // next we probe the other relation
    // code duplication is because we want to only do the swap check once
    let results = POOL.install(move || {
        probe_hashes
            .into_par_iter()
            .zip(offsets)
            .map(move |(probe_hashes, offset)| {
                // local reference
                let hash_tbls = &hash_tbls;

                let len = probe_hashes.len() / POOL.current_num_threads();
                let mut result_idx_left = Vec::with_capacity(len);
                let mut result_idx_right = Vec::with_capacity(len);
                let local_offset = offset;

                let mut idx_a = local_offset as IdxSize;
                for probe_hashes in probe_hashes.data_views() {
                    for &h in probe_hashes {
                        // probe table that contains the hashed value
                        let current_probe_table = unsafe {
                            get_hash_tbl_threaded_join_partitioned(h, hash_tbls, n_tables)
                        };

                        let entry = current_probe_table.raw_entry().from_hash(h, |idx_hash| {
                            let idx_b = idx_hash.idx;
                            // Safety:
                            // indices in a join operation are always in bounds.
                            unsafe { compare_df_rows2(a, b, idx_a as usize, idx_b as usize) }
                        });

                        match entry {
                            // left and right matches
                            Some((_, indexes_b)) => {
                                result_idx_left
                                    .extend(std::iter::repeat(idx_a).take(indexes_b.len()));
                                result_idx_right.extend(indexes_b.iter().copied().map(Some))
                            }
                            // only left values, right = null
                            None => {
                                result_idx_left.push(idx_a);
                                result_idx_right.push(None);
                            }
                        }
                        idx_a += 1;
                    }
                }

                finish_left_join_mappings(
                    result_idx_left,
                    result_idx_right,
                    chunk_mapping_left,
                    chunk_mapping_right,
                )
            })
            .collect::<Vec<_>>()
    });
    flatten_left_join_ids(results)
}

#[cfg(feature = "semi_anti_join")]
pub(crate) fn create_build_table_semi_anti(
    hashes: &[UInt64Chunked],
    keys: &DataFrame,
) -> Vec<HashMap<IdxHash, (), IdBuildHasher>> {
    let n_partitions = _set_partition_size();

    // We will create a hashtable in every thread.
    // We use the hash to partition the keys to the matching hashtable.
    // Every thread traverses all keys/hashes and ignores the ones that doesn't fall in that partition.
    POOL.install(|| {
        (0..n_partitions).into_par_iter().map(|part_no| {
            let part_no = part_no as u64;
            let mut hash_tbl: HashMap<IdxHash, (), IdBuildHasher> =
                HashMap::with_capacity_and_hasher(HASHMAP_INIT_SIZE, Default::default());

            let n_partitions = n_partitions as u64;
            let mut offset = 0;
            for hashes in hashes {
                for hashes in hashes.data_views() {
                    let len = hashes.len();
                    let mut idx = 0;
                    hashes.iter().for_each(|h| {
                        // partition hashes by thread no.
                        // So only a part of the hashes go to this hashmap
                        if this_partition(*h, part_no, n_partitions) {
                            let idx = idx + offset;
                            populate_multiple_key_hashmap(
                                &mut hash_tbl,
                                idx,
                                *h,
                                keys,
                                || (),
                                |_| (),
                            )
                        }
                        idx += 1;
                    });

                    offset += len as IdxSize;
                }
            }
            hash_tbl
        })
    })
    .collect()
}

#[cfg(feature = "semi_anti_join")]
pub(crate) fn semi_anti_join_multiple_keys_impl<'a>(
    a: &'a mut DataFrame,
    b: &'a mut DataFrame,
) -> impl ParallelIterator<Item = (IdxSize, bool)> + 'a {
    // we should not join on logical types
    debug_assert!(!a.iter().any(|s| s.dtype().is_logical()));
    debug_assert!(!b.iter().any(|s| s.dtype().is_logical()));

    let n_threads = POOL.current_num_threads();
    let dfs_a = split_df(a, n_threads).unwrap();
    let dfs_b = split_df(b, n_threads).unwrap();

    let (build_hashes, random_state) = df_rows_to_hashes_threaded(&dfs_b, None).unwrap();
    let (probe_hashes, _) = df_rows_to_hashes_threaded(&dfs_a, Some(random_state)).unwrap();

    let hash_tbls = create_build_table_semi_anti(&build_hashes, b);
    // early drop to reduce memory pressure
    drop(build_hashes);

    let n_tables = hash_tbls.len() as u64;
    let offsets = get_offsets(&probe_hashes);

    // next we probe the other relation
    // code duplication is because we want to only do the swap check once
    POOL.install(move || {
        probe_hashes
            .into_par_iter()
            .zip(offsets)
            .map(move |(probe_hashes, offset)| {
                // local reference
                let hash_tbls = &hash_tbls;
                let mut results =
                    Vec::with_capacity(probe_hashes.len() / POOL.current_num_threads());
                let local_offset = offset;

                let mut idx_a = local_offset as IdxSize;
                for probe_hashes in probe_hashes.data_views() {
                    for &h in probe_hashes {
                        // probe table that contains the hashed value
                        let current_probe_table = unsafe {
                            get_hash_tbl_threaded_join_partitioned(h, hash_tbls, n_tables)
                        };

                        let entry = current_probe_table.raw_entry().from_hash(h, |idx_hash| {
                            let idx_b = idx_hash.idx;
                            // Safety:
                            // indices in a join operation are always in bounds.
                            unsafe { compare_df_rows2(a, b, idx_a as usize, idx_b as usize) }
                        });

                        match entry {
                            // left and right matches
                            Some((_, _)) => results.push((idx_a, true)),
                            // only left values, right = null
                            None => results.push((idx_a, false)),
                        }
                        idx_a += 1;
                    }
                }

                results
            })
            .flatten()
    })
}

source

pub fn get_column_names(&self) -> Vec<&str> ⓘ

Example

let df: DataFrame = df!("Language" => &["Rust", "Python"],
                        "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;

assert_eq!(df.get_column_names(), &["Language", "Designer"]);

Examples found in repository ?

src/frame/groupby/mod.rs (line 387)

    fn prepare_agg(&self) -> PolarsResult<(Vec<Series>, Vec<Series>)> {
        let selection = match &self.selected_agg {
            Some(selection) => selection.clone(),
            None => {
                let by: Vec<_> = self.selected_keys.iter().map(|s| s.name()).collect();
                self.df
                    .get_column_names()
                    .into_iter()
                    .filter(|a| !by.contains(a))
                    .map(|s| s.to_string())
                    .collect()
            }
        };

        let keys = self.keys();
        let agg_col = self.df.select_series(selection)?;
        Ok((keys, agg_col))
    }

More examples

Hide additional examples

src/frame/mod.rs (line 3075)

    fn unique_impl(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<Self> {
        use UniqueKeepStrategy::*;
        let names = match &subset {
            Some(s) => s.iter().map(|s| &**s).collect(),
            None => self.get_column_names(),
        };

        let columns = match (keep, maintain_order) {
            (First, true) => {
                let gb = self.groupby_stable(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, true) => {
                // maintain order by last values, so the sorted groups are not correct as they
                // are sorted by the first value
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                let last_idx: NoNull<IdxCa> = groups
                    .iter()
                    .map(|g| match g {
                        GroupsIndicator::Idx((_first, idx)) => idx[idx.len() - 1],
                        GroupsIndicator::Slice([first, len]) => first + len,
                    })
                    .collect();

                let last_idx = last_idx.sort(false);
                return Ok(unsafe { self.take_unchecked(&last_idx) });
            }
            (First, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
            }
        };
        Ok(DataFrame::new_no_checks(columns))
    }

    /// Get a mask of all the unique rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
    ///                         "ISIN" => &["US0378331005", "US5949181045"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_unique()?;
    ///
    /// assert!(ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_unique(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            true,
            false,
        ))
    }

    /// Get a mask of all the duplicated rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
    ///                         "ISIN" => &["US02079K3059", "US02079K1079"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;
    ///
    /// assert!(!ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_duplicated(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            false,
            true,
        ))
    }

src/frame/asof_join/groups.rs (line 668)

    pub fn _join_asof_by(
        &self,
        other: &DataFrame,
        left_on: &str,
        right_on: &str,
        left_by: Vec<String>,
        right_by: Vec<String>,
        strategy: AsofStrategy,
        tolerance: Option<AnyValue<'static>>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let left_asof = self.column(left_on)?.to_physical_repr();
        let right_asof = other.column(right_on)?.to_physical_repr();
        let right_asof_name = right_asof.name();
        let left_asof_name = left_asof.name();

        check_asof_columns(&left_asof, &right_asof)?;

        let mut left_by = self.select_physical(left_by)?;
        let mut right_by = other.select_physical(right_by)?;

        let left_by_s = left_by.get_columns()[0].to_physical_repr().into_owned();
        let right_by_s = right_by.get_columns()[0].to_physical_repr().into_owned();

        let right_join_tuples = with_match_physical_numeric_polars_type!(left_asof.dtype(), |$T| {
            let left_asof: &ChunkedArray<$T> = left_asof.as_ref().as_ref().as_ref();
            let right_asof: &ChunkedArray<$T> = right_asof.as_ref().as_ref().as_ref();

            dispatch_join(
                left_asof,
                right_asof,
                &left_by_s,
                &right_by_s,
                &mut left_by,
                &mut right_by,
                strategy,
                tolerance
            )
        })?;

        let mut drop_these = right_by.get_column_names();
        if left_asof_name == right_asof_name {
            drop_these.push(right_asof_name);
        }

        let cols = other
            .get_columns()
            .iter()
            .filter_map(|s| {
                if drop_these.contains(&s.name()) {
                    None
                } else {
                    Some(s.clone())
                }
            })
            .collect();
        let other = DataFrame::new_no_checks(cols);

        let mut left = self.clone();
        let mut right_join_tuples = &*right_join_tuples;

        if let Some((offset, len)) = slice {
            left = left.slice(offset, len);
            right_join_tuples = slice_slice(right_join_tuples, offset, len);
        }

        // Safety:
        // join tuples are in bounds
        let right_df = unsafe {
            other.take_opt_iter_unchecked(
                right_join_tuples
                    .iter()
                    .map(|opt_idx| opt_idx.map(|idx| idx as usize)),
            )
        };

        _finish_join(left, right_df, None)
    }

source

pub fn get_column_names_owned(&self) -> Vec<String> ⓘ

Get the Vec<String> representing the column names.

Examples found in repository ?

src/frame/mod.rs (line 1323)

    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

source

pub fn set_column_names<S: AsRef<str>>(
&mut self,
names: &[S]
) -> PolarsResult<()>

Set the column names.

Example

let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
df.set_column_names(&["Set"])?;

assert_eq!(df.get_column_names(), &["Set"]);

source

pub fn dtypes(&self) -> Vec<DataType> ⓘ

Get the data types of the columns in the DataFrame.

Example

let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
                               "Fraction" => &[0.965, 0.035])?;

assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);

source

pub fn n_chunks(&self) -> usize

The number of chunks per column

Examples found in repository ?

src/utils/mod.rs (line 182)

pub fn split_df_as_ref(df: &DataFrame, n: usize) -> PolarsResult<Vec<DataFrame>> {
    let total_len = df.height();
    let chunk_size = total_len / n;

    if df.n_chunks() == n
        && df.get_columns()[0]
            .chunk_lengths()
            .all(|len| len.abs_diff(chunk_size) < 100)
    {
        return Ok(flatten_df(df).collect());
    }

    let mut out = Vec::with_capacity(n);

    for i in 0..n {
        let offset = i * chunk_size;
        let len = if i == (n - 1) {
            total_len - offset
        } else {
            chunk_size
        };
        let df = df.slice((i * chunk_size) as i64, len);
        if df.n_chunks() > 1 {
            // we add every chunk as separate dataframe. This make sure that every partition
            // deals with it.
            out.extend(flatten_df(&df))
        } else {
            out.push(df)
        }
    }

    Ok(out)
}

More examples

Hide additional examples

src/frame/mod.rs (line 1636)

    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

    /// Slice the `DataFrame` along the rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
    ///                         "Color" => &["Green", "Red", "White", "White", "Red"])?;
    /// let sl: DataFrame = df.slice(2, 3);
    ///
    /// assert_eq!(sl.shape(), (3, 2));
    /// println!("{}", sl);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Output:
    /// ```text
    /// shape: (3, 2)
    /// +-------+-------+
    /// | Fruit | Color |
    /// | ---   | ---   |
    /// | str   | str   |
    /// +=======+=======+
    /// | Grape | White |
    /// +-------+-------+
    /// | Fig   | White |
    /// +-------+-------+
    /// | Fig   | Red   |
    /// +-------+-------+
    /// ```
    #[must_use]
    pub fn slice(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        let col = self
            .columns
            .iter()
            .map(|s| s.slice(offset, length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    #[must_use]
    pub fn slice_par(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns_par(&|s| s.slice(offset, length)))
    }

    #[must_use]
    pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns(&|s| {
            let mut out = s.slice(offset, length);
            out.shrink_to_fit();
            out
        }))
    }

    /// Get the head of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
    ///         "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
    ///         "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
    ///         "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
    /// assert_eq!(countries.shape(), (5, 4));
    ///
    /// println!("{}", countries.head(Some(3)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 4)
    /// +--------------------+---------------+---------------+------------+
    /// | Rank by GDP (2021) | Continent     | Country       | Capital    |
    /// | ---                | ---           | ---           | ---        |
    /// | i32                | str           | str           | str        |
    /// +====================+===============+===============+============+
    /// | 1                  | North America | United States | Washington |
    /// +--------------------+---------------+---------------+------------+
    /// | 2                  | Asia          | China         | Beijing    |
    /// +--------------------+---------------+---------------+------------+
    /// | 3                  | Asia          | Japan         | Tokyo      |
    /// +--------------------+---------------+---------------+------------+
    /// ```
    #[must_use]
    pub fn head(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.head(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Get the tail of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank (2021)" => &[105, 106, 107, 108, 109],
    ///         "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
    ///         "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
    /// assert_eq!(countries.shape(), (5, 3));
    ///
    /// println!("{}", countries.tail(Some(2)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (2, 3)
    /// +-------------+--------------------+---------+
    /// | Rank (2021) | Apple Price (€/kg) | Country |
    /// | ---         | ---                | ---     |
    /// | i32         | f64                | str     |
    /// +=============+====================+=========+
    /// | 108         | 0.63               | Syria   |
    /// +-------------+--------------------+---------+
    /// | 109         | 0.63               | Turkey  |
    /// +-------------+--------------------+---------+
    /// ```
    #[must_use]
    pub fn tail(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.tail(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks(&self) -> RecordBatchIter {
        RecordBatchIter {
            columns: &self.columns,
            idx: 0,
            n_chunks: self.n_chunks(),
        }
    }

source

pub fn fields(&self) -> Vec<Field> ⓘ

Get a reference to the schema fields of the DataFrame.

Example

let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
                           "Fraction" => &[0.708, 0.292])?;

let f1: Field = Field::new("Surface type", DataType::Utf8);
let f2: Field = Field::new("Fraction", DataType::Float64);

assert_eq!(earth.fields(), &[f1, f2]);

source

pub fn shape(&self) -> (usize, usize)

Get (height, width) of the DataFrame.

Example

let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
                         "2" => &[1, 2, 3, 4, 5])?;

assert_eq!(df0.shape(), (0 ,0));
assert_eq!(df1.shape(), (5, 1));
assert_eq!(df2.shape(), (5, 2));

Examples found in repository ?

src/frame/mod.rs (line 706)

705
706
707

    pub fn height(&self) -> usize {
        self.shape().0
    }

More examples

Hide additional examples

src/testing.rs (line 96)

    pub fn frame_equal(&self, other: &DataFrame) -> bool {
        if self.shape() != other.shape() {
            return false;
        }
        for (left, right) in self.get_columns().iter().zip(other.get_columns()) {
            if !left.series_equal(right) {
                return false;
            }
        }
        true
    }

    /// Check if all values in `DataFrames` are equal where `None == None` evaluates to `true`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
    ///                         "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
    /// let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
    ///                         "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
    ///
    /// assert!(df1.frame_equal_missing(&df2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn frame_equal_missing(&self, other: &DataFrame) -> bool {
        if self.shape() != other.shape() {
            return false;
        }
        for (left, right) in self.get_columns().iter().zip(other.get_columns()) {
            if !left.series_equal_missing(right) {
                return false;
            }
        }
        true
    }

    /// Checks if the Arc ptrs of the Series are equal
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
    ///                         "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
    /// let df2: &DataFrame = &df1;
    ///
    /// assert!(df1.ptr_equal(df2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn ptr_equal(&self, other: &DataFrame) -> bool {
        self.columns
            .iter()
            .zip(other.columns.iter())
            .all(|(a, b)| a.get_data_ptr() == b.get_data_ptr())
    }
}

impl PartialEq for DataFrame {
    fn eq(&self, other: &Self) -> bool {
        self.shape() == other.shape()
            && self
                .columns
                .iter()
                .zip(other.columns.iter())
                .all(|(s1, s2)| s1 == s2)
    }

src/chunked_array/ndarray.rs (line 124)

    pub fn to_ndarray<N>(&self) -> PolarsResult<Array2<N::Native>>
    where
        N: PolarsNumericType,
    {
        let columns = self
            .get_columns()
            .par_iter()
            .map(|s| {
                let s = s.cast(&N::get_dtype())?;
                let s = match s.dtype() {
                    DataType::Float32 => {
                        let ca = s.f32().unwrap();
                        ca.none_to_nan().into_series()
                    }
                    DataType::Float64 => {
                        let ca = s.f64().unwrap();
                        ca.none_to_nan().into_series()
                    }
                    _ => s,
                };
                Ok(s.rechunk())
            })
            .collect::<PolarsResult<Vec<_>>>()?;

        let shape = self.shape();
        let height = self.height();
        let mut membuf = Vec::with_capacity(shape.0 * shape.1);
        let ptr = membuf.as_ptr() as usize;

        columns.par_iter().enumerate().map(|(col_idx, s)| {
            if s.null_count() != 0 {
                return Err(PolarsError::ComputeError(
                    "Creation of ndarray with null values is not supported. Consider using floats and NaNs".into(),
                ));
            }

            // this is an Arc clone if already of type N
            let s = s.cast(&N::get_dtype())?;
            let ca = s.unpack::<N>()?;
            let vals = ca.cont_slice().unwrap();

            // Safety:
            // we get parallel access to the vector
            // but we make sure that we don't get aliased access by offsetting the column indices + length
            unsafe {
                let offset_ptr = (ptr as *mut N::Native).add(col_idx * height) ;
                // Safety:
                // this is uninitialized memory, so we must never read from this data
                // copy_from_slice does not read
                let buf = std::slice::from_raw_parts_mut(offset_ptr, height);
                buf.copy_from_slice(vals)
            }

            Ok(())
        }).collect::<PolarsResult<Vec<_>>>()?;

        // Safety:
        // we have written all data, so we can now safely set length
        unsafe {
            membuf.set_len(shape.0 * shape.1);
        }
        let ndarr = Array2::from_shape_vec((shape.1, shape.0), membuf).unwrap();
        Ok(ndarr.reversed_axes())
    }

src/fmt.rs (line 556)

    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        #[cfg(any(feature = "fmt", feature = "fmt_no_tty"))]
        {
            let height = self.height();
            assert!(
                self.columns.iter().all(|s| s.len() == height),
                "The column lengths in the DataFrame are not equal."
            );
            let str_truncate = std::env::var(FMT_STR_LEN)
                .as_deref()
                .unwrap_or("")
                .parse()
                .unwrap_or(32);

            let max_n_cols = std::env::var(FMT_MAX_COLS)
                .as_deref()
                .unwrap_or("")
                .parse()
                .map_or(8, |n: i64| if n < 0 { self.width() } else { n as usize });

            let max_n_rows = std::env::var(FMT_MAX_ROWS)
                .as_deref()
                .unwrap_or("")
                .parse()
                .map_or(8, |n: i64| if n < 0 { height } else { n as usize });

            let (n_first, n_last) = if self.width() > max_n_cols {
                ((max_n_cols + 1) / 2, max_n_cols / 2)
            } else {
                (self.width(), 0)
            };
            let reduce_columns = n_first + n_last < self.width();
            let mut names = Vec::with_capacity(n_first + n_last + reduce_columns as usize);

            let field_to_str = |f: &Field| {
                let name = make_str_val(f.name(), str_truncate);
                let lower_bounds = name.len().clamp(5, 12);
                let mut column_name = name;
                if env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES) {
                    column_name = "".to_string();
                }
                let column_data_type = if env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES) {
                    "".to_string()
                } else if env_is_true(FMT_TABLE_INLINE_COLUMN_DATA_TYPE)
                    | env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES)
                {
                    format!("{}", f.data_type())
                } else {
                    format!("\n{}", f.data_type())
                };
                let mut column_separator = "\n---";
                if env_is_true(FMT_TABLE_HIDE_COLUMN_SEPARATOR)
                    | env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES)
                    | env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES)
                {
                    column_separator = ""
                }
                let s = if env_is_true(FMT_TABLE_INLINE_COLUMN_DATA_TYPE)
                    & !env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES)
                {
                    format!("{column_name} ({column_data_type})")
                } else {
                    format!("{column_name}{column_separator}{column_data_type}")
                };
                (s, lower_bounds)
            };
            let tbl_lower_bounds =
                |l: usize| ColumnConstraint::LowerBoundary(comfy_table::Width::Fixed(l as u16));

            let mut constraints = Vec::with_capacity(n_first + n_last + reduce_columns as usize);
            let fields = self.fields();
            for field in fields[0..n_first].iter() {
                let (s, l) = field_to_str(field);
                names.push(s);
                constraints.push(tbl_lower_bounds(l));
            }
            if reduce_columns {
                names.push("...".into());
                constraints.push(tbl_lower_bounds(5));
            }
            for field in fields[self.width() - n_last..].iter() {
                let (s, l) = field_to_str(field);
                names.push(s);
                constraints.push(tbl_lower_bounds(l));
            }
            let preset = match std::env::var(FMT_TABLE_FORMATTING)
                .as_deref()
                .unwrap_or("DEFAULT")
            {
                "ASCII_FULL" => ASCII_FULL,
                "ASCII_NO_BORDERS" => ASCII_NO_BORDERS,
                "ASCII_BORDERS_ONLY" => ASCII_BORDERS_ONLY,
                "ASCII_BORDERS_ONLY_CONDENSED" => ASCII_BORDERS_ONLY_CONDENSED,
                "ASCII_HORIZONTAL_ONLY" => ASCII_HORIZONTAL_ONLY,
                "ASCII_MARKDOWN" => ASCII_MARKDOWN,
                "UTF8_FULL" => UTF8_FULL,
                "UTF8_FULL_CONDENSED" => UTF8_FULL_CONDENSED,
                "UTF8_NO_BORDERS" => UTF8_NO_BORDERS,
                "UTF8_BORDERS_ONLY" => UTF8_BORDERS_ONLY,
                "UTF8_HORIZONTAL_ONLY" => UTF8_HORIZONTAL_ONLY,
                "NOTHING" => NOTHING,
                "DEFAULT" => UTF8_FULL,
                _ => UTF8_FULL,
            };

            let mut table = Table::new();
            table
                .load_preset(preset)
                .set_content_arrangement(ContentArrangement::Dynamic);

            if max_n_rows > 0 {
                if height > max_n_rows {
                    let mut rows = Vec::with_capacity(std::cmp::max(max_n_rows, 2));
                    for i in 0..std::cmp::max(max_n_rows / 2, 1) {
                        let row = self
                            .columns
                            .iter()
                            .map(|s| s.str_value(i).unwrap())
                            .collect();
                        rows.push(prepare_row(row, n_first, n_last, str_truncate));
                    }
                    let dots = rows[0].iter().map(|_| "...".to_string()).collect();
                    rows.push(dots);
                    if max_n_rows > 1 {
                        for i in (height - (max_n_rows + 1) / 2)..height {
                            let row = self
                                .columns
                                .iter()
                                .map(|s| s.str_value(i).unwrap())
                                .collect();
                            rows.push(prepare_row(row, n_first, n_last, str_truncate));
                        }
                    }
                    table.add_rows(rows);
                } else {
                    for i in 0..height {
                        if self.width() > 0 {
                            let row = self
                                .columns
                                .iter()
                                .map(|s| s.str_value(i).unwrap())
                                .collect();
                            table.add_row(prepare_row(row, n_first, n_last, str_truncate));
                        } else {
                            break;
                        }
                    }
                }
            } else if height > 0 {
                let dots: Vec<String> = self.columns.iter().map(|_| "...".to_string()).collect();
                table.add_row(dots);
            }

            // insert a header row, unless both column names and column data types are already hidden
            if !(env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES)
                && env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES))
            {
                table.set_header(names).set_constraints(constraints);
            }
            let tbl_width = std::env::var("POLARS_TABLE_WIDTH")
                .map(|s| {
                    Some(
                        s.parse::<u16>()
                            .expect("could not parse table width argument"),
                    )
                })
                .unwrap_or(None);

            // if tbl_width is explicitly set, use it
            if let Some(w) = tbl_width {
                table.set_width(w);
            }

            // if no tbl_width (its not-tty && it is not explicitly set), then set default.
            // this is needed to support non-tty applications
            #[cfg(feature = "fmt")]
            if table.width().is_none() && !table.is_tty() {
                table.set_width(100);
            }
            #[cfg(feature = "fmt_no_tty")]
            if table.width().is_none() {
                table.set_width(100);
            }

            // set alignment of cells, if defined
            if std::env::var(FMT_TABLE_CELL_ALIGNMENT).is_ok() {
                // for (column_index, column) in table.column_iter_mut().enumerate() {
                let str_preset = std::env::var(FMT_TABLE_CELL_ALIGNMENT)
                    .unwrap_or_else(|_| "DEFAULT".to_string());
                for column in table.column_iter_mut() {
                    if str_preset == "RIGHT" {
                        column.set_cell_alignment(CellAlignment::Right);
                    } else if str_preset == "LEFT" {
                        column.set_cell_alignment(CellAlignment::Left);
                    } else if str_preset == "CENTER" {
                        column.set_cell_alignment(CellAlignment::Center);
                    } else {
                        column.set_cell_alignment(CellAlignment::Left);
                    }
                }
            }

            // establish 'shape' information (above/below/hidden)
            if env_is_true(FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION) {
                write!(f, "{table}")?;
            } else if env_is_true(FMT_TABLE_DATAFRAME_SHAPE_BELOW) {
                write!(f, "{table}\nshape: {:?}", self.shape())?;
            } else {
                write!(f, "shape: {:?}\n{}", self.shape(), table)?;
            }
        }

        #[cfg(not(any(feature = "fmt", feature = "fmt_no_tty")))]
        {
            write!(
                f,
                "shape: {:?}\nto see more, compile with the 'fmt' or 'fmt_no_tty' feature",
                self.shape()
            )?;
        }

        Ok(())
    }

source

pub fn width(&self) -> usize

Get the width of the DataFrame which is the number of columns.

Example

let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("Series 1" => &[0; 0])?;
let df2: DataFrame = df!("Series 1" => &[0; 0],
                         "Series 2" => &[0; 0])?;

assert_eq!(df0.width(), 0);
assert_eq!(df1.width(), 1);
assert_eq!(df2.width(), 2);

Examples found in repository ?

src/frame/hash_join/mod.rs (line 307)

pub fn _finish_join(
    mut df_left: DataFrame,
    mut df_right: DataFrame,
    suffix: Option<&str>,
) -> PolarsResult<DataFrame> {
    let mut left_names = PlHashSet::with_capacity(df_left.width());

    df_left.columns.iter().for_each(|series| {
        left_names.insert(series.name());
    });

    let mut rename_strs = Vec::with_capacity(df_right.width());

    df_right.columns.iter().for_each(|series| {
        if left_names.contains(series.name()) {
            rename_strs.push(series.name().to_owned())
        }
    });
    let suffix = suffix.unwrap_or("_right");

    for name in rename_strs {
        df_right.rename(&name, &_join_suffix_name(&name, suffix))?;
    }

    drop(left_names);
    df_left.hstack_mut(&df_right.columns)?;
    Ok(df_left)
}

More examples

Hide additional examples

src/frame/mod.rs (line 899)

    pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self> {
        if self.width() != other.width() {
            if self.width() == 0 {
                self.columns = other.columns.clone();
                return Ok(self);
            }

            return Err(PolarsError::ShapeMisMatch(
                format!("Could not vertically stack DataFrame. The DataFrames appended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.append(right).expect("should not fail");
                Ok(())
            })?;
        Ok(self)
    }

    /// Does not check if schema is correct
    pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) {
        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .for_each(|(left, right)| {
                left.append(right).expect("should not fail");
            });
    }

    /// Extend the memory backed by this [`DataFrame`] with the values from `other`.
    ///
    /// Different from [`vstack`](Self::vstack) which adds the chunks from `other` to the chunks of this [`DataFrame`]
    /// `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
    ///
    /// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
    /// and thus will yield faster queries.
    ///
    /// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
    /// online operations where you add `n` rows and rerun a query.
    ///
    /// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
    /// when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence
    /// of `append` operations with a [`rechunk`](Self::rechunk).
    pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()> {
        if self.width() != other.width() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Could not extend DataFrame. The DataFrames extended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.extend(right).unwrap();
                Ok(())
            })?;
        Ok(())
    }

    /// Remove a column by name and return the column removed.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
    ///                             "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
    ///
    /// let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
    /// assert!(s1.is_err());
    ///
    /// let s2: Series = df.drop_in_place("Animal")?;
    /// assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series> {
        let idx = self.check_name_to_idx(name)?;
        Ok(self.columns.remove(idx))
    }

    /// Return a new `DataFrame` where all null values are dropped.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
    ///                         "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
    /// assert_eq!(df1.shape(), (3, 2));
    ///
    /// let df2: DataFrame = df1.drop_nulls(None)?;
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------------------+
    /// | Country | Tax revenue (% GDP) |
    /// | ---     | ---                 |
    /// | str     | f64                 |
    /// +=========+=====================+
    /// | Malta   | 32.7                |
    /// +---------+---------------------+
    /// ```
    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

    /// Slice the `DataFrame` along the rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
    ///                         "Color" => &["Green", "Red", "White", "White", "Red"])?;
    /// let sl: DataFrame = df.slice(2, 3);
    ///
    /// assert_eq!(sl.shape(), (3, 2));
    /// println!("{}", sl);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Output:
    /// ```text
    /// shape: (3, 2)
    /// +-------+-------+
    /// | Fruit | Color |
    /// | ---   | ---   |
    /// | str   | str   |
    /// +=======+=======+
    /// | Grape | White |
    /// +-------+-------+
    /// | Fig   | White |
    /// +-------+-------+
    /// | Fig   | Red   |
    /// +-------+-------+
    /// ```
    #[must_use]
    pub fn slice(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        let col = self
            .columns
            .iter()
            .map(|s| s.slice(offset, length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    #[must_use]
    pub fn slice_par(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns_par(&|s| s.slice(offset, length)))
    }

    #[must_use]
    pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns(&|s| {
            let mut out = s.slice(offset, length);
            out.shrink_to_fit();
            out
        }))
    }

    /// Get the head of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
    ///         "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
    ///         "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
    ///         "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
    /// assert_eq!(countries.shape(), (5, 4));
    ///
    /// println!("{}", countries.head(Some(3)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 4)
    /// +--------------------+---------------+---------------+------------+
    /// | Rank by GDP (2021) | Continent     | Country       | Capital    |
    /// | ---                | ---           | ---           | ---        |
    /// | i32                | str           | str           | str        |
    /// +====================+===============+===============+============+
    /// | 1                  | North America | United States | Washington |
    /// +--------------------+---------------+---------------+------------+
    /// | 2                  | Asia          | China         | Beijing    |
    /// +--------------------+---------------+---------------+------------+
    /// | 3                  | Asia          | Japan         | Tokyo      |
    /// +--------------------+---------------+---------------+------------+
    /// ```
    #[must_use]
    pub fn head(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.head(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Get the tail of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank (2021)" => &[105, 106, 107, 108, 109],
    ///         "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
    ///         "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
    /// assert_eq!(countries.shape(), (5, 3));
    ///
    /// println!("{}", countries.tail(Some(2)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (2, 3)
    /// +-------------+--------------------+---------+
    /// | Rank (2021) | Apple Price (€/kg) | Country |
    /// | ---         | ---                | ---     |
    /// | i32         | f64                | str     |
    /// +=============+====================+=========+
    /// | 108         | 0.63               | Syria   |
    /// +-------------+--------------------+---------+
    /// | 109         | 0.63               | Turkey  |
    /// +-------------+--------------------+---------+
    /// ```
    #[must_use]
    pub fn tail(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.tail(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks(&self) -> RecordBatchIter {
        RecordBatchIter {
            columns: &self.columns,
            idx: 0,
            n_chunks: self.n_chunks(),
        }
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches as physical values.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> {
        PhysRecordBatchIter {
            iters: self.columns.iter().map(|s| s.chunks().iter()).collect(),
        }
    }

    /// Get a `DataFrame` with all the columns in reversed order.
    #[must_use]
    pub fn reverse(&self) -> Self {
        let col = self.columns.iter().map(|s| s.reverse()).collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Shift the values by a given period and fill the parts that will be empty due to this operation
    /// with `Nones`.
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.shift) for more info on the `shift` operation.
    #[must_use]
    pub fn shift(&self, periods: i64) -> Self {
        let col = self.apply_columns_par(&|s| s.shift(periods));

        DataFrame::new_no_checks(col)
    }

    /// Replace None values with one of the following strategies:
    /// * Forward fill (replace None with the previous value)
    /// * Backward fill (replace None with the next value)
    /// * Mean fill (replace None with the mean of the whole array)
    /// * Min fill (replace None with the minimum of the whole array)
    /// * Max fill (replace None with the maximum of the whole array)
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.fill_null) for more info on the `fill_null` operation.
    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
        let col = self.try_apply_columns_par(&|s| s.fill_null(strategy))?;

        Ok(DataFrame::new_no_checks(col))
    }

    /// Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.
    /// Try in keep output similar to pandas
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("categorical" => &["d","e","f"],
    ///                          "numeric" => &[1, 2, 3],
    ///                          "object" => &["a", "b", "c"])?;
    /// assert_eq!(df1.shape(), (3, 3));
    ///
    /// let df2: DataFrame = df1.describe(None);
    /// assert_eq!(df2.shape(), (8, 4));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (8, 4)
    /// ┌──────────┬─────────────┬─────────┬────────┐
    /// │ describe ┆ categorical ┆ numeric ┆ object │
    /// │ ---      ┆ ---         ┆ ---     ┆ ---    │
    /// │ str      ┆ f64         ┆ f64     ┆ f64    │
    /// ╞══════════╪═════════════╪═════════╪════════╡
    /// │ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ mean     ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ std      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ min      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 25%      ┆ null        ┆ 1.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 50%      ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 75%      ┆ null        ┆ 2.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ max      ┆ null        ┆ 3.0     ┆ null   │
    /// └──────────┴─────────────┴─────────┴────────┘
    /// ```
    #[must_use]
    #[cfg(feature = "describe")]
    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

    /// Aggregate the columns to their maximum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.max();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 6       | 5       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn max(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.max_as_series());

        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their standard deviation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.std(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +-------------------+--------------------+
    /// | Die n°1           | Die n°2            |
    /// | ---               | ---                |
    /// | f64               | f64                |
    /// +===================+====================+
    /// | 2.280350850198276 | 1.0954451150103321 |
    /// +-------------------+--------------------+
    /// ```
    #[must_use]
    pub fn std(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.std_as_series(ddof));

        DataFrame::new_no_checks(columns)
    }
    /// Aggregate the columns to their variation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.var(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 5.2     | 1.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn var(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.var_as_series(ddof));
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their minimum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.min();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 1       | 2       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn min(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.min_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their sum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.sum();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 16      | 16      |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn sum(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.sum_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their mean values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.mean();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 3.2     | 3.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn mean(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.mean_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their median values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.median();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 3       | 3       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn median(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.median_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their quantile values.
    pub fn quantile(&self, quantile: f64, interpol: QuantileInterpolOptions) -> PolarsResult<Self> {
        let columns = self.try_apply_columns_par(&|s| s.quantile_as_series(quantile, interpol))?;

        Ok(DataFrame::new_no_checks(columns))
    }

    /// Aggregate the column horizontally to their min values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmin(&self) -> PolarsResult<Option<Series>> {
        let min_fn = |acc: &Series, s: &Series| {
            let mask = acc.lt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => min_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| min_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their max values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmax(&self) -> PolarsResult<Option<Series>> {
        let max_fn = |acc: &Series, s: &Series| {
            let mask = acc.gt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => max_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| max_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their sum values.
    pub fn hsum(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        let sum_fn =
            |acc: &Series, s: &Series, none_strategy: NullStrategy| -> PolarsResult<Series> {
                let mut acc = acc.clone();
                let mut s = s.clone();
                if let NullStrategy::Ignore = none_strategy {
                    // if has nulls
                    if acc.has_validity() {
                        acc = acc.fill_null(FillNullStrategy::Zero)?;
                    }
                    if s.has_validity() {
                        s = s.fill_null(FillNullStrategy::Zero)?;
                    }
                }
                Ok(&acc + &s)
            };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => sum_fn(&self.columns[0], &self.columns[1], none_strategy).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| sum_fn(&l, &r, none_strategy).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their mean values.
    pub fn hmean(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            _ => {
                let columns = self
                    .columns
                    .iter()
                    .cloned()
                    .filter(|s| {
                        let dtype = s.dtype();
                        dtype.is_numeric() || matches!(dtype, DataType::Boolean)
                    })
                    .collect();
                let numeric_df = DataFrame::new_no_checks(columns);

                let sum = || numeric_df.hsum(none_strategy);

                let null_count = || {
                    numeric_df
                        .columns
                        .par_iter()
                        .map(|s| s.is_null().cast(&DataType::UInt32).unwrap())
                        .reduce_with(|l, r| &l + &r)
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 2 columns
                        .unwrap()
                };

                let (sum, null_count) = POOL.install(|| rayon::join(sum, null_count));
                let sum = sum?;

                // value lengths: len - null_count
                let value_length: UInt32Chunked =
                    (numeric_df.width().sub(&null_count)).u32().unwrap().clone();

                // make sure that we do not divide by zero
                // by replacing with None
                let value_length = value_length
                    .set(&value_length.equal(0), None)?
                    .into_series()
                    .cast(&DataType::Float64)?;

                Ok(sum.map(|sum| &sum / &value_length))
            }
        }
    }

src/functions.rs (line 260)

pub fn diag_concat_df(dfs: &[DataFrame]) -> PolarsResult<DataFrame> {
    // TODO! replace with lazy only?
    let upper_bound_width = dfs.iter().map(|df| df.width()).sum();
    let mut column_names = AHashSet::with_capacity(upper_bound_width);
    let mut schema = Vec::with_capacity(upper_bound_width);

    for df in dfs {
        df.get_columns().iter().for_each(|s| {
            let name = s.name();
            if column_names.insert(name) {
                schema.push((name, s.dtype()))
            }
        });
    }

    let dfs = dfs
        .iter()
        .map(|df| {
            let height = df.height();
            let mut columns = Vec::with_capacity(schema.len());

            for (name, dtype) in &schema {
                match df.column(name).ok() {
                    Some(s) => columns.push(s.clone()),
                    None => columns.push(Series::full_null(name, height, dtype)),
                }
            }
            DataFrame::new_no_checks(columns)
        })
        .collect::<Vec<_>>();

    concat_df(&dfs)
}

src/frame/arithmetic.rs (line 117)

    fn binary_aligned(
        &self,
        other: &DataFrame,
        f: &(dyn Fn(&Series, &Series) -> PolarsResult<Series> + Sync + Send),
    ) -> PolarsResult<DataFrame> {
        let max_len = std::cmp::max(self.height(), other.height());
        let max_width = std::cmp::max(self.width(), other.width());
        let mut cols = self
            .get_columns()
            .par_iter()
            .zip(other.get_columns().par_iter())
            .map(|(l, r)| {
                let diff_l = max_len - l.len();
                let diff_r = max_len - r.len();

                let st = try_get_supertype(l.dtype(), r.dtype())?;
                let mut l = l.cast(&st)?;
                let mut r = r.cast(&st)?;

                if diff_l > 0 {
                    l = l.extend_constant(AnyValue::Null, diff_l)?;
                };
                if diff_r > 0 {
                    r = r.extend_constant(AnyValue::Null, diff_r)?;
                };

                f(&l, &r)
            })
            .collect::<PolarsResult<Vec<_>>>()?;

        let col_len = cols.len();
        if col_len < max_width {
            let df = if col_len < self.width() { self } else { other };

            for i in col_len..max_len {
                let s = &df.get_columns()[i];
                let name = s.name();
                let dtype = s.dtype();

                // trick to fill a series with nulls
                let vals: &[Option<i32>] = &[None];
                let s = Series::new(name, vals).cast(dtype)?;
                cols.push(s.new_from_index(0, max_len))
            }
        }
        DataFrame::new(cols)
    }

src/frame/asof_join/groups.rs (line 585)

fn dispatch_join<T: PolarsNumericType>(
    left_asof: &ChunkedArray<T>,
    right_asof: &ChunkedArray<T>,
    left_by_s: &Series,
    right_by_s: &Series,
    left_by: &mut DataFrame,
    right_by: &mut DataFrame,
    strategy: AsofStrategy,
    tolerance: Option<AnyValue<'static>>,
) -> PolarsResult<Vec<Option<IdxSize>>> {
    let out = if left_by.width() == 1 {
        match left_by_s.dtype() {
            DataType::Utf8 => asof_join_by_utf8(
                left_by_s.utf8().unwrap(),
                right_by_s.utf8().unwrap(),
                left_asof,
                right_asof,
                tolerance,
                strategy,
            ),
            _ => {
                if left_by_s.bit_repr_is_large() {
                    let left_by = left_by_s.bit_repr_large();
                    let right_by = right_by_s.bit_repr_large();
                    asof_join_by_numeric(
                        &left_by, &right_by, left_asof, right_asof, tolerance, strategy,
                    )?
                } else {
                    let left_by = left_by_s.bit_repr_small();
                    let right_by = right_by_s.bit_repr_small();
                    asof_join_by_numeric(
                        &left_by, &right_by, left_asof, right_asof, tolerance, strategy,
                    )?
                }
            }
        }
    } else {
        for (lhs, rhs) in left_by.get_columns().iter().zip(right_by.get_columns()) {
            check_asof_columns(lhs, rhs)?;
            #[cfg(feature = "dtype-categorical")]
            _check_categorical_src(lhs.dtype(), rhs.dtype())?;
        }
        asof_join_by_multiple(
            left_by, right_by, left_asof, right_asof, tolerance, strategy,
        )
    };
    Ok(out)
}

src/frame/explode.rs (line 67)

    pub fn explode_impl(&self, mut columns: Vec<Series>) -> PolarsResult<DataFrame> {
        let mut df = self.clone();
        if self.height() == 0 {
            for s in &columns {
                df.with_column(s.explode()?)?;
            }
            return Ok(df);
        }
        columns.sort_by(|sa, sb| {
            self.check_name_to_idx(sa.name())
                .expect("checked above")
                .partial_cmp(&self.check_name_to_idx(sb.name()).expect("checked above"))
                .expect("cmp usize -> Ordering")
        });

        // first remove all the exploded columns
        for s in &columns {
            df = df.drop(s.name())?;
        }

        for (i, s) in columns.iter().enumerate() {
            // Safety:
            // offsets don't have indices exceeding Series length.
            if let Ok((exploded, offsets)) = get_exploded(s) {
                let col_idx = self.check_name_to_idx(s.name())?;

                // expand all the other columns based the exploded first column
                if i == 0 {
                    let row_idx = offsets_to_indexes(offsets.as_slice(), exploded.len());
                    let mut row_idx = IdxCa::from_vec("", row_idx);
                    row_idx.set_sorted(false);

                    // Safety
                    // We just created indices that are in bounds.
                    df = unsafe { df.take_unchecked(&row_idx) };
                }
                if exploded.len() == df.height() || df.width() == 0 {
                    df.columns.insert(col_idx, exploded);
                } else {
                    return Err(PolarsError::ShapeMisMatch(
                        format!("The exploded column(s) don't have the same length. Length DataFrame: {}. Length exploded column {}: {}", df.height(), exploded.name(), exploded.len()).into(),
                    ));
                }
            } else {
                return Err(PolarsError::InvalidOperation(
                    format!("cannot explode dtype: {:?}", s.dtype()).into(),
                ));
            }
        }
        Ok(df)
    }
    /// Explode `DataFrame` to long format by exploding a column with Lists.
    ///
    /// # Example
    ///
    /// ```ignore
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("a", &[1i64, 2, 3]);
    /// let s1 = Series::new("b", &[1i64, 1, 1]);
    /// let s2 = Series::new("c", &[2i64, 2, 2]);
    /// let list = Series::new("foo", &[s0, s1, s2]);
    ///
    /// let s0 = Series::new("B", [1, 2, 3]);
    /// let s1 = Series::new("C", [1, 1, 1]);
    /// let df = DataFrame::new(vec![list, s0, s1])?;
    /// let exploded = df.explode(["foo"])?;
    ///
    /// println!("{:?}", df);
    /// println!("{:?}", exploded);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Outputs:
    ///
    /// ```text
    ///  +-------------+-----+-----+
    ///  | foo         | B   | C   |
    ///  | ---         | --- | --- |
    ///  | list [i64]  | i32 | i32 |
    ///  +=============+=====+=====+
    ///  | "[1, 2, 3]" | 1   | 1   |
    ///  +-------------+-----+-----+
    ///  | "[1, 1, 1]" | 2   | 1   |
    ///  +-------------+-----+-----+
    ///  | "[2, 2, 2]" | 3   | 1   |
    ///  +-------------+-----+-----+
    ///
    ///  +-----+-----+-----+
    ///  | foo | B   | C   |
    ///  | --- | --- | --- |
    ///  | i64 | i32 | i32 |
    ///  +=====+=====+=====+
    ///  | 1   | 1   | 1   |
    ///  +-----+-----+-----+
    ///  | 2   | 1   | 1   |
    ///  +-----+-----+-----+
    ///  | 3   | 1   | 1   |
    ///  +-----+-----+-----+
    ///  | 1   | 2   | 1   |
    ///  +-----+-----+-----+
    ///  | 1   | 2   | 1   |
    ///  +-----+-----+-----+
    ///  | 1   | 2   | 1   |
    ///  +-----+-----+-----+
    ///  | 2   | 3   | 1   |
    ///  +-----+-----+-----+
    ///  | 2   | 3   | 1   |
    ///  +-----+-----+-----+
    ///  | 2   | 3   | 1   |
    ///  +-----+-----+-----+
    /// ```
    pub fn explode<I, S>(&self, columns: I) -> PolarsResult<DataFrame>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        // We need to sort the column by order of original occurrence. Otherwise the insert by index
        // below will panic
        let columns = self.select_series(columns)?;
        self.explode_impl(columns)
    }

    ///
    /// Unpivot a `DataFrame` from wide to long format.
    ///
    /// # Example
    ///
    /// # Arguments
    ///
    /// * `id_vars` - String slice that represent the columns to use as id variables.
    /// * `value_vars` - String slice that represent the columns to use as value variables.
    ///
    /// If `value_vars` is empty all columns that are not in `id_vars` will be used.
    ///
    /// ```ignore
    /// # use polars_core::prelude::*;
    /// let df = df!("A" => &["a", "b", "a"],
    ///              "B" => &[1, 3, 5],
    ///              "C" => &[10, 11, 12],
    ///              "D" => &[2, 4, 6]
    ///     )?;
    ///
    /// let melted = df.melt(&["A", "B"], &["C", "D"])?;
    /// println!("{:?}", df);
    /// println!("{:?}", melted);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Outputs:
    /// ```text
    ///  +-----+-----+-----+-----+
    ///  | A   | B   | C   | D   |
    ///  | --- | --- | --- | --- |
    ///  | str | i32 | i32 | i32 |
    ///  +=====+=====+=====+=====+
    ///  | "a" | 1   | 10  | 2   |
    ///  +-----+-----+-----+-----+
    ///  | "b" | 3   | 11  | 4   |
    ///  +-----+-----+-----+-----+
    ///  | "a" | 5   | 12  | 6   |
    ///  +-----+-----+-----+-----+
    ///
    ///  +-----+-----+----------+-------+
    ///  | A   | B   | variable | value |
    ///  | --- | --- | ---      | ---   |
    ///  | str | i32 | str      | i32   |
    ///  +=====+=====+==========+=======+
    ///  | "a" | 1   | "C"      | 10    |
    ///  +-----+-----+----------+-------+
    ///  | "b" | 3   | "C"      | 11    |
    ///  +-----+-----+----------+-------+
    ///  | "a" | 5   | "C"      | 12    |
    ///  +-----+-----+----------+-------+
    ///  | "a" | 1   | "D"      | 2     |
    ///  +-----+-----+----------+-------+
    ///  | "b" | 3   | "D"      | 4     |
    ///  +-----+-----+----------+-------+
    ///  | "a" | 5   | "D"      | 6     |
    ///  +-----+-----+----------+-------+
    /// ```
    pub fn melt<I, J>(&self, id_vars: I, value_vars: J) -> PolarsResult<Self>
    where
        I: IntoVec<String>,
        J: IntoVec<String>,
    {
        let id_vars = id_vars.into_vec();
        let value_vars = value_vars.into_vec();
        self.melt2(MeltArgs {
            id_vars,
            value_vars,
            ..Default::default()
        })
    }

    /// Similar to melt, but without generics. This may be easier if you want to pass
    /// an empty `id_vars` or empty `value_vars`.
    pub fn melt2(&self, args: MeltArgs) -> PolarsResult<Self> {
        let id_vars = args.id_vars;
        let mut value_vars = args.value_vars;

        let value_name = args.value_name.as_deref().unwrap_or("value");
        let variable_name = args.variable_name.as_deref().unwrap_or("variable");

        let len = self.height();

        // if value vars is empty we take all columns that are not in id_vars.
        if value_vars.is_empty() {
            let id_vars_set = PlHashSet::from_iter(id_vars.iter().map(|s| s.as_str()));
            value_vars = self
                .get_columns()
                .iter()
                .filter_map(|s| {
                    if id_vars_set.contains(s.name()) {
                        None
                    } else {
                        Some(s.name().to_string())
                    }
                })
                .collect();
        }

        // values will all be placed in single column, so we must find their supertype
        let schema = self.schema();
        let mut iter = value_vars.iter().map(|v| {
            schema
                .get(v)
                .ok_or_else(|| PolarsError::NotFound(v.to_string().into()))
        });
        let mut st = iter.next().unwrap()?.clone();
        for dt in iter {
            st = try_get_supertype(&st, dt?)?;
        }

        let values_len = value_vars.iter().map(|name| name.len()).sum::<usize>();

        // The column name of the variable that is melted
        let mut variable_col = MutableUtf8Array::<i64>::with_capacities(
            len * value_vars.len() + 1,
            len * values_len + 1,
        );
        // prepare ids
        let ids_ = self.select(id_vars)?;
        let mut ids = ids_.clone();
        if ids.width() > 0 {
            for _ in 0..value_vars.len() - 1 {
                ids.vstack_mut_unchecked(&ids_)
            }
        }
        ids.as_single_chunk_par();
        drop(ids_);

        let mut values = Vec::with_capacity(value_vars.len());

        for value_column_name in &value_vars {
            variable_col.extend_trusted_len_values(std::iter::repeat(value_column_name).take(len));
            let value_col = self.column(value_column_name)?.cast(&st)?;
            values.extend_from_slice(value_col.chunks())
        }
        let values_arr = concatenate_owned_unchecked(&values)?;
        // Safety
        // The give dtype is correct
        let values =
            unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) };

        let variable_col = variable_col.as_box();
        // Safety
        // The give dtype is correct
        let variables = unsafe {
            Series::from_chunks_and_dtype_unchecked(
                variable_name,
                vec![variable_col],
                &DataType::Utf8,
            )
        };

        ids.hstack_mut(&[variables, values])?;

        Ok(ids)
    }

Additional examples can be found in:

source

pub fn height(&self) -> usize

Get the height of the DataFrame which is the number of rows.

Example

let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("Currency" => &["€", "$"])?;
let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;

assert_eq!(df0.height(), 0);
assert_eq!(df1.height(), 2);
assert_eq!(df2.height(), 5);

Examples found in repository ?

src/frame/hash_join/mod.rs (line 336)

    unsafe fn create_left_df_chunked(&self, chunk_ids: &[ChunkId], left_join: bool) -> DataFrame {
        if left_join && chunk_ids.len() == self.height() {
            self.clone()
        } else {
            // left join keys are in ascending order
            let sorted = if left_join {
                IsSorted::Ascending
            } else {
                IsSorted::Not
            };
            self.take_chunked_unchecked(chunk_ids, sorted)
        }
    }

    /// # Safety
    /// Join tuples must be in bounds
    pub unsafe fn _create_left_df_from_slice(
        &self,
        join_tuples: &[IdxSize],
        left_join: bool,
        sorted: bool,
    ) -> DataFrame {
        if left_join && join_tuples.len() == self.height() {
            self.clone()
        } else {
            let sorted = if left_join || sorted {
                IsSorted::Ascending
            } else {
                IsSorted::Not
            };

            // left join tuples are always in ascending order
            self._take_unchecked_slice2(join_tuples, true, sorted)
        }
    }

More examples

Hide additional examples

src/frame/mod.rs (line 371)

    pub fn with_row_count(&self, name: &str, offset: Option<IdxSize>) -> PolarsResult<Self> {
        let mut columns = Vec::with_capacity(self.columns.len() + 1);
        let offset = offset.unwrap_or(0);

        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);
        columns.push(ca.into_series());

        columns.extend_from_slice(&self.columns);
        DataFrame::new(columns)
    }

    /// Add a row count in place.
    pub fn with_row_count_mut(&mut self, name: &str, offset: Option<IdxSize>) -> &mut Self {
        let offset = offset.unwrap_or(0);
        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);

        self.columns.insert(0, ca.into_series());
        self
    }

    /// Create a new `DataFrame` but does not check the length or duplicate occurrence of the `Series`.
    ///
    /// It is advised to use [Series::new](Series::new) in favor of this method.
    ///
    /// # Panic
    /// It is the callers responsibility to uphold the contract of all `Series`
    /// having an equal length, if not this may panic down the line.
    pub const fn new_no_checks(columns: Vec<Series>) -> DataFrame {
        DataFrame { columns }
    }

    /// Aggregate all chunks to contiguous memory.
    #[must_use]
    pub fn agg_chunks(&self) -> Self {
        // Don't parallelize this. Memory overhead
        let f = |s: &Series| s.rechunk();
        let cols = self.columns.iter().map(f).collect();
        DataFrame::new_no_checks(cols)
    }

    /// Shrink the capacity of this DataFrame to fit its length.
    pub fn shrink_to_fit(&mut self) {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            s.shrink_to_fit();
        }
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk.
    pub fn as_single_chunk(&mut self) -> &mut Self {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            *s = s.rechunk();
        }
        self
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk in parallel.
    /// This may lead to more peak memory consumption.
    pub fn as_single_chunk_par(&mut self) -> &mut Self {
        if self.columns.iter().any(|s| s.n_chunks() > 1) {
            self.columns = self.apply_columns_par(&|s| s.rechunk());
        }
        self
    }

    /// Estimates of the DataFrames columns consist of the same chunk sizes
    pub fn should_rechunk(&self) -> bool {
        let hb = RandomState::default();
        let hb2 = RandomState::with_seeds(392498, 98132457, 0, 412059);
        !self
            .columns
            .iter()
            // The idea is that we create a hash of the chunk lengths.
            // Consisting of the combined hash + the sum (assuming collision probability is nihil)
            // if not, we can add more hashes or at worst case we do an extra rechunk.
            // the old solution to this was clone all lengths to a vec and compare the vecs
            .map(|s| {
                s.chunk_lengths().map(|i| i as u64).fold(
                    (0u64, 0u64, s.n_chunks()),
                    |(lhash, lh2, n), rval| {
                        let mut h = hb.build_hasher();
                        rval.hash(&mut h);
                        let rhash = h.finish();
                        let mut h = hb2.build_hasher();
                        rval.hash(&mut h);
                        let rh2 = h.finish();
                        (
                            _boost_hash_combine(lhash, rhash),
                            _boost_hash_combine(lh2, rh2),
                            n,
                        )
                    },
                )
            })
            .all_equal()
    }

    /// Ensure all the chunks in the DataFrame are aligned.
    pub fn rechunk(&mut self) -> &mut Self {
        if self.should_rechunk() {
            self.as_single_chunk_par()
        } else {
            self
        }
    }

    /// Get the `DataFrame` schema.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
    ///                         "Diameter (m)" => &[8.8e26, f64::INFINITY])?;
    ///
    /// let f1: Field = Field::new("Thing", DataType::Utf8);
    /// let f2: Field = Field::new("Diameter (m)", DataType::Float64);
    /// let sc: Schema = Schema::from(vec![f1, f2].into_iter());
    ///
    /// assert_eq!(df.schema(), sc);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn schema(&self) -> Schema {
        Schema::from(self.iter().map(|s| s.field().into_owned()))
    }

    /// Get a reference to the `DataFrame` columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
    ///                         "Symbol" => &["A", "C", "G", "T"])?;
    /// let columns: &Vec<Series> = df.get_columns();
    ///
    /// assert_eq!(columns[0].name(), "Name");
    /// assert_eq!(columns[1].name(), "Symbol");
    /// # Ok::<(), PolarsError>(())
    /// ```
    #[inline]
    pub fn get_columns(&self) -> &Vec<Series> {
        &self.columns
    }

    #[cfg(feature = "private")]
    #[inline]
    pub fn get_columns_mut(&mut self) -> &mut Vec<Series> {
        &mut self.columns
    }

    /// Iterator over the columns as `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
    /// let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// let mut iterator = df.iter();
    ///
    /// assert_eq!(iterator.next(), Some(&s1));
    /// assert_eq!(iterator.next(), Some(&s2));
    /// assert_eq!(iterator.next(), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn iter(&self) -> std::slice::Iter<'_, Series> {
        self.columns.iter()
    }

    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Language" => &["Rust", "Python"],
    ///                         "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Language", "Designer"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn get_column_names(&self) -> Vec<&str> {
        self.columns.iter().map(|s| s.name()).collect()
    }

    /// Get the `Vec<String>` representing the column names.
    pub fn get_column_names_owned(&self) -> Vec<String> {
        self.columns.iter().map(|s| s.name().to_string()).collect()
    }

    /// Set the column names.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
    /// df.set_column_names(&["Set"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Set"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> PolarsResult<()> {
        if names.len() != self.columns.len() {
            return Err(PolarsError::ShapeMisMatch("the provided slice with column names has not the same size as the DataFrame's width".into()));
        }
        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(names.iter().map(|name| name.as_ref()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }

        let columns = mem::take(&mut self.columns);
        self.columns = columns
            .into_iter()
            .zip(names)
            .map(|(s, name)| {
                let mut s = s;
                s.rename(name.as_ref());
                s
            })
            .collect();
        Ok(())
    }

    /// Get the data types of the columns in the DataFrame.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
    ///                                "Fraction" => &[0.965, 0.035])?;
    ///
    /// assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn dtypes(&self) -> Vec<DataType> {
        self.columns.iter().map(|s| s.dtype().clone()).collect()
    }

    /// The number of chunks per column
    pub fn n_chunks(&self) -> usize {
        match self.columns.get(0) {
            None => 0,
            Some(s) => s.n_chunks(),
        }
    }

    /// Get a reference to the schema fields of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
    ///                            "Fraction" => &[0.708, 0.292])?;
    ///
    /// let f1: Field = Field::new("Surface type", DataType::Utf8);
    /// let f2: Field = Field::new("Fraction", DataType::Float64);
    ///
    /// assert_eq!(earth.fields(), &[f1, f2]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn fields(&self) -> Vec<Field> {
        self.columns
            .iter()
            .map(|s| s.field().into_owned())
            .collect()
    }

    /// Get (height, width) of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
    /// let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
    ///                          "2" => &[1, 2, 3, 4, 5])?;
    ///
    /// assert_eq!(df0.shape(), (0 ,0));
    /// assert_eq!(df1.shape(), (5, 1));
    /// assert_eq!(df2.shape(), (5, 2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn shape(&self) -> (usize, usize) {
        match self.columns.as_slice() {
            &[] => (0, 0),
            v => (v[0].len(), v.len()),
        }
    }

    /// Get the width of the `DataFrame` which is the number of columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Series 1" => &[0; 0])?;
    /// let df2: DataFrame = df!("Series 1" => &[0; 0],
    ///                          "Series 2" => &[0; 0])?;
    ///
    /// assert_eq!(df0.width(), 0);
    /// assert_eq!(df1.width(), 1);
    /// assert_eq!(df2.width(), 2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn width(&self) -> usize {
        self.columns.len()
    }

    /// Get the height of the `DataFrame` which is the number of rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Currency" => &["€", "$"])?;
    /// let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;
    ///
    /// assert_eq!(df0.height(), 0);
    /// assert_eq!(df1.height(), 2);
    /// assert_eq!(df2.height(), 5);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn height(&self) -> usize {
        self.shape().0
    }

    /// Check if the `DataFrame` is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = DataFrame::default();
    /// assert!(df1.is_empty());
    ///
    /// let df2: DataFrame = df!("First name" => &["Forever"],
    ///                          "Last name" => &["Alone"])?;
    /// assert!(!df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_empty(&self) -> bool {
        self.columns.is_empty()
    }

    pub(crate) fn hstack_mut_no_checks(&mut self, columns: &[Series]) -> &mut Self {
        for col in columns {
            self.columns.push(col.clone());
        }
        self
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn stack(df: &mut DataFrame, columns: &[Series]) {
    ///     df.hstack_mut(columns);
    /// }
    /// ```
    pub fn hstack_mut(&mut self, columns: &[Series]) -> PolarsResult<&mut Self> {
        let mut names = PlHashSet::with_capacity(self.columns.len());
        for s in &self.columns {
            names.insert(s.name());
        }

        let height = self.height();
        // first loop check validity. We don't do this in a single pass otherwise
        // this DataFrame is already modified when an error occurs.
        for col in columns {
            if col.len() != height && height != 0 {
                return Err(PolarsError::ShapeMisMatch(
                    format!("Could not horizontally stack Series. The Series length {} differs from the DataFrame height: {height}", col.len()).into()));
            }

            let name = col.name();
            if names.contains(name) {
                return Err(PolarsError::Duplicate(
                    format!("Cannot do hstack operation. Column with name: {name} already exists",)
                        .into(),
                ));
            }
            names.insert(name);
        }
        drop(names);
        Ok(self.hstack_mut_no_checks(columns))
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
    /// let s1: Series = Series::new("Proton", &[29, 47, 79]);
    /// let s2: Series = Series::new("Electron", &[29, 47, 79]);
    ///
    /// let df2: DataFrame = df1.hstack(&[s1, s2])?;
    /// assert_eq!(df2.shape(), (3, 3));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 3)
    /// +---------+--------+----------+
    /// | Element | Proton | Electron |
    /// | ---     | ---    | ---      |
    /// | str     | i32    | i32      |
    /// +=========+========+==========+
    /// | Copper  | 29     | 29       |
    /// +---------+--------+----------+
    /// | Silver  | 47     | 47       |
    /// +---------+--------+----------+
    /// | Gold    | 79     | 79       |
    /// +---------+--------+----------+
    /// ```
    pub fn hstack(&self, columns: &[Series]) -> PolarsResult<Self> {
        let mut new_cols = self.columns.clone();
        new_cols.extend_from_slice(columns);
        DataFrame::new(new_cols)
    }

    /// Concatenate a `DataFrame` to this `DataFrame` and return as newly allocated `DataFrame`.
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// let df3: DataFrame = df1.vstack(&df2)?;
    ///
    /// assert_eq!(df3.shape(), (5, 2));
    /// println!("{}", df3);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack(&self, other: &DataFrame) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.vstack_mut(other)?;
        Ok(df)
    }

    /// Concatenate a DataFrame to this DataFrame
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// df1.vstack_mut(&df2)?;
    ///
    /// assert_eq!(df1.shape(), (5, 2));
    /// println!("{}", df1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self> {
        if self.width() != other.width() {
            if self.width() == 0 {
                self.columns = other.columns.clone();
                return Ok(self);
            }

            return Err(PolarsError::ShapeMisMatch(
                format!("Could not vertically stack DataFrame. The DataFrames appended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.append(right).expect("should not fail");
                Ok(())
            })?;
        Ok(self)
    }

    /// Does not check if schema is correct
    pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) {
        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .for_each(|(left, right)| {
                left.append(right).expect("should not fail");
            });
    }

    /// Extend the memory backed by this [`DataFrame`] with the values from `other`.
    ///
    /// Different from [`vstack`](Self::vstack) which adds the chunks from `other` to the chunks of this [`DataFrame`]
    /// `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
    ///
    /// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
    /// and thus will yield faster queries.
    ///
    /// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
    /// online operations where you add `n` rows and rerun a query.
    ///
    /// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
    /// when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence
    /// of `append` operations with a [`rechunk`](Self::rechunk).
    pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()> {
        if self.width() != other.width() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Could not extend DataFrame. The DataFrames extended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.extend(right).unwrap();
                Ok(())
            })?;
        Ok(())
    }

    /// Remove a column by name and return the column removed.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
    ///                             "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
    ///
    /// let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
    /// assert!(s1.is_err());
    ///
    /// let s2: Series = df.drop_in_place("Animal")?;
    /// assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series> {
        let idx = self.check_name_to_idx(name)?;
        Ok(self.columns.remove(idx))
    }

    /// Return a new `DataFrame` where all null values are dropped.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
    ///                         "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
    /// assert_eq!(df1.shape(), (3, 2));
    ///
    /// let df2: DataFrame = df1.drop_nulls(None)?;
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------------------+
    /// | Country | Tax revenue (% GDP) |
    /// | ---     | ---                 |
    /// | str     | f64                 |
    /// +=========+=====================+
    /// | Malta   | 32.7                |
    /// +---------+---------------------+
    /// ```
    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

    /// Slice the `DataFrame` along the rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
    ///                         "Color" => &["Green", "Red", "White", "White", "Red"])?;
    /// let sl: DataFrame = df.slice(2, 3);
    ///
    /// assert_eq!(sl.shape(), (3, 2));
    /// println!("{}", sl);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Output:
    /// ```text
    /// shape: (3, 2)
    /// +-------+-------+
    /// | Fruit | Color |
    /// | ---   | ---   |
    /// | str   | str   |
    /// +=======+=======+
    /// | Grape | White |
    /// +-------+-------+
    /// | Fig   | White |
    /// +-------+-------+
    /// | Fig   | Red   |
    /// +-------+-------+
    /// ```
    #[must_use]
    pub fn slice(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        let col = self
            .columns
            .iter()
            .map(|s| s.slice(offset, length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    #[must_use]
    pub fn slice_par(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns_par(&|s| s.slice(offset, length)))
    }

    #[must_use]
    pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self {
        if offset == 0 && length == self.height() {
            return self.clone();
        }
        DataFrame::new_no_checks(self.apply_columns(&|s| {
            let mut out = s.slice(offset, length);
            out.shrink_to_fit();
            out
        }))
    }

    /// Get the head of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
    ///         "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
    ///         "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
    ///         "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
    /// assert_eq!(countries.shape(), (5, 4));
    ///
    /// println!("{}", countries.head(Some(3)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 4)
    /// +--------------------+---------------+---------------+------------+
    /// | Rank by GDP (2021) | Continent     | Country       | Capital    |
    /// | ---                | ---           | ---           | ---        |
    /// | i32                | str           | str           | str        |
    /// +====================+===============+===============+============+
    /// | 1                  | North America | United States | Washington |
    /// +--------------------+---------------+---------------+------------+
    /// | 2                  | Asia          | China         | Beijing    |
    /// +--------------------+---------------+---------------+------------+
    /// | 3                  | Asia          | Japan         | Tokyo      |
    /// +--------------------+---------------+---------------+------------+
    /// ```
    #[must_use]
    pub fn head(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.head(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Get the tail of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let countries: DataFrame =
    ///     df!("Rank (2021)" => &[105, 106, 107, 108, 109],
    ///         "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
    ///         "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
    /// assert_eq!(countries.shape(), (5, 3));
    ///
    /// println!("{}", countries.tail(Some(2)));
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (2, 3)
    /// +-------------+--------------------+---------+
    /// | Rank (2021) | Apple Price (€/kg) | Country |
    /// | ---         | ---                | ---     |
    /// | i32         | f64                | str     |
    /// +=============+====================+=========+
    /// | 108         | 0.63               | Syria   |
    /// +-------------+--------------------+---------+
    /// | 109         | 0.63               | Turkey  |
    /// +-------------+--------------------+---------+
    /// ```
    #[must_use]
    pub fn tail(&self, length: Option<usize>) -> Self {
        let col = self
            .columns
            .iter()
            .map(|s| s.tail(length))
            .collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks(&self) -> RecordBatchIter {
        RecordBatchIter {
            columns: &self.columns,
            idx: 0,
            n_chunks: self.n_chunks(),
        }
    }

    /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches as physical values.
    ///
    /// # Panics
    ///
    /// Panics if the `DataFrame` that is passed is not rechunked.
    ///
    /// This responsibility is left to the caller as we don't want to take mutable references here,
    /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller
    /// as well.
    pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> {
        PhysRecordBatchIter {
            iters: self.columns.iter().map(|s| s.chunks().iter()).collect(),
        }
    }

    /// Get a `DataFrame` with all the columns in reversed order.
    #[must_use]
    pub fn reverse(&self) -> Self {
        let col = self.columns.iter().map(|s| s.reverse()).collect::<Vec<_>>();
        DataFrame::new_no_checks(col)
    }

    /// Shift the values by a given period and fill the parts that will be empty due to this operation
    /// with `Nones`.
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.shift) for more info on the `shift` operation.
    #[must_use]
    pub fn shift(&self, periods: i64) -> Self {
        let col = self.apply_columns_par(&|s| s.shift(periods));

        DataFrame::new_no_checks(col)
    }

    /// Replace None values with one of the following strategies:
    /// * Forward fill (replace None with the previous value)
    /// * Backward fill (replace None with the next value)
    /// * Mean fill (replace None with the mean of the whole array)
    /// * Min fill (replace None with the minimum of the whole array)
    /// * Max fill (replace None with the maximum of the whole array)
    ///
    /// See the method on [Series](../series/trait.SeriesTrait.html#method.fill_null) for more info on the `fill_null` operation.
    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
        let col = self.try_apply_columns_par(&|s| s.fill_null(strategy))?;

        Ok(DataFrame::new_no_checks(col))
    }

    /// Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.
    /// Try in keep output similar to pandas
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("categorical" => &["d","e","f"],
    ///                          "numeric" => &[1, 2, 3],
    ///                          "object" => &["a", "b", "c"])?;
    /// assert_eq!(df1.shape(), (3, 3));
    ///
    /// let df2: DataFrame = df1.describe(None);
    /// assert_eq!(df2.shape(), (8, 4));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (8, 4)
    /// ┌──────────┬─────────────┬─────────┬────────┐
    /// │ describe ┆ categorical ┆ numeric ┆ object │
    /// │ ---      ┆ ---         ┆ ---     ┆ ---    │
    /// │ str      ┆ f64         ┆ f64     ┆ f64    │
    /// ╞══════════╪═════════════╪═════════╪════════╡
    /// │ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ mean     ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ std      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ min      ┆ null        ┆ 1.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 25%      ┆ null        ┆ 1.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 50%      ┆ null        ┆ 2.0     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ 75%      ┆ null        ┆ 2.5     ┆ null   │
    /// ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
    /// │ max      ┆ null        ┆ 3.0     ┆ null   │
    /// └──────────┴─────────────┴─────────┴────────┘
    /// ```
    #[must_use]
    #[cfg(feature = "describe")]
    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

    /// Aggregate the columns to their maximum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.max();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 6       | 5       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn max(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.max_as_series());

        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their standard deviation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.std(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +-------------------+--------------------+
    /// | Die n°1           | Die n°2            |
    /// | ---               | ---                |
    /// | f64               | f64                |
    /// +===================+====================+
    /// | 2.280350850198276 | 1.0954451150103321 |
    /// +-------------------+--------------------+
    /// ```
    #[must_use]
    pub fn std(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.std_as_series(ddof));

        DataFrame::new_no_checks(columns)
    }
    /// Aggregate the columns to their variation values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.var(1);
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 5.2     | 1.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn var(&self, ddof: u8) -> Self {
        let columns = self.apply_columns_par(&|s| s.var_as_series(ddof));
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their minimum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.min();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 1       | 2       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn min(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.min_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their sum values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.sum();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 16      | 16      |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn sum(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.sum_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their mean values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.mean();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | f64     | f64     |
    /// +=========+=========+
    /// | 3.2     | 3.2     |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn mean(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.mean_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their median values.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
    ///                          "Die n°2" => &[3, 2, 3, 5, 3])?;
    /// assert_eq!(df1.shape(), (5, 2));
    ///
    /// let df2: DataFrame = df1.median();
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------+
    /// | Die n°1 | Die n°2 |
    /// | ---     | ---     |
    /// | i32     | i32     |
    /// +=========+=========+
    /// | 3       | 3       |
    /// +---------+---------+
    /// ```
    #[must_use]
    pub fn median(&self) -> Self {
        let columns = self.apply_columns_par(&|s| s.median_as_series());
        DataFrame::new_no_checks(columns)
    }

    /// Aggregate the columns to their quantile values.
    pub fn quantile(&self, quantile: f64, interpol: QuantileInterpolOptions) -> PolarsResult<Self> {
        let columns = self.try_apply_columns_par(&|s| s.quantile_as_series(quantile, interpol))?;

        Ok(DataFrame::new_no_checks(columns))
    }

    /// Aggregate the column horizontally to their min values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmin(&self) -> PolarsResult<Option<Series>> {
        let min_fn = |acc: &Series, s: &Series| {
            let mask = acc.lt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => min_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| min_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their max values.
    #[cfg(feature = "zip_with")]
    #[cfg_attr(docsrs, doc(cfg(feature = "zip_with")))]
    pub fn hmax(&self) -> PolarsResult<Option<Series>> {
        let max_fn = |acc: &Series, s: &Series| {
            let mask = acc.gt(s)? & acc.is_not_null() | s.is_null();
            acc.zip_with(&mask, s)
        };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => max_fn(&self.columns[0], &self.columns[1]).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| max_fn(&l, &r).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their sum values.
    pub fn hsum(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        let sum_fn =
            |acc: &Series, s: &Series, none_strategy: NullStrategy| -> PolarsResult<Series> {
                let mut acc = acc.clone();
                let mut s = s.clone();
                if let NullStrategy::Ignore = none_strategy {
                    // if has nulls
                    if acc.has_validity() {
                        acc = acc.fill_null(FillNullStrategy::Zero)?;
                    }
                    if s.has_validity() {
                        s = s.fill_null(FillNullStrategy::Zero)?;
                    }
                }
                Ok(&acc + &s)
            };

        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            2 => sum_fn(&self.columns[0], &self.columns[1], none_strategy).map(Some),
            _ => {
                // the try_reduce_with is a bit slower in parallelism,
                // but I don't think it matters here as we parallelize over columns, not over elements
                POOL.install(|| {
                    self.columns
                        .par_iter()
                        .map(|s| Ok(Cow::Borrowed(s)))
                        .try_reduce_with(|l, r| sum_fn(&l, &r, none_strategy).map(Cow::Owned))
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 3 columns
                        .unwrap()
                        .map(|cow| Some(cow.into_owned()))
                })
            }
        }
    }

    /// Aggregate the column horizontally to their mean values.
    pub fn hmean(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            _ => {
                let columns = self
                    .columns
                    .iter()
                    .cloned()
                    .filter(|s| {
                        let dtype = s.dtype();
                        dtype.is_numeric() || matches!(dtype, DataType::Boolean)
                    })
                    .collect();
                let numeric_df = DataFrame::new_no_checks(columns);

                let sum = || numeric_df.hsum(none_strategy);

                let null_count = || {
                    numeric_df
                        .columns
                        .par_iter()
                        .map(|s| s.is_null().cast(&DataType::UInt32).unwrap())
                        .reduce_with(|l, r| &l + &r)
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 2 columns
                        .unwrap()
                };

                let (sum, null_count) = POOL.install(|| rayon::join(sum, null_count));
                let sum = sum?;

                // value lengths: len - null_count
                let value_length: UInt32Chunked =
                    (numeric_df.width().sub(&null_count)).u32().unwrap().clone();

                // make sure that we do not divide by zero
                // by replacing with None
                let value_length = value_length
                    .set(&value_length.equal(0), None)?
                    .into_series()
                    .cast(&DataType::Float64)?;

                Ok(sum.map(|sum| &sum / &value_length))
            }
        }
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe<F, B>(self, f: F) -> PolarsResult<B>
    where
        F: Fn(DataFrame) -> PolarsResult<B>,
    {
        f(self)
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe_mut<F, B>(&mut self, f: F) -> PolarsResult<B>
    where
        F: Fn(&mut DataFrame) -> PolarsResult<B>,
    {
        f(self)
    }

    /// Pipe different functions/ closure operations that work on a DataFrame together.
    pub fn pipe_with_args<F, B, Args>(self, f: F, args: Args) -> PolarsResult<B>
    where
        F: Fn(DataFrame, Args) -> PolarsResult<B>,
    {
        f(self, args)
    }

    /// Drop duplicate rows from a `DataFrame`.
    /// *This fails when there is a column of type List in DataFrame*
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///               "flt" => [1., 1., 2., 2., 3., 3.],
    ///               "int" => [1, 1, 2, 2, 3, 3, ],
    ///               "str" => ["a", "a", "b", "b", "c", "c"]
    ///           }?;
    ///
    /// println!("{}", df.drop_duplicates(true, None)?);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Returns
    ///
    /// ```text
    /// +-----+-----+-----+
    /// | flt | int | str |
    /// | --- | --- | --- |
    /// | f64 | i32 | str |
    /// +=====+=====+=====+
    /// | 1   | 1   | "a" |
    /// +-----+-----+-----+
    /// | 2   | 2   | "b" |
    /// +-----+-----+-----+
    /// | 3   | 3   | "c" |
    /// +-----+-----+-----+
    /// ```
    #[deprecated(note = "use DataFrame::unique")]
    pub fn drop_duplicates(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
    ) -> PolarsResult<Self> {
        match maintain_order {
            true => self.unique_stable(subset, UniqueKeepStrategy::First),
            false => self.unique(subset, UniqueKeepStrategy::First),
        }
    }

    /// Drop duplicate rows from a `DataFrame`.
    /// *This fails when there is a column of type List in DataFrame*
    ///
    /// Stable means that the order is maintained. This has a higher cost than an unstable distinct.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///               "flt" => [1., 1., 2., 2., 3., 3.],
    ///               "int" => [1, 1, 2, 2, 3, 3, ],
    ///               "str" => ["a", "a", "b", "b", "c", "c"]
    ///           }?;
    ///
    /// println!("{}", df.unique_stable(None, UniqueKeepStrategy::First)?);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Returns
    ///
    /// ```text
    /// +-----+-----+-----+
    /// | flt | int | str |
    /// | --- | --- | --- |
    /// | f64 | i32 | str |
    /// +=====+=====+=====+
    /// | 1   | 1   | "a" |
    /// +-----+-----+-----+
    /// | 2   | 2   | "b" |
    /// +-----+-----+-----+
    /// | 3   | 3   | "c" |
    /// +-----+-----+-----+
    /// ```
    pub fn unique_stable(
        &self,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<DataFrame> {
        self.unique_impl(true, subset, keep)
    }

    /// Unstable distinct. See [`DataFrame::unique_stable`].
    pub fn unique(
        &self,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<DataFrame> {
        self.unique_impl(false, subset, keep)
    }

    fn unique_impl(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
        keep: UniqueKeepStrategy,
    ) -> PolarsResult<Self> {
        use UniqueKeepStrategy::*;
        let names = match &subset {
            Some(s) => s.iter().map(|s| &**s).collect(),
            None => self.get_column_names(),
        };

        let columns = match (keep, maintain_order) {
            (First, true) => {
                let gb = self.groupby_stable(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, true) => {
                // maintain order by last values, so the sorted groups are not correct as they
                // are sorted by the first value
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                let last_idx: NoNull<IdxCa> = groups
                    .iter()
                    .map(|g| match g {
                        GroupsIndicator::Idx((_first, idx)) => idx[idx.len() - 1],
                        GroupsIndicator::Slice([first, len]) => first + len,
                    })
                    .collect();

                let last_idx = last_idx.sort(false);
                return Ok(unsafe { self.take_unchecked(&last_idx) });
            }
            (First, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
            }
            (Last, false) => {
                let gb = self.groupby(names)?;
                let groups = gb.get_groups();
                self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
            }
        };
        Ok(DataFrame::new_no_checks(columns))
    }

    /// Get a mask of all the unique rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
    ///                         "ISIN" => &["US0378331005", "US5949181045"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_unique()?;
    ///
    /// assert!(ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_unique(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            true,
            false,
        ))
    }

    /// Get a mask of all the duplicated rows in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
    ///                         "ISIN" => &["US02079K3059", "US02079K1079"])?;
    /// let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;
    ///
    /// assert!(!ca.all());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_duplicated(&self) -> PolarsResult<BooleanChunked> {
        let gb = self.groupby(self.get_column_names())?;
        let groups = gb.take_groups();
        Ok(is_unique_helper(
            groups,
            self.height() as IdxSize,
            false,
            true,
        ))
    }

src/utils/mod.rs (line 159)

fn flatten_df(df: &DataFrame) -> impl Iterator<Item = DataFrame> + '_ {
    df.iter_chunks_physical().flat_map(|chunk| {
        let df = DataFrame::new_no_checks(
            df.iter()
                .zip(chunk.into_arrays())
                .map(|(s, arr)| {
                    // Safety:
                    // datatypes are correct
                    let mut out = unsafe {
                        Series::from_chunks_and_dtype_unchecked(s.name(), vec![arr], s.dtype())
                    };
                    out.set_sorted(s.is_sorted());
                    out
                })
                .collect(),
        );
        if df.height() == 0 {
            None
        } else {
            Some(df)
        }
    })
}

pub fn flatten_series(s: &Series) -> Vec<Series> {
    let name = s.name();
    let dtype = s.dtype();
    unsafe {
        s.chunks()
            .iter()
            .map(|arr| Series::from_chunks_and_dtype_unchecked(name, vec![arr.clone()], dtype))
            .collect()
    }
}

pub fn split_df_as_ref(df: &DataFrame, n: usize) -> PolarsResult<Vec<DataFrame>> {
    let total_len = df.height();
    let chunk_size = total_len / n;

    if df.n_chunks() == n
        && df.get_columns()[0]
            .chunk_lengths()
            .all(|len| len.abs_diff(chunk_size) < 100)
    {
        return Ok(flatten_df(df).collect());
    }

    let mut out = Vec::with_capacity(n);

    for i in 0..n {
        let offset = i * chunk_size;
        let len = if i == (n - 1) {
            total_len - offset
        } else {
            chunk_size
        };
        let df = df.slice((i * chunk_size) as i64, len);
        if df.n_chunks() > 1 {
            // we add every chunk as separate dataframe. This make sure that every partition
            // deals with it.
            out.extend(flatten_df(&df))
        } else {
            out.push(df)
        }
    }

    Ok(out)
}

#[cfg(feature = "private")]
#[doc(hidden)]
/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
pub fn split_df(df: &mut DataFrame, n: usize) -> PolarsResult<Vec<DataFrame>> {
    if n == 0 || df.height() == 0 {
        return Ok(vec![df.clone()]);
    }
    // make sure that chunks are aligned.
    df.rechunk();
    split_df_as_ref(df, n)
}

src/chunked_array/random.rs (line 174)

    pub fn sample_n(
        &self,
        n: usize,
        with_replacement: bool,
        shuffle: bool,
        seed: Option<u64>,
    ) -> PolarsResult<Self> {
        if !with_replacement && n > self.height() {
            return Err(PolarsError::ShapeMisMatch(
                "cannot take a larger sample than the total population when `with_replacement=false`"
                    .into(),
            ));
        }
        // all columns should used the same indices. So we first create the indices.
        let idx = match with_replacement {
            true => create_rand_index_with_replacement(n, self.height(), seed),
            false => create_rand_index_no_replacement(n, self.height(), seed, shuffle),
        };
        // Safety:
        // indices are within bounds
        Ok(unsafe { self.take_unchecked(&idx) })
    }

    /// Sample a fraction between 0.0-1.0 of this DataFrame.
    pub fn sample_frac(
        &self,
        frac: f64,
        with_replacement: bool,
        shuffle: bool,
        seed: Option<u64>,
    ) -> PolarsResult<Self> {
        let n = (self.height() as f64 * frac) as usize;
        self.sample_n(n, with_replacement, shuffle, seed)
    }

src/functions.rs (line 220)

pub fn hor_concat_df(dfs: &[DataFrame]) -> PolarsResult<DataFrame> {
    let max_len = dfs
        .iter()
        .map(|df| df.height())
        .max()
        .ok_or_else(|| PolarsError::ComputeError("cannot concat empty dataframes".into()))?;

    let owned_df;

    // if not all equal length, extend the DataFrame with nulls
    let dfs = if !dfs.iter().all(|df| df.height() == max_len) {
        owned_df = dfs
            .iter()
            .cloned()
            .map(|mut df| {
                if df.height() != max_len {
                    let diff = max_len - df.height();
                    df.columns
                        .iter_mut()
                        .for_each(|s| *s = s.extend_constant(AnyValue::Null, diff).unwrap());
                }
                df
            })
            .collect::<Vec<_>>();
        owned_df.as_slice()
    } else {
        dfs
    };

    let mut first_df = dfs[0].clone();

    for df in &dfs[1..] {
        first_df.hstack_mut(df.get_columns())?;
    }
    Ok(first_df)
}

/// Concat `[DataFrame]`s diagonally.
#[cfg(feature = "diagonal_concat")]
#[cfg_attr(docsrs, doc(cfg(feature = "diagonal_concat")))]
/// Concat diagonally thereby combining different schemas.
pub fn diag_concat_df(dfs: &[DataFrame]) -> PolarsResult<DataFrame> {
    // TODO! replace with lazy only?
    let upper_bound_width = dfs.iter().map(|df| df.width()).sum();
    let mut column_names = AHashSet::with_capacity(upper_bound_width);
    let mut schema = Vec::with_capacity(upper_bound_width);

    for df in dfs {
        df.get_columns().iter().for_each(|s| {
            let name = s.name();
            if column_names.insert(name) {
                schema.push((name, s.dtype()))
            }
        });
    }

    let dfs = dfs
        .iter()
        .map(|df| {
            let height = df.height();
            let mut columns = Vec::with_capacity(schema.len());

            for (name, dtype) in &schema {
                match df.column(name).ok() {
                    Some(s) => columns.push(s.clone()),
                    None => columns.push(Series::full_null(name, height, dtype)),
                }
            }
            DataFrame::new_no_checks(columns)
        })
        .collect::<Vec<_>>();

    concat_df(&dfs)
}

src/frame/arithmetic.rs (line 116)

    fn binary_aligned(
        &self,
        other: &DataFrame,
        f: &(dyn Fn(&Series, &Series) -> PolarsResult<Series> + Sync + Send),
    ) -> PolarsResult<DataFrame> {
        let max_len = std::cmp::max(self.height(), other.height());
        let max_width = std::cmp::max(self.width(), other.width());
        let mut cols = self
            .get_columns()
            .par_iter()
            .zip(other.get_columns().par_iter())
            .map(|(l, r)| {
                let diff_l = max_len - l.len();
                let diff_r = max_len - r.len();

                let st = try_get_supertype(l.dtype(), r.dtype())?;
                let mut l = l.cast(&st)?;
                let mut r = r.cast(&st)?;

                if diff_l > 0 {
                    l = l.extend_constant(AnyValue::Null, diff_l)?;
                };
                if diff_r > 0 {
                    r = r.extend_constant(AnyValue::Null, diff_r)?;
                };

                f(&l, &r)
            })
            .collect::<PolarsResult<Vec<_>>>()?;

        let col_len = cols.len();
        if col_len < max_width {
            let df = if col_len < self.width() { self } else { other };

            for i in col_len..max_len {
                let s = &df.get_columns()[i];
                let name = s.name();
                let dtype = s.dtype();

                // trick to fill a series with nulls
                let vals: &[Option<i32>] = &[None];
                let s = Series::new(name, vals).cast(dtype)?;
                cols.push(s.new_from_index(0, max_len))
            }
        }
        DataFrame::new(cols)
    }

Additional examples can be found in:

source

pub fn is_empty(&self) -> bool

Check if the DataFrame is empty.

Example

let df1: DataFrame = DataFrame::default();
assert!(df1.is_empty());

let df2: DataFrame = df!("First name" => &["Forever"],
                         "Last name" => &["Alone"])?;
assert!(!df2.is_empty());

Examples found in repository ?

src/frame/mod.rs (line 1129)

        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

source

pub fn hstack_mut(&mut self, columns: &[Series]) -> PolarsResult<&mut Self>

Add multiple Series to a DataFrame. The added Series are required to have the same length.

Example

fn stack(df: &mut DataFrame, columns: &[Series]) {
    df.hstack_mut(columns);
}

Examples found in repository ?

src/utils/mod.rs (line 628)

pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
    let mut iter = dfs.into_iter();
    let mut acc_df = iter.next().unwrap();
    for df in iter {
        acc_df.hstack_mut(df.get_columns())?;
    }
    Ok(acc_df)
}

More examples

Hide additional examples

src/frame/hash_join/mod.rs (line 327)

pub fn _finish_join(
    mut df_left: DataFrame,
    mut df_right: DataFrame,
    suffix: Option<&str>,
) -> PolarsResult<DataFrame> {
    let mut left_names = PlHashSet::with_capacity(df_left.width());

    df_left.columns.iter().for_each(|series| {
        left_names.insert(series.name());
    });

    let mut rename_strs = Vec::with_capacity(df_right.width());

    df_right.columns.iter().for_each(|series| {
        if left_names.contains(series.name()) {
            rename_strs.push(series.name().to_owned())
        }
    });
    let suffix = suffix.unwrap_or("_right");

    for name in rename_strs {
        df_right.rename(&name, &_join_suffix_name(&name, suffix))?;
    }

    drop(left_names);
    df_left.hstack_mut(&df_right.columns)?;
    Ok(df_left)
}

src/functions.rs (line 249)

pub fn hor_concat_df(dfs: &[DataFrame]) -> PolarsResult<DataFrame> {
    let max_len = dfs
        .iter()
        .map(|df| df.height())
        .max()
        .ok_or_else(|| PolarsError::ComputeError("cannot concat empty dataframes".into()))?;

    let owned_df;

    // if not all equal length, extend the DataFrame with nulls
    let dfs = if !dfs.iter().all(|df| df.height() == max_len) {
        owned_df = dfs
            .iter()
            .cloned()
            .map(|mut df| {
                if df.height() != max_len {
                    let diff = max_len - df.height();
                    df.columns
                        .iter_mut()
                        .for_each(|s| *s = s.extend_constant(AnyValue::Null, diff).unwrap());
                }
                df
            })
            .collect::<Vec<_>>();
        owned_df.as_slice()
    } else {
        dfs
    };

    let mut first_df = dfs[0].clone();

    for df in &dfs[1..] {
        first_df.hstack_mut(df.get_columns())?;
    }
    Ok(first_df)
}

src/frame/explode.rs (line 304)

    pub fn melt2(&self, args: MeltArgs) -> PolarsResult<Self> {
        let id_vars = args.id_vars;
        let mut value_vars = args.value_vars;

        let value_name = args.value_name.as_deref().unwrap_or("value");
        let variable_name = args.variable_name.as_deref().unwrap_or("variable");

        let len = self.height();

        // if value vars is empty we take all columns that are not in id_vars.
        if value_vars.is_empty() {
            let id_vars_set = PlHashSet::from_iter(id_vars.iter().map(|s| s.as_str()));
            value_vars = self
                .get_columns()
                .iter()
                .filter_map(|s| {
                    if id_vars_set.contains(s.name()) {
                        None
                    } else {
                        Some(s.name().to_string())
                    }
                })
                .collect();
        }

        // values will all be placed in single column, so we must find their supertype
        let schema = self.schema();
        let mut iter = value_vars.iter().map(|v| {
            schema
                .get(v)
                .ok_or_else(|| PolarsError::NotFound(v.to_string().into()))
        });
        let mut st = iter.next().unwrap()?.clone();
        for dt in iter {
            st = try_get_supertype(&st, dt?)?;
        }

        let values_len = value_vars.iter().map(|name| name.len()).sum::<usize>();

        // The column name of the variable that is melted
        let mut variable_col = MutableUtf8Array::<i64>::with_capacities(
            len * value_vars.len() + 1,
            len * values_len + 1,
        );
        // prepare ids
        let ids_ = self.select(id_vars)?;
        let mut ids = ids_.clone();
        if ids.width() > 0 {
            for _ in 0..value_vars.len() - 1 {
                ids.vstack_mut_unchecked(&ids_)
            }
        }
        ids.as_single_chunk_par();
        drop(ids_);

        let mut values = Vec::with_capacity(value_vars.len());

        for value_column_name in &value_vars {
            variable_col.extend_trusted_len_values(std::iter::repeat(value_column_name).take(len));
            let value_col = self.column(value_column_name)?.cast(&st)?;
            values.extend_from_slice(value_col.chunks())
        }
        let values_arr = concatenate_owned_unchecked(&values)?;
        // Safety
        // The give dtype is correct
        let values =
            unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) };

        let variable_col = variable_col.as_box();
        // Safety
        // The give dtype is correct
        let variables = unsafe {
            Series::from_chunks_and_dtype_unchecked(
                variable_name,
                vec![variable_col],
                &DataType::Utf8,
            )
        };

        ids.hstack_mut(&[variables, values])?;

        Ok(ids)
    }

source

pub fn hstack(&self, columns: &[Series]) -> PolarsResult<Self>

Add multiple Series to a DataFrame. The added Series are required to have the same length.

Example

let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
let s1: Series = Series::new("Proton", &[29, 47, 79]);
let s2: Series = Series::new("Electron", &[29, 47, 79]);

let df2: DataFrame = df1.hstack(&[s1, s2])?;
assert_eq!(df2.shape(), (3, 3));
println!("{}", df2);

Output:

shape: (3, 3)
+---------+--------+----------+
| Element | Proton | Electron |
| ---     | ---    | ---      |
| str     | i32    | i32      |
+=========+========+==========+
| Copper  | 29     | 29       |
+---------+--------+----------+
| Silver  | 47     | 47       |
+---------+--------+----------+
| Gold    | 79     | 79       |
+---------+--------+----------+

source

pub fn vstack(&self, other: &DataFrame) -> PolarsResult<Self>

Concatenate a DataFrame to this DataFrame and return as newly allocated DataFrame.

If many vstack operations are done, it is recommended to call DataFrame::rechunk.

Example

let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
                         "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
                         "Melting Point (K)" => &[2041.4, 1828.05])?;

let df3: DataFrame = df1.vstack(&df2)?;

assert_eq!(df3.shape(), (5, 2));
println!("{}", df3);

Output:

shape: (5, 2)
+-----------+-------------------+
| Element   | Melting Point (K) |
| ---       | ---               |
| str       | f64               |
+===========+===================+
| Copper    | 1357.77           |
+-----------+-------------------+
| Silver    | 1234.93           |
+-----------+-------------------+
| Gold      | 1337.33           |
+-----------+-------------------+
| Platinum  | 2041.4            |
+-----------+-------------------+
| Palladium | 1828.05           |
+-----------+-------------------+

source

pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self>

Concatenate a DataFrame to this DataFrame

If many vstack operations are done, it is recommended to call DataFrame::rechunk.

Example

let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
                         "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
                         "Melting Point (K)" => &[2041.4, 1828.05])?;

df1.vstack_mut(&df2)?;

assert_eq!(df1.shape(), (5, 2));
println!("{}", df1);

Output:

shape: (5, 2)
+-----------+-------------------+
| Element   | Melting Point (K) |
| ---       | ---               |
| str       | f64               |
+===========+===================+
| Copper    | 1357.77           |
+-----------+-------------------+
| Silver    | 1234.93           |
+-----------+-------------------+
| Gold      | 1337.33           |
+-----------+-------------------+
| Platinum  | 2041.4            |
+-----------+-------------------+
| Palladium | 1828.05           |
+-----------+-------------------+

Examples found in repository ?

src/frame/mod.rs (line 854)

    pub fn vstack(&self, other: &DataFrame) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.vstack_mut(other)?;
        Ok(df)
    }

    /// Concatenate a DataFrame to this DataFrame
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// df1.vstack_mut(&df2)?;
    ///
    /// assert_eq!(df1.shape(), (5, 2));
    /// println!("{}", df1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self> {
        if self.width() != other.width() {
            if self.width() == 0 {
                self.columns = other.columns.clone();
                return Ok(self);
            }

            return Err(PolarsError::ShapeMisMatch(
                format!("Could not vertically stack DataFrame. The DataFrames appended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.append(right).expect("should not fail");
                Ok(())
            })?;
        Ok(self)
    }

    /// Does not check if schema is correct
    pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) {
        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .for_each(|(left, right)| {
                left.append(right).expect("should not fail");
            });
    }

    /// Extend the memory backed by this [`DataFrame`] with the values from `other`.
    ///
    /// Different from [`vstack`](Self::vstack) which adds the chunks from `other` to the chunks of this [`DataFrame`]
    /// `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
    ///
    /// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
    /// and thus will yield faster queries.
    ///
    /// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
    /// online operations where you add `n` rows and rerun a query.
    ///
    /// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
    /// when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence
    /// of `append` operations with a [`rechunk`](Self::rechunk).
    pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()> {
        if self.width() != other.width() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Could not extend DataFrame. The DataFrames extended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.extend(right).unwrap();
                Ok(())
            })?;
        Ok(())
    }

    /// Remove a column by name and return the column removed.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
    ///                             "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
    ///
    /// let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
    /// assert!(s1.is_err());
    ///
    /// let s2: Series = df.drop_in_place("Animal")?;
    /// assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series> {
        let idx = self.check_name_to_idx(name)?;
        Ok(self.columns.remove(idx))
    }

    /// Return a new `DataFrame` where all null values are dropped.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
    ///                         "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
    /// assert_eq!(df1.shape(), (3, 2));
    ///
    /// let df2: DataFrame = df1.drop_nulls(None)?;
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------------------+
    /// | Country | Tax revenue (% GDP) |
    /// | ---     | ---                 |
    /// | str     | f64                 |
    /// +=========+=====================+
    /// | Malta   | 32.7                |
    /// +---------+---------------------+
    /// ```
    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

More examples

Hide additional examples

src/utils/mod.rs (line 589)

pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
where
    I: IntoIterator<Item = DataFrame>,
{
    let mut iter = dfs.into_iter();
    let additional = iter.size_hint().0;
    let mut acc_df = iter.next().unwrap();
    acc_df.reserve_chunks(additional);
    for df in iter {
        acc_df.vstack_mut(&df)?;
    }
    Ok(acc_df)
}

/// Concat the DataFrames to a single DataFrame.
pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
where
    I: IntoIterator<Item = &'a DataFrame>,
{
    let mut iter = dfs.into_iter();
    let additional = iter.size_hint().0;
    let mut acc_df = iter.next().unwrap().clone();
    acc_df.reserve_chunks(additional);
    for df in iter {
        acc_df.vstack_mut(df)?;
    }
    Ok(acc_df)
}

source

pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()>

Extend the memory backed by this DataFrame with the values from other.

Different from vstack which adds the chunks from other to the chunks of this DataFrame extend appends the data from other to the underlying memory locations and thus may cause a reallocation.

If this does not cause a reallocation, the resulting data structure will not have any extra chunks and thus will yield faster queries.

Prefer extend over vstack when you want to do a query after a single append. For instance during online operations where you add n rows and rerun a query.

Prefer vstack over extend when you want to append many times before doing a query. For instance when you read in multiple files and when to store them in a single DataFrame. In the latter case, finish the sequence of append operations with a rechunk.

source

pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series>

Remove a column by name and return the column removed.

Example

let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
                            "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;

let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
assert!(s1.is_err());

let s2: Series = df.drop_in_place("Animal")?;
assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));

source

pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self>

Return a new DataFrame where all null values are dropped.

Example

let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
                        "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
assert_eq!(df1.shape(), (3, 2));

let df2: DataFrame = df1.drop_nulls(None)?;
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);

Output:

shape: (1, 2)
+---------+---------------------+
| Country | Tax revenue (% GDP) |
| ---     | ---                 |
| str     | f64                 |
+=========+=====================+
| Malta   | 32.7                |
+---------+---------------------+

source

pub fn drop(&self, name: &str) -> PolarsResult<Self>

Drop a column by name. This is a pure method and will return a new DataFrame instead of modifying the current one in place.

Example

let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
let df2: DataFrame = df1.drop("Ray type")?;

assert!(df2.is_empty());

Examples found in repository ?

src/frame/hash_join/mod.rs (line 481)

    pub fn _left_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
        verbose: bool,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // ensure that the chunks are aligned otherwise we go OOB
        let mut left = self.clone();
        let mut s_left = s_left.clone();
        let mut right = other.clone();
        let mut s_right = s_right.clone();
        if left.should_rechunk() {
            left.as_single_chunk_par();
            s_left = s_left.rechunk();
        }
        if right.should_rechunk() {
            right.as_single_chunk_par();
            s_right = s_right.rechunk();
        }
        let ids = sort_or_hash_left(&s_left, &s_right, verbose);
        left._finish_left_join(ids, &right.drop(s_right.name()).unwrap(), suffix, slice)
    }

    #[cfg(feature = "semi_anti_join")]
    /// # Safety
    /// `idx` must be in bounds
    pub unsafe fn _finish_anti_semi_join(
        &self,
        mut idx: &[IdxSize],
        slice: Option<(i64, usize)>,
    ) -> DataFrame {
        if let Some((offset, len)) = slice {
            idx = slice_slice(idx, offset, len);
        }
        // idx from anti-semi join should always be sorted
        self._take_unchecked_slice2(idx, true, IsSorted::Ascending)
    }

    #[cfg(feature = "semi_anti_join")]
    pub fn _semi_anti_join_from_series(
        &self,
        s_left: &Series,
        s_right: &Series,
        slice: Option<(i64, usize)>,
        anti: bool,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        let idx = s_left.hash_join_semi_anti(s_right, anti);
        // Safety:
        // indices are in bounds
        Ok(unsafe { self._finish_anti_semi_join(&idx, slice) })
    }
    pub fn _outer_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // store this so that we can keep original column order.
        let join_column_index = self.iter().position(|s| s.name() == s_left.name()).unwrap();

        // Get the indexes of the joined relations
        let opt_join_tuples = s_left.hash_join_outer(s_right);
        let mut opt_join_tuples = &*opt_join_tuples;

        if let Some((offset, len)) = slice {
            opt_join_tuples = slice_slice(opt_join_tuples, offset, len);
        }

        // Take the left and right dataframes by join tuples
        let (mut df_left, df_right) = POOL.join(
            || unsafe {
                self.drop(s_left.name()).unwrap().take_opt_iter_unchecked(
                    opt_join_tuples
                        .iter()
                        .map(|(left, _right)| left.map(|i| i as usize)),
                )
            },
            || unsafe {
                other.drop(s_right.name()).unwrap().take_opt_iter_unchecked(
                    opt_join_tuples
                        .iter()
                        .map(|(_left, right)| right.map(|i| i as usize)),
                )
            },
        );

        let mut s = s_left
            .to_physical_repr()
            .zip_outer_join_column(&s_right.to_physical_repr(), opt_join_tuples);
        s.rename(s_left.name());
        let s = match s_left.dtype() {
            #[cfg(feature = "dtype-categorical")]
            DataType::Categorical(_) => {
                let ca_left = s_left.categorical().unwrap();
                let new_rev_map = ca_left.merge_categorical_map(s_right.categorical().unwrap())?;
                let logical = s.u32().unwrap().clone();
                // safety:
                // categorical maps are merged
                unsafe {
                    CategoricalChunked::from_cats_and_rev_map_unchecked(logical, new_rev_map)
                        .into_series()
                }
            }
            dt @ DataType::Datetime(_, _)
            | dt @ DataType::Time
            | dt @ DataType::Date
            | dt @ DataType::Duration(_) => s.cast(dt).unwrap(),
            _ => s,
        };

        df_left.get_columns_mut().insert(join_column_index, s);
        _finish_join(df_left, df_right, suffix.as_deref())
    }

More examples

Hide additional examples

src/frame/explode.rs (line 48)

    pub fn explode_impl(&self, mut columns: Vec<Series>) -> PolarsResult<DataFrame> {
        let mut df = self.clone();
        if self.height() == 0 {
            for s in &columns {
                df.with_column(s.explode()?)?;
            }
            return Ok(df);
        }
        columns.sort_by(|sa, sb| {
            self.check_name_to_idx(sa.name())
                .expect("checked above")
                .partial_cmp(&self.check_name_to_idx(sb.name()).expect("checked above"))
                .expect("cmp usize -> Ordering")
        });

        // first remove all the exploded columns
        for s in &columns {
            df = df.drop(s.name())?;
        }

        for (i, s) in columns.iter().enumerate() {
            // Safety:
            // offsets don't have indices exceeding Series length.
            if let Ok((exploded, offsets)) = get_exploded(s) {
                let col_idx = self.check_name_to_idx(s.name())?;

                // expand all the other columns based the exploded first column
                if i == 0 {
                    let row_idx = offsets_to_indexes(offsets.as_slice(), exploded.len());
                    let mut row_idx = IdxCa::from_vec("", row_idx);
                    row_idx.set_sorted(false);

                    // Safety
                    // We just created indices that are in bounds.
                    df = unsafe { df.take_unchecked(&row_idx) };
                }
                if exploded.len() == df.height() || df.width() == 0 {
                    df.columns.insert(col_idx, exploded);
                } else {
                    return Err(PolarsError::ShapeMisMatch(
                        format!("The exploded column(s) don't have the same length. Length DataFrame: {}. Length exploded column {}: {}", df.height(), exploded.name(), exploded.len()).into(),
                    ));
                }
            } else {
                return Err(PolarsError::InvalidOperation(
                    format!("cannot explode dtype: {:?}", s.dtype()).into(),
                ));
            }
        }
        Ok(df)
    }

src/frame/asof_join/mod.rs (line 171)

    pub fn _join_asof(
        &self,
        other: &DataFrame,
        left_on: &str,
        right_on: &str,
        strategy: AsofStrategy,
        tolerance: Option<AnyValue<'static>>,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let left_key = self.column(left_on)?;
        let right_key = other.column(right_on)?;

        check_asof_columns(left_key, right_key)?;
        let left_key = left_key.to_physical_repr();
        let right_key = right_key.to_physical_repr();

        let take_idx = match left_key.dtype() {
            DataType::Int64 => left_key
                .i64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Int32 => left_key
                .i32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::UInt64 => left_key
                .u64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::UInt32 => left_key
                .u32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Float32 => left_key
                .f32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Float64 => left_key
                .f64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            _ => {
                let left_key = left_key.cast(&DataType::Int32).unwrap();
                let right_key = right_key.cast(&DataType::Int32).unwrap();
                left_key
                    .i32()
                    .unwrap()
                    .join_asof(&right_key, strategy, tolerance)
            }
        }?;

        // take_idx are sorted so this is a bound check for all
        if let Some(Some(idx)) = take_idx.last() {
            assert!((*idx as usize) < other.height())
        }

        // drop right join column
        let other = if left_on == right_on {
            Cow::Owned(other.drop(right_on)?)
        } else {
            Cow::Borrowed(other)
        };

        let mut left = self.clone();
        let mut take_idx = &*take_idx;

        if let Some((offset, len)) = slice {
            left = left.slice(offset, len);
            take_idx = slice_slice(take_idx, offset, len);
        }

        // Safety:
        // join tuples are in bounds
        let right_df = unsafe {
            other.take_opt_iter_unchecked(
                take_idx
                    .iter()
                    .map(|opt_idx| opt_idx.map(|idx| idx as usize)),
            )
        };

        _finish_join(left, right_df, suffix.as_deref())
    }

source

pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self

source

pub fn insert_at_idx<S: IntoSeries>(
 &mut self,
 index: usize,
 column: S
) -> PolarsResult<&mut Self>

Insert a new column at a given index.

Examples found in repository ?

src/frame/mod.rs (line 2549)

    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

source

pub fn with_column<S: IntoSeries>(
&mut self,
column: S
) -> PolarsResult<&mut Self>

Add a new column to this DataFrame or replace an existing one.

Examples found in repository ?

src/frame/mod.rs (line 1175)

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

More examples

Hide additional examples

src/frame/explode.rs (line 35)

    pub fn explode_impl(&self, mut columns: Vec<Series>) -> PolarsResult<DataFrame> {
        let mut df = self.clone();
        if self.height() == 0 {
            for s in &columns {
                df.with_column(s.explode()?)?;
            }
            return Ok(df);
        }
        columns.sort_by(|sa, sb| {
            self.check_name_to_idx(sa.name())
                .expect("checked above")
                .partial_cmp(&self.check_name_to_idx(sb.name()).expect("checked above"))
                .expect("cmp usize -> Ordering")
        });

        // first remove all the exploded columns
        for s in &columns {
            df = df.drop(s.name())?;
        }

        for (i, s) in columns.iter().enumerate() {
            // Safety:
            // offsets don't have indices exceeding Series length.
            if let Ok((exploded, offsets)) = get_exploded(s) {
                let col_idx = self.check_name_to_idx(s.name())?;

                // expand all the other columns based the exploded first column
                if i == 0 {
                    let row_idx = offsets_to_indexes(offsets.as_slice(), exploded.len());
                    let mut row_idx = IdxCa::from_vec("", row_idx);
                    row_idx.set_sorted(false);

                    // Safety
                    // We just created indices that are in bounds.
                    df = unsafe { df.take_unchecked(&row_idx) };
                }
                if exploded.len() == df.height() || df.width() == 0 {
                    df.columns.insert(col_idx, exploded);
                } else {
                    return Err(PolarsError::ShapeMisMatch(
                        format!("The exploded column(s) don't have the same length. Length DataFrame: {}. Length exploded column {}: {}", df.height(), exploded.name(), exploded.len()).into(),
                    ));
                }
            } else {
                return Err(PolarsError::InvalidOperation(
                    format!("cannot explode dtype: {:?}", s.dtype()).into(),
                ));
            }
        }
        Ok(df)
    }

source

pub fn _add_columns(
 &mut self,
 columns: Vec<Series>,
 schema: &Schema
) -> PolarsResult<()>

source

pub fn with_column_and_schema<S: IntoSeries>(
 &mut self,
 column: S,
 schema: &Schema
) -> PolarsResult<&mut Self>

Add a new column to this DataFrame or replace an existing one. Uses an existing schema to amortize lookups. If the schema is incorrect, we will fallback to linear search.

Examples found in repository ?

src/frame/mod.rs (line 1173)

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

source

pub fn get(&self, idx: usize) -> Option<Vec<AnyValue<'_>>>

Get a row in the DataFrame. Beware this is slow.

Example

fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    df.get(idx)
}

source

pub fn select_at_idx(&self, idx: usize) -> Option<&Series>

Select a Series by index.

Example

let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
                        "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;

let s1: Option<&Series> = df.select_at_idx(0);
let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);

assert_eq!(s1, Some(&s2));

Examples found in repository ?

src/frame/mod.rs (line 1367)

    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

source

pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>where
R: RangeBounds<usize>,

Select column(s) from this DataFrame by range and return a new DataFrame

Examples

let df = df! {
    "0" => &[0, 0, 0],
    "1" => &[1, 1, 1],
    "2" => &[2, 2, 2]
}?;

assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
assert!(df.frame_equal(&df.select_by_range(..)?));

source

pub fn find_idx_by_name(&self, name: &str) -> Option<usize>

Get column index of a Series by name.

Example

let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
                        "Health" => &[100, 200, 500],
                        "Mana" => &[250, 100, 0],
                        "Strength" => &[30, 150, 300])?;

assert_eq!(df.find_idx_by_name("Name"), Some(0));
assert_eq!(df.find_idx_by_name("Health"), Some(1));
assert_eq!(df.find_idx_by_name("Mana"), Some(2));
assert_eq!(df.find_idx_by_name("Strength"), Some(3));
assert_eq!(df.find_idx_by_name("Haste"), None);

Examples found in repository ?

src/frame/mod.rs (line 190)

    fn check_name_to_idx(&self, name: &str) -> PolarsResult<usize> {
        self.find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))
    }

    fn check_already_present(&self, name: &str) -> PolarsResult<()> {
        if self.columns.iter().any(|s| s.name() == name) {
            Err(PolarsError::Duplicate(
                format!("column with name: '{name}' already present in DataFrame").into(),
            ))
        } else {
            Ok(())
        }
    }

    /// Reserve additional slots into the chunks of the series.
    pub(crate) fn reserve_chunks(&mut self, additional: usize) {
        for s in &mut self.columns {
            // Safety
            // do not modify the data, simply resize.
            unsafe { s.chunks_mut().reserve(additional) }
        }
    }

    /// Create a DataFrame from a Vector of Series.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("days", [0, 1, 2].as_ref());
    /// let s1 = Series::new("temp", [22.1, 19.9, 7.].as_ref());
    ///
    /// let df = DataFrame::new(vec![s0, s1])?;
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn new<S: IntoSeries>(columns: Vec<S>) -> PolarsResult<Self> {
        let mut first_len = None;

        let shape_err = |s: &[Series]| {
            let msg = format!(
                "Could not create a new DataFrame from Series. \
            The Series have different lengths. \
            Got {s:?}",
            );
            Err(PolarsError::ShapeMisMatch(msg.into()))
        };

        let series_cols = if S::is_series() {
            // Safety:
            // we are guarded by the type system here.
            #[allow(clippy::transmute_undefined_repr)]
            let series_cols = unsafe { std::mem::transmute::<Vec<S>, Vec<Series>>(columns) };
            let mut names = PlHashSet::with_capacity(series_cols.len());

            for s in &series_cols {
                match first_len {
                    Some(len) => {
                        if s.len() != len {
                            return shape_err(&series_cols);
                        }
                    }
                    None => first_len = Some(s.len()),
                }
                let name = s.name();

                if names.contains(name) {
                    _duplicate_err(name)?
                }

                names.insert(name);
            }
            // we drop early as the brchk thinks the &str borrows are used when calling the drop
            // of both `series_cols` and `names`
            drop(names);
            series_cols
        } else {
            let mut series_cols = Vec::with_capacity(columns.len());
            let mut names = PlHashSet::with_capacity(columns.len());

            // check for series length equality and convert into series in one pass
            for s in columns {
                let series = s.into_series();
                match first_len {
                    Some(len) => {
                        if series.len() != len {
                            return shape_err(&series_cols);
                        }
                    }
                    None => first_len = Some(series.len()),
                }
                // we have aliasing borrows so we must allocate a string
                let name = series.name().to_string();

                if names.contains(&name) {
                    _duplicate_err(&name)?
                }

                series_cols.push(series);
                names.insert(name);
            }
            drop(names);
            series_cols
        };

        Ok(DataFrame {
            columns: series_cols,
        })
    }

    /// Creates an empty `DataFrame` usable in a compile time context (such as static initializers).
    ///
    /// # Example
    ///
    /// ```rust
    /// use polars_core::prelude::DataFrame;
    /// static EMPTY: DataFrame = DataFrame::empty();
    /// ```
    pub const fn empty() -> Self {
        DataFrame::new_no_checks(Vec::new())
    }

    /// Removes the last `Series` from the `DataFrame` and returns it, or [`None`] if it is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1 = Series::new("Ocean", &["Atlantic", "Indian"]);
    /// let s2 = Series::new("Area (km²)", &[106_460_000, 70_560_000]);
    /// let mut df = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// assert_eq!(df.pop(), Some(s2));
    /// assert_eq!(df.pop(), Some(s1));
    /// assert_eq!(df.pop(), None);
    /// assert!(df.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn pop(&mut self) -> Option<Series> {
        self.columns.pop()
    }

    /// Add a new column at index 0 that counts the rows.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Name" => &["James", "Mary", "John", "Patricia"])?;
    /// assert_eq!(df1.shape(), (4, 1));
    ///
    /// let df2: DataFrame = df1.with_row_count("Id", None)?;
    /// assert_eq!(df2.shape(), (4, 2));
    /// println!("{}", df2);
    ///
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    ///  shape: (4, 2)
    ///  +-----+----------+
    ///  | Id  | Name     |
    ///  | --- | ---      |
    ///  | u32 | str      |
    ///  +=====+==========+
    ///  | 0   | James    |
    ///  +-----+----------+
    ///  | 1   | Mary     |
    ///  +-----+----------+
    ///  | 2   | John     |
    ///  +-----+----------+
    ///  | 3   | Patricia |
    ///  +-----+----------+
    /// ```
    pub fn with_row_count(&self, name: &str, offset: Option<IdxSize>) -> PolarsResult<Self> {
        let mut columns = Vec::with_capacity(self.columns.len() + 1);
        let offset = offset.unwrap_or(0);

        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);
        columns.push(ca.into_series());

        columns.extend_from_slice(&self.columns);
        DataFrame::new(columns)
    }

    /// Add a row count in place.
    pub fn with_row_count_mut(&mut self, name: &str, offset: Option<IdxSize>) -> &mut Self {
        let offset = offset.unwrap_or(0);
        let mut ca = IdxCa::from_vec(
            name,
            (offset..(self.height() as IdxSize) + offset).collect(),
        );
        ca.set_sorted(false);

        self.columns.insert(0, ca.into_series());
        self
    }

    /// Create a new `DataFrame` but does not check the length or duplicate occurrence of the `Series`.
    ///
    /// It is advised to use [Series::new](Series::new) in favor of this method.
    ///
    /// # Panic
    /// It is the callers responsibility to uphold the contract of all `Series`
    /// having an equal length, if not this may panic down the line.
    pub const fn new_no_checks(columns: Vec<Series>) -> DataFrame {
        DataFrame { columns }
    }

    /// Aggregate all chunks to contiguous memory.
    #[must_use]
    pub fn agg_chunks(&self) -> Self {
        // Don't parallelize this. Memory overhead
        let f = |s: &Series| s.rechunk();
        let cols = self.columns.iter().map(f).collect();
        DataFrame::new_no_checks(cols)
    }

    /// Shrink the capacity of this DataFrame to fit its length.
    pub fn shrink_to_fit(&mut self) {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            s.shrink_to_fit();
        }
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk.
    pub fn as_single_chunk(&mut self) -> &mut Self {
        // Don't parallelize this. Memory overhead
        for s in &mut self.columns {
            *s = s.rechunk();
        }
        self
    }

    /// Aggregate all the chunks in the DataFrame to a single chunk in parallel.
    /// This may lead to more peak memory consumption.
    pub fn as_single_chunk_par(&mut self) -> &mut Self {
        if self.columns.iter().any(|s| s.n_chunks() > 1) {
            self.columns = self.apply_columns_par(&|s| s.rechunk());
        }
        self
    }

    /// Estimates of the DataFrames columns consist of the same chunk sizes
    pub fn should_rechunk(&self) -> bool {
        let hb = RandomState::default();
        let hb2 = RandomState::with_seeds(392498, 98132457, 0, 412059);
        !self
            .columns
            .iter()
            // The idea is that we create a hash of the chunk lengths.
            // Consisting of the combined hash + the sum (assuming collision probability is nihil)
            // if not, we can add more hashes or at worst case we do an extra rechunk.
            // the old solution to this was clone all lengths to a vec and compare the vecs
            .map(|s| {
                s.chunk_lengths().map(|i| i as u64).fold(
                    (0u64, 0u64, s.n_chunks()),
                    |(lhash, lh2, n), rval| {
                        let mut h = hb.build_hasher();
                        rval.hash(&mut h);
                        let rhash = h.finish();
                        let mut h = hb2.build_hasher();
                        rval.hash(&mut h);
                        let rh2 = h.finish();
                        (
                            _boost_hash_combine(lhash, rhash),
                            _boost_hash_combine(lh2, rh2),
                            n,
                        )
                    },
                )
            })
            .all_equal()
    }

    /// Ensure all the chunks in the DataFrame are aligned.
    pub fn rechunk(&mut self) -> &mut Self {
        if self.should_rechunk() {
            self.as_single_chunk_par()
        } else {
            self
        }
    }

    /// Get the `DataFrame` schema.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
    ///                         "Diameter (m)" => &[8.8e26, f64::INFINITY])?;
    ///
    /// let f1: Field = Field::new("Thing", DataType::Utf8);
    /// let f2: Field = Field::new("Diameter (m)", DataType::Float64);
    /// let sc: Schema = Schema::from(vec![f1, f2].into_iter());
    ///
    /// assert_eq!(df.schema(), sc);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn schema(&self) -> Schema {
        Schema::from(self.iter().map(|s| s.field().into_owned()))
    }

    /// Get a reference to the `DataFrame` columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
    ///                         "Symbol" => &["A", "C", "G", "T"])?;
    /// let columns: &Vec<Series> = df.get_columns();
    ///
    /// assert_eq!(columns[0].name(), "Name");
    /// assert_eq!(columns[1].name(), "Symbol");
    /// # Ok::<(), PolarsError>(())
    /// ```
    #[inline]
    pub fn get_columns(&self) -> &Vec<Series> {
        &self.columns
    }

    #[cfg(feature = "private")]
    #[inline]
    pub fn get_columns_mut(&mut self) -> &mut Vec<Series> {
        &mut self.columns
    }

    /// Iterator over the columns as `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
    /// let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
    ///
    /// let mut iterator = df.iter();
    ///
    /// assert_eq!(iterator.next(), Some(&s1));
    /// assert_eq!(iterator.next(), Some(&s2));
    /// assert_eq!(iterator.next(), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn iter(&self) -> std::slice::Iter<'_, Series> {
        self.columns.iter()
    }

    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Language" => &["Rust", "Python"],
    ///                         "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Language", "Designer"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn get_column_names(&self) -> Vec<&str> {
        self.columns.iter().map(|s| s.name()).collect()
    }

    /// Get the `Vec<String>` representing the column names.
    pub fn get_column_names_owned(&self) -> Vec<String> {
        self.columns.iter().map(|s| s.name().to_string()).collect()
    }

    /// Set the column names.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
    /// df.set_column_names(&["Set"])?;
    ///
    /// assert_eq!(df.get_column_names(), &["Set"]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> PolarsResult<()> {
        if names.len() != self.columns.len() {
            return Err(PolarsError::ShapeMisMatch("the provided slice with column names has not the same size as the DataFrame's width".into()));
        }
        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(names.iter().map(|name| name.as_ref()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }

        let columns = mem::take(&mut self.columns);
        self.columns = columns
            .into_iter()
            .zip(names)
            .map(|(s, name)| {
                let mut s = s;
                s.rename(name.as_ref());
                s
            })
            .collect();
        Ok(())
    }

    /// Get the data types of the columns in the DataFrame.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
    ///                                "Fraction" => &[0.965, 0.035])?;
    ///
    /// assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn dtypes(&self) -> Vec<DataType> {
        self.columns.iter().map(|s| s.dtype().clone()).collect()
    }

    /// The number of chunks per column
    pub fn n_chunks(&self) -> usize {
        match self.columns.get(0) {
            None => 0,
            Some(s) => s.n_chunks(),
        }
    }

    /// Get a reference to the schema fields of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
    ///                            "Fraction" => &[0.708, 0.292])?;
    ///
    /// let f1: Field = Field::new("Surface type", DataType::Utf8);
    /// let f2: Field = Field::new("Fraction", DataType::Float64);
    ///
    /// assert_eq!(earth.fields(), &[f1, f2]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn fields(&self) -> Vec<Field> {
        self.columns
            .iter()
            .map(|s| s.field().into_owned())
            .collect()
    }

    /// Get (height, width) of the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
    /// let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
    ///                          "2" => &[1, 2, 3, 4, 5])?;
    ///
    /// assert_eq!(df0.shape(), (0 ,0));
    /// assert_eq!(df1.shape(), (5, 1));
    /// assert_eq!(df2.shape(), (5, 2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn shape(&self) -> (usize, usize) {
        match self.columns.as_slice() {
            &[] => (0, 0),
            v => (v[0].len(), v.len()),
        }
    }

    /// Get the width of the `DataFrame` which is the number of columns.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Series 1" => &[0; 0])?;
    /// let df2: DataFrame = df!("Series 1" => &[0; 0],
    ///                          "Series 2" => &[0; 0])?;
    ///
    /// assert_eq!(df0.width(), 0);
    /// assert_eq!(df1.width(), 1);
    /// assert_eq!(df2.width(), 2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn width(&self) -> usize {
        self.columns.len()
    }

    /// Get the height of the `DataFrame` which is the number of rows.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df0: DataFrame = DataFrame::default();
    /// let df1: DataFrame = df!("Currency" => &["€", "$"])?;
    /// let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;
    ///
    /// assert_eq!(df0.height(), 0);
    /// assert_eq!(df1.height(), 2);
    /// assert_eq!(df2.height(), 5);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn height(&self) -> usize {
        self.shape().0
    }

    /// Check if the `DataFrame` is empty.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = DataFrame::default();
    /// assert!(df1.is_empty());
    ///
    /// let df2: DataFrame = df!("First name" => &["Forever"],
    ///                          "Last name" => &["Alone"])?;
    /// assert!(!df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn is_empty(&self) -> bool {
        self.columns.is_empty()
    }

    pub(crate) fn hstack_mut_no_checks(&mut self, columns: &[Series]) -> &mut Self {
        for col in columns {
            self.columns.push(col.clone());
        }
        self
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn stack(df: &mut DataFrame, columns: &[Series]) {
    ///     df.hstack_mut(columns);
    /// }
    /// ```
    pub fn hstack_mut(&mut self, columns: &[Series]) -> PolarsResult<&mut Self> {
        let mut names = PlHashSet::with_capacity(self.columns.len());
        for s in &self.columns {
            names.insert(s.name());
        }

        let height = self.height();
        // first loop check validity. We don't do this in a single pass otherwise
        // this DataFrame is already modified when an error occurs.
        for col in columns {
            if col.len() != height && height != 0 {
                return Err(PolarsError::ShapeMisMatch(
                    format!("Could not horizontally stack Series. The Series length {} differs from the DataFrame height: {height}", col.len()).into()));
            }

            let name = col.name();
            if names.contains(name) {
                return Err(PolarsError::Duplicate(
                    format!("Cannot do hstack operation. Column with name: {name} already exists",)
                        .into(),
                ));
            }
            names.insert(name);
        }
        drop(names);
        Ok(self.hstack_mut_no_checks(columns))
    }

    /// Add multiple `Series` to a `DataFrame`.
    /// The added `Series` are required to have the same length.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
    /// let s1: Series = Series::new("Proton", &[29, 47, 79]);
    /// let s2: Series = Series::new("Electron", &[29, 47, 79]);
    ///
    /// let df2: DataFrame = df1.hstack(&[s1, s2])?;
    /// assert_eq!(df2.shape(), (3, 3));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (3, 3)
    /// +---------+--------+----------+
    /// | Element | Proton | Electron |
    /// | ---     | ---    | ---      |
    /// | str     | i32    | i32      |
    /// +=========+========+==========+
    /// | Copper  | 29     | 29       |
    /// +---------+--------+----------+
    /// | Silver  | 47     | 47       |
    /// +---------+--------+----------+
    /// | Gold    | 79     | 79       |
    /// +---------+--------+----------+
    /// ```
    pub fn hstack(&self, columns: &[Series]) -> PolarsResult<Self> {
        let mut new_cols = self.columns.clone();
        new_cols.extend_from_slice(columns);
        DataFrame::new(new_cols)
    }

    /// Concatenate a `DataFrame` to this `DataFrame` and return as newly allocated `DataFrame`.
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// let df3: DataFrame = df1.vstack(&df2)?;
    ///
    /// assert_eq!(df3.shape(), (5, 2));
    /// println!("{}", df3);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack(&self, other: &DataFrame) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.vstack_mut(other)?;
        Ok(df)
    }

    /// Concatenate a DataFrame to this DataFrame
    ///
    /// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
    ///                          "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
    /// let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
    ///                          "Melting Point (K)" => &[2041.4, 1828.05])?;
    ///
    /// df1.vstack_mut(&df2)?;
    ///
    /// assert_eq!(df1.shape(), (5, 2));
    /// println!("{}", df1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (5, 2)
    /// +-----------+-------------------+
    /// | Element   | Melting Point (K) |
    /// | ---       | ---               |
    /// | str       | f64               |
    /// +===========+===================+
    /// | Copper    | 1357.77           |
    /// +-----------+-------------------+
    /// | Silver    | 1234.93           |
    /// +-----------+-------------------+
    /// | Gold      | 1337.33           |
    /// +-----------+-------------------+
    /// | Platinum  | 2041.4            |
    /// +-----------+-------------------+
    /// | Palladium | 1828.05           |
    /// +-----------+-------------------+
    /// ```
    pub fn vstack_mut(&mut self, other: &DataFrame) -> PolarsResult<&mut Self> {
        if self.width() != other.width() {
            if self.width() == 0 {
                self.columns = other.columns.clone();
                return Ok(self);
            }

            return Err(PolarsError::ShapeMisMatch(
                format!("Could not vertically stack DataFrame. The DataFrames appended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.append(right).expect("should not fail");
                Ok(())
            })?;
        Ok(self)
    }

    /// Does not check if schema is correct
    pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) {
        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .for_each(|(left, right)| {
                left.append(right).expect("should not fail");
            });
    }

    /// Extend the memory backed by this [`DataFrame`] with the values from `other`.
    ///
    /// Different from [`vstack`](Self::vstack) which adds the chunks from `other` to the chunks of this [`DataFrame`]
    /// `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
    ///
    /// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
    /// and thus will yield faster queries.
    ///
    /// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
    /// online operations where you add `n` rows and rerun a query.
    ///
    /// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
    /// when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence
    /// of `append` operations with a [`rechunk`](Self::rechunk).
    pub fn extend(&mut self, other: &DataFrame) -> PolarsResult<()> {
        if self.width() != other.width() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Could not extend DataFrame. The DataFrames extended width {} differs from the parent DataFrames width {}", self.width(), other.width()).into()
            ));
        }

        self.columns
            .iter_mut()
            .zip(other.columns.iter())
            .try_for_each::<_, PolarsResult<_>>(|(left, right)| {
                can_extend(left, right)?;
                left.extend(right).unwrap();
                Ok(())
            })?;
        Ok(())
    }

    /// Remove a column by name and return the column removed.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
    ///                             "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
    ///
    /// let s1: PolarsResult<Series> = df.drop_in_place("Average weight");
    /// assert!(s1.is_err());
    ///
    /// let s2: Series = df.drop_in_place("Animal")?;
    /// assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop_in_place(&mut self, name: &str) -> PolarsResult<Series> {
        let idx = self.check_name_to_idx(name)?;
        Ok(self.columns.remove(idx))
    }

    /// Return a new `DataFrame` where all null values are dropped.
    ///
    /// # Example
    ///
    /// ```no_run
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
    ///                         "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
    /// assert_eq!(df1.shape(), (3, 2));
    ///
    /// let df2: DataFrame = df1.drop_nulls(None)?;
    /// assert_eq!(df2.shape(), (1, 2));
    /// println!("{}", df2);
    /// # Ok::<(), PolarsError>(())
    /// ```
    ///
    /// Output:
    ///
    /// ```text
    /// shape: (1, 2)
    /// +---------+---------------------+
    /// | Country | Tax revenue (% GDP) |
    /// | ---     | ---                 |
    /// | str     | f64                 |
    /// +=========+=====================+
    /// | Malta   | 32.7                |
    /// +---------+---------------------+
    /// ```
    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

    /// Replace or update a column. The difference between this method and [DataFrame::with_column]
    /// is that now the value of `column: &str` determines the name of the column and not the name
    /// of the `Series` passed to this method.
    pub fn replace_or_add<S: IntoSeries>(
        &mut self,
        column: &str,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_col = new_col.into_series();
        new_col.rename(column);
        self.with_column(new_col)
    }

    /// Replace column at index `idx` with a `Series`.
    ///
    /// # Example
    ///
    /// ```ignored
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace_at_idx<S: IntoSeries>(
        &mut self,
        idx: usize,
        new_col: S,
    ) -> PolarsResult<&mut Self> {
        let mut new_column = new_col.into_series();
        if new_column.len() != self.height() {
            return Err(PolarsError::ShapeMisMatch(
                format!("Cannot replace Series at index {}. The shape of Series {} does not match that of the DataFrame {}",
                idx, new_column.len(), self.height()
                ).into()));
        };
        if idx >= self.width() {
            return Err(PolarsError::ComputeError(
                format!(
                    "Column index: {} outside of DataFrame with {} columns",
                    idx,
                    self.width()
                )
                .into(),
            ));
        }
        let old_col = &mut self.columns[idx];
        mem::swap(old_col, &mut new_column);
        Ok(self)
    }

    /// Apply a closure to a column. This is the recommended way to do in place modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("names", &["Jean", "Claude", "van"]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// fn str_to_len(str_val: &Series) -> Series {
    ///     str_val.utf8()
    ///         .unwrap()
    ///         .into_iter()
    ///         .map(|opt_name: Option<&str>| {
    ///             opt_name.map(|name: &str| name.len() as u32)
    ///          })
    ///         .collect::<UInt32Chunked>()
    ///         .into_series()
    /// }
    ///
    /// // Replace the names column by the length of the names.
    /// df.apply("names", str_to_len);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    |       |
    /// | ---    | names |
    /// | str    | u32   |
    /// +========+=======+
    /// | "ham"  | 4     |
    /// +--------+-------+
    /// | "spam" | 6     |
    /// +--------+-------+
    /// | "egg"  | 3     |
    /// +--------+-------+
    /// ```
    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

    /// Apply a closure to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg"]);
    /// let s1 = Series::new("ascii", &[70, 79, 79]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // Add 32 to get lowercase ascii values
    /// df.apply_at_idx(1, |s| s + 32);
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +--------+-------+
    /// | foo    | ascii |
    /// | ---    | ---   |
    /// | str    | i32   |
    /// +========+=======+
    /// | "ham"  | 102   |
    /// +--------+-------+
    /// | "spam" | 111   |
    /// +--------+-------+
    /// | "egg"  | 111   |
    /// +--------+-------+
    /// ```
    pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let df_height = self.height();
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();
        let new_col = f(col).into_series();
        match new_col.len() {
            1 => {
                let new_col = new_col.new_from_index(0, df_height);
                let _ = mem::replace(col, new_col);
            }
            len if (len == df_height) => {
                let _ = mem::replace(col, new_col);
            }
            len => {
                return Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Result Series has shape {} where the DataFrame has height {}",
                        len,
                        self.height()
                    )
                    .into(),
                ));
            }
        }

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column at index `idx`. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given range of indexes.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// let idx = vec![0, 1, 4];
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "ham-is-modified"   | 1      |
    /// +---------------------+--------+
    /// | "spam-is-modified"  | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "quack-is-modified" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let width = self.width();
        let col = self.columns.get_mut(idx).ok_or_else(|| {
            PolarsError::ComputeError(
                format!("Column index: {idx} outside of DataFrame with {width} columns",).into(),
            )
        })?;
        let name = col.name().to_string();

        let _ = mem::replace(col, f(col).map(|s| s.into_series())?);

        // make sure the name remains the same after applying the closure
        unsafe {
            let col = self.columns.get_unchecked_mut(idx);
            col.rename(&name);
        }
        Ok(self)
    }

    /// Apply a closure that may fail to a column. This is the recommended way to do in place
    /// modification.
    ///
    /// # Example
    ///
    /// This is the idiomatic way to replace some values a column of a `DataFrame` given a boolean mask.
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
    /// let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
    /// let mut df = DataFrame::new(vec![s0, s1])?;
    ///
    /// // create a mask
    /// let values = df.column("values")?;
    /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
    ///
    /// df.try_apply("foo", |s| {
    ///     s.utf8()?
    ///     .set(&mask, Some("not_within_bounds"))
    /// });
    /// # Ok::<(), PolarsError>(())
    /// ```
    /// Results in:
    ///
    /// ```text
    /// +---------------------+--------+
    /// | foo                 | values |
    /// | ---                 | ---    |
    /// | str                 | i32    |
    /// +=====================+========+
    /// | "not_within_bounds" | 1      |
    /// +---------------------+--------+
    /// | "spam"              | 2      |
    /// +---------------------+--------+
    /// | "egg"               | 3      |
    /// +---------------------+--------+
    /// | "bacon"             | 4      |
    /// +---------------------+--------+
    /// | "not_within_bounds" | 5      |
    /// +---------------------+--------+
    /// ```
    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

source

pub fn column(&self, name: &str) -> PolarsResult<&Series>

Select a single column by name.

Example

let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;

assert_eq!(df.column("Password")?, &s1);

Examples found in repository ?

src/frame/mod.rs (line 1391)

    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

More examples

Hide additional examples

src/functions.rs (line 280)

pub fn diag_concat_df(dfs: &[DataFrame]) -> PolarsResult<DataFrame> {
    // TODO! replace with lazy only?
    let upper_bound_width = dfs.iter().map(|df| df.width()).sum();
    let mut column_names = AHashSet::with_capacity(upper_bound_width);
    let mut schema = Vec::with_capacity(upper_bound_width);

    for df in dfs {
        df.get_columns().iter().for_each(|s| {
            let name = s.name();
            if column_names.insert(name) {
                schema.push((name, s.dtype()))
            }
        });
    }

    let dfs = dfs
        .iter()
        .map(|df| {
            let height = df.height();
            let mut columns = Vec::with_capacity(schema.len());

            for (name, dtype) in &schema {
                match df.column(name).ok() {
                    Some(s) => columns.push(s.clone()),
                    None => columns.push(Series::full_null(name, height, dtype)),
                }
            }
            DataFrame::new_no_checks(columns)
        })
        .collect::<Vec<_>>();

    concat_df(&dfs)
}

src/frame/asof_join/groups.rs (line 639)

    pub fn _join_asof_by(
        &self,
        other: &DataFrame,
        left_on: &str,
        right_on: &str,
        left_by: Vec<String>,
        right_by: Vec<String>,
        strategy: AsofStrategy,
        tolerance: Option<AnyValue<'static>>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let left_asof = self.column(left_on)?.to_physical_repr();
        let right_asof = other.column(right_on)?.to_physical_repr();
        let right_asof_name = right_asof.name();
        let left_asof_name = left_asof.name();

        check_asof_columns(&left_asof, &right_asof)?;

        let mut left_by = self.select_physical(left_by)?;
        let mut right_by = other.select_physical(right_by)?;

        let left_by_s = left_by.get_columns()[0].to_physical_repr().into_owned();
        let right_by_s = right_by.get_columns()[0].to_physical_repr().into_owned();

        let right_join_tuples = with_match_physical_numeric_polars_type!(left_asof.dtype(), |$T| {
            let left_asof: &ChunkedArray<$T> = left_asof.as_ref().as_ref().as_ref();
            let right_asof: &ChunkedArray<$T> = right_asof.as_ref().as_ref().as_ref();

            dispatch_join(
                left_asof,
                right_asof,
                &left_by_s,
                &right_by_s,
                &mut left_by,
                &mut right_by,
                strategy,
                tolerance
            )
        })?;

        let mut drop_these = right_by.get_column_names();
        if left_asof_name == right_asof_name {
            drop_these.push(right_asof_name);
        }

        let cols = other
            .get_columns()
            .iter()
            .filter_map(|s| {
                if drop_these.contains(&s.name()) {
                    None
                } else {
                    Some(s.clone())
                }
            })
            .collect();
        let other = DataFrame::new_no_checks(cols);

        let mut left = self.clone();
        let mut right_join_tuples = &*right_join_tuples;

        if let Some((offset, len)) = slice {
            left = left.slice(offset, len);
            right_join_tuples = slice_slice(right_join_tuples, offset, len);
        }

        // Safety:
        // join tuples are in bounds
        let right_df = unsafe {
            other.take_opt_iter_unchecked(
                right_join_tuples
                    .iter()
                    .map(|opt_idx| opt_idx.map(|idx| idx as usize)),
            )
        };

        _finish_join(left, right_df, None)
    }

src/frame/asof_join/mod.rs (line 122)

    pub fn _join_asof(
        &self,
        other: &DataFrame,
        left_on: &str,
        right_on: &str,
        strategy: AsofStrategy,
        tolerance: Option<AnyValue<'static>>,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let left_key = self.column(left_on)?;
        let right_key = other.column(right_on)?;

        check_asof_columns(left_key, right_key)?;
        let left_key = left_key.to_physical_repr();
        let right_key = right_key.to_physical_repr();

        let take_idx = match left_key.dtype() {
            DataType::Int64 => left_key
                .i64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Int32 => left_key
                .i32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::UInt64 => left_key
                .u64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::UInt32 => left_key
                .u32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Float32 => left_key
                .f32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Float64 => left_key
                .f64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            _ => {
                let left_key = left_key.cast(&DataType::Int32).unwrap();
                let right_key = right_key.cast(&DataType::Int32).unwrap();
                left_key
                    .i32()
                    .unwrap()
                    .join_asof(&right_key, strategy, tolerance)
            }
        }?;

        // take_idx are sorted so this is a bound check for all
        if let Some(Some(idx)) = take_idx.last() {
            assert!((*idx as usize) < other.height())
        }

        // drop right join column
        let other = if left_on == right_on {
            Cow::Owned(other.drop(right_on)?)
        } else {
            Cow::Borrowed(other)
        };

        let mut left = self.clone();
        let mut take_idx = &*take_idx;

        if let Some((offset, len)) = slice {
            left = left.slice(offset, len);
            take_idx = slice_slice(take_idx, offset, len);
        }

        // Safety:
        // join tuples are in bounds
        let right_df = unsafe {
            other.take_opt_iter_unchecked(
                take_idx
                    .iter()
                    .map(|opt_idx| opt_idx.map(|idx| idx as usize)),
            )
        };

        _finish_join(left, right_df, suffix.as_deref())
    }

src/frame/explode.rs (line 284)

    pub fn melt2(&self, args: MeltArgs) -> PolarsResult<Self> {
        let id_vars = args.id_vars;
        let mut value_vars = args.value_vars;

        let value_name = args.value_name.as_deref().unwrap_or("value");
        let variable_name = args.variable_name.as_deref().unwrap_or("variable");

        let len = self.height();

        // if value vars is empty we take all columns that are not in id_vars.
        if value_vars.is_empty() {
            let id_vars_set = PlHashSet::from_iter(id_vars.iter().map(|s| s.as_str()));
            value_vars = self
                .get_columns()
                .iter()
                .filter_map(|s| {
                    if id_vars_set.contains(s.name()) {
                        None
                    } else {
                        Some(s.name().to_string())
                    }
                })
                .collect();
        }

        // values will all be placed in single column, so we must find their supertype
        let schema = self.schema();
        let mut iter = value_vars.iter().map(|v| {
            schema
                .get(v)
                .ok_or_else(|| PolarsError::NotFound(v.to_string().into()))
        });
        let mut st = iter.next().unwrap()?.clone();
        for dt in iter {
            st = try_get_supertype(&st, dt?)?;
        }

        let values_len = value_vars.iter().map(|name| name.len()).sum::<usize>();

        // The column name of the variable that is melted
        let mut variable_col = MutableUtf8Array::<i64>::with_capacities(
            len * value_vars.len() + 1,
            len * values_len + 1,
        );
        // prepare ids
        let ids_ = self.select(id_vars)?;
        let mut ids = ids_.clone();
        if ids.width() > 0 {
            for _ in 0..value_vars.len() - 1 {
                ids.vstack_mut_unchecked(&ids_)
            }
        }
        ids.as_single_chunk_par();
        drop(ids_);

        let mut values = Vec::with_capacity(value_vars.len());

        for value_column_name in &value_vars {
            variable_col.extend_trusted_len_values(std::iter::repeat(value_column_name).take(len));
            let value_col = self.column(value_column_name)?.cast(&st)?;
            values.extend_from_slice(value_col.chunks())
        }
        let values_arr = concatenate_owned_unchecked(&values)?;
        // Safety
        // The give dtype is correct
        let values =
            unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) };

        let variable_col = variable_col.as_box();
        // Safety
        // The give dtype is correct
        let variables = unsafe {
            Series::from_chunks_and_dtype_unchecked(
                variable_name,
                vec![variable_col],
                &DataType::Utf8,
            )
        };

        ids.hstack_mut(&[variables, values])?;

        Ok(ids)
    }

source

pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>where
I: IntoIterator<Item = S>,
S: AsRef<str>,

Selected multiple columns by name.

Example

let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
                        "Max weight (kg)" => &[16.0, 35.89])?;
let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;

assert_eq!(&df[0], sv[0]);
assert_eq!(&df[1], sv[1]);

source

pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>where
I: IntoIterator<Item = S>,
S: AsRef<str>,

Select column(s) from this DataFrame and return a new DataFrame.

Examples

fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    df.select(["foo", "bar"])
}

Examples found in repository ?

src/frame/explode.rs (line 270)

    pub fn melt2(&self, args: MeltArgs) -> PolarsResult<Self> {
        let id_vars = args.id_vars;
        let mut value_vars = args.value_vars;

        let value_name = args.value_name.as_deref().unwrap_or("value");
        let variable_name = args.variable_name.as_deref().unwrap_or("variable");

        let len = self.height();

        // if value vars is empty we take all columns that are not in id_vars.
        if value_vars.is_empty() {
            let id_vars_set = PlHashSet::from_iter(id_vars.iter().map(|s| s.as_str()));
            value_vars = self
                .get_columns()
                .iter()
                .filter_map(|s| {
                    if id_vars_set.contains(s.name()) {
                        None
                    } else {
                        Some(s.name().to_string())
                    }
                })
                .collect();
        }

        // values will all be placed in single column, so we must find their supertype
        let schema = self.schema();
        let mut iter = value_vars.iter().map(|v| {
            schema
                .get(v)
                .ok_or_else(|| PolarsError::NotFound(v.to_string().into()))
        });
        let mut st = iter.next().unwrap()?.clone();
        for dt in iter {
            st = try_get_supertype(&st, dt?)?;
        }

        let values_len = value_vars.iter().map(|name| name.len()).sum::<usize>();

        // The column name of the variable that is melted
        let mut variable_col = MutableUtf8Array::<i64>::with_capacities(
            len * value_vars.len() + 1,
            len * values_len + 1,
        );
        // prepare ids
        let ids_ = self.select(id_vars)?;
        let mut ids = ids_.clone();
        if ids.width() > 0 {
            for _ in 0..value_vars.len() - 1 {
                ids.vstack_mut_unchecked(&ids_)
            }
        }
        ids.as_single_chunk_par();
        drop(ids_);

        let mut values = Vec::with_capacity(value_vars.len());

        for value_column_name in &value_vars {
            variable_col.extend_trusted_len_values(std::iter::repeat(value_column_name).take(len));
            let value_col = self.column(value_column_name)?.cast(&st)?;
            values.extend_from_slice(value_col.chunks())
        }
        let values_arr = concatenate_owned_unchecked(&values)?;
        // Safety
        // The give dtype is correct
        let values =
            unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) };

        let variable_col = variable_col.as_box();
        // Safety
        // The give dtype is correct
        let variables = unsafe {
            Series::from_chunks_and_dtype_unchecked(
                variable_name,
                vec![variable_col],
                &DataType::Utf8,
            )
        };

        ids.hstack_mut(&[variables, values])?;

        Ok(ids)
    }

source

pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>where
I: IntoIterator<Item = S>,
S: AsRef<str>,

Examples found in repository ?

src/frame/asof_join/groups.rs (line 646)

    pub fn _join_asof_by(
        &self,
        other: &DataFrame,
        left_on: &str,
        right_on: &str,
        left_by: Vec<String>,
        right_by: Vec<String>,
        strategy: AsofStrategy,
        tolerance: Option<AnyValue<'static>>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let left_asof = self.column(left_on)?.to_physical_repr();
        let right_asof = other.column(right_on)?.to_physical_repr();
        let right_asof_name = right_asof.name();
        let left_asof_name = left_asof.name();

        check_asof_columns(&left_asof, &right_asof)?;

        let mut left_by = self.select_physical(left_by)?;
        let mut right_by = other.select_physical(right_by)?;

        let left_by_s = left_by.get_columns()[0].to_physical_repr().into_owned();
        let right_by_s = right_by.get_columns()[0].to_physical_repr().into_owned();

        let right_join_tuples = with_match_physical_numeric_polars_type!(left_asof.dtype(), |$T| {
            let left_asof: &ChunkedArray<$T> = left_asof.as_ref().as_ref().as_ref();
            let right_asof: &ChunkedArray<$T> = right_asof.as_ref().as_ref().as_ref();

            dispatch_join(
                left_asof,
                right_asof,
                &left_by_s,
                &right_by_s,
                &mut left_by,
                &mut right_by,
                strategy,
                tolerance
            )
        })?;

        let mut drop_these = right_by.get_column_names();
        if left_asof_name == right_asof_name {
            drop_these.push(right_asof_name);
        }

        let cols = other
            .get_columns()
            .iter()
            .filter_map(|s| {
                if drop_these.contains(&s.name()) {
                    None
                } else {
                    Some(s.clone())
                }
            })
            .collect();
        let other = DataFrame::new_no_checks(cols);

        let mut left = self.clone();
        let mut right_join_tuples = &*right_join_tuples;

        if let Some((offset, len)) = slice {
            left = left.slice(offset, len);
            right_join_tuples = slice_slice(right_join_tuples, offset, len);
        }

        // Safety:
        // join tuples are in bounds
        let right_df = unsafe {
            other.take_opt_iter_unchecked(
                right_join_tuples
                    .iter()
                    .map(|opt_idx| opt_idx.map(|idx| idx as usize)),
            )
        };

        _finish_join(left, right_df, None)
    }

source

pub fn select_series(
&self,
selection: impl IntoVec<String>
) -> PolarsResult<Vec<Series>>

Select column(s) from this DataFrame and return them into a Vec.

Example

let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
                        "Carbon" => &[1, 2, 3],
                        "Hydrogen" => &[4, 6, 8])?;
let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;

assert_eq!(df["Carbon"], sv[0]);
assert_eq!(df["Hydrogen"], sv[1]);

Examples found in repository ?

src/frame/groupby/mod.rs (line 191)

    pub fn groupby<I, S>(&self, by: I) -> PolarsResult<GroupBy>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let selected_keys = self.select_series(by)?;
        self.groupby_with_series(selected_keys, true, false)
    }

    /// Group DataFrame using a Series column.
    /// The groups are ordered by their smallest row index.
    pub fn groupby_stable<I, S>(&self, by: I) -> PolarsResult<GroupBy>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let selected_keys = self.select_series(by)?;
        self.groupby_with_series(selected_keys, true, true)
    }
}

/// Returned by a groupby operation on a DataFrame. This struct supports
/// several aggregations.
///
/// Until described otherwise, the examples in this struct are performed on the following DataFrame:
///
/// ```ignore
/// use polars_core::prelude::*;
///
/// let dates = &[
/// "2020-08-21",
/// "2020-08-21",
/// "2020-08-22",
/// "2020-08-23",
/// "2020-08-22",
/// ];
/// // date format
/// let fmt = "%Y-%m-%d";
/// // create date series
/// let s0 = DateChunked::parse_from_str_slice("date", dates, fmt)
///         .into_series();
/// // create temperature series
/// let s1 = Series::new("temp", [20, 10, 7, 9, 1]);
/// // create rain series
/// let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01]);
/// // create a new DataFrame
/// let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
/// println!("{:?}", df);
/// ```
///
/// Outputs:
///
/// ```text
/// +------------+------+------+
/// | date       | temp | rain |
/// | ---        | ---  | ---  |
/// | Date       | i32  | f64  |
/// +============+======+======+
/// | 2020-08-21 | 20   | 0.2  |
/// +------------+------+------+
/// | 2020-08-21 | 10   | 0.1  |
/// +------------+------+------+
/// | 2020-08-22 | 7    | 0.3  |
/// +------------+------+------+
/// | 2020-08-23 | 9    | 0.1  |
/// +------------+------+------+
/// | 2020-08-22 | 1    | 0.01 |
/// +------------+------+------+
/// ```
///
#[derive(Debug, Clone)]
pub struct GroupBy<'df> {
    pub df: &'df DataFrame,
    pub(crate) selected_keys: Vec<Series>,
    // [first idx, [other idx]]
    groups: GroupsProxy,
    // columns selected for aggregation
    pub(crate) selected_agg: Option<Vec<String>>,
}

impl<'df> GroupBy<'df> {
    pub fn new(
        df: &'df DataFrame,
        by: Vec<Series>,
        groups: GroupsProxy,
        selected_agg: Option<Vec<String>>,
    ) -> Self {
        GroupBy {
            df,
            selected_keys: by,
            groups,
            selected_agg,
        }
    }

    /// Select the column(s) that should be aggregated.
    /// You can select a single column or a slice of columns.
    ///
    /// Note that making a selection with this method is not required. If you
    /// skip it all columns (except for the keys) will be selected for aggregation.
    #[must_use]
    pub fn select<I: IntoIterator<Item = S>, S: AsRef<str>>(mut self, selection: I) -> Self {
        self.selected_agg = Some(
            selection
                .into_iter()
                .map(|s| s.as_ref().to_string())
                .collect(),
        );
        self
    }

    /// Get the internal representation of the GroupBy operation.
    /// The Vec returned contains:
    ///     (first_idx, Vec<indexes>)
    ///     Where second value in the tuple is a vector with all matching indexes.
    pub fn get_groups(&self) -> &GroupsProxy {
        &self.groups
    }

    /// Get the internal representation of the GroupBy operation.
    /// The Vec returned contains:
    ///     (first_idx, Vec<indexes>)
    ///     Where second value in the tuple is a vector with all matching indexes.
    ///
    /// # Safety
    /// Groups should always be in bounds of the `DataFrame` hold by this `[GroupBy]`.
    /// If you mutate it, you must hold that invariant.
    pub unsafe fn get_groups_mut(&mut self) -> &mut GroupsProxy {
        &mut self.groups
    }

    pub fn take_groups(self) -> GroupsProxy {
        self.groups
    }

    pub fn take_groups_mut(&mut self) -> GroupsProxy {
        std::mem::take(&mut self.groups)
    }

    pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec<Series> {
        #[allow(unused_assignments)]
        // needed to keep the lifetimes valid for this scope
        let mut groups_owned = None;

        let groups = if let Some((offset, len)) = slice {
            groups_owned = Some(self.groups.slice(offset, len));
            groups_owned.as_deref().unwrap()
        } else {
            &self.groups
        };

        POOL.install(|| {
            self.selected_keys
                .par_iter()
                .map(|s| {
                    match groups {
                        GroupsProxy::Idx(groups) => {
                            let mut iter = groups.iter().map(|(first, _idx)| first as usize);
                            // Safety:
                            // groups are always in bounds
                            let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
                            if groups.sorted {
                                out.set_sorted(s.is_sorted());
                            };
                            out
                        }
                        GroupsProxy::Slice { groups, rolling } => {
                            if *rolling && !groups.is_empty() {
                                // groups can be sliced
                                let offset = groups[0][0];
                                let [upper_offset, upper_len] = groups[groups.len() - 1];
                                return s.slice(
                                    offset as i64,
                                    ((upper_offset + upper_len) - offset) as usize,
                                );
                            }

                            let mut iter = groups.iter().map(|&[first, _len]| first as usize);
                            // Safety:
                            // groups are always in bounds
                            let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
                            // sliced groups are always in order of discovery
                            out.set_sorted(s.is_sorted());
                            out
                        }
                    }
                })
                .collect()
        })
    }

    pub fn keys(&self) -> Vec<Series> {
        self.keys_sliced(None)
    }

    fn prepare_agg(&self) -> PolarsResult<(Vec<Series>, Vec<Series>)> {
        let selection = match &self.selected_agg {
            Some(selection) => selection.clone(),
            None => {
                let by: Vec<_> = self.selected_keys.iter().map(|s| s.name()).collect();
                self.df
                    .get_column_names()
                    .into_iter()
                    .filter(|a| !by.contains(a))
                    .map(|s| s.to_string())
                    .collect()
            }
        };

        let keys = self.keys();
        let agg_col = self.df.select_series(selection)?;
        Ok((keys, agg_col))
    }

    /// Aggregate grouped series and compute the mean per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(&["temp", "rain"]).mean()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+-----------+-----------+
    /// | date       | temp_mean | rain_mean |
    /// | ---        | ---       | ---       |
    /// | Date       | f64       | f64       |
    /// +============+===========+===========+
    /// | 2020-08-23 | 9         | 0.1       |
    /// +------------+-----------+-----------+
    /// | 2020-08-22 | 4         | 0.155     |
    /// +------------+-----------+-----------+
    /// | 2020-08-21 | 15        | 0.15      |
    /// +------------+-----------+-----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn mean(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;

        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Mean);
            let mut agg = unsafe { agg_col.agg_mean(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the sum per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).sum()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+----------+
    /// | date       | temp_sum |
    /// | ---        | ---      |
    /// | Date       | i32      |
    /// +============+==========+
    /// | 2020-08-23 | 9        |
    /// +------------+----------+
    /// | 2020-08-22 | 8        |
    /// +------------+----------+
    /// | 2020-08-21 | 30       |
    /// +------------+----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn sum(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;

        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Sum);
            let mut agg = unsafe { agg_col.agg_sum(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the minimal value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).min()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+----------+
    /// | date       | temp_min |
    /// | ---        | ---      |
    /// | Date       | i32      |
    /// +============+==========+
    /// | 2020-08-23 | 9        |
    /// +------------+----------+
    /// | 2020-08-22 | 1        |
    /// +------------+----------+
    /// | 2020-08-21 | 10       |
    /// +------------+----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn min(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Min);
            let mut agg = unsafe { agg_col.agg_min(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the maximum value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).max()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+----------+
    /// | date       | temp_max |
    /// | ---        | ---      |
    /// | Date       | i32      |
    /// +============+==========+
    /// | 2020-08-23 | 9        |
    /// +------------+----------+
    /// | 2020-08-22 | 7        |
    /// +------------+----------+
    /// | 2020-08-21 | 20       |
    /// +------------+----------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn max(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Max);
            let mut agg = unsafe { agg_col.agg_max(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and find the first value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).first()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------+
    /// | date       | temp_first |
    /// | ---        | ---        |
    /// | Date       | i32        |
    /// +============+============+
    /// | 2020-08-23 | 9          |
    /// +------------+------------+
    /// | 2020-08-22 | 7          |
    /// +------------+------------+
    /// | 2020-08-21 | 20         |
    /// +------------+------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn first(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::First);
            let mut agg = unsafe { agg_col.agg_first(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and return the last value per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).last()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------+
    /// | date       | temp_last |
    /// | ---        | ---        |
    /// | Date       | i32        |
    /// +============+============+
    /// | 2020-08-23 | 9          |
    /// +------------+------------+
    /// | 2020-08-22 | 1          |
    /// +------------+------------+
    /// | 2020-08-21 | 10         |
    /// +------------+------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn last(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Last);
            let mut agg = unsafe { agg_col.agg_last(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` by counting the number of unique values.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).n_unique()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+---------------+
    /// | date       | temp_n_unique |
    /// | ---        | ---           |
    /// | Date       | u32           |
    /// +============+===============+
    /// | 2020-08-23 | 1             |
    /// +------------+---------------+
    /// | 2020-08-22 | 2             |
    /// +------------+---------------+
    /// | 2020-08-21 | 2             |
    /// +------------+---------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn n_unique(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::NUnique);
            let mut agg = unsafe { agg_col.agg_n_unique(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the quantile per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// # use polars_arrow::prelude::QuantileInterpolOptions;
    ///
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).quantile(0.2, QuantileInterpolOptions::default())
    /// }
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn quantile(
        &self,
        quantile: f64,
        interpol: QuantileInterpolOptions,
    ) -> PolarsResult<DataFrame> {
        if !(0.0..=1.0).contains(&quantile) {
            return Err(PolarsError::ComputeError(
                "quantile should be within 0.0 and 1.0".into(),
            ));
        }
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name =
                fmt_groupby_column(agg_col.name(), GroupByMethod::Quantile(quantile, interpol));
            let mut agg = unsafe { agg_col.agg_quantile(&self.groups, quantile, interpol) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the median per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).median()
    /// }
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn median(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Median);
            let mut agg = unsafe { agg_col.agg_median(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the variance per group.
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn var(&self, ddof: u8) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Var(ddof));
            let mut agg = unsafe { agg_col.agg_var(&self.groups, ddof) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped `Series` and determine the standard deviation per group.
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn std(&self, ddof: u8) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Std(ddof));
            let mut agg = unsafe { agg_col.agg_std(&self.groups, ddof) };
            agg.rename(&new_name);
            cols.push(agg.into_series());
        }
        DataFrame::new(cols)
    }

    /// Aggregate grouped series and compute the number of values per group.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.select(["temp"]).count()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------+
    /// | date       | temp_count |
    /// | ---        | ---        |
    /// | Date       | u32        |
    /// +============+============+
    /// | 2020-08-23 | 1          |
    /// +------------+------------+
    /// | 2020-08-22 | 2          |
    /// +------------+------------+
    /// | 2020-08-21 | 2          |
    /// +------------+------------+
    /// ```
    pub fn count(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;

        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::Count);
            let mut ca = self.groups.group_count();
            ca.rename(&new_name);
            cols.push(ca.into_series());
        }
        DataFrame::new(cols)
    }

    /// Get the groupby group indexes.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     df.groupby(["date"])?.groups()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +--------------+------------+
    /// | date         | groups     |
    /// | ---          | ---        |
    /// | Date(days)   | list [u32] |
    /// +==============+============+
    /// | 2020-08-23   | "[3]"      |
    /// +--------------+------------+
    /// | 2020-08-22   | "[2, 4]"   |
    /// +--------------+------------+
    /// | 2020-08-21   | "[0, 1]"   |
    /// +--------------+------------+
    /// ```
    pub fn groups(&self) -> PolarsResult<DataFrame> {
        let mut cols = self.keys();
        let mut column = self.groups.as_list_chunked();
        let new_name = fmt_groupby_column("", GroupByMethod::Groups);
        column.rename(&new_name);
        cols.push(column.into_series());
        DataFrame::new(cols)
    }

    /// Aggregate the groups of the groupby operation into lists.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
    ///     // GroupBy and aggregate to Lists
    ///     df.groupby(["date"])?.select(["temp"]).agg_list()
    /// }
    /// ```
    /// Returns:
    ///
    /// ```text
    /// +------------+------------------------+
    /// | date       | temp_agg_list          |
    /// | ---        | ---                    |
    /// | Date       | list [i32]             |
    /// +============+========================+
    /// | 2020-08-23 | "[Some(9)]"            |
    /// +------------+------------------------+
    /// | 2020-08-22 | "[Some(7), Some(1)]"   |
    /// +------------+------------------------+
    /// | 2020-08-21 | "[Some(20), Some(10)]" |
    /// +------------+------------------------+
    /// ```
    #[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
    pub fn agg_list(&self) -> PolarsResult<DataFrame> {
        let (mut cols, agg_cols) = self.prepare_agg()?;
        for agg_col in agg_cols {
            let new_name = fmt_groupby_column(agg_col.name(), GroupByMethod::List);
            let mut agg = unsafe { agg_col.agg_list(&self.groups) };
            agg.rename(&new_name);
            cols.push(agg);
        }
        DataFrame::new(cols)
    }

    fn prepare_apply(&self) -> PolarsResult<DataFrame> {
        if let Some(agg) = &self.selected_agg {
            if agg.is_empty() {
                Ok(self.df.clone())
            } else {
                let mut new_cols = Vec::with_capacity(self.selected_keys.len() + agg.len());
                new_cols.extend_from_slice(&self.selected_keys);
                let cols = self.df.select_series(agg)?;
                new_cols.extend(cols.into_iter());
                Ok(DataFrame::new_no_checks(new_cols))
            }
        } else {
            Ok(self.df.clone())
        }
    }

More examples

Hide additional examples

src/frame/explode.rs (line 148)

    pub fn explode<I, S>(&self, columns: I) -> PolarsResult<DataFrame>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        // We need to sort the column by order of original occurrence. Otherwise the insert by index
        // below will panic
        let columns = self.select_series(columns)?;
        self.explode_impl(columns)
    }

src/frame/mod.rs (line 1017)

    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

    /// Drop a column by name.
    /// This is a pure method and will return a new `DataFrame` instead of modifying
    /// the current one in place.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
    /// let df2: DataFrame = df1.drop("Ray type")?;
    ///
    /// assert!(df2.is_empty());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn drop(&self, name: &str) -> PolarsResult<Self> {
        let idx = self.check_name_to_idx(name)?;
        let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

        self.columns.iter().enumerate().for_each(|(i, s)| {
            if i != idx {
                new_cols.push(s.clone())
            }
        });

        Ok(DataFrame::new_no_checks(new_cols))
    }

    pub fn drop_many<S: AsRef<str>>(&self, names: &[S]) -> Self {
        let names = names.iter().map(|s| s.as_ref()).collect();
        fn inner(df: &DataFrame, names: Vec<&str>) -> DataFrame {
            let mut new_cols = Vec::with_capacity(df.columns.len() - names.len());
            df.columns.iter().for_each(|s| {
                if !names.contains(&s.name()) {
                    new_cols.push(s.clone())
                }
            });

            DataFrame::new_no_checks(new_cols)
        }
        inner(self, names)
    }

    fn insert_at_idx_no_name_check(
        &mut self,
        index: usize,
        series: Series,
    ) -> PolarsResult<&mut Self> {
        if series.len() == self.height() {
            self.columns.insert(index, series);
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                .into(),
            ))
        }
    }

    /// Insert a new column at a given index.
    pub fn insert_at_idx<S: IntoSeries>(
        &mut self,
        index: usize,
        column: S,
    ) -> PolarsResult<&mut Self> {
        let series = column.into_series();
        self.check_already_present(series.name())?;
        self.insert_at_idx_no_name_check(index, series)
    }

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

    pub fn _add_columns(&mut self, columns: Vec<Series>, schema: &Schema) -> PolarsResult<()> {
        for (i, s) in columns.into_iter().enumerate() {
            // we need to branch here
            // because users can add multiple columns with the same name
            if i == 0 || schema.get(s.name()).is_some() {
                self.with_column_and_schema(s, schema)?;
            } else {
                self.with_column(s.clone())?;
            }
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    /// Uses an existing schema to amortize lookups.
    /// If the schema is incorrect, we will fallback to linear search.
    pub fn with_column_and_schema<S: IntoSeries>(
        &mut self,
        column: S,
        schema: &Schema,
    ) -> PolarsResult<&mut Self> {
        let mut series = column.into_series();

        let height = self.height();
        if series.len() == 1 && height > 1 {
            series = series.new_from_index(0, height);
        }

        if series.len() == height || self.is_empty() {
            self.add_column_by_schema(series, schema)?;
            Ok(self)
        }
        // special case for literals
        else if height == 0 && series.len() == 1 {
            let s = series.slice(0, 0);
            self.add_column_by_schema(s, schema)?;
            Ok(self)
        } else {
            Err(PolarsError::ShapeMisMatch(
                format!(
                    "Could not add column. The Series length {} differs from the DataFrame height: {}",
                    series.len(),
                    self.height()
                )
                    .into(),
            ))
        }
    }

    /// Get a row in the `DataFrame`. Beware this is slow.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    ///     df.get(idx)
    /// }
    /// ```
    pub fn get(&self, idx: usize) -> Option<Vec<AnyValue>> {
        match self.columns.get(0) {
            Some(s) => {
                if s.len() <= idx {
                    return None;
                }
            }
            None => return None,
        }
        // safety: we just checked bounds
        unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) }
    }

    /// Select a `Series` by index.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
    ///                         "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
    ///
    /// let s1: Option<&Series> = df.select_at_idx(0);
    /// let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
    ///
    /// assert_eq!(s1, Some(&s2));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_at_idx(&self, idx: usize) -> Option<&Series> {
        self.columns.get(idx)
    }

    /// Select a mutable series by index.
    ///
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> {
        self.columns.get_mut(idx)
    }

    /// Select column(s) from this `DataFrame` by range and return a new DataFrame
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df = df! {
    ///     "0" => &[0, 0, 0],
    ///     "1" => &[1, 1, 1],
    ///     "2" => &[2, 2, 2]
    /// }?;
    ///
    /// assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
    /// assert!(df.frame_equal(&df.select_by_range(..)?));
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_by_range<R>(&self, range: R) -> PolarsResult<Self>
    where
        R: ops::RangeBounds<usize>,
    {
        // This function is copied from std::slice::range (https://doc.rust-lang.org/std/slice/fn.range.html)
        // because it is the nightly feature. We should change here if this function were stable.
        fn get_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
        where
            R: ops::RangeBounds<usize>,
        {
            let len = bounds.end;

            let start: ops::Bound<&usize> = range.start_bound();
            let start = match start {
                ops::Bound::Included(&start) => start,
                ops::Bound::Excluded(start) => start.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice from after maximum usize");
                }),
                ops::Bound::Unbounded => 0,
            };

            let end: ops::Bound<&usize> = range.end_bound();
            let end = match end {
                ops::Bound::Included(end) => end.checked_add(1).unwrap_or_else(|| {
                    panic!("attempted to index slice up to maximum usize");
                }),
                ops::Bound::Excluded(&end) => end,
                ops::Bound::Unbounded => len,
            };

            if start > end {
                panic!("slice index starts at {start} but ends at {end}");
            }
            if end > len {
                panic!("range end index {end} out of range for slice of length {len}",);
            }

            ops::Range { start, end }
        }

        let colnames = self.get_column_names_owned();
        let range = get_range(range, ..colnames.len());

        self.select_impl(&colnames[range])
    }

    /// Get column index of a `Series` by name.
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
    ///                         "Health" => &[100, 200, 500],
    ///                         "Mana" => &[250, 100, 0],
    ///                         "Strength" => &[30, 150, 300])?;
    ///
    /// assert_eq!(df.find_idx_by_name("Name"), Some(0));
    /// assert_eq!(df.find_idx_by_name("Health"), Some(1));
    /// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
    /// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
    /// assert_eq!(df.find_idx_by_name("Haste"), None);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|s| s.name() == name)
    }

    /// Select a single column by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
    /// let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
    /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
    ///
    /// assert_eq!(df.column("Password")?, &s1);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn column(&self, name: &str) -> PolarsResult<&Series> {
        let idx = self
            .find_idx_by_name(name)
            .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
        Ok(self.select_at_idx(idx).unwrap())
    }

    /// Selected multiple columns by name.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
    ///                         "Max weight (kg)" => &[16.0, 35.89])?;
    /// let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
    ///
    /// assert_eq!(&df[0], sv[0]);
    /// assert_eq!(&df[1], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn columns<I, S>(&self, names: I) -> PolarsResult<Vec<&Series>>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        names
            .into_iter()
            .map(|name| self.column(name.as_ref()))
            .collect()
    }

    /// Select column(s) from this `DataFrame` and return a new `DataFrame`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.select(["foo", "bar"])
    /// }
    /// ```
    pub fn select<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_impl(&cols)
    }

    fn select_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    pub fn select_physical<I, S>(&self, selection: I) -> PolarsResult<Self>
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let cols = selection
            .into_iter()
            .map(|s| s.as_ref().to_string())
            .collect::<Vec<_>>();
        self.select_physical_impl(&cols)
    }

    fn select_physical_impl(&self, cols: &[String]) -> PolarsResult<Self> {
        self.select_check_duplicates(cols)?;
        let selected = self.select_series_physical_impl(cols)?;
        Ok(DataFrame::new_no_checks(selected))
    }

    fn select_check_duplicates(&self, cols: &[String]) -> PolarsResult<()> {
        let mut names = PlHashSet::with_capacity(cols.len());
        for name in cols {
            if !names.insert(name.as_str()) {
                _duplicate_err(name)?
            }
        }
        Ok(())
    }

    /// Select column(s) from this `DataFrame` and return them into a `Vec`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
    ///                         "Carbon" => &[1, 2, 3],
    ///                         "Hydrogen" => &[4, 6, 8])?;
    /// let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
    ///
    /// assert_eq!(df["Carbon"], sv[0]);
    /// assert_eq!(df["Hydrogen"], sv[1]);
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn select_series(&self, selection: impl IntoVec<String>) -> PolarsResult<Vec<Series>> {
        let cols = selection.into_vec();
        self.select_series_impl(&cols)
    }

    fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> {
        self.columns
            .iter()
            .enumerate()
            .map(|(i, s)| (s.name(), i))
            .collect()
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_physical_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            let name_to_idx = self._names_to_idx_map();
            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self
                        .select_at_idx(idx)
                        .unwrap()
                        .to_physical_repr()
                        .into_owned())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.to_physical_repr().into_owned()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// A non generic implementation to reduce compiler bloat.
    fn select_series_impl(&self, cols: &[String]) -> PolarsResult<Vec<Series>> {
        let selected = if cols.len() > 1 && self.columns.len() > 10 {
            // we hash, because there are user that having millions of columns.
            // # https://github.com/pola-rs/polars/issues/1023
            let name_to_idx = self._names_to_idx_map();

            cols.iter()
                .map(|name| {
                    let idx = *name_to_idx
                        .get(name.as_str())
                        .ok_or_else(|| PolarsError::NotFound(name.to_string().into()))?;
                    Ok(self.select_at_idx(idx).unwrap().clone())
                })
                .collect::<PolarsResult<Vec<_>>>()?
        } else {
            cols.iter()
                .map(|c| self.column(c).map(|s| s.clone()))
                .collect::<PolarsResult<Vec<_>>>()?
        };

        Ok(selected)
    }

    /// Select a mutable series by name.
    /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
    /// For this reason the method is not public
    fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
        let opt_idx = self.find_idx_by_name(name);

        match opt_idx {
            Some(idx) => self.select_at_idx_mut(idx),
            None => None,
        }
    }

    /// Does a filter but splits thread chunks vertically instead of horizontally
    /// This yields a DataFrame with `n_chunks == n_threads`.
    fn filter_vertical(&mut self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let n_threads = POOL.current_num_threads();

        let masks = split_ca(mask, n_threads).unwrap();
        let dfs = split_df(self, n_threads).unwrap();
        let dfs: PolarsResult<Vec<_>> = POOL.install(|| {
            masks
                .par_iter()
                .zip(dfs)
                .map(|(mask, df)| {
                    let cols = df
                        .columns
                        .iter()
                        .map(|s| s.filter(mask))
                        .collect::<PolarsResult<_>>()?;
                    Ok(DataFrame::new_no_checks(cols))
                })
                .collect()
        });

        let mut iter = dfs?.into_iter();
        let first = iter.next().unwrap();
        Ok(iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        }))
    }

    /// Take the `DataFrame` rows by a boolean mask.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let mask = df.column("sepal.width")?.is_not_null();
    ///     df.filter(&mask)
    /// }
    /// ```
    pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            return self.clone().filter_vertical(mask);
        }
        let new_col = self.try_apply_columns_par(&|s| match s.dtype() {
            DataType::Utf8 => s.filter_threaded(mask, true),
            _ => s.filter(mask),
        })?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Same as `filter` but does not parallelize.
    pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self> {
        let new_col = self.try_apply_columns(&|s| s.filter(mask))?;
        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` value by indexes from an iterator.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let iterator = (0..9).into_iter();
    ///     df.take_iter(iterator)
    /// }
    /// ```
    pub fn take_iter<I>(&self, iter: I) -> PolarsResult<Self>
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        let new_col = self.try_apply_columns_par(&|s| {
            let mut i = iter.clone();
            s.take_iter(&mut i)
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    /// Take `DataFrame` values by indexes from an iterator.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking but checks null validity.
    #[must_use]
    pub unsafe fn take_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = usize> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            return self.take_unchecked_vectical(&idx_ca.into_inner());
        }

        let n_chunks = self.n_chunks();
        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: NoNull<IdxCa> = iter.into_iter().map(|idx| idx as IdxSize).collect();
            let idx_ca = idx_ca.into_inner();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_iter_unchecked(&mut i)
            })
        };
        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` values by indexes from an iterator that may contain None values.
    ///
    /// # Safety
    ///
    /// This doesn't do any bound checking. Out of bounds may access uninitialized memory.
    /// Null validity is checked
    #[must_use]
    pub unsafe fn take_opt_iter_unchecked<I>(&self, mut iter: I) -> Self
    where
        I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,
    {
        if std::env::var("POLARS_VERT_PAR").is_ok() {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked_vectical(&idx_ca);
        }

        let n_chunks = self.n_chunks();

        let has_utf8 = self
            .columns
            .iter()
            .any(|s| matches!(s.dtype(), DataType::Utf8));

        if (n_chunks == 1 && self.width() > 1) || has_utf8 {
            let idx_ca: IdxCa = iter
                .into_iter()
                .map(|opt| opt.map(|v| v as IdxSize))
                .collect();
            return self.take_unchecked(&idx_ca);
        }

        let new_col = if self.width() == 1 {
            self.columns
                .iter()
                .map(|s| s.take_opt_iter_unchecked(&mut iter))
                .collect::<Vec<_>>()
        } else {
            self.apply_columns_par(&|s| {
                let mut i = iter.clone();
                s.take_opt_iter_unchecked(&mut i)
            })
        };

        DataFrame::new_no_checks(new_col)
    }

    /// Take `DataFrame` rows by index values.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     let idx = IdxCa::new("idx", &[0, 1, 9]);
    ///     df.take(&idx)
    /// }
    /// ```
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self> {
        let indices = if indices.chunks.len() > 1 {
            Cow::Owned(indices.rechunk())
        } else {
            Cow::Borrowed(indices)
        };
        let new_col = POOL.install(|| {
            self.try_apply_columns_par(&|s| match s.dtype() {
                DataType::Utf8 => s.take_threaded(&indices, true),
                _ => s.take(&indices),
            })
        })?;

        Ok(DataFrame::new_no_checks(new_col))
    }

    pub(crate) unsafe fn take_unchecked(&self, idx: &IdxCa) -> Self {
        self.take_unchecked_impl(idx, true)
    }

    unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
        let cols = if allow_threads {
            POOL.install(|| {
                self.apply_columns_par(&|s| match s.dtype() {
                    DataType::Utf8 => s.take_unchecked_threaded(idx, true).unwrap(),
                    _ => s.take_unchecked(idx).unwrap(),
                })
            })
        } else {
            self.columns
                .iter()
                .map(|s| s.take_unchecked(idx).unwrap())
                .collect()
        };
        DataFrame::new_no_checks(cols)
    }

    unsafe fn take_unchecked_vectical(&self, indices: &IdxCa) -> Self {
        let n_threads = POOL.current_num_threads();
        let idxs = split_ca(indices, n_threads).unwrap();

        let dfs: Vec<_> = POOL.install(|| {
            idxs.par_iter()
                .map(|idx| {
                    let cols = self
                        .columns
                        .iter()
                        .map(|s| s.take_unchecked(idx).unwrap())
                        .collect();
                    DataFrame::new_no_checks(cols)
                })
                .collect()
        });

        let mut iter = dfs.into_iter();
        let first = iter.next().unwrap();
        iter.fold(first, |mut acc, df| {
            acc.vstack_mut(&df).unwrap();
            acc
        })
    }

    /// Rename a column in the `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    ///     let original_name = "foo";
    ///     let new_name = "bar";
    ///     df.rename(original_name, new_name)
    /// }
    /// ```
    pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
        self.select_mut(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))
            .map(|s| s.rename(name))?;

        let unique_names: AHashSet<&str, ahash::RandomState> =
            AHashSet::from_iter(self.columns.iter().map(|s| s.name()));
        if unique_names.len() != self.columns.len() {
            return Err(PolarsError::SchemaMisMatch(
                "duplicate column names found".into(),
            ));
        }
        Ok(self)
    }

    /// Sort `DataFrame` in place by a column.
    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

source

pub fn filter(&self, mask: &BooleanChunked) -> PolarsResult<Self>

Take the DataFrame rows by a boolean mask.

Example

fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    let mask = df.column("sepal.width")?.is_not_null();
    df.filter(&mask)
}

Examples found in repository ?

src/frame/mod.rs (line 1036)

    pub fn drop_nulls(&self, subset: Option<&[String]>) -> PolarsResult<Self> {
        let selected_series;

        let mut iter = match subset {
            Some(cols) => {
                selected_series = self.select_series(cols)?;
                selected_series.iter()
            }
            None => self.columns.iter(),
        };

        // fast path for no nulls in df
        if iter.clone().all(|s| !s.has_validity()) {
            return Ok(self.clone());
        }

        let mask = iter
            .next()
            .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?;
        let mut mask = mask.is_not_null();

        for s in iter {
            mask = mask & s.is_not_null();
        }
        self.filter(&mask)
    }

source

pub fn _filter_seq(&self, mask: &BooleanChunked) -> PolarsResult<Self>

Same as filter but does not parallelize.

source

pub fn take_iter(&self, iter: I) -> PolarsResult<Self>where
I: Iterator<Item = usize> + Clone + Sync + TrustedLen,

Take DataFrame value by indexes from an iterator.

Example

fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    let iterator = (0..9).into_iter();
    df.take_iter(iterator)
}

source

pub unsafe fn take_iter_unchecked(&self, iter: I) -> Selfwhere
I: Iterator<Item = usize> + Clone + Sync + TrustedLen,

Take DataFrame values by indexes from an iterator.

Safety

This doesn’t do any bound checking but checks null validity.

Examples found in repository ?

src/frame/groupby/mod.rs (line 916)

unsafe fn take_df(df: &DataFrame, g: GroupsIndicator) -> DataFrame {
    match g {
        GroupsIndicator::Idx(idx) => df.take_iter_unchecked(idx.1.iter().map(|i| *i as usize)),
        GroupsIndicator::Slice([first, len]) => df.slice(first as i64, len as usize),
    }
}

source

pub unsafe fn take_opt_iter_unchecked(&self, iter: I) -> Selfwhere
I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen,

Take DataFrame values by indexes from an iterator that may contain None values.

Safety

This doesn’t do any bound checking. Out of bounds may access uninitialized memory. Null validity is checked

Examples found in repository ?

src/frame/hash_join/mod.rs (lines 437-439)

    pub fn _finish_left_join(
        &self,
        ids: LeftJoinIds,
        other: &DataFrame,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let (left_idx, right_idx) = ids;
        let materialize_left = || match left_idx {
            JoinIds::Left(left_idx) => {
                let mut left_idx = &*left_idx;
                if let Some((offset, len)) = slice {
                    left_idx = slice_slice(left_idx, offset, len);
                }
                unsafe { self._create_left_df_from_slice(left_idx, true, true) }
            }
            JoinIds::Right(left_idx) => {
                let mut left_idx = &*left_idx;
                if let Some((offset, len)) = slice {
                    left_idx = slice_slice(left_idx, offset, len);
                }
                unsafe { self.create_left_df_chunked(left_idx, true) }
            }
        };

        let materialize_right = || match right_idx {
            JoinOptIds::Left(right_idx) => {
                let mut right_idx = &*right_idx;
                if let Some((offset, len)) = slice {
                    right_idx = slice_slice(right_idx, offset, len);
                }
                unsafe {
                    other.take_opt_iter_unchecked(
                        right_idx.iter().map(|opt_i| opt_i.map(|i| i as usize)),
                    )
                }
            }
            JoinOptIds::Right(right_idx) => {
                let mut right_idx = &*right_idx;
                if let Some((offset, len)) = slice {
                    right_idx = slice_slice(right_idx, offset, len);
                }
                unsafe { other.take_opt_chunked_unchecked(right_idx) }
            }
        };
        let (df_left, df_right) = POOL.join(materialize_left, materialize_right);

        _finish_join(df_left, df_right, suffix.as_deref())
    }

    pub fn _left_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
        verbose: bool,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // ensure that the chunks are aligned otherwise we go OOB
        let mut left = self.clone();
        let mut s_left = s_left.clone();
        let mut right = other.clone();
        let mut s_right = s_right.clone();
        if left.should_rechunk() {
            left.as_single_chunk_par();
            s_left = s_left.rechunk();
        }
        if right.should_rechunk() {
            right.as_single_chunk_par();
            s_right = s_right.rechunk();
        }
        let ids = sort_or_hash_left(&s_left, &s_right, verbose);
        left._finish_left_join(ids, &right.drop(s_right.name()).unwrap(), suffix, slice)
    }

    #[cfg(feature = "semi_anti_join")]
    /// # Safety
    /// `idx` must be in bounds
    pub unsafe fn _finish_anti_semi_join(
        &self,
        mut idx: &[IdxSize],
        slice: Option<(i64, usize)>,
    ) -> DataFrame {
        if let Some((offset, len)) = slice {
            idx = slice_slice(idx, offset, len);
        }
        // idx from anti-semi join should always be sorted
        self._take_unchecked_slice2(idx, true, IsSorted::Ascending)
    }

    #[cfg(feature = "semi_anti_join")]
    pub fn _semi_anti_join_from_series(
        &self,
        s_left: &Series,
        s_right: &Series,
        slice: Option<(i64, usize)>,
        anti: bool,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        let idx = s_left.hash_join_semi_anti(s_right, anti);
        // Safety:
        // indices are in bounds
        Ok(unsafe { self._finish_anti_semi_join(&idx, slice) })
    }
    pub fn _outer_join_from_series(
        &self,
        other: &DataFrame,
        s_left: &Series,
        s_right: &Series,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        #[cfg(feature = "dtype-categorical")]
        _check_categorical_src(s_left.dtype(), s_right.dtype())?;

        // store this so that we can keep original column order.
        let join_column_index = self.iter().position(|s| s.name() == s_left.name()).unwrap();

        // Get the indexes of the joined relations
        let opt_join_tuples = s_left.hash_join_outer(s_right);
        let mut opt_join_tuples = &*opt_join_tuples;

        if let Some((offset, len)) = slice {
            opt_join_tuples = slice_slice(opt_join_tuples, offset, len);
        }

        // Take the left and right dataframes by join tuples
        let (mut df_left, df_right) = POOL.join(
            || unsafe {
                self.drop(s_left.name()).unwrap().take_opt_iter_unchecked(
                    opt_join_tuples
                        .iter()
                        .map(|(left, _right)| left.map(|i| i as usize)),
                )
            },
            || unsafe {
                other.drop(s_right.name()).unwrap().take_opt_iter_unchecked(
                    opt_join_tuples
                        .iter()
                        .map(|(_left, right)| right.map(|i| i as usize)),
                )
            },
        );

        let mut s = s_left
            .to_physical_repr()
            .zip_outer_join_column(&s_right.to_physical_repr(), opt_join_tuples);
        s.rename(s_left.name());
        let s = match s_left.dtype() {
            #[cfg(feature = "dtype-categorical")]
            DataType::Categorical(_) => {
                let ca_left = s_left.categorical().unwrap();
                let new_rev_map = ca_left.merge_categorical_map(s_right.categorical().unwrap())?;
                let logical = s.u32().unwrap().clone();
                // safety:
                // categorical maps are merged
                unsafe {
                    CategoricalChunked::from_cats_and_rev_map_unchecked(logical, new_rev_map)
                        .into_series()
                }
            }
            dt @ DataType::Datetime(_, _)
            | dt @ DataType::Time
            | dt @ DataType::Date
            | dt @ DataType::Duration(_) => s.cast(dt).unwrap(),
            _ => s,
        };

        df_left.get_columns_mut().insert(join_column_index, s);
        _finish_join(df_left, df_right, suffix.as_deref())
    }

More examples

Hide additional examples

src/frame/asof_join/groups.rs (lines 697-701)

    pub fn _join_asof_by(
        &self,
        other: &DataFrame,
        left_on: &str,
        right_on: &str,
        left_by: Vec<String>,
        right_by: Vec<String>,
        strategy: AsofStrategy,
        tolerance: Option<AnyValue<'static>>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let left_asof = self.column(left_on)?.to_physical_repr();
        let right_asof = other.column(right_on)?.to_physical_repr();
        let right_asof_name = right_asof.name();
        let left_asof_name = left_asof.name();

        check_asof_columns(&left_asof, &right_asof)?;

        let mut left_by = self.select_physical(left_by)?;
        let mut right_by = other.select_physical(right_by)?;

        let left_by_s = left_by.get_columns()[0].to_physical_repr().into_owned();
        let right_by_s = right_by.get_columns()[0].to_physical_repr().into_owned();

        let right_join_tuples = with_match_physical_numeric_polars_type!(left_asof.dtype(), |$T| {
            let left_asof: &ChunkedArray<$T> = left_asof.as_ref().as_ref().as_ref();
            let right_asof: &ChunkedArray<$T> = right_asof.as_ref().as_ref().as_ref();

            dispatch_join(
                left_asof,
                right_asof,
                &left_by_s,
                &right_by_s,
                &mut left_by,
                &mut right_by,
                strategy,
                tolerance
            )
        })?;

        let mut drop_these = right_by.get_column_names();
        if left_asof_name == right_asof_name {
            drop_these.push(right_asof_name);
        }

        let cols = other
            .get_columns()
            .iter()
            .filter_map(|s| {
                if drop_these.contains(&s.name()) {
                    None
                } else {
                    Some(s.clone())
                }
            })
            .collect();
        let other = DataFrame::new_no_checks(cols);

        let mut left = self.clone();
        let mut right_join_tuples = &*right_join_tuples;

        if let Some((offset, len)) = slice {
            left = left.slice(offset, len);
            right_join_tuples = slice_slice(right_join_tuples, offset, len);
        }

        // Safety:
        // join tuples are in bounds
        let right_df = unsafe {
            other.take_opt_iter_unchecked(
                right_join_tuples
                    .iter()
                    .map(|opt_idx| opt_idx.map(|idx| idx as usize)),
            )
        };

        _finish_join(left, right_df, None)
    }

src/frame/asof_join/mod.rs (lines 187-191)

    pub fn _join_asof(
        &self,
        other: &DataFrame,
        left_on: &str,
        right_on: &str,
        strategy: AsofStrategy,
        tolerance: Option<AnyValue<'static>>,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let left_key = self.column(left_on)?;
        let right_key = other.column(right_on)?;

        check_asof_columns(left_key, right_key)?;
        let left_key = left_key.to_physical_repr();
        let right_key = right_key.to_physical_repr();

        let take_idx = match left_key.dtype() {
            DataType::Int64 => left_key
                .i64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Int32 => left_key
                .i32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::UInt64 => left_key
                .u64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::UInt32 => left_key
                .u32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Float32 => left_key
                .f32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Float64 => left_key
                .f64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            _ => {
                let left_key = left_key.cast(&DataType::Int32).unwrap();
                let right_key = right_key.cast(&DataType::Int32).unwrap();
                left_key
                    .i32()
                    .unwrap()
                    .join_asof(&right_key, strategy, tolerance)
            }
        }?;

        // take_idx are sorted so this is a bound check for all
        if let Some(Some(idx)) = take_idx.last() {
            assert!((*idx as usize) < other.height())
        }

        // drop right join column
        let other = if left_on == right_on {
            Cow::Owned(other.drop(right_on)?)
        } else {
            Cow::Borrowed(other)
        };

        let mut left = self.clone();
        let mut take_idx = &*take_idx;

        if let Some((offset, len)) = slice {
            left = left.slice(offset, len);
            take_idx = slice_slice(take_idx, offset, len);
        }

        // Safety:
        // join tuples are in bounds
        let right_df = unsafe {
            other.take_opt_iter_unchecked(
                take_idx
                    .iter()
                    .map(|opt_idx| opt_idx.map(|idx| idx as usize)),
            )
        };

        _finish_join(left, right_df, suffix.as_deref())
    }

source

pub fn take(&self, indices: &IdxCa) -> PolarsResult<Self>

Take DataFrame rows by index values.

Example

fn example(df: &DataFrame) -> PolarsResult<DataFrame> {
    let idx = IdxCa::new("idx", &[0, 1, 9]);
    df.take(&idx)
}

source

pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self>

Rename a column in the DataFrame.

Example

fn example(df: &mut DataFrame) -> PolarsResult<&mut DataFrame> {
    let original_name = "foo";
    let new_name = "bar";
    df.rename(original_name, new_name)
}

Examples found in repository ?

src/frame/hash_join/mod.rs (line 323)

pub fn _finish_join(
    mut df_left: DataFrame,
    mut df_right: DataFrame,
    suffix: Option<&str>,
) -> PolarsResult<DataFrame> {
    let mut left_names = PlHashSet::with_capacity(df_left.width());

    df_left.columns.iter().for_each(|series| {
        left_names.insert(series.name());
    });

    let mut rename_strs = Vec::with_capacity(df_right.width());

    df_right.columns.iter().for_each(|series| {
        if left_names.contains(series.name()) {
            rename_strs.push(series.name().to_owned())
        }
    });
    let suffix = suffix.unwrap_or("_right");

    for name in rename_strs {
        df_right.rename(&name, &_join_suffix_name(&name, suffix))?;
    }

    drop(left_names);
    df_left.hstack_mut(&df_right.columns)?;
    Ok(df_left)
}

source

pub fn sort_in_place(
 &mut self,
 by_column: impl IntoVec<String>,
 reverse: impl IntoVec<bool>
) -> PolarsResult<&mut Self>

Sort DataFrame in place by a column.

Examples found in repository ?

src/frame/mod.rs (line 1920)

    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

source

pub fn sort_impl(
 &self,
 by_column: Vec<Series>,
 reverse: Vec<bool>,
 nulls_last: bool,
 slice: Option<(i64, usize)>
) -> PolarsResult<Self>

This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.

Examples found in repository ?

src/frame/mod.rs (line 1821)

    pub fn sort_in_place(
        &mut self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<&mut Self> {
        // a lot of indirection in both sorting and take
        self.as_single_chunk_par();
        let by_column = self.select_series(by_column)?;
        let reverse = reverse.into_vec();
        self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
        Ok(self)
    }

    /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
    #[cfg(feature = "private")]
    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

source

pub fn sort(
 &self,
 by_column: impl IntoVec<String>,
 reverse: impl IntoVec<bool>
) -> PolarsResult<Self>

Return a sorted clone of this DataFrame.

Example

fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    df.sort(["a"], reverse)
}

fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    df.sort(&["a", "b"], vec![false, true])
}

Examples found in repository ?

src/chunked_array/logical/categorical/ops/unique.rs (line 64)

    pub fn value_counts(&self) -> PolarsResult<DataFrame> {
        let groups = self.logical().group_tuples(true, false).unwrap();
        let logical_values = unsafe {
            self.logical()
                .clone()
                .into_series()
                .agg_first(&groups)
                .u32()
                .unwrap()
                .clone()
        };

        let mut values = self.clone();
        *values.logical_mut() = logical_values;

        let mut counts = groups.group_count();
        counts.rename("counts");
        let cols = vec![values.into_series(), counts.into_series()];
        let df = DataFrame::new_no_checks(cols);
        df.sort(["counts"], true)
    }

source

pub fn sort_with_options(
 &self,
 by_column: &str,
 options: SortOptions
) -> PolarsResult<Self>

Sort the DataFrame by a single column with extra options.

source

pub fn replace<S: IntoSeries>(
 &mut self,
 column: &str,
 new_col: S
) -> PolarsResult<&mut Self>

Replace a column with a Series.

Example

let mut df: DataFrame = df!("Country" => &["United States", "China"],
                        "Area (km²)" => &[9_833_520, 9_596_961])?;
let s: Series = Series::new("Country", &["USA", "PRC"]);

assert!(df.replace("Nation", s.clone()).is_err());
assert!(df.replace("Country", s).is_ok());

source

pub fn replace_or_add<S: IntoSeries>(
 &mut self,
 column: &str,
 new_col: S
) -> PolarsResult<&mut Self>

Replace or update a column. The difference between this method and DataFrame::with_column is that now the value of column: &str determines the name of the column and not the name of the Series passed to this method.

source

pub fn replace_at_idx<S: IntoSeries>(
 &mut self,
 idx: usize,
 new_col: S
) -> PolarsResult<&mut Self>

Replace column at index idx with a Series.

Example

# use polars_core::prelude::*;
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("ascii", &[70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1])?;

// Add 32 to get lowercase ascii values
df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
# Ok::<(), PolarsError>(())

Examples found in repository ?

src/frame/mod.rs (line 1114)

    fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
        if let Some(idx) = self.find_idx_by_name(series.name()) {
            self.replace_at_idx(idx, series)?;
        } else {
            self.columns.push(series);
        }
        Ok(())
    }

    /// Add a new column to this `DataFrame` or replace an existing one.
    pub fn with_column<S: IntoSeries>(&mut self, column: S) -> PolarsResult<&mut Self> {
        fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> {
            let height = df.height();
            if series.len() == 1 && height > 1 {
                series = series.new_from_index(0, height);
            }

            if series.len() == height || df.is_empty() {
                df.add_column_by_search(series)?;
                Ok(df)
            }
            // special case for literals
            else if height == 0 && series.len() == 1 {
                let s = series.slice(0, 0);
                df.add_column_by_search(s)?;
                Ok(df)
            } else {
                Err(PolarsError::ShapeMisMatch(
                    format!(
                        "Could not add column. The Series length {} differs from the DataFrame height: {}",
                        series.len(),
                        df.height()
                    )
                        .into(),
                ))
            }
        }
        let series = column.into_series();
        inner(self, series)
    }

    fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
        let name = s.name();
        if let Some((idx, _, _)) = schema.get_full(name) {
            // schema is incorrect fallback to search
            if self.columns.get(idx).map(|s| s.name()) != Some(name) {
                self.add_column_by_search(s)?;
            } else {
                self.replace_at_idx(idx, s)?;
            }
        } else {
            self.columns.push(s);
        }
        Ok(())
    }

source

pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>where
F: FnOnce(&Series) -> S,
S: IntoSeries,

Apply a closure to a column. This is the recommended way to do in place modification.

Example

let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("names", &["Jean", "Claude", "van"]);
let mut df = DataFrame::new(vec![s0, s1])?;

fn str_to_len(str_val: &Series) -> Series {
    str_val.utf8()
        .unwrap()
        .into_iter()
        .map(|opt_name: Option<&str>| {
            opt_name.map(|name: &str| name.len() as u32)
         })
        .collect::<UInt32Chunked>()
        .into_series()
}

// Replace the names column by the length of the names.
df.apply("names", str_to_len);

Results in:

+--------+-------+
| foo    |       |
| ---    | names |
| str    | u32   |
+========+=======+
| "ham"  | 4     |
+--------+-------+
| "spam" | 6     |
+--------+-------+
| "egg"  | 3     |
+--------+-------+

Examples found in repository ?

src/frame/mod.rs (lines 1888-1896)

    pub fn sort_impl(
        &self,
        by_column: Vec<Series>,
        reverse: Vec<bool>,
        nulls_last: bool,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<Self> {
        // note that the by_column argument also contains evaluated expression from polars-lazy
        // that may not even be present in this dataframe.

        // therefore when we try to set the first columns as sorted, we ignore the error
        // as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
        let first_reverse = reverse[0];
        let first_by_column = by_column[0].name().to_string();
        let mut take = match by_column.len() {
            1 => {
                let s = &by_column[0];
                let options = SortOptions {
                    descending: reverse[0],
                    nulls_last,
                };
                // fast path for a frame with a single series
                // no need to compute the sort indices and then take by these indices
                // simply sort and return as frame
                if self.width() == 1 && self.check_name_to_idx(s.name()).is_ok() {
                    let mut out = s.sort_with(options);
                    if let Some((offset, len)) = slice {
                        out = out.slice(offset, len);
                    }

                    return Ok(out.into_frame());
                }
                s.argsort(options)
            }
            _ => {
                #[cfg(feature = "sort_multiple")]
                {
                    let (first, by_column, reverse) = prepare_argsort(by_column, reverse)?;
                    first.argsort_multiple(&by_column, &reverse)?
                }
                #[cfg(not(feature = "sort_multiple"))]
                {
                    panic!("activate `sort_multiple` feature gate to enable this functionality");
                }
            }
        };

        if let Some((offset, len)) = slice {
            take = take.slice(offset, len);
        }

        // Safety:
        // the created indices are in bounds
        let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
            unsafe { self.take_unchecked_vectical(&take) }
        } else {
            unsafe { self.take_unchecked(&take) }
        };
        // Mark the first sort column as sorted
        // if the column did not exists it is ok, because we sorted by an expression
        // not present in the dataframe
        let _ = df.apply(&first_by_column, |s| {
            let mut s = s.clone();
            if first_reverse {
                s.set_sorted(IsSorted::Descending)
            } else {
                s.set_sorted(IsSorted::Ascending)
            }
            s
        });
        Ok(df)
    }

    /// Return a sorted clone of this `DataFrame`.
    ///
    /// # Example
    ///
    /// ```
    /// # use polars_core::prelude::*;
    /// fn sort_example(df: &DataFrame, reverse: bool) -> PolarsResult<DataFrame> {
    ///     df.sort(["a"], reverse)
    /// }
    ///
    /// fn sort_by_multiple_columns_example(df: &DataFrame) -> PolarsResult<DataFrame> {
    ///     df.sort(&["a", "b"], vec![false, true])
    /// }
    /// ```
    pub fn sort(
        &self,
        by_column: impl IntoVec<String>,
        reverse: impl IntoVec<bool>,
    ) -> PolarsResult<Self> {
        let mut df = self.clone();
        df.sort_in_place(by_column, reverse)?;
        Ok(df)
    }

    /// Sort the `DataFrame` by a single column with extra options.
    pub fn sort_with_options(&self, by_column: &str, options: SortOptions) -> PolarsResult<Self> {
        let mut df = self.clone();
        // a lot of indirection in both sorting and take
        df.as_single_chunk_par();
        let by_column = vec![df.column(by_column)?.clone()];
        let reverse = vec![options.descending];
        df.columns = df
            .sort_impl(by_column, reverse, options.nulls_last, None)?
            .columns;
        Ok(df)
    }

    /// Replace a column with a `Series`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # use polars_core::prelude::*;
    /// let mut df: DataFrame = df!("Country" => &["United States", "China"],
    ///                         "Area (km²)" => &[9_833_520, 9_596_961])?;
    /// let s: Series = Series::new("Country", &["USA", "PRC"]);
    ///
    /// assert!(df.replace("Nation", s.clone()).is_err());
    /// assert!(df.replace("Country", s).is_ok());
    /// # Ok::<(), PolarsError>(())
    /// ```
    pub fn replace<S: IntoSeries>(&mut self, column: &str, new_col: S) -> PolarsResult<&mut Self> {
        self.apply(column, |_| new_col.into_series())
    }

source

pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self>where
F: FnOnce(&Series) -> S,
S: IntoSeries,

Apply a closure to a column at index idx. This is the recommended way to do in place modification.

Example

let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("ascii", &[70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1])?;

// Add 32 to get lowercase ascii values
df.apply_at_idx(1, |s| s + 32);

Results in:

+--------+-------+
| foo    | ascii |
| ---    | ---   |
| str    | i32   |
+========+=======+
| "ham"  | 102   |
+--------+-------+
| "spam" | 111   |
+--------+-------+
| "egg"  | 111   |
+--------+-------+

Examples found in repository ?

src/frame/mod.rs (line 2055)

    pub fn apply<F, S>(&mut self, name: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> S,
        S: IntoSeries,
    {
        let idx = self.check_name_to_idx(name)?;
        self.apply_at_idx(idx, f)
    }

source

pub fn try_apply_at_idx<F, S>(
 &mut self,
 idx: usize,
 f: F
) -> PolarsResult<&mut Self>where
 F: FnOnce(&Series) -> PolarsResult<S>,
 S: IntoSeries,

Apply a closure that may fail to a column at index idx. This is the recommended way to do in place modification.

Example

This is the idiomatic way to replace some values a column of a DataFrame given range of indexes.

let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1])?;

let idx = vec![0, 1, 4];

df.try_apply("foo", |s| {
    s.utf8()?
    .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
});

Results in:

+---------------------+--------+
| foo                 | values |
| ---                 | ---    |
| str                 | i32    |
+=====================+========+
| "ham-is-modified"   | 1      |
+---------------------+--------+
| "spam-is-modified"  | 2      |
+---------------------+--------+
| "egg"               | 3      |
+---------------------+--------+
| "bacon"             | 4      |
+---------------------+--------+
| "quack-is-modified" | 5      |
+---------------------+--------+

Examples found in repository ?

src/frame/mod.rs (line 2243)

    pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>
    where
        F: FnOnce(&Series) -> PolarsResult<S>,
        S: IntoSeries,
    {
        let idx = self
            .find_idx_by_name(column)
            .ok_or_else(|| PolarsError::NotFound(column.to_string().into()))?;
        self.try_apply_at_idx(idx, f)
    }

source

pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> PolarsResult<&mut Self>where
F: FnOnce(&Series) -> PolarsResult<S>,
S: IntoSeries,

Apply a closure that may fail to a column. This is the recommended way to do in place modification.

Example

This is the idiomatic way to replace some values a column of a DataFrame given a boolean mask.

let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1])?;

// create a mask
let values = df.column("values")?;
let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;

df.try_apply("foo", |s| {
    s.utf8()?
    .set(&mask, Some("not_within_bounds"))
});

Results in:

+---------------------+--------+
| foo                 | values |
| ---                 | ---    |
| str                 | i32    |
+=====================+========+
| "not_within_bounds" | 1      |
+---------------------+--------+
| "spam"              | 2      |
+---------------------+--------+
| "egg"               | 3      |
+---------------------+--------+
| "bacon"             | 4      |
+---------------------+--------+
| "not_within_bounds" | 5      |
+---------------------+--------+

source

pub fn slice(&self, offset: i64, length: usize) -> Self

Slice the DataFrame along the rows.

Example

let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
                        "Color" => &["Green", "Red", "White", "White", "Red"])?;
let sl: DataFrame = df.slice(2, 3);

assert_eq!(sl.shape(), (3, 2));
println!("{}", sl);

Output:

shape: (3, 2)
+-------+-------+
| Fruit | Color |
| ---   | ---   |
| str   | str   |
+=======+=======+
| Grape | White |
+-------+-------+
| Fig   | White |
+-------+-------+
| Fig   | Red   |
+-------+-------+

Examples found in repository ?

src/frame/groupby/mod.rs (line 917)

unsafe fn take_df(df: &DataFrame, g: GroupsIndicator) -> DataFrame {
    match g {
        GroupsIndicator::Idx(idx) => df.take_iter_unchecked(idx.1.iter().map(|i| *i as usize)),
        GroupsIndicator::Slice([first, len]) => df.slice(first as i64, len as usize),
    }
}

More examples

Hide additional examples

src/utils/mod.rs (line 199)

pub fn split_df_as_ref(df: &DataFrame, n: usize) -> PolarsResult<Vec<DataFrame>> {
    let total_len = df.height();
    let chunk_size = total_len / n;

    if df.n_chunks() == n
        && df.get_columns()[0]
            .chunk_lengths()
            .all(|len| len.abs_diff(chunk_size) < 100)
    {
        return Ok(flatten_df(df).collect());
    }

    let mut out = Vec::with_capacity(n);

    for i in 0..n {
        let offset = i * chunk_size;
        let len = if i == (n - 1) {
            total_len - offset
        } else {
            chunk_size
        };
        let df = df.slice((i * chunk_size) as i64, len);
        if df.n_chunks() > 1 {
            // we add every chunk as separate dataframe. This make sure that every partition
            // deals with it.
            out.extend(flatten_df(&df))
        } else {
            out.push(df)
        }
    }

    Ok(out)
}

src/frame/mod.rs (line 3322)

    pub fn _partition_by_impl(
        &self,
        cols: &[String],
        stable: bool,
    ) -> PolarsResult<Vec<DataFrame>> {
        let groups = if stable {
            self.groupby_stable(cols)?.take_groups()
        } else {
            self.groupby(cols)?.take_groups()
        };

        // don't parallelize this
        // there is a lot of parallelization in take and this may easily SO
        POOL.install(|| {
            match groups {
                GroupsProxy::Idx(idx) => {
                    Ok(idx
                        .into_par_iter()
                        .map(|(_, group)| {
                            // groups are in bounds
                            unsafe { self._take_unchecked_slice(&group, false) }
                        })
                        .collect())
                }
                GroupsProxy::Slice { groups, .. } => Ok(groups
                    .into_par_iter()
                    .map(|[first, len]| self.slice(first as i64, len as usize))
                    .collect()),
            }
        })
    }

src/frame/asof_join/groups.rs (line 690)

    pub fn _join_asof_by(
        &self,
        other: &DataFrame,
        left_on: &str,
        right_on: &str,
        left_by: Vec<String>,
        right_by: Vec<String>,
        strategy: AsofStrategy,
        tolerance: Option<AnyValue<'static>>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let left_asof = self.column(left_on)?.to_physical_repr();
        let right_asof = other.column(right_on)?.to_physical_repr();
        let right_asof_name = right_asof.name();
        let left_asof_name = left_asof.name();

        check_asof_columns(&left_asof, &right_asof)?;

        let mut left_by = self.select_physical(left_by)?;
        let mut right_by = other.select_physical(right_by)?;

        let left_by_s = left_by.get_columns()[0].to_physical_repr().into_owned();
        let right_by_s = right_by.get_columns()[0].to_physical_repr().into_owned();

        let right_join_tuples = with_match_physical_numeric_polars_type!(left_asof.dtype(), |$T| {
            let left_asof: &ChunkedArray<$T> = left_asof.as_ref().as_ref().as_ref();
            let right_asof: &ChunkedArray<$T> = right_asof.as_ref().as_ref().as_ref();

            dispatch_join(
                left_asof,
                right_asof,
                &left_by_s,
                &right_by_s,
                &mut left_by,
                &mut right_by,
                strategy,
                tolerance
            )
        })?;

        let mut drop_these = right_by.get_column_names();
        if left_asof_name == right_asof_name {
            drop_these.push(right_asof_name);
        }

        let cols = other
            .get_columns()
            .iter()
            .filter_map(|s| {
                if drop_these.contains(&s.name()) {
                    None
                } else {
                    Some(s.clone())
                }
            })
            .collect();
        let other = DataFrame::new_no_checks(cols);

        let mut left = self.clone();
        let mut right_join_tuples = &*right_join_tuples;

        if let Some((offset, len)) = slice {
            left = left.slice(offset, len);
            right_join_tuples = slice_slice(right_join_tuples, offset, len);
        }

        // Safety:
        // join tuples are in bounds
        let right_df = unsafe {
            other.take_opt_iter_unchecked(
                right_join_tuples
                    .iter()
                    .map(|opt_idx| opt_idx.map(|idx| idx as usize)),
            )
        };

        _finish_join(left, right_df, None)
    }

src/frame/asof_join/mod.rs (line 180)

    pub fn _join_asof(
        &self,
        other: &DataFrame,
        left_on: &str,
        right_on: &str,
        strategy: AsofStrategy,
        tolerance: Option<AnyValue<'static>>,
        suffix: Option<String>,
        slice: Option<(i64, usize)>,
    ) -> PolarsResult<DataFrame> {
        let left_key = self.column(left_on)?;
        let right_key = other.column(right_on)?;

        check_asof_columns(left_key, right_key)?;
        let left_key = left_key.to_physical_repr();
        let right_key = right_key.to_physical_repr();

        let take_idx = match left_key.dtype() {
            DataType::Int64 => left_key
                .i64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Int32 => left_key
                .i32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::UInt64 => left_key
                .u64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::UInt32 => left_key
                .u32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Float32 => left_key
                .f32()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            DataType::Float64 => left_key
                .f64()
                .unwrap()
                .join_asof(&right_key, strategy, tolerance),
            _ => {
                let left_key = left_key.cast(&DataType::Int32).unwrap();
                let right_key = right_key.cast(&DataType::Int32).unwrap();
                left_key
                    .i32()
                    .unwrap()
                    .join_asof(&right_key, strategy, tolerance)
            }
        }?;

        // take_idx are sorted so this is a bound check for all
        if let Some(Some(idx)) = take_idx.last() {
            assert!((*idx as usize) < other.height())
        }

        // drop right join column
        let other = if left_on == right_on {
            Cow::Owned(other.drop(right_on)?)
        } else {
            Cow::Borrowed(other)
        };

        let mut left = self.clone();
        let mut take_idx = &*take_idx;

        if let Some((offset, len)) = slice {
            left = left.slice(offset, len);
            take_idx = slice_slice(take_idx, offset, len);
        }

        // Safety:
        // join tuples are in bounds
        let right_df = unsafe {
            other.take_opt_iter_unchecked(
                take_idx
                    .iter()
                    .map(|opt_idx| opt_idx.map(|idx| idx as usize)),
            )
        };

        _finish_join(left, right_df, suffix.as_deref())
    }

source

pub fn slice_par(&self, offset: i64, length: usize) -> Self

source

pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self

source

pub fn head(&self, length: Option<usize>) -> Self

Get the head of the DataFrame.

Example

let countries: DataFrame =
    df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
        "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
        "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
        "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
assert_eq!(countries.shape(), (5, 4));

println!("{}", countries.head(Some(3)));

Output:

shape: (3, 4)
+--------------------+---------------+---------------+------------+
| Rank by GDP (2021) | Continent     | Country       | Capital    |
| ---                | ---           | ---           | ---        |
| i32                | str           | str           | str        |
+====================+===============+===============+============+
| 1                  | North America | United States | Washington |
+--------------------+---------------+---------------+------------+
| 2                  | Asia          | China         | Beijing    |
+--------------------+---------------+---------------+------------+
| 3                  | Asia          | Japan         | Tokyo      |
+--------------------+---------------+---------------+------------+

source

pub fn tail(&self, length: Option<usize>) -> Self

Get the tail of the DataFrame.

Example

let countries: DataFrame =
    df!("Rank (2021)" => &[105, 106, 107, 108, 109],
        "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
        "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
assert_eq!(countries.shape(), (5, 3));

println!("{}", countries.tail(Some(2)));

Output:

shape: (2, 3)
+-------------+--------------------+---------+
| Rank (2021) | Apple Price (€/kg) | Country |
| ---         | ---                | ---     |
| i32         | f64                | str     |
+=============+====================+=========+
| 108         | 0.63               | Syria   |
+-------------+--------------------+---------+
| 109         | 0.63               | Turkey  |
+-------------+--------------------+---------+

source

pub fn iter_chunks(&self) -> RecordBatchIter<'_> ⓘ

Iterator over the rows in this DataFrame as Arrow RecordBatches.

Panics

Panics if the DataFrame that is passed is not rechunked.

This responsibility is left to the caller as we don’t want to take mutable references here, but we also don’t want to rechunk here, as this operation is costly and would benefit the caller as well.

source

pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> ⓘ

Iterator over the rows in this DataFrame as Arrow RecordBatches as physical values.

Panics

Panics if the DataFrame that is passed is not rechunked.

This responsibility is left to the caller as we don’t want to take mutable references here, but we also don’t want to rechunk here, as this operation is costly and would benefit the caller as well.

Examples found in repository ?

src/utils/mod.rs (line 144)

fn flatten_df(df: &DataFrame) -> impl Iterator<Item = DataFrame> + '_ {
    df.iter_chunks_physical().flat_map(|chunk| {
        let df = DataFrame::new_no_checks(
            df.iter()
                .zip(chunk.into_arrays())
                .map(|(s, arr)| {
                    // Safety:
                    // datatypes are correct
                    let mut out = unsafe {
                        Series::from_chunks_and_dtype_unchecked(s.name(), vec![arr], s.dtype())
                    };
                    out.set_sorted(s.is_sorted());
                    out
                })
                .collect(),
        );
        if df.height() == 0 {
            None
        } else {
            Some(df)
        }
    })
}

source

pub fn reverse(&self) -> Self

Get a DataFrame with all the columns in reversed order.

source

pub fn shift(&self, periods: i64) -> Self

Shift the values by a given period and fill the parts that will be empty due to this operation with Nones.

See the method on Series for more info on the shift operation.

source

pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self>

Replace None values with one of the following strategies:

Forward fill (replace None with the previous value)
Backward fill (replace None with the next value)
Mean fill (replace None with the mean of the whole array)
Min fill (replace None with the minimum of the whole array)
Max fill (replace None with the maximum of the whole array)

See the method on Series for more info on the fill_null operation.

source

pub fn describe(&self, percentiles: Option<&[f64]>) -> Self

Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes. Try in keep output similar to pandas

Example

let df1: DataFrame = df!("categorical" => &["d","e","f"],
                         "numeric" => &[1, 2, 3],
                         "object" => &["a", "b", "c"])?;
assert_eq!(df1.shape(), (3, 3));

let df2: DataFrame = df1.describe(None);
assert_eq!(df2.shape(), (8, 4));
println!("{}", df2);

Output:

shape: (8, 4)
┌──────────┬─────────────┬─────────┬────────┐
│ describe ┆ categorical ┆ numeric ┆ object │
│ ---      ┆ ---         ┆ ---     ┆ ---    │
│ str      ┆ f64         ┆ f64     ┆ f64    │
╞══════════╪═════════════╪═════════╪════════╡
│ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ mean     ┆ null        ┆ 2.0     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ std      ┆ null        ┆ 1.0     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ min      ┆ null        ┆ 1.0     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 25%      ┆ null        ┆ 1.5     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 50%      ┆ null        ┆ 2.0     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 75%      ┆ null        ┆ 2.5     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ max      ┆ null        ┆ 3.0     ┆ null   │
└──────────┴─────────────┴─────────┴────────┘

source

pub fn max(&self) -> Self

Aggregate the columns to their maximum values.

Example

let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));

let df2: DataFrame = df1.max();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);

Output:

shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| i32     | i32     |
+=========+=========+
| 6       | 5       |
+---------+---------+

Examples found in repository ?

src/frame/mod.rs (line 2543)

    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

source

pub fn std(&self, ddof: u8) -> Self

Aggregate the columns to their standard deviation values.

Example

let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));

let df2: DataFrame = df1.std(1);
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);

Output:

shape: (1, 2)
+-------------------+--------------------+
| Die n°1           | Die n°2            |
| ---               | ---                |
| f64               | f64                |
+===================+====================+
| 2.280350850198276 | 1.0954451150103321 |
+-------------------+--------------------+

Examples found in repository ?

src/frame/mod.rs (line 2529)

    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

source

pub fn var(&self, ddof: u8) -> Self

Aggregate the columns to their variation values.

Example

let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));

let df2: DataFrame = df1.var(1);
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);

Output:

shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| f64     | f64     |
+=========+=========+
| 5.2     | 1.2     |
+---------+---------+

source

pub fn min(&self) -> Self

Aggregate the columns to their minimum values.

Example

let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));

let df2: DataFrame = df1.min();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);

Output:

shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| i32     | i32     |
+=========+=========+
| 1       | 2       |
+---------+---------+

Examples found in repository ?

src/frame/mod.rs (line 2530)

    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

source

pub fn sum(&self) -> Self

Aggregate the columns to their sum values.

Example

let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));

let df2: DataFrame = df1.sum();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);

Output:

shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| i32     | i32     |
+=========+=========+
| 16      | 16      |
+---------+---------+

source

pub fn mean(&self) -> Self

Aggregate the columns to their mean values.

Example

let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));

let df2: DataFrame = df1.mean();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);

Output:

shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| f64     | f64     |
+=========+=========+
| 3.2     | 3.2     |
+---------+---------+

Examples found in repository ?

src/frame/mod.rs (line 2528)

    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

source

pub fn median(&self) -> Self

Aggregate the columns to their median values.

Example

let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));

let df2: DataFrame = df1.median();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);

Output:

shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| i32     | i32     |
+=========+=========+
| 3       | 3       |
+---------+---------+

source

pub fn quantile(
 &self,
 quantile: f64,
 interpol: QuantileInterpolOptions
) -> PolarsResult<Self>

Aggregate the columns to their quantile values.

Examples found in repository ?

src/frame/mod.rs (line 2536)

    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

source

pub fn hmin(&self) -> PolarsResult<Option<Series>>

Available on crate feature zip_with only.

Aggregate the column horizontally to their min values.

source

pub fn hmax(&self) -> PolarsResult<Option<Series>>

Available on crate feature zip_with only.

Aggregate the column horizontally to their max values.

source

pub fn hsum(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>>

Aggregate the column horizontally to their sum values.

Examples found in repository ?

src/frame/mod.rs (line 2917)

    pub fn hmean(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>> {
        match self.columns.len() {
            0 => Ok(None),
            1 => Ok(Some(self.columns[0].clone())),
            _ => {
                let columns = self
                    .columns
                    .iter()
                    .cloned()
                    .filter(|s| {
                        let dtype = s.dtype();
                        dtype.is_numeric() || matches!(dtype, DataType::Boolean)
                    })
                    .collect();
                let numeric_df = DataFrame::new_no_checks(columns);

                let sum = || numeric_df.hsum(none_strategy);

                let null_count = || {
                    numeric_df
                        .columns
                        .par_iter()
                        .map(|s| s.is_null().cast(&DataType::UInt32).unwrap())
                        .reduce_with(|l, r| &l + &r)
                        // we can unwrap the option, because we are certain there is a column
                        // we started this operation on 2 columns
                        .unwrap()
                };

                let (sum, null_count) = POOL.install(|| rayon::join(sum, null_count));
                let sum = sum?;

                // value lengths: len - null_count
                let value_length: UInt32Chunked =
                    (numeric_df.width().sub(&null_count)).u32().unwrap().clone();

                // make sure that we do not divide by zero
                // by replacing with None
                let value_length = value_length
                    .set(&value_length.equal(0), None)?
                    .into_series()
                    .cast(&DataType::Float64)?;

                Ok(sum.map(|sum| &sum / &value_length))
            }
        }
    }

source

pub fn hmean(&self, none_strategy: NullStrategy) -> PolarsResult<Option<Series>>

Aggregate the column horizontally to their mean values.

source

pub fn pipe<F, B>(self, f: F) -> PolarsResultwhere
F: Fn(DataFrame) -> PolarsResult,

Pipe different functions/ closure operations that work on a DataFrame together.

source

pub fn pipe_mut<F, B>(&mut self, f: F) -> PolarsResultwhere
F: Fn(&mut DataFrame) -> PolarsResult,

Pipe different functions/ closure operations that work on a DataFrame together.

source

pub fn pipe_with_args<F, B, Args>(self, f: F, args: Args) -> PolarsResultwhere
F: Fn(DataFrame, Args) -> PolarsResult,

Pipe different functions/ closure operations that work on a DataFrame together.

source

pub fn drop_duplicates(
 &self,
 maintain_order: bool,
 subset: Option<&[String]>
) -> PolarsResult<Self>

👎Deprecated: use DataFrame::unique

Drop duplicate rows from a DataFrame. This fails when there is a column of type List in DataFrame

Example

let df = df! {
              "flt" => [1., 1., 2., 2., 3., 3.],
              "int" => [1, 1, 2, 2, 3, 3, ],
              "str" => ["a", "a", "b", "b", "c", "c"]
          }?;

println!("{}", df.drop_duplicates(true, None)?);

Returns

+-----+-----+-----+
| flt | int | str |
| --- | --- | --- |
| f64 | i32 | str |
+=====+=====+=====+
| 1   | 1   | "a" |
+-----+-----+-----+
| 2   | 2   | "b" |
+-----+-----+-----+
| 3   | 3   | "c" |
+-----+-----+-----+

source

pub fn unique_stable(
 &self,
 subset: Option<&[String]>,
 keep: UniqueKeepStrategy
) -> PolarsResult<DataFrame>

Drop duplicate rows from a DataFrame. This fails when there is a column of type List in DataFrame

Stable means that the order is maintained. This has a higher cost than an unstable distinct.

Example

let df = df! {
              "flt" => [1., 1., 2., 2., 3., 3.],
              "int" => [1, 1, 2, 2, 3, 3, ],
              "str" => ["a", "a", "b", "b", "c", "c"]
          }?;

println!("{}", df.unique_stable(None, UniqueKeepStrategy::First)?);

Returns

+-----+-----+-----+
| flt | int | str |
| --- | --- | --- |
| f64 | i32 | str |
+=====+=====+=====+
| 1   | 1   | "a" |
+-----+-----+-----+
| 2   | 2   | "b" |
+-----+-----+-----+
| 3   | 3   | "c" |
+-----+-----+-----+

Examples found in repository ?

src/frame/mod.rs (line 3011)

    pub fn drop_duplicates(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
    ) -> PolarsResult<Self> {
        match maintain_order {
            true => self.unique_stable(subset, UniqueKeepStrategy::First),
            false => self.unique(subset, UniqueKeepStrategy::First),
        }
    }

source

pub fn unique(
 &self,
 subset: Option<&[String]>,
 keep: UniqueKeepStrategy
) -> PolarsResult<DataFrame>

Unstable distinct. See DataFrame::unique_stable.

Examples found in repository ?

src/frame/mod.rs (line 3012)

    pub fn drop_duplicates(
        &self,
        maintain_order: bool,
        subset: Option<&[String]>,
    ) -> PolarsResult<Self> {
        match maintain_order {
            true => self.unique_stable(subset, UniqueKeepStrategy::First),
            false => self.unique(subset, UniqueKeepStrategy::First),
        }
    }

source

pub fn is_unique(&self) -> PolarsResult<BooleanChunked>

Get a mask of all the unique rows in the DataFrame.

Example

let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
                        "ISIN" => &["US0378331005", "US5949181045"])?;
let ca: ChunkedArray<BooleanType> = df.is_unique()?;

assert!(ca.all());

source

pub fn is_duplicated(&self) -> PolarsResult<BooleanChunked>

Get a mask of all the duplicated rows in the DataFrame.

Example

let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
                        "ISIN" => &["US02079K3059", "US02079K1079"])?;
let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;

assert!(!ca.all());

source

pub fn null_count(&self) -> Self

Create a new DataFrame that shows the null counts per column.

source

pub fn hash_rows(
&mut self,
hasher_builder: Option<RandomState>
) -> PolarsResult<UInt64Chunked>

Hash and combine the row values

source

pub fn get_supertype(&self) -> Option<PolarsResult<DataType>>

Get the supertype of the columns in this DataFrame

Examples found in repository ?

src/frame/row.rs (line 189)

    pub fn transpose(&self) -> PolarsResult<DataFrame> {
        let height = self.height();
        let width = self.width();
        if height == 0 || width == 0 {
            return Err(PolarsError::NoData("empty dataframe".into()));
        }

        let dtype = self.get_supertype().unwrap()?;
        self.transpose_from_dtype(&dtype)
    }

source

pub fn partition_by(
&self,
cols: impl IntoVec<String>
) -> PolarsResult<Vec<DataFrame>>

Available on crate feature partition_by only.

Split into multiple DataFrames partitioned by groups

source

pub fn partition_by_stable(
&self,
cols: impl IntoVec<String>
) -> PolarsResult<Vec<DataFrame>>

Available on crate feature partition_by only.

Split into multiple DataFrames partitioned by groups Order of the groups are maintained.

source §

impl DataFrame

source

pub fn frame_equal(&self, other: &DataFrame) -> bool

Check if DataFrames are equal. Note that None == None evaluates to false

Example

let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
                        "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
                        "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;

assert!(!df1.frame_equal(&df2));

source

pub fn frame_equal_missing(&self, other: &DataFrame) -> bool

Check if all values in DataFrames are equal where None == None evaluates to true.

Example

let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
                        "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
                        "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;

assert!(df1.frame_equal_missing(&df2));

source

pub fn ptr_equal(&self, other: &DataFrame) -> bool

Checks if the Arc ptrs of the Series are equal

Example

let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
                        "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
let df2: &DataFrame = &df1;

assert!(df1.ptr_equal(df2));