Struct polars_core::frame::DataFrame  
source · [−]pub struct DataFrame { /* private fields */ }Expand description
A contiguous growable collection of Series that have the same length.
Use declarations
All the common tools can be found in crate::prelude (or in polars::prelude).
use polars_core::prelude::*; // if the crate polars-core is used directly
// use polars::prelude::*;      if the crate polars is usedInitialization
Default
A DataFrame can be initialized empty:
let df = DataFrame::default();
assert!(df.is_empty());Wrapping a Vec<Series>
A DataFrame is built upon a Vec<Series> where the Series have the same length.
let s1 = Series::new("Fruit", &["Apple", "Apple", "Pear"]);
let s2 = Series::new("Color", &["Red", "Yellow", "Green"]);
let df: Result<DataFrame> = DataFrame::new(vec![s1, s2]);Using a macro
The df! macro is a convenient method:
let df: Result<DataFrame> = df!("Fruit" => &["Apple", "Apple", "Pear"],
                                "Color" => &["Red", "Yellow", "Green"]);Using a CSV file
See the polars_io::csv::CsvReader.
Indexing
By a number
The Index<usize> is implemented for the DataFrame.
let df = df!("Fruit" => &["Apple", "Apple", "Pear"],
             "Color" => &["Red", "Yellow", "Green"])?;
assert_eq!(df[0], Series::new("Fruit", &["Apple", "Apple", "Pear"]));
assert_eq!(df[1], Series::new("Color", &["Red", "Yellow", "Green"]));By a Series name
let df = df!("Fruit" => &["Apple", "Apple", "Pear"],
             "Color" => &["Red", "Yellow", "Green"])?;
assert_eq!(df["Fruit"], Series::new("Fruit", &["Apple", "Apple", "Pear"]));
assert_eq!(df["Color"], Series::new("Color", &["Red", "Yellow", "Green"]));Implementations
sourceimpl DataFrame
 
impl DataFrame
sourcepub fn to_ndarray<N>(&self) -> Result<Array2<N::Native>> where
    N: PolarsNumericType, 
 Available on crate feature ndarray only.
pub fn to_ndarray<N>(&self) -> Result<Array2<N::Native>> where
    N: PolarsNumericType, 
ndarray only.Create a 2D ndarray::Array from this DataFrame. This requires all columns in the
DataFrame to be non-null and numeric. They will be casted to the same data type
(if they aren’t already).
For floating point data we implicitly convert None to NaN without failure.
use polars_core::prelude::*;
let a = UInt32Chunked::new("a", &[1, 2, 3]).into_series();
let b = Float64Chunked::new("b", &[10., 8., 6.]).into_series();
let df = DataFrame::new(vec![a, b]).unwrap();
let ndarray = df.to_ndarray::<Float64Type>().unwrap();
println!("{:?}", ndarray);Outputs:
[[1.0, 10.0],
 [2.0, 8.0],
 [3.0, 6.0]], shape=[3, 2], strides=[2, 1], layout=C (0x1), const ndim=2/sourceimpl DataFrame
 
impl DataFrame
sourceimpl DataFrame
 
impl DataFrame
sourcepub fn join_asof_by<I, S>(
    &self,
    other: &DataFrame,
    left_on: &str,
    right_on: &str,
    left_by: I,
    right_by: I,
    strategy: AsofStrategy,
    tolerance: Option<AnyValue<'static>>
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 Available on crate feature asof_join only.
pub fn join_asof_by<I, S>(
    &self,
    other: &DataFrame,
    left_on: &str,
    right_on: &str,
    left_by: I,
    right_by: I,
    strategy: AsofStrategy,
    tolerance: Option<AnyValue<'static>>
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
asof_join only.This is similar to a left-join except that we match on nearest key rather than equal keys.
The keys must be sorted to perform an asof join. This is a special implementation of an asof join
that searches for the nearest keys within a subgroup set by by.
sourceimpl DataFrame
 
impl DataFrame
sourcepub fn join_asof(
    &self,
    other: &DataFrame,
    left_on: &str,
    right_on: &str,
    strategy: AsofStrategy,
    tolerance: Option<AnyValue<'static>>,
    suffix: Option<String>
) -> Result<DataFrame>
 Available on crate feature asof_join only.
pub fn join_asof(
    &self,
    other: &DataFrame,
    left_on: &str,
    right_on: &str,
    strategy: AsofStrategy,
    tolerance: Option<AnyValue<'static>>,
    suffix: Option<String>
) -> Result<DataFrame>
asof_join only.This is similar to a left-join except that we match on nearest key rather than equal keys. The keys must be sorted to perform an asof join
sourceimpl DataFrame
 
impl DataFrame
pub fn explode_impl(&self, columns: Vec<Series>) -> Result<DataFrame>
sourcepub fn explode<I, S>(&self, columns: I) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn explode<I, S>(&self, columns: I) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Explode DataFrame to long format by exploding a column with Lists.
Example
let s0 = Series::new("a", &[1i64, 2, 3]);
let s1 = Series::new("b", &[1i64, 1, 1]);
let s2 = Series::new("c", &[2i64, 2, 2]);
let list = Series::new("foo", &[s0, s1, s2]);
let s0 = Series::new("B", [1, 2, 3]);
let s1 = Series::new("C", [1, 1, 1]);
let df = DataFrame::new(vec![list, s0, s1])?;
let exploded = df.explode(["foo"])?;
println!("{:?}", df);
println!("{:?}", exploded);Outputs:
 +-------------+-----+-----+
 | foo         | B   | C   |
 | ---         | --- | --- |
 | list [i64]  | i32 | i32 |
 +=============+=====+=====+
 | "[1, 2, 3]" | 1   | 1   |
 +-------------+-----+-----+
 | "[1, 1, 1]" | 2   | 1   |
 +-------------+-----+-----+
 | "[2, 2, 2]" | 3   | 1   |
 +-------------+-----+-----+
 +-----+-----+-----+
 | foo | B   | C   |
 | --- | --- | --- |
 | i64 | i32 | i32 |
 +=====+=====+=====+
 | 1   | 1   | 1   |
 +-----+-----+-----+
 | 2   | 1   | 1   |
 +-----+-----+-----+
 | 3   | 1   | 1   |
 +-----+-----+-----+
 | 1   | 2   | 1   |
 +-----+-----+-----+
 | 1   | 2   | 1   |
 +-----+-----+-----+
 | 1   | 2   | 1   |
 +-----+-----+-----+
 | 2   | 3   | 1   |
 +-----+-----+-----+
 | 2   | 3   | 1   |
 +-----+-----+-----+
 | 2   | 3   | 1   |
 +-----+-----+-----+sourcepub fn melt<I, J>(&self, id_vars: I, value_vars: J) -> Result<Self> where
    I: IntoVec<String>,
    J: IntoVec<String>, 
 
pub fn melt<I, J>(&self, id_vars: I, value_vars: J) -> Result<Self> where
    I: IntoVec<String>,
    J: IntoVec<String>, 
Unpivot a DataFrame from wide to long format.
Example
Arguments
- id_vars- String slice that represent the columns to use as id variables.
- value_vars- String slice that represent the columns to use as value variables.
If value_vars is empty all columns that are not in id_vars will be used.
let df = df!("A" => &["a", "b", "a"],
             "B" => &[1, 3, 5],
             "C" => &[10, 11, 12],
             "D" => &[2, 4, 6]
    )?;
let melted = df.melt(&["A", "B"], &["C", "D"])?;
println!("{:?}", df);
println!("{:?}", melted);Outputs:
 +-----+-----+-----+-----+
 | A   | B   | C   | D   |
 | --- | --- | --- | --- |
 | str | i32 | i32 | i32 |
 +=====+=====+=====+=====+
 | "a" | 1   | 10  | 2   |
 +-----+-----+-----+-----+
 | "b" | 3   | 11  | 4   |
 +-----+-----+-----+-----+
 | "a" | 5   | 12  | 6   |
 +-----+-----+-----+-----+
 +-----+-----+----------+-------+
 | A   | B   | variable | value |
 | --- | --- | ---      | ---   |
 | str | i32 | str      | i32   |
 +=====+=====+==========+=======+
 | "a" | 1   | "C"      | 10    |
 +-----+-----+----------+-------+
 | "b" | 3   | "C"      | 11    |
 +-----+-----+----------+-------+
 | "a" | 5   | "C"      | 12    |
 +-----+-----+----------+-------+
 | "a" | 1   | "D"      | 2     |
 +-----+-----+----------+-------+
 | "b" | 3   | "D"      | 4     |
 +-----+-----+----------+-------+
 | "a" | 5   | "D"      | 6     |
 +-----+-----+----------+-------+sourceimpl DataFrame
 
impl DataFrame
sourcepub fn pivot<I0, S0, I1, S1, I2, S2>(
    &self,
    values: I0,
    index: I1,
    columns: I2,
    agg_fn: PivotAgg,
    sort_columns: bool
) -> Result<DataFrame> where
    I0: IntoIterator<Item = S0>,
    S0: AsRef<str>,
    I1: IntoIterator<Item = S1>,
    S1: AsRef<str>,
    I2: IntoIterator<Item = S2>,
    S2: AsRef<str>, 
 
pub fn pivot<I0, S0, I1, S1, I2, S2>(
    &self,
    values: I0,
    index: I1,
    columns: I2,
    agg_fn: PivotAgg,
    sort_columns: bool
) -> Result<DataFrame> where
    I0: IntoIterator<Item = S0>,
    S0: AsRef<str>,
    I1: IntoIterator<Item = S1>,
    S1: AsRef<str>,
    I2: IntoIterator<Item = S2>,
    S2: AsRef<str>, 
Do a pivot operation based on the group key, a pivot column and an aggregation function on the values column.
Note
Polars’/arrow memory is not ideal for transposing operations like pivots. If you have a relatively large table, consider using a groupby over a pivot.
pub fn pivot_stable<I0, S0, I1, S1, I2, S2>(
    &self,
    values: I0,
    index: I1,
    columns: I2,
    agg_fn: PivotAgg,
    sort_columns: bool
) -> Result<DataFrame> where
    I0: IntoIterator<Item = S0>,
    S0: AsRef<str>,
    I1: IntoIterator<Item = S1>,
    S1: AsRef<str>,
    I2: IntoIterator<Item = S2>,
    S2: AsRef<str>, 
sourceimpl DataFrame
 
impl DataFrame
pub fn groupby_with_series(
    &self,
    by: Vec<Series>,
    multithreaded: bool,
    sorted: bool
) -> Result<GroupBy<'_>>
sourcepub fn groupby<I, S>(&self, by: I) -> Result<GroupBy<'_>> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn groupby<I, S>(&self, by: I) -> Result<GroupBy<'_>> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Group DataFrame using a Series column.
Example
use polars_core::prelude::*;
fn groupby_sum(df: &DataFrame) -> Result<DataFrame> {
    df.groupby(["column_name"])?
    .select(["agg_column_name"])
    .sum()
}sourcepub fn groupby_stable<I, S>(&self, by: I) -> Result<GroupBy<'_>> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn groupby_stable<I, S>(&self, by: I) -> Result<GroupBy<'_>> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Group DataFrame using a Series column. The groups are ordered by their smallest row index.
sourceimpl DataFrame
 
impl DataFrame
sourcepub fn join<I, S>(
    &self,
    other: &DataFrame,
    left_on: I,
    right_on: I,
    how: JoinType,
    suffix: Option<String>
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn join<I, S>(
    &self,
    other: &DataFrame,
    left_on: I,
    right_on: I,
    how: JoinType,
    suffix: Option<String>
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Generic join method. Can be used to join on multiple columns.
Example
let df1: DataFrame = df!("Fruit" => &["Apple", "Banana", "Pear"],
                         "Phosphorus (mg/100g)" => &[11, 22, 12])?;
let df2: DataFrame = df!("Name" => &["Apple", "Banana", "Pear"],
                         "Potassium (mg/100g)" => &[107, 358, 115])?;
let df3: DataFrame = df1.join(&df2, ["Fruit"], ["Name"], JoinType::Inner, None)?;
assert_eq!(df3.shape(), (3, 3));
println!("{}", df3);Output:
shape: (3, 3)
+--------+----------------------+---------------------+
| Fruit  | Phosphorus (mg/100g) | Potassium (mg/100g) |
| ---    | ---                  | ---                 |
| str    | i32                  | i32                 |
+========+======================+=====================+
| Apple  | 11                   | 107                 |
+--------+----------------------+---------------------+
| Banana | 22                   | 358                 |
+--------+----------------------+---------------------+
| Pear   | 12                   | 115                 |
+--------+----------------------+---------------------+sourcepub fn inner_join<I, S>(
    &self,
    other: &DataFrame,
    left_on: I,
    right_on: I
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn inner_join<I, S>(
    &self,
    other: &DataFrame,
    left_on: I,
    right_on: I
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Perform an inner join on two DataFrames.
Example
fn join_dfs(left: &DataFrame, right: &DataFrame) -> Result<DataFrame> {
    left.inner_join(right, ["join_column_left"], ["join_column_right"])
}sourcepub fn left_join<I, S>(
    &self,
    other: &DataFrame,
    left_on: I,
    right_on: I
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn left_join<I, S>(
    &self,
    other: &DataFrame,
    left_on: I,
    right_on: I
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Perform a left join on two DataFrames
Example
let df1: DataFrame = df!("Wavelength (nm)" => &[480.0, 650.0, 577.0, 1201.0, 100.0])?;
let df2: DataFrame = df!("Color" => &["Blue", "Yellow", "Red"],
                         "Wavelength nm" => &[480.0, 577.0, 650.0])?;
let df3: DataFrame = df1.left_join(&df2, ["Wavelength (nm)"], ["Wavelength nm"])?;
println!("{:?}", df3);Output:
shape: (5, 2)
+-----------------+--------+
| Wavelength (nm) | Color  |
| ---             | ---    |
| f64             | str    |
+=================+========+
| 480             | Blue   |
+-----------------+--------+
| 650             | Red    |
+-----------------+--------+
| 577             | Yellow |
+-----------------+--------+
| 1201            | null   |
+-----------------+--------+
| 100             | null   |
+-----------------+--------+sourcepub fn outer_join<I, S>(
    &self,
    other: &DataFrame,
    left_on: I,
    right_on: I
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn outer_join<I, S>(
    &self,
    other: &DataFrame,
    left_on: I,
    right_on: I
) -> Result<DataFrame> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Perform an outer join on two DataFrames
Example
fn join_dfs(left: &DataFrame, right: &DataFrame) -> Result<DataFrame> {
    left.outer_join(right, ["join_column_left"], ["join_column_right"])
}sourceimpl DataFrame
 
impl DataFrame
sourcepub fn get_row(&self, idx: usize) -> Row<'_>
 Available on crate feature rows only.
pub fn get_row(&self, idx: usize) -> Row<'_>
rows only.Get a row from a DataFrame. Use of this is discouraged as it will likely be slow.
sourcepub fn get_row_amortized<'a>(&'a self, idx: usize, row: &mut Row<'a>)
 Available on crate feature rows only.
pub fn get_row_amortized<'a>(&'a self, idx: usize, row: &mut Row<'a>)
rows only.Amortize allocations by reusing a row. The caller is responsible to make sure that the row has at least the capacity for the number of columns in the DataFrame
sourcepub unsafe fn get_row_amortized_unchecked<'a>(
    &'a self,
    idx: usize,
    row: &mut Row<'a>
)
 Available on crate feature rows only.
pub unsafe fn get_row_amortized_unchecked<'a>(
    &'a self,
    idx: usize,
    row: &mut Row<'a>
)
rows only.Amortize allocations by reusing a row. The caller is responsible to make sure that the row has at least the capacity for the number of columns in the DataFrame
Safety
Does not do any bounds checking.
sourcepub fn from_rows_and_schema(rows: &[Row<'_>], schema: &Schema) -> Result<Self>
 Available on crate feature rows only.
pub fn from_rows_and_schema(rows: &[Row<'_>], schema: &Schema) -> Result<Self>
rows only.Create a new DataFrame from rows. This should only be used when you have row wise data,
as this is a lot slower than creating the Series in a columnar fashion
sourcepub fn from_rows_iter_and_schema<'a, I>(rows: I, schema: &Schema) -> Result<Self> where
    I: Iterator<Item = &'a Row<'a>>, 
 Available on crate feature rows only.
pub fn from_rows_iter_and_schema<'a, I>(rows: I, schema: &Schema) -> Result<Self> where
    I: Iterator<Item = &'a Row<'a>>, 
rows only.Create a new DataFrame from an iterator over rows. This should only be used when you have row wise data,
as this is a lot slower than creating the Series in a columnar fashion
sourceimpl DataFrame
 
impl DataFrame
sourcepub fn estimated_size(&self) -> usize
 
pub fn estimated_size(&self) -> usize
Returns an estimation of the total (heap) allocated size of the DataFrame in bytes.
Implementation
This estimation is the sum of the size of its buffers, validity, including nested arrays.
Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
sum of the sizes computed from this function. In particular, StructArray’s size is an upper bound.
When an array is sliced, its allocated size remains constant because the buffer unchanged. However, this function will yield a smaller number. This is because this function returns the visible size of the buffer, not its total capacity.
FFI buffers are included in this estimation.
sourcepub fn new<S: IntoSeries>(columns: Vec<S>) -> Result<Self>
 
pub fn new<S: IntoSeries>(columns: Vec<S>) -> Result<Self>
Create a DataFrame from a Vector of Series.
Example
let s0 = Series::new("days", [0, 1, 2].as_ref());
let s1 = Series::new("temp", [22.1, 19.9, 7.].as_ref());
let df = DataFrame::new(vec![s0, s1])?;sourcepub const fn empty() -> Self
 
pub const fn empty() -> Self
Creates an empty DataFrame usable in a compile time context (such as static initializers).
Example
use polars_core::prelude::DataFrame;
static EMPTY: DataFrame = DataFrame::empty();sourcepub fn pop(&mut self) -> Option<Series>
 
pub fn pop(&mut self) -> Option<Series>
Removes the last Series from the DataFrame and returns it, or None if it is empty.
Example
let s1 = Series::new("Ocean", &["Atlantic", "Indian"]);
let s2 = Series::new("Area (km²)", &[106_460_000, 70_560_000]);
let mut df = DataFrame::new(vec![s1.clone(), s2.clone()])?;
assert_eq!(df.pop(), Some(s2));
assert_eq!(df.pop(), Some(s1));
assert_eq!(df.pop(), None);
assert!(df.is_empty());sourcepub fn with_row_count(&self, name: &str, offset: Option<IdxSize>) -> Result<Self>
 
pub fn with_row_count(&self, name: &str, offset: Option<IdxSize>) -> Result<Self>
Add a new column at index 0 that counts the rows.
Example
let df1: DataFrame = df!("Name" => &["James", "Mary", "John", "Patricia"])?;
assert_eq!(df1.shape(), (4, 1));
let df2: DataFrame = df1.with_row_count("Id", None)?;
assert_eq!(df2.shape(), (4, 2));
println!("{}", df2);
Output:
 shape: (4, 2)
 +-----+----------+
 | Id  | Name     |
 | --- | ---      |
 | u32 | str      |
 +=====+==========+
 | 0   | James    |
 +-----+----------+
 | 1   | Mary     |
 +-----+----------+
 | 2   | John     |
 +-----+----------+
 | 3   | Patricia |
 +-----+----------+sourcepub fn with_row_count_mut(
    &mut self,
    name: &str,
    offset: Option<IdxSize>
) -> &mut Self
 
pub fn with_row_count_mut(
    &mut self,
    name: &str,
    offset: Option<IdxSize>
) -> &mut Self
Add a row count in place.
sourcepub const fn new_no_checks(columns: Vec<Series>) -> DataFrame
 
pub const fn new_no_checks(columns: Vec<Series>) -> DataFrame
Create a new DataFrame but does not check the length or duplicate occurrence of the Series.
It is advised to use Series::new in favor of this method.
Panic
It is the callers responsibility to uphold the contract of all Series
having an equal length, if not this may panic down the line.
sourcepub fn agg_chunks(&self) -> Self
 
pub fn agg_chunks(&self) -> Self
Aggregate all chunks to contiguous memory.
sourcepub fn shrink_to_fit(&mut self)
 
pub fn shrink_to_fit(&mut self)
Shrink the capacity of this DataFrame to fit it’s length.
sourcepub fn as_single_chunk(&mut self) -> &mut Self
 
pub fn as_single_chunk(&mut self) -> &mut Self
Aggregate all the chunks in the DataFrame to a single chunk.
sourcepub fn as_single_chunk_par(&mut self) -> &mut Self
 
pub fn as_single_chunk_par(&mut self) -> &mut Self
Aggregate all the chunks in the DataFrame to a single chunk in parallel. This may lead to more peak memory consumption.
sourcepub fn should_rechunk(&self) -> bool
 
pub fn should_rechunk(&self) -> bool
Estimates of the DataFrames columns consist of the same chunk sizes
sourcepub fn schema(&self) -> Schema
 
pub fn schema(&self) -> Schema
Get the DataFrame schema.
Example
let df: DataFrame = df!("Thing" => &["Observable universe", "Human stupidity"],
                        "Diameter (m)" => &[8.8e26, f64::INFINITY])?;
let f1: Field = Field::new("Thing", DataType::Utf8);
let f2: Field = Field::new("Diameter (m)", DataType::Float64);
let sc: Schema = Schema::from(vec![f1, f2]);
assert_eq!(df.schema(), sc);sourcepub fn get_columns(&self) -> &Vec<Series>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
 
pub fn get_columns(&self) -> &Vec<Series>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
A: Allocator,
Get a reference to the DataFrame columns.
Example
let df: DataFrame = df!("Name" => &["Adenine", "Cytosine", "Guanine", "Thymine"],
                        "Symbol" => &["A", "C", "G", "T"])?;
let columns: &Vec<Series> = df.get_columns();
assert_eq!(columns[0].name(), "Name");
assert_eq!(columns[1].name(), "Symbol");pub fn get_columns_mut(&mut self) -> &mut Vec<Series>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
A: Allocator,
sourcepub fn iter(&self) -> Iter<'_, Series>
 
pub fn iter(&self) -> Iter<'_, Series>
Iterator over the columns as Series.
Example
let s1: Series = Series::new("Name", &["Pythagoras' theorem", "Shannon entropy"]);
let s2: Series = Series::new("Formula", &["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]);
let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?;
let mut iterator = df.iter();
assert_eq!(iterator.next(), Some(&s1));
assert_eq!(iterator.next(), Some(&s2));
assert_eq!(iterator.next(), None);sourcepub fn get_column_names(&self) -> Vec<&str>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
 
pub fn get_column_names(&self) -> Vec<&str>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
A: Allocator,
Example
let df: DataFrame = df!("Language" => &["Rust", "Python"],
                        "Designer" => &["Graydon Hoare", "Guido van Rossum"])?;
assert_eq!(df.get_column_names(), &["Language", "Designer"]);sourcepub fn get_column_names_owned(&self) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
 
pub fn get_column_names_owned(&self) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
A: Allocator,
Get the Vec<String> representing the column names.
sourcepub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> Result<()>
 
pub fn set_column_names<S: AsRef<str>>(&mut self, names: &[S]) -> Result<()>
Set the column names.
Example
let mut df: DataFrame = df!("Mathematical set" => &["ℕ", "ℤ", "𝔻", "ℚ", "ℝ", "ℂ"])?;
df.set_column_names(&["Set"])?;
assert_eq!(df.get_column_names(), &["Set"]);sourcepub fn dtypes(&self) -> Vec<DataType>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
 
pub fn dtypes(&self) -> Vec<DataType>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
A: Allocator,
Get the data types of the columns in the DataFrame.
Example
let venus_air: DataFrame = df!("Element" => &["Carbon dioxide", "Nitrogen"],
                               "Fraction" => &[0.965, 0.035])?;
assert_eq!(venus_air.dtypes(), &[DataType::Utf8, DataType::Float64]);sourcepub fn fields(&self) -> Vec<Field>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
 
pub fn fields(&self) -> Vec<Field>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
A: Allocator,
Get a reference to the schema fields of the DataFrame.
Example
let earth: DataFrame = df!("Surface type" => &["Water", "Land"],
                           "Fraction" => &[0.708, 0.292])?;
let f1: Field = Field::new("Surface type", DataType::Utf8);
let f2: Field = Field::new("Fraction", DataType::Float64);
assert_eq!(earth.fields(), &[f1, f2]);sourcepub fn shape(&self) -> (usize, usize)
 
pub fn shape(&self) -> (usize, usize)
Get (height, width) of the DataFrame.
Example
let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("1" => &[1, 2, 3, 4, 5])?;
let df2: DataFrame = df!("1" => &[1, 2, 3, 4, 5],
                         "2" => &[1, 2, 3, 4, 5])?;
assert_eq!(df0.shape(), (0 ,0));
assert_eq!(df1.shape(), (5, 1));
assert_eq!(df2.shape(), (5, 2));sourcepub fn width(&self) -> usize
 
pub fn width(&self) -> usize
Get the width of the DataFrame which is the number of columns.
Example
let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("Series 1" => &[0; 0])?;
let df2: DataFrame = df!("Series 1" => &[0; 0],
                         "Series 2" => &[0; 0])?;
assert_eq!(df0.width(), 0);
assert_eq!(df1.width(), 1);
assert_eq!(df2.width(), 2);sourcepub fn height(&self) -> usize
 
pub fn height(&self) -> usize
Get the height of the DataFrame which is the number of rows.
Example
let df0: DataFrame = DataFrame::default();
let df1: DataFrame = df!("Currency" => &["€", "$"])?;
let df2: DataFrame = df!("Currency" => &["€", "$", "¥", "£", "₿"])?;
assert_eq!(df0.height(), 0);
assert_eq!(df1.height(), 2);
assert_eq!(df2.height(), 5);sourcepub fn is_empty(&self) -> bool
 
pub fn is_empty(&self) -> bool
Check if the DataFrame is empty.
Example
let df1: DataFrame = DataFrame::default();
assert!(df1.is_empty());
let df2: DataFrame = df!("First name" => &["Forever"],
                         "Last name" => &["Alone"])?;
assert!(!df2.is_empty());sourcepub fn hstack_mut(&mut self, columns: &[Series]) -> Result<&mut Self>
 
pub fn hstack_mut(&mut self, columns: &[Series]) -> Result<&mut Self>
Add multiple Series to a DataFrame.
The added Series are required to have the same length.
Example
fn stack(df: &mut DataFrame, columns: &[Series]) {
    df.hstack_mut(columns);
}sourcepub fn hstack(&self, columns: &[Series]) -> Result<Self>
 
pub fn hstack(&self, columns: &[Series]) -> Result<Self>
Add multiple Series to a DataFrame.
The added Series are required to have the same length.
Example
let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"])?;
let s1: Series = Series::new("Proton", &[29, 47, 79]);
let s2: Series = Series::new("Electron", &[29, 47, 79]);
let df2: DataFrame = df1.hstack(&[s1, s2])?;
assert_eq!(df2.shape(), (3, 3));
println!("{}", df2);Output:
shape: (3, 3)
+---------+--------+----------+
| Element | Proton | Electron |
| ---     | ---    | ---      |
| str     | i32    | i32      |
+=========+========+==========+
| Copper  | 29     | 29       |
+---------+--------+----------+
| Silver  | 47     | 47       |
+---------+--------+----------+
| Gold    | 79     | 79       |
+---------+--------+----------+sourcepub fn vstack(&self, other: &DataFrame) -> Result<Self>
 
pub fn vstack(&self, other: &DataFrame) -> Result<Self>
Concatenate a DataFrame to this DataFrame and return as newly allocated DataFrame.
If many vstack operations are done, it is recommended to call DataFrame::rechunk.
Example
let df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
                         "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
                         "Melting Point (K)" => &[2041.4, 1828.05])?;
let df3: DataFrame = df1.vstack(&df2)?;
assert_eq!(df3.shape(), (5, 2));
println!("{}", df3);Output:
shape: (5, 2)
+-----------+-------------------+
| Element   | Melting Point (K) |
| ---       | ---               |
| str       | f64               |
+===========+===================+
| Copper    | 1357.77           |
+-----------+-------------------+
| Silver    | 1234.93           |
+-----------+-------------------+
| Gold      | 1337.33           |
+-----------+-------------------+
| Platinum  | 2041.4            |
+-----------+-------------------+
| Palladium | 1828.05           |
+-----------+-------------------+sourcepub fn vstack_mut(&mut self, other: &DataFrame) -> Result<&mut Self>
 
pub fn vstack_mut(&mut self, other: &DataFrame) -> Result<&mut Self>
Concatenate a DataFrame to this DataFrame
If many vstack operations are done, it is recommended to call DataFrame::rechunk.
Example
let mut df1: DataFrame = df!("Element" => &["Copper", "Silver", "Gold"],
                         "Melting Point (K)" => &[1357.77, 1234.93, 1337.33])?;
let df2: DataFrame = df!("Element" => &["Platinum", "Palladium"],
                         "Melting Point (K)" => &[2041.4, 1828.05])?;
df1.vstack_mut(&df2)?;
assert_eq!(df1.shape(), (5, 2));
println!("{}", df1);Output:
shape: (5, 2)
+-----------+-------------------+
| Element   | Melting Point (K) |
| ---       | ---               |
| str       | f64               |
+===========+===================+
| Copper    | 1357.77           |
+-----------+-------------------+
| Silver    | 1234.93           |
+-----------+-------------------+
| Gold      | 1337.33           |
+-----------+-------------------+
| Platinum  | 2041.4            |
+-----------+-------------------+
| Palladium | 1828.05           |
+-----------+-------------------+sourcepub fn extend(&mut self, other: &DataFrame) -> Result<()>
 
pub fn extend(&mut self, other: &DataFrame) -> Result<()>
Extend the memory backed by this DataFrame with the values from other.
Different from vstack which adds the chunks from other to the chunks of this DataFrame
extent appends the data from other to the underlying memory locations and thus may cause a reallocation.
If this does not cause a reallocation, the resulting data structure will not have any extra chunks and thus will yield faster queries.
Prefer extend over vstack when you want to do a query after a single append. For instance during
online operations where you add n rows and rerun a query.
Prefer vstack over extend when you want to append many times before doing a query. For instance
when you read in multiple files and when to store them in a single DataFrame. In the latter case, finish the sequence
of append operations with a rechunk.
sourcepub fn drop_in_place(&mut self, name: &str) -> Result<Series>
 
pub fn drop_in_place(&mut self, name: &str) -> Result<Series>
Remove a column by name and return the column removed.
Example
let mut df: DataFrame = df!("Animal" => &["Tiger", "Lion", "Great auk"],
                            "IUCN" => &["Endangered", "Vulnerable", "Extinct"])?;
let s1: Result<Series> = df.drop_in_place("Average weight");
assert!(s1.is_err());
let s2: Series = df.drop_in_place("Animal")?;
assert_eq!(s2, Series::new("Animal", &["Tiger", "Lion", "Great auk"]));sourcepub fn drop_nulls(&self, subset: Option<&[String]>) -> Result<Self>
 
pub fn drop_nulls(&self, subset: Option<&[String]>) -> Result<Self>
Return a new DataFrame where all null values are dropped.
Example
let df1: DataFrame = df!("Country" => ["Malta", "Liechtenstein", "North Korea"],
                        "Tax revenue (% GDP)" => [Some(32.7), None, None])?;
assert_eq!(df1.shape(), (3, 2));
let df2: DataFrame = df1.drop_nulls(None)?;
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);Output:
shape: (1, 2)
+---------+---------------------+
| Country | Tax revenue (% GDP) |
| ---     | ---                 |
| str     | f64                 |
+=========+=====================+
| Malta   | 32.7                |
+---------+---------------------+sourcepub fn drop(&self, name: &str) -> Result<Self>
 
pub fn drop(&self, name: &str) -> Result<Self>
Drop a column by name.
This is a pure method and will return a new DataFrame instead of modifying
the current one in place.
Example
let df1: DataFrame = df!("Ray type" => &["α", "β", "X", "γ"])?;
let df2: DataFrame = df1.drop("Ray type")?;
assert!(df2.is_empty());sourcepub fn insert_at_idx<S: IntoSeries>(
    &mut self,
    index: usize,
    column: S
) -> Result<&mut Self>
 
pub fn insert_at_idx<S: IntoSeries>(
    &mut self,
    index: usize,
    column: S
) -> Result<&mut Self>
Insert a new column at a given index.
sourcepub fn with_column<S: IntoSeries>(&mut self, column: S) -> Result<&mut Self>
 
pub fn with_column<S: IntoSeries>(&mut self, column: S) -> Result<&mut Self>
Add a new column to this DataFrame or replace an existing one.
sourcepub fn with_column_and_schema<S: IntoSeries>(
    &mut self,
    column: S,
    schema: &Schema
) -> Result<&mut Self>
 
pub fn with_column_and_schema<S: IntoSeries>(
    &mut self,
    column: S,
    schema: &Schema
) -> Result<&mut Self>
Add a new column to this DataFrame or replace an existing one.
Uses an existing schema to amortize lookups.
If the schema is incorrect, we will fallback to linear search.
sourcepub fn get(&self, idx: usize) -> Option<Vec<AnyValue<'_>>>
 
pub fn get(&self, idx: usize) -> Option<Vec<AnyValue<'_>>>
Get a row in the DataFrame. Beware this is slow.
Example
fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    df.get(idx)
}sourcepub fn select_at_idx(&self, idx: usize) -> Option<&Series>
 
pub fn select_at_idx(&self, idx: usize) -> Option<&Series>
Select a Series by index.
Example
let df: DataFrame = df!("Star" => &["Sun", "Betelgeuse", "Sirius A", "Sirius B"],
                        "Absolute magnitude" => &[4.83, -5.85, 1.42, 11.18])?;
let s1: Option<&Series> = df.select_at_idx(0);
let s2: Series = Series::new("Star", &["Sun", "Betelgeuse", "Sirius A", "Sirius B"]);
assert_eq!(s1, Some(&s2));sourcepub fn select_by_range<R>(&self, range: R) -> Result<Self> where
    R: RangeBounds<usize>, 
 
pub fn select_by_range<R>(&self, range: R) -> Result<Self> where
    R: RangeBounds<usize>, 
Select column(s) from this DataFrame by range and return a new DataFrame
Examples
let df = df! {
    "0" => &[0, 0, 0],
    "1" => &[1, 1, 1],
    "2" => &[2, 2, 2]
}?;
assert!(df.select(&["0", "1"])?.frame_equal(&df.select_by_range(0..=1)?));
assert!(df.frame_equal(&df.select_by_range(..)?));sourcepub fn find_idx_by_name(&self, name: &str) -> Option<usize>
 
pub fn find_idx_by_name(&self, name: &str) -> Option<usize>
Get column index of a Series by name.
Example
let df: DataFrame = df!("Name" => &["Player 1", "Player 2", "Player 3"],
                        "Health" => &[100, 200, 500],
                        "Mana" => &[250, 100, 0],
                        "Strength" => &[30, 150, 300])?;
assert_eq!(df.find_idx_by_name("Name"), Some(0));
assert_eq!(df.find_idx_by_name("Health"), Some(1));
assert_eq!(df.find_idx_by_name("Mana"), Some(2));
assert_eq!(df.find_idx_by_name("Strength"), Some(3));
assert_eq!(df.find_idx_by_name("Haste"), None);sourcepub fn column(&self, name: &str) -> Result<&Series>
 
pub fn column(&self, name: &str) -> Result<&Series>
Select a single column by name.
Example
let s1: Series = Series::new("Password", &["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]);
let s2: Series = Series::new("Robustness", &["Weak", "Strong"]);
let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?;
assert_eq!(df.column("Password")?, &s1);sourcepub fn columns<I, S>(&self, names: I) -> Result<Vec<&Series>> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn columns<I, S>(&self, names: I) -> Result<Vec<&Series>> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Selected multiple columns by name.
Example
let df: DataFrame = df!("Latin name" => &["Oncorhynchus kisutch", "Salmo salar"],
                        "Max weight (kg)" => &[16.0, 35.89])?;
let sv: Vec<&Series> = df.columns(&["Latin name", "Max weight (kg)"])?;
assert_eq!(&df[0], sv[0]);
assert_eq!(&df[1], sv[1]);sourcepub fn select<I, S>(&self, selection: I) -> Result<Self> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn select<I, S>(&self, selection: I) -> Result<Self> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Select column(s) from this DataFrame and return a new DataFrame.
Examples
fn example(df: &DataFrame) -> Result<DataFrame> {
    df.select(["foo", "bar"])
}sourcepub fn select_series(
    &self,
    selection: impl IntoVec<String>
) -> Result<Vec<Series>>
 
pub fn select_series(
    &self,
    selection: impl IntoVec<String>
) -> Result<Vec<Series>>
Select column(s) from this DataFrame and return them into a Vec.
Example
let df: DataFrame = df!("Name" => &["Methane", "Ethane", "Propane"],
                        "Carbon" => &[1, 2, 3],
                        "Hydrogen" => &[4, 6, 8])?;
let sv: Vec<Series> = df.select_series(&["Carbon", "Hydrogen"])?;
assert_eq!(df["Carbon"], sv[0]);
assert_eq!(df["Hydrogen"], sv[1]);sourcepub fn filter(&self, mask: &BooleanChunked) -> Result<Self>
 
pub fn filter(&self, mask: &BooleanChunked) -> Result<Self>
Take the DataFrame rows by a boolean mask.
Example
fn example(df: &DataFrame) -> Result<DataFrame> {
    let mask = df.column("sepal.width")?.is_not_null();
    df.filter(&mask)
}sourcepub fn _filter_seq(&self, mask: &BooleanChunked) -> Result<Self>
 
pub fn _filter_seq(&self, mask: &BooleanChunked) -> Result<Self>
Same as filter but does not parallelize.
sourcepub fn take_iter<I>(&self, iter: I) -> Result<Self> where
    I: Iterator<Item = usize> + Clone + Sync + TrustedLen, 
 
pub fn take_iter<I>(&self, iter: I) -> Result<Self> where
    I: Iterator<Item = usize> + Clone + Sync + TrustedLen, 
Take DataFrame value by indexes from an iterator.
Example
fn example(df: &DataFrame) -> Result<DataFrame> {
    let iterator = (0..9).into_iter();
    df.take_iter(iterator)
}sourcepub unsafe fn take_iter_unchecked<I>(&self, iter: I) -> Self where
    I: Iterator<Item = usize> + Clone + Sync + TrustedLen, 
 
pub unsafe fn take_iter_unchecked<I>(&self, iter: I) -> Self where
    I: Iterator<Item = usize> + Clone + Sync + TrustedLen, 
Take DataFrame values by indexes from an iterator.
Safety
This doesn’t do any bound checking but checks null validity.
sourcepub unsafe fn take_opt_iter_unchecked<I>(&self, iter: I) -> Self where
    I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen, 
 
pub unsafe fn take_opt_iter_unchecked<I>(&self, iter: I) -> Self where
    I: Iterator<Item = Option<usize>> + Clone + Sync + TrustedLen, 
Take DataFrame values by indexes from an iterator that may contain None values.
Safety
This doesn’t do any bound checking. Out of bounds may access uninitialized memory. Null validity is checked
sourcepub fn take(&self, indices: &IdxCa) -> Result<Self>
 
pub fn take(&self, indices: &IdxCa) -> Result<Self>
Take DataFrame rows by index values.
Example
fn example(df: &DataFrame) -> Result<DataFrame> {
    let idx = IdxCa::new("idx", &[0, 1, 9]);
    df.take(&idx)
}sourcepub fn rename(&mut self, column: &str, name: &str) -> Result<&mut Self>
 
pub fn rename(&mut self, column: &str, name: &str) -> Result<&mut Self>
Rename a column in the DataFrame.
Example
fn example(df: &mut DataFrame) -> Result<&mut DataFrame> {
    let original_name = "foo";
    let new_name = "bar";
    df.rename(original_name, new_name)
}sourcepub fn sort_in_place(
    &mut self,
    by_column: impl IntoVec<String>,
    reverse: impl IntoVec<bool>
) -> Result<&mut Self>
 
pub fn sort_in_place(
    &mut self,
    by_column: impl IntoVec<String>,
    reverse: impl IntoVec<bool>
) -> Result<&mut Self>
Sort DataFrame in place by a column.
sourcepub fn sort_impl(
    &self,
    by_column: Vec<Series>,
    reverse: Vec<bool>,
    nulls_last: bool,
    slice: Option<(i64, usize)>
) -> Result<Self>
 
pub fn sort_impl(
    &self,
    by_column: Vec<Series>,
    reverse: Vec<bool>,
    nulls_last: bool,
    slice: Option<(i64, usize)>
) -> Result<Self>
This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization.
sourcepub fn sort(
    &self,
    by_column: impl IntoVec<String>,
    reverse: impl IntoVec<bool>
) -> Result<Self>
 
pub fn sort(
    &self,
    by_column: impl IntoVec<String>,
    reverse: impl IntoVec<bool>
) -> Result<Self>
Return a sorted clone of this DataFrame.
Example
fn sort_example(df: &DataFrame, reverse: bool) -> Result<DataFrame> {
    df.sort(["a"], reverse)
}
fn sort_by_multiple_columns_example(df: &DataFrame) -> Result<DataFrame> {
    df.sort(&["a", "b"], vec![false, true])
}sourcepub fn sort_with_options(
    &self,
    by_column: &str,
    options: SortOptions
) -> Result<Self>
 
pub fn sort_with_options(
    &self,
    by_column: &str,
    options: SortOptions
) -> Result<Self>
Sort the DataFrame by a single column with extra options.
sourcepub fn replace<S: IntoSeries>(
    &mut self,
    column: &str,
    new_col: S
) -> Result<&mut Self>
 
pub fn replace<S: IntoSeries>(
    &mut self,
    column: &str,
    new_col: S
) -> Result<&mut Self>
Replace a column with a Series.
Example
let mut df: DataFrame = df!("Country" => &["United States", "China"],
                        "Area (km²)" => &[9_833_520, 9_596_961])?;
let s: Series = Series::new("Country", &["USA", "PRC"]);
assert!(df.replace("Nation", s.clone()).is_err());
assert!(df.replace("Country", s).is_ok());sourcepub fn replace_or_add<S: IntoSeries>(
    &mut self,
    column: &str,
    new_col: S
) -> Result<&mut Self>
 
pub fn replace_or_add<S: IntoSeries>(
    &mut self,
    column: &str,
    new_col: S
) -> Result<&mut Self>
Replace or update a column. The difference between this method and DataFrame::with_column
is that now the value of column: &str determines the name of the column and not the name
of the Series passed to this method.
sourcepub fn replace_at_idx<S: IntoSeries>(
    &mut self,
    idx: usize,
    new_col: S
) -> Result<&mut Self>
 
pub fn replace_at_idx<S: IntoSeries>(
    &mut self,
    idx: usize,
    new_col: S
) -> Result<&mut Self>
Replace column at index idx with a Series.
Example
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("ascii", &[70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1])?;
// Add 32 to get lowercase ascii values
df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);sourcepub fn apply<F, S>(&mut self, name: &str, f: F) -> Result<&mut Self> where
    F: FnOnce(&Series) -> S,
    S: IntoSeries, 
 
pub fn apply<F, S>(&mut self, name: &str, f: F) -> Result<&mut Self> where
    F: FnOnce(&Series) -> S,
    S: IntoSeries, 
Apply a closure to a column. This is the recommended way to do in place modification.
Example
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("names", &["Jean", "Claude", "van"]);
let mut df = DataFrame::new(vec![s0, s1])?;
fn str_to_len(str_val: &Series) -> Series {
    str_val.utf8()
        .unwrap()
        .into_iter()
        .map(|opt_name: Option<&str>| {
            opt_name.map(|name: &str| name.len() as u32)
         })
        .collect::<UInt32Chunked>()
        .into_series()
}
// Replace the names column by the length of the names.
df.apply("names", str_to_len);Results in:
+--------+-------+
| foo    |       |
| ---    | names |
| str    | u32   |
+========+=======+
| "ham"  | 4     |
+--------+-------+
| "spam" | 6     |
+--------+-------+
| "egg"  | 3     |
+--------+-------+sourcepub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> Result<&mut Self> where
    F: FnOnce(&Series) -> S,
    S: IntoSeries, 
 
pub fn apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> Result<&mut Self> where
    F: FnOnce(&Series) -> S,
    S: IntoSeries, 
Apply a closure to a column at index idx. This is the recommended way to do in place
modification.
Example
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("ascii", &[70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1])?;
// Add 32 to get lowercase ascii values
df.apply_at_idx(1, |s| s + 32);Results in:
+--------+-------+
| foo    | ascii |
| ---    | ---   |
| str    | i32   |
+========+=======+
| "ham"  | 102   |
+--------+-------+
| "spam" | 111   |
+--------+-------+
| "egg"  | 111   |
+--------+-------+sourcepub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> Result<&mut Self> where
    F: FnOnce(&Series) -> Result<S>,
    S: IntoSeries, 
 
pub fn try_apply_at_idx<F, S>(&mut self, idx: usize, f: F) -> Result<&mut Self> where
    F: FnOnce(&Series) -> Result<S>,
    S: IntoSeries, 
Apply a closure that may fail to a column at index idx. This is the recommended way to do in place
modification.
Example
This is the idiomatic way to replace some values a column of a DataFrame given range of indexes.
let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1])?;
let idx = vec![0, 1, 4];
df.try_apply("foo", |s| {
    s.utf8()?
    .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
});Results in:
+---------------------+--------+
| foo                 | values |
| ---                 | ---    |
| str                 | i32    |
+=====================+========+
| "ham-is-modified"   | 1      |
+---------------------+--------+
| "spam-is-modified"  | 2      |
+---------------------+--------+
| "egg"               | 3      |
+---------------------+--------+
| "bacon"             | 4      |
+---------------------+--------+
| "quack-is-modified" | 5      |
+---------------------+--------+sourcepub fn try_apply<F, S>(&mut self, column: &str, f: F) -> Result<&mut Self> where
    F: FnOnce(&Series) -> Result<S>,
    S: IntoSeries, 
 
pub fn try_apply<F, S>(&mut self, column: &str, f: F) -> Result<&mut Self> where
    F: FnOnce(&Series) -> Result<S>,
    S: IntoSeries, 
Apply a closure that may fail to a column. This is the recommended way to do in place modification.
Example
This is the idiomatic way to replace some values a column of a DataFrame given a boolean mask.
let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1])?;
// create a mask
let values = df.column("values")?;
let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?;
df.try_apply("foo", |s| {
    s.utf8()?
    .set(&mask, Some("not_within_bounds"))
});Results in:
+---------------------+--------+
| foo                 | values |
| ---                 | ---    |
| str                 | i32    |
+=====================+========+
| "not_within_bounds" | 1      |
+---------------------+--------+
| "spam"              | 2      |
+---------------------+--------+
| "egg"               | 3      |
+---------------------+--------+
| "bacon"             | 4      |
+---------------------+--------+
| "not_within_bounds" | 5      |
+---------------------+--------+sourcepub fn slice(&self, offset: i64, length: usize) -> Self
 
pub fn slice(&self, offset: i64, length: usize) -> Self
Slice the DataFrame along the rows.
Example
let df: DataFrame = df!("Fruit" => &["Apple", "Grape", "Grape", "Fig", "Fig"],
                        "Color" => &["Green", "Red", "White", "White", "Red"])?;
let sl: DataFrame = df.slice(2, 3);
assert_eq!(sl.shape(), (3, 2));
println!("{}", sl);Output:
shape: (3, 2)
+-------+-------+
| Fruit | Color |
| ---   | ---   |
| str   | str   |
+=======+=======+
| Grape | White |
+-------+-------+
| Fig   | White |
+-------+-------+
| Fig   | Red   |
+-------+-------+pub fn slice_par(&self, offset: i64, length: usize) -> Self
pub fn _slice_and_realloc(&self, offset: i64, length: usize) -> Self
sourcepub fn head(&self, length: Option<usize>) -> Self
 
pub fn head(&self, length: Option<usize>) -> Self
Get the head of the DataFrame.
Example
let countries: DataFrame =
    df!("Rank by GDP (2021)" => &[1, 2, 3, 4, 5],
        "Continent" => &["North America", "Asia", "Asia", "Europe", "Europe"],
        "Country" => &["United States", "China", "Japan", "Germany", "United Kingdom"],
        "Capital" => &["Washington", "Beijing", "Tokyo", "Berlin", "London"])?;
assert_eq!(countries.shape(), (5, 4));
println!("{}", countries.head(Some(3)));Output:
shape: (3, 4)
+--------------------+---------------+---------------+------------+
| Rank by GDP (2021) | Continent     | Country       | Capital    |
| ---                | ---           | ---           | ---        |
| i32                | str           | str           | str        |
+====================+===============+===============+============+
| 1                  | North America | United States | Washington |
+--------------------+---------------+---------------+------------+
| 2                  | Asia          | China         | Beijing    |
+--------------------+---------------+---------------+------------+
| 3                  | Asia          | Japan         | Tokyo      |
+--------------------+---------------+---------------+------------+sourcepub fn tail(&self, length: Option<usize>) -> Self
 
pub fn tail(&self, length: Option<usize>) -> Self
Get the tail of the DataFrame.
Example
let countries: DataFrame =
    df!("Rank (2021)" => &[105, 106, 107, 108, 109],
        "Apple Price (€/kg)" => &[0.75, 0.70, 0.70, 0.65, 0.52],
        "Country" => &["Kosovo", "Moldova", "North Macedonia", "Syria", "Turkey"])?;
assert_eq!(countries.shape(), (5, 3));
println!("{}", countries.tail(Some(2)));Output:
shape: (2, 3)
+-------------+--------------------+---------+
| Rank (2021) | Apple Price (€/kg) | Country |
| ---         | ---                | ---     |
| i32         | f64                | str     |
+=============+====================+=========+
| 108         | 0.63               | Syria   |
+-------------+--------------------+---------+
| 109         | 0.63               | Turkey  |
+-------------+--------------------+---------+sourcepub fn iter_chunks(&self) -> impl Iterator<Item = ArrowChunk> + '_
 
pub fn iter_chunks(&self) -> impl Iterator<Item = ArrowChunk> + '_
Iterator over the rows in this DataFrame as Arrow RecordBatches.
Panics
Panics if the DataFrame that is passed is not rechunked.
This responsibility is left to the caller as we don’t want to take mutable references here, but we also don’t want to rechunk here, as this operation is costly and would benefit the caller as well.
sourcepub fn shift(&self, periods: i64) -> Self
 
pub fn shift(&self, periods: i64) -> Self
Shift the values by a given period and fill the parts that will be empty due to this operation
with Nones.
See the method on Series for more info on the shift operation.
sourcepub fn fill_null(&self, strategy: FillNullStrategy) -> Result<Self>
 
pub fn fill_null(&self, strategy: FillNullStrategy) -> Result<Self>
Replace None values with one of the following strategies:
- Forward fill (replace None with the previous value)
- Backward fill (replace None with the next value)
- Mean fill (replace None with the mean of the whole array)
- Min fill (replace None with the minimum of the whole array)
- Max fill (replace None with the maximum of the whole array)
See the method on Series for more info on the fill_null operation.
sourcepub fn describe(&self, percentiles: Option<&[f64]>) -> Self
 
pub fn describe(&self, percentiles: Option<&[f64]>) -> Self
Summary statistics for a DataFrame. Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes. Try in keep output similar to pandas
Example
let df1: DataFrame = df!("categorical" => &["d","e","f"],
                         "numeric" => &[1, 2, 3],
                         "object" => &["a", "b", "c"])?;
assert_eq!(df1.shape(), (3, 3));
let df2: DataFrame = df1.describe(None);
assert_eq!(df2.shape(), (8, 4));
println!("{}", df2);Output:
shape: (8, 4)
┌──────────┬─────────────┬─────────┬────────┐
│ describe ┆ categorical ┆ numeric ┆ object │
│ ---      ┆ ---         ┆ ---     ┆ ---    │
│ str      ┆ f64         ┆ f64     ┆ f64    │
╞══════════╪═════════════╪═════════╪════════╡
│ count    ┆ 3.0         ┆ 3.0     ┆ 3.0    │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ mean     ┆ null        ┆ 2.0     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ std      ┆ null        ┆ 1.0     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ min      ┆ null        ┆ 1.0     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 0.25%    ┆ null        ┆ 1.5     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 0.5%     ┆ null        ┆ 2.0     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 0.75%    ┆ null        ┆ 2.5     ┆ null   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ max      ┆ null        ┆ 3.0     ┆ null   │
└──────────┴─────────────┴─────────┴────────┘sourcepub fn max(&self) -> Self
 
pub fn max(&self) -> Self
Aggregate the columns to their maximum values.
Example
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.max();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| i32     | i32     |
+=========+=========+
| 6       | 5       |
+---------+---------+sourcepub fn std(&self) -> Self
 
pub fn std(&self) -> Self
Aggregate the columns to their standard deviation values.
Example
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.std();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);Output:
shape: (1, 2)
+-------------------+--------------------+
| Die n°1           | Die n°2            |
| ---               | ---                |
| f64               | f64                |
+===================+====================+
| 2.280350850198276 | 1.0954451150103321 |
+-------------------+--------------------+sourcepub fn var(&self) -> Self
 
pub fn var(&self) -> Self
Aggregate the columns to their variation values.
Example
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.var();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| f64     | f64     |
+=========+=========+
| 5.2     | 1.2     |
+---------+---------+sourcepub fn min(&self) -> Self
 
pub fn min(&self) -> Self
Aggregate the columns to their minimum values.
Example
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.min();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| i32     | i32     |
+=========+=========+
| 1       | 2       |
+---------+---------+sourcepub fn sum(&self) -> Self
 
pub fn sum(&self) -> Self
Aggregate the columns to their sum values.
Example
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.sum();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| i32     | i32     |
+=========+=========+
| 16      | 16      |
+---------+---------+sourcepub fn mean(&self) -> Self
 
pub fn mean(&self) -> Self
Aggregate the columns to their mean values.
Example
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.mean();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| f64     | f64     |
+=========+=========+
| 3.2     | 3.2     |
+---------+---------+sourcepub fn median(&self) -> Self
 
pub fn median(&self) -> Self
Aggregate the columns to their median values.
Example
let df1: DataFrame = df!("Die n°1" => &[1, 3, 1, 5, 6],
                         "Die n°2" => &[3, 2, 3, 5, 3])?;
assert_eq!(df1.shape(), (5, 2));
let df2: DataFrame = df1.median();
assert_eq!(df2.shape(), (1, 2));
println!("{}", df2);Output:
shape: (1, 2)
+---------+---------+
| Die n°1 | Die n°2 |
| ---     | ---     |
| i32     | i32     |
+=========+=========+
| 3       | 3       |
+---------+---------+sourcepub fn quantile(
    &self,
    quantile: f64,
    interpol: QuantileInterpolOptions
) -> Result<Self>
 
pub fn quantile(
    &self,
    quantile: f64,
    interpol: QuantileInterpolOptions
) -> Result<Self>
Aggregate the columns to their quantile values.
sourcepub fn hmin(&self) -> Result<Option<Series>>
 Available on crate feature zip_with only.
pub fn hmin(&self) -> Result<Option<Series>>
zip_with only.Aggregate the column horizontally to their min values.
sourcepub fn hmax(&self) -> Result<Option<Series>>
 Available on crate feature zip_with only.
pub fn hmax(&self) -> Result<Option<Series>>
zip_with only.Aggregate the column horizontally to their max values.
sourcepub fn hsum(&self, none_strategy: NullStrategy) -> Result<Option<Series>>
 
pub fn hsum(&self, none_strategy: NullStrategy) -> Result<Option<Series>>
Aggregate the column horizontally to their sum values.
sourcepub fn hmean(&self, none_strategy: NullStrategy) -> Result<Option<Series>>
 
pub fn hmean(&self, none_strategy: NullStrategy) -> Result<Option<Series>>
Aggregate the column horizontally to their mean values.
sourcepub fn pipe<F, B>(self, f: F) -> Result<B> where
    F: Fn(DataFrame) -> Result<B>, 
 
pub fn pipe<F, B>(self, f: F) -> Result<B> where
    F: Fn(DataFrame) -> Result<B>, 
Pipe different functions/ closure operations that work on a DataFrame together.
sourcepub fn pipe_mut<F, B>(&mut self, f: F) -> Result<B> where
    F: Fn(&mut DataFrame) -> Result<B>, 
 
pub fn pipe_mut<F, B>(&mut self, f: F) -> Result<B> where
    F: Fn(&mut DataFrame) -> Result<B>, 
Pipe different functions/ closure operations that work on a DataFrame together.
sourcepub fn pipe_with_args<F, B, Args>(self, f: F, args: Args) -> Result<B> where
    F: Fn(DataFrame, Args) -> Result<B>, 
 
pub fn pipe_with_args<F, B, Args>(self, f: F, args: Args) -> Result<B> where
    F: Fn(DataFrame, Args) -> Result<B>, 
Pipe different functions/ closure operations that work on a DataFrame together.
sourcepub fn drop_duplicates(
    &self,
    maintain_order: bool,
    subset: Option<&[String]>
) -> Result<Self>
 👎 Deprecated: use distinct
pub fn drop_duplicates(
    &self,
    maintain_order: bool,
    subset: Option<&[String]>
) -> Result<Self>
use distinct
Drop duplicate rows from a DataFrame.
This fails when there is a column of type List in DataFrame
Example
let df = df! {
              "flt" => [1., 1., 2., 2., 3., 3.],
              "int" => [1, 1, 2, 2, 3, 3, ],
              "str" => ["a", "a", "b", "b", "c", "c"]
          }?;
println!("{}", df.drop_duplicates(true, None)?);Returns
+-----+-----+-----+
| flt | int | str |
| --- | --- | --- |
| f64 | i32 | str |
+=====+=====+=====+
| 1   | 1   | "a" |
+-----+-----+-----+
| 2   | 2   | "b" |
+-----+-----+-----+
| 3   | 3   | "c" |
+-----+-----+-----+sourcepub fn unique_stable(
    &self,
    subset: Option<&[String]>,
    keep: UniqueKeepStrategy
) -> Result<DataFrame>
 
pub fn unique_stable(
    &self,
    subset: Option<&[String]>,
    keep: UniqueKeepStrategy
) -> Result<DataFrame>
Drop duplicate rows from a DataFrame.
This fails when there is a column of type List in DataFrame
Stable means that the order is maintained. This has a higher cost than an unstable distinct.
Example
let df = df! {
              "flt" => [1., 1., 2., 2., 3., 3.],
              "int" => [1, 1, 2, 2, 3, 3, ],
              "str" => ["a", "a", "b", "b", "c", "c"]
          }?;
println!("{}", df.unique_stable(None, UniqueKeepStrategy::First)?);Returns
+-----+-----+-----+
| flt | int | str |
| --- | --- | --- |
| f64 | i32 | str |
+=====+=====+=====+
| 1   | 1   | "a" |
+-----+-----+-----+
| 2   | 2   | "b" |
+-----+-----+-----+
| 3   | 3   | "c" |
+-----+-----+-----+sourcepub fn unique(
    &self,
    subset: Option<&[String]>,
    keep: UniqueKeepStrategy
) -> Result<DataFrame>
 
pub fn unique(
    &self,
    subset: Option<&[String]>,
    keep: UniqueKeepStrategy
) -> Result<DataFrame>
Unstable distinct. See [DataFrame::distinct_stable].
sourcepub fn is_unique(&self) -> Result<BooleanChunked>
 
pub fn is_unique(&self) -> Result<BooleanChunked>
Get a mask of all the unique rows in the DataFrame.
Example
let df: DataFrame = df!("Company" => &["Apple", "Microsoft"],
                        "ISIN" => &["US0378331005", "US5949181045"])?;
let ca: ChunkedArray<BooleanType> = df.is_unique()?;
assert!(ca.all());sourcepub fn is_duplicated(&self) -> Result<BooleanChunked>
 
pub fn is_duplicated(&self) -> Result<BooleanChunked>
Get a mask of all the duplicated rows in the DataFrame.
Example
let df: DataFrame = df!("Company" => &["Alphabet", "Alphabet"],
                        "ISIN" => &["US02079K3059", "US02079K1079"])?;
let ca: ChunkedArray<BooleanType> = df.is_duplicated()?;
assert!(!ca.all());sourcepub fn null_count(&self) -> Self
 
pub fn null_count(&self) -> Self
Create a new DataFrame that shows the null counts per column.
sourcepub fn hash_rows(
    &self,
    hasher_builder: Option<RandomState>
) -> Result<UInt64Chunked>
 
pub fn hash_rows(
    &self,
    hasher_builder: Option<RandomState>
) -> Result<UInt64Chunked>
Hash and combine the row values
sourcepub fn get_supertype(&self) -> Option<Result<DataType>>
 
pub fn get_supertype(&self) -> Option<Result<DataType>>
Get the supertype of the columns in this DataFrame
sourceimpl DataFrame
 
impl DataFrame
sourcepub fn frame_equal(&self, other: &DataFrame) -> bool
 
pub fn frame_equal(&self, other: &DataFrame) -> bool
Check if DataFrames are equal. Note that None == None evaluates to false
Example
let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
                        "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
                        "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
assert!(!df1.frame_equal(&df2));sourcepub fn frame_equal_missing(&self, other: &DataFrame) -> bool
 
pub fn frame_equal_missing(&self, other: &DataFrame) -> bool
Check if all values in DataFrames are equal where None == None evaluates to true.
Example
let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
                        "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
let df2: DataFrame = df!("Atomic number" => &[1, 51, 300],
                        "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
assert!(df1.frame_equal_missing(&df2));Trait Implementations
sourceimpl FromIterator<Series> for DataFrame
 
impl FromIterator<Series> for DataFrame
sourcefn from_iter<T: IntoIterator<Item = Series>>(iter: T) -> Self
 
fn from_iter<T: IntoIterator<Item = Series>>(iter: T) -> Self
Panics
Panics if Series have different lengths.
sourceimpl Index<RangeInclusive<usize>> for DataFrame
 
impl Index<RangeInclusive<usize>> for DataFrame
sourceimpl Index<RangeToInclusive<usize>> for DataFrame
 
impl Index<RangeToInclusive<usize>> for DataFrame
sourceimpl TryFrom<(Chunk<Box<dyn Array + 'static, Global>>, &[Field])> for DataFrame
 
impl TryFrom<(Chunk<Box<dyn Array + 'static, Global>>, &[Field])> for DataFrame
type Error = PolarsError
type Error = PolarsError
The type returned in the event of a conversion error.
sourcefn try_from(arg: (ArrowChunk, &[ArrowField])) -> Result<DataFrame>
 
fn try_from(arg: (ArrowChunk, &[ArrowField])) -> Result<DataFrame>
Performs the conversion.
sourceimpl TryFrom<StructArray> for DataFrame
 
impl TryFrom<StructArray> for DataFrame
type Error = PolarsError
type Error = PolarsError
The type returned in the event of a conversion error.
sourcefn try_from(arr: StructArray) -> Result<Self>
 
fn try_from(arr: StructArray) -> Result<Self>
Performs the conversion.
Auto Trait Implementations
impl !RefUnwindSafe for DataFrame
impl Send for DataFrame
impl Sync for DataFrame
impl Unpin for DataFrame
impl !UnwindSafe for DataFrame
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
    T: ?Sized, 
 
impl<T> BorrowMut<T> for T where
    T: ?Sized, 
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
 
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more