Struct polars::frame::DataFrame[][src]

pub struct DataFrame { /* fields omitted */ }

Implementations

impl DataFrame[src]

pub fn to_ndarray<N>(
    &self
) -> Result<ArrayBase<OwnedRepr<<N as ArrowPrimitiveType>::Native>, Dim<[usize; 2]>>, PolarsError> where
    N: PolarsNumericType,
    <N as ArrowPrimitiveType>::Native: Zero,
    <N as ArrowPrimitiveType>::Native: Copy
[src]

Create a 2D ndarray::Array from this DataFrame. This requires all columns in the DataFrame to be non-null and numeric. They will be casted to the same data type (if they aren’t already).

use polars_core::prelude::*;
let a = UInt32Chunked::new_from_slice("a", &[1, 2, 3]).into_series();
let b = Float64Chunked::new_from_slice("b", &[10., 8., 6.]).into_series();

let df = DataFrame::new(vec![a, b]).unwrap();
let ndarray = df.to_ndarray::<Float64Type>().unwrap();
println!("{:?}", ndarray);

Outputs:

[[1.0, 10.0],
 [2.0, 8.0],
 [3.0, 6.0]], shape=[3, 2], strides=[2, 1], layout=C (0x1), const ndim=2/

impl DataFrame[src]

pub fn sample_n(
    &self,
    n: usize,
    with_replacement: bool
) -> Result<DataFrame, PolarsError>
[src]

Sample n datapoints from this DataFrame.

pub fn sample_frac(
    &self,
    frac: f64,
    with_replacement: bool
) -> Result<DataFrame, PolarsError>
[src]

Sample a fraction between 0.0-1.0 of this DataFrame.

impl DataFrame[src]

pub fn explode<'a, J, S>(&self, columns: S) -> Result<DataFrame, PolarsError> where
    S: Selection<'a, J>, 
[src]

Explode DataFrame to long format by exploding a column with Lists.

Example

 use polars_core::prelude::*;
 let s0 = Series::new("a", &[1i64, 2, 3]);
 let s1 = Series::new("b", &[1i64, 1, 1]);
 let s2 = Series::new("c", &[2i64, 2, 2]);
 let list = Series::new("foo", &[s0, s1, s2]);

 let s0 = Series::new("B", [1, 2, 3]);
 let s1 = Series::new("C", [1, 1, 1]);
 let df = DataFrame::new(vec![list, s0, s1]).unwrap();
 let exploded = df.explode("foo").unwrap();

 println!("{:?}", df);
 println!("{:?}", exploded);

Outputs:

 +-------------+-----+-----+
 | foo         | B   | C   |
 | ---         | --- | --- |
 | list [i64]  | i32 | i32 |
 +=============+=====+=====+
 | "[1, 2, 3]" | 1   | 1   |
 +-------------+-----+-----+
 | "[1, 1, 1]" | 2   | 1   |
 +-------------+-----+-----+
 | "[2, 2, 2]" | 3   | 1   |
 +-------------+-----+-----+

 +-----+-----+-----+
 | foo | B   | C   |
 | --- | --- | --- |
 | i64 | i32 | i32 |
 +=====+=====+=====+
 | 1   | 1   | 1   |
 +-----+-----+-----+
 | 2   | 1   | 1   |
 +-----+-----+-----+
 | 3   | 1   | 1   |
 +-----+-----+-----+
 | 1   | 2   | 1   |
 +-----+-----+-----+
 | 1   | 2   | 1   |
 +-----+-----+-----+
 | 1   | 2   | 1   |
 +-----+-----+-----+
 | 2   | 3   | 1   |
 +-----+-----+-----+
 | 2   | 3   | 1   |
 +-----+-----+-----+
 | 2   | 3   | 1   |
 +-----+-----+-----+

pub fn melt<'a, 'b, J, K, SelId, SelValue>(
    &self,
    id_vars: SelId,
    value_vars: SelValue
) -> Result<DataFrame, PolarsError> where
    SelId: Selection<'a, J>,
    SelValue: Selection<'b, K>, 
[src]

Unpivot a DataFrame from wide to long format.

Example

Arguments

  • id_vars - String slice that represent the columns to use as id variables.
  • value_vars - String slice that represent the columns to use as value variables.

use polars_core::prelude::*;
let df = df!("A" => &["a", "b", "a"],
             "B" => &[1, 3, 5],
             "C" => &[10, 11, 12],
             "D" => &[2, 4, 6]
    )
.unwrap();

let melted = df.melt(&["A", "B"], &["C", "D"]).unwrap();
println!("{:?}", df);
println!("{:?}", melted);

Outputs:

 +-----+-----+-----+-----+
 | A   | B   | C   | D   |
 | --- | --- | --- | --- |
 | str | i32 | i32 | i32 |
 +=====+=====+=====+=====+
 | "a" | 1   | 10  | 2   |
 +-----+-----+-----+-----+
 | "b" | 3   | 11  | 4   |
 +-----+-----+-----+-----+
 | "a" | 5   | 12  | 6   |
 +-----+-----+-----+-----+

 +-----+-----+----------+-------+
 | A   | B   | variable | value |
 | --- | --- | ---      | ---   |
 | str | i32 | str      | i32   |
 +=====+=====+==========+=======+
 | "a" | 1   | "C"      | 10    |
 +-----+-----+----------+-------+
 | "b" | 3   | "C"      | 11    |
 +-----+-----+----------+-------+
 | "a" | 5   | "C"      | 12    |
 +-----+-----+----------+-------+
 | "a" | 1   | "D"      | 2     |
 +-----+-----+----------+-------+
 | "b" | 3   | "D"      | 4     |
 +-----+-----+----------+-------+
 | "a" | 5   | "D"      | 6     |
 +-----+-----+----------+-------+

impl DataFrame[src]

pub fn downsample(
    &self,
    key: &str,
    rule: SampleRule
) -> Result<GroupBy<'_, '_>, PolarsError>
[src]

Downsample a temporal column by some frequency/ rule

Examples

Consider the following input DataFrame:

╭─────────────────────┬─────╮
│ ms                  ┆ i   │
│ ---                 ┆ --- │
│ date64(ms)          ┆ u8  │
╞═════════════════════╪═════╡
│ 2000-01-01 00:00:00 ┆ 0   │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:01:00 ┆ 1   │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:02:00 ┆ 2   │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:03:00 ┆ 3   │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ ...                 ┆ ... │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:15:00 ┆ 15  │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:16:00 ┆ 16  │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:17:00 ┆ 17  │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:18:00 ┆ 18  │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
│ 2000-01-01 00:19:00 ┆ 19  │
╰─────────────────────┴─────╯
use polars_core::prelude::*;
use polars_core::frame::groupby::resample::SampleRule;

fn example(df: &DataFrame) -> Result<DataFrame> {
    df.downsample("datetime", SampleRule::Minute(6))?
        .first()?
        .sort("datetime", false)
}

outputs:

 ╭─────────────────────┬─────────╮
 │ ms                  ┆ i_first │
 │ ---                 ┆ ---     │
 │ date64(ms)          ┆ u8      │
 ╞═════════════════════╪═════════╡
 │ 2000-01-01 00:00:00 ┆ 0       │
 ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
 │ 2000-01-01 00:05:00 ┆ 5       │
 ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
 │ 2000-01-01 00:10:00 ┆ 10      │
 ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
 │ 2000-01-01 00:15:00 ┆ 15      │
 ╰─────────────────────┴─────────╯

pub fn downsample_with_series(
    &self,
    key: &Series,
    rule: SampleRule
) -> Result<GroupBy<'_, '_>, PolarsError>
[src]

impl DataFrame[src]

pub fn groupby_with_series(
    &self,
    by: Vec<Series, Global>,
    multithreaded: bool
) -> Result<GroupBy<'_, '_>, PolarsError>
[src]

pub fn groupby<'g, J, S>(&self, by: S) -> Result<GroupBy<'_, '_>, PolarsError> where
    S: Selection<'g, J>, 
[src]

Group DataFrame using a Series column.

Example

use polars_core::prelude::*;
fn groupby_sum(df: &DataFrame) -> Result<DataFrame> {
    df.groupby("column_name")?
    .select("agg_column_name")
    .sum()
}

pub fn groupby_stable<'g, J, S>(
    &self,
    by: S
) -> Result<GroupBy<'_, '_>, PolarsError> where
    S: Selection<'g, J>, 
[src]

Group DataFrame using a Series column. The groups are ordered by their smallest row index.

impl DataFrame[src]

pub fn join<'a, J, S1, S2>(
    &self,
    other: &DataFrame,
    left_on: S1,
    right_on: S2,
    how: JoinType
) -> Result<DataFrame, PolarsError> where
    S1: Selection<'a, J>,
    S2: Selection<'a, J>, 
[src]

Generic join method. Can be used to join on multiple columns.

pub fn inner_join(
    &self,
    other: &DataFrame,
    left_on: &str,
    right_on: &str
) -> Result<DataFrame, PolarsError>
[src]

Perform an inner join on two DataFrames.

Example

use polars_core::prelude::*;
fn join_dfs(left: &DataFrame, right: &DataFrame) -> Result<DataFrame> {
    left.inner_join(right, "join_column_left", "join_column_right")
}

pub fn left_join(
    &self,
    other: &DataFrame,
    left_on: &str,
    right_on: &str
) -> Result<DataFrame, PolarsError>
[src]

Perform a left join on two DataFrames

Example

use polars_core::prelude::*;
fn join_dfs(left: &DataFrame, right: &DataFrame) -> Result<DataFrame> {
    left.left_join(right, "join_column_left", "join_column_right")
}

pub fn outer_join(
    &self,
    other: &DataFrame,
    left_on: &str,
    right_on: &str
) -> Result<DataFrame, PolarsError>
[src]

Perform an outer join on two DataFrames

Example

use polars_core::prelude::*;
fn join_dfs(left: &DataFrame, right: &DataFrame) -> Result<DataFrame> {
    left.outer_join(right, "join_column_left", "join_column_right")
}

impl DataFrame[src]

pub fn get_row(&self, idx: usize) -> Row<'_>[src]

Get a row from a DataFrame. Use of this is discouraged as it will likely be slow.

pub fn get_row_amortized(&'a self, idx: usize, row: &mut Row<'a>)[src]

Amortize allocations by reusing a row. The caller is responsible for the making sure the row has at least capacity for the number of columns in the DataFrame

pub unsafe fn get_row_amortized_unchecked(
    &'a self,
    idx: usize,
    row: &mut Row<'a>
)
[src]

Amortize allocations by reusing a row. The caller is responsible for the making sure the row has at least capacity for the number of columns in the DataFrame

Safety

Does not do any bounds checking.

impl DataFrame[src]

pub fn new<S>(columns: Vec<S, Global>) -> Result<DataFrame, PolarsError> where
    S: IntoSeries
[src]

Create a DataFrame from a Vector of Series.

Example

use polars_core::prelude::*;
let s0 = Series::new("days", [0, 1, 2].as_ref());
let s1 = Series::new("temp", [22.1, 19.9, 7.].as_ref());
let df = DataFrame::new(vec![s0, s1]).unwrap();

pub fn new_no_checks(columns: Vec<Series, Global>) -> DataFrame[src]

Create a new DataFrame but does not check the length or duplicate occurrence of the Series.

It is adviced to use Series::new in favor of this method.

Panic

It is the callers responsibility to uphold the contract of all Series having an equal length, if not this may panic down the line.

pub fn agg_chunks(&self) -> DataFrame[src]

Aggregate all chunks to contiguous memory.

pub fn as_single_chunk(&mut self) -> &mut DataFrame[src]

Aggregate all the chunks in the DataFrame to a single chunk.

pub fn rechunk(&mut self) -> &mut DataFrame[src]

Ensure all the chunks in the DataFrame are aligned.

pub fn schema(&self) -> Schema[src]

Get the DataFrame schema.

pub fn get_columns(&self) -> &Vec<Series, Global>[src]

Get a reference to the DataFrame columns.

pub fn iter(&self) -> Iter<'_, Series>[src]

Iterator over the columns as Series.

pub fn get_column_names(&self) -> Vec<&str, Global>[src]

pub fn set_column_names<S>(&mut self, names: &[S]) -> Result<(), PolarsError> where
    S: AsRef<str>, 
[src]

Set the column names.

pub fn dtypes(&self) -> Vec<DataType, Global>[src]

Get the data types of the columns in the DataFrame.

pub fn n_chunks(&self) -> Result<usize, PolarsError>[src]

The number of chunks per column

pub fn fields(&self) -> Vec<Field, Global>[src]

Get a reference to the schema fields of the DataFrame.

pub fn shape(&self) -> (usize, usize)[src]

Get (width x height)

Example

use polars_core::prelude::*;
fn assert_shape(df: &DataFrame, shape: (usize, usize)) {
    assert_eq!(df.shape(), shape)
}

pub fn width(&self) -> usize[src]

Get width of DataFrame

Example

use polars_core::prelude::*;
fn assert_width(df: &DataFrame, width: usize) {
    assert_eq!(df.width(), width)
}

pub fn height(&self) -> usize[src]

Get height of DataFrame

Example

use polars_core::prelude::*;
fn assert_height(df: &DataFrame, height: usize) {
    assert_eq!(df.height(), height)
}

pub fn is_empty(&self) -> bool[src]

Check if DataFrame is empty

pub fn hstack_mut(
    &mut self,
    columns: &[Series]
) -> Result<&mut DataFrame, PolarsError>
[src]

Add multiple Series to a DataFrame The added Series are required to have the same length.

Example

use polars_core::prelude::*;
fn stack(df: &mut DataFrame, columns: &[Series]) {
    df.hstack_mut(columns);
}

pub fn hstack(&self, columns: &[Series]) -> Result<DataFrame, PolarsError>[src]

Add multiple Series to a DataFrame The added Series are required to have the same length.

pub fn vstack(&self, columns: &DataFrame) -> Result<DataFrame, PolarsError>[src]

Concatenate a DataFrame to this DataFrame and return as newly allocated DataFrame

pub fn vstack_mut(
    &mut self,
    df: &DataFrame
) -> Result<&mut DataFrame, PolarsError>
[src]

Concatenate a DataFrame to this DataFrame

pub fn drop_in_place(&mut self, name: &str) -> Result<Series, PolarsError>[src]

Remove column by name

Example

use polars_core::prelude::*;
fn drop_column(df: &mut DataFrame, name: &str) -> Result<Series> {
    df.drop_in_place(name)
}

pub fn drop_nulls(
    &self,
    subset: Option<&[String]>
) -> Result<DataFrame, PolarsError>
[src]

Return a new DataFrame where all null values are dropped

pub fn drop(&self, name: &str) -> Result<DataFrame, PolarsError>[src]

Drop a column by name. This is a pure method and will return a new DataFrame instead of modifying the current one in place.

pub fn insert_at_idx<S>(
    &mut self,
    index: usize,
    column: S
) -> Result<&mut DataFrame, PolarsError> where
    S: IntoSeries
[src]

Insert a new column at a given index

pub fn with_column<S>(
    &mut self,
    column: S
) -> Result<&mut DataFrame, PolarsError> where
    S: IntoSeries
[src]

Add a new column to this DataFrame or replace an existing one.

pub fn get(&self, idx: usize) -> Option<Vec<AnyValue<'_>, Global>>[src]

Get a row in the DataFrame Beware this is slow.

Example

use polars_core::prelude::*;
fn example(df: &mut DataFrame, idx: usize) -> Option<Vec<AnyValue>> {
    df.get(idx)
}

pub fn select_at_idx(&self, idx: usize) -> Option<&Series>[src]

Select a series by index.

pub fn find_idx_by_name(&self, name: &str) -> Option<usize>[src]

Get column index of a series by name.

pub fn column(&self, name: &str) -> Result<&Series, PolarsError>[src]

Select a single column by name.

pub fn columns<I, S>(
    &self,
    names: I
) -> Result<Vec<&Series, Global>, PolarsError> where
    S: AsRef<str>,
    I: IntoIterator<Item = S>, 
[src]

Selected multiple columns by name.

pub fn select<'a, S, J>(&self, selection: S) -> Result<DataFrame, PolarsError> where
    S: Selection<'a, J>, 
[src]

Select column(s) from this DataFrame and return a new DataFrame.

Examples

use polars_core::prelude::*;

fn example(df: &DataFrame, possible: &str) -> Result<DataFrame> {
    match possible {
        "by_str" => df.select("my-column"),
        "by_tuple" => df.select(("col_1", "col_2")),
        "by_vec" => df.select(vec!["col_a", "col_b"]),
         _ => unimplemented!()
    }
}

pub fn select_series<'a, S, J>(
    &self,
    selection: S
) -> Result<Vec<Series, Global>, PolarsError> where
    S: Selection<'a, J>, 
[src]

Select column(s) from this DataFrame and return them into a Vector.

pub fn filter(
    &self,
    mask: &ChunkedArray<BooleanType>
) -> Result<DataFrame, PolarsError>
[src]

Take DataFrame rows by a boolean mask.

Example

use polars_core::prelude::*;
fn example(df: &DataFrame) -> Result<DataFrame> {
    let mask = df.column("sepal.width")?.is_not_null();
    df.filter(&mask)
}

pub fn take_iter<I>(&self, iter: I) -> DataFrame where
    I: Iterator<Item = usize> + Clone + Sync
[src]

Take DataFrame value by indexes from an iterator.

Example

use polars_core::prelude::*;
fn example(df: &DataFrame) -> DataFrame {
    let iterator = (0..9).into_iter();
    df.take_iter(iterator)
}

Safety

Out of bounds access doesn’t Error but will return a Null value

pub unsafe fn take_iter_unchecked<I>(&self, iter: I) -> DataFrame where
    I: Iterator<Item = usize> + Clone + Sync
[src]

Take DataFrame values by indexes from an iterator.

Safety

This doesn’t do any bound checking but checks null validity.

pub unsafe fn take_opt_iter_unchecked<I>(&self, iter: I) -> DataFrame where
    I: Iterator<Item = Option<usize>> + Clone + Sync
[src]

Take DataFrame values by indexes from an iterator that may contain None values.

Safety

This doesn’t do any bound checking. Out of bounds may access uninitialized memory. Null validity is checked

pub fn take(&self, indices: &ChunkedArray<UInt32Type>) -> DataFrame[src]

Take DataFrame rows by index values.

Example

use polars_core::prelude::*;
fn example(df: &DataFrame) -> DataFrame {
    let idx = UInt32Chunked::new_from_slice("idx", &[0, 1, 9]);
    df.take(&idx)
}

Safety

Out of bounds access doesn’t Error but will return a Null value

pub fn rename(
    &mut self,
    column: &str,
    name: &str
) -> Result<&mut DataFrame, PolarsError>
[src]

Rename a column in the DataFrame

Example

use polars_core::prelude::*;
fn example(df: &mut DataFrame) -> Result<&mut DataFrame> {
    let original_name = "foo";
    let new_name = "bar";
    df.rename(original_name, new_name)
}

pub fn sort_in_place(
    &mut self,
    by_column: &str,
    reverse: bool
) -> Result<&mut DataFrame, PolarsError>
[src]

Sort DataFrame in place by a column.

pub fn sort<'a, S, J>(
    &self,
    by_column: S,
    reverse: impl IntoVec<bool>
) -> Result<DataFrame, PolarsError> where
    S: Selection<'a, J>, 
[src]

Return a sorted clone of this DataFrame.

Example

use polars_core::prelude::*;

fn sort_example(df: &DataFrame, reverse: bool) -> Result<DataFrame> {
    df.sort("a", reverse)
}

fn sort_by_multiple_columns_example(df: &DataFrame) -> Result<DataFrame> {
    df.sort(&["a", "b"], vec![false, true])
}

pub fn replace<S>(
    &mut self,
    column: &str,
    new_col: S
) -> Result<&mut DataFrame, PolarsError> where
    S: IntoSeries
[src]

Replace a column with a series.

pub fn replace_or_add<S>(
    &mut self,
    column: &str,
    new_col: S
) -> Result<&mut DataFrame, PolarsError> where
    S: IntoSeries
[src]

Replace or update a column.

pub fn replace_at_idx<S>(
    &mut self,
    idx: usize,
    new_col: S
) -> Result<&mut DataFrame, PolarsError> where
    S: IntoSeries
[src]

Replace column at index idx with a series.

Example

use polars_core::prelude::*;
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("ascii", &[70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();

// Add 32 to get lowercase ascii values
df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);

pub fn apply<F, S>(
    &mut self,
    column: &str,
    f: F
) -> Result<&mut DataFrame, PolarsError> where
    F: FnOnce(&Series) -> S,
    S: IntoSeries
[src]

Apply a closure to a column. This is the recommended way to do in place modification.

Example

use polars_core::prelude::*;
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("names", &["Jean", "Claude", "van"]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();

fn str_to_len(str_val: &Series) -> Series {
    str_val.utf8()
        .unwrap()
        .into_iter()
        .map(|opt_name: Option<&str>| {
            opt_name.map(|name: &str| name.len() as u32)
         })
        .collect::<UInt32Chunked>()
        .into_series()
}

// Replace the names column by the length of the names.
df.apply("names", str_to_len);

Results in:

+--------+-------+
| foo    |       |
| ---    | names |
| str    | u32   |
+========+=======+
| "ham"  | 4     |
+--------+-------+
| "spam" | 6     |
+--------+-------+
| "egg"  | 3     |
+--------+-------+

pub fn apply_at_idx<F, S>(
    &mut self,
    idx: usize,
    f: F
) -> Result<&mut DataFrame, PolarsError> where
    F: FnOnce(&Series) -> S,
    S: IntoSeries
[src]

Apply a closure to a column at index idx. This is the recommended way to do in place modification.

Example

use polars_core::prelude::*;
let s0 = Series::new("foo", &["ham", "spam", "egg"]);
let s1 = Series::new("ascii", &[70, 79, 79]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();

// Add 32 to get lowercase ascii values
df.apply_at_idx(1, |s| s + 32);

Results in:

+--------+-------+
| foo    | ascii |
| ---    | ---   |
| str    | i32   |
+========+=======+
| "ham"  | 102   |
+--------+-------+
| "spam" | 111   |
+--------+-------+
| "egg"  | 111   |
+--------+-------+

pub fn may_apply_at_idx<F, S>(
    &mut self,
    idx: usize,
    f: F
) -> Result<&mut DataFrame, PolarsError> where
    F: FnOnce(&Series) -> Result<S, PolarsError>,
    S: IntoSeries
[src]

Apply a closure that may fail to a column at index idx. This is the recommended way to do in place modification.

Example

This is the idomatic way to replace some values a column of a DataFrame given range of indexes.

let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();

let idx = vec![0, 1, 4];

df.may_apply("foo", |s| {
    s.utf8()?
    .set_at_idx_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string)))
});

Results in:

+---------------------+--------+
| foo                 | values |
| ---                 | ---    |
| str                 | i32    |
+=====================+========+
| "ham-is-modified"   | 1      |
+---------------------+--------+
| "spam-is-modified"  | 2      |
+---------------------+--------+
| "egg"               | 3      |
+---------------------+--------+
| "bacon"             | 4      |
+---------------------+--------+
| "quack-is-modified" | 5      |
+---------------------+--------+

pub fn may_apply<F, S>(
    &mut self,
    column: &str,
    f: F
) -> Result<&mut DataFrame, PolarsError> where
    F: FnOnce(&Series) -> Result<S, PolarsError>,
    S: IntoSeries
[src]

Apply a closure that may fail to a column. This is the recommended way to do in place modification.

Example

This is the idomatic way to replace some values a column of a DataFrame given a boolean mask.

let s0 = Series::new("foo", &["ham", "spam", "egg", "bacon", "quack"]);
let s1 = Series::new("values", &[1, 2, 3, 4, 5]);
let mut df = DataFrame::new(vec![s0, s1]).unwrap();

// create a mask
let values = df.column("values").unwrap();
let mask = values.lt_eq(1) | values.gt_eq(5);

df.may_apply("foo", |s| {
    s.utf8()?
    .set(&mask, Some("not_within_bounds"))
});

Results in:

+---------------------+--------+
| foo                 | values |
| ---                 | ---    |
| str                 | i32    |
+=====================+========+
| "not_within_bounds" | 1      |
+---------------------+--------+
| "spam"              | 2      |
+---------------------+--------+
| "egg"               | 3      |
+---------------------+--------+
| "bacon"             | 4      |
+---------------------+--------+
| "not_within_bounds" | 5      |
+---------------------+--------+

pub fn slice(&self, offset: i64, length: usize) -> DataFrame[src]

Slice the DataFrame along the rows.

pub fn head(&self, length: Option<usize>) -> DataFrame[src]

Get the head of the DataFrame

pub fn tail(&self, length: Option<usize>) -> DataFrame[src]

Get the tail of the DataFrame

pub fn as_record_batches(&self) -> Result<Vec<RecordBatch, Global>, PolarsError>[src]

Transform the underlying chunks in the DataFrame to Arrow RecordBatches

pub fn iter_record_batches(
    &mut self,
    buffer_size: usize
) -> impl Iterator<Item = RecordBatch>
[src]

Iterator over the rows in this DataFrame as Arrow RecordBatches.

pub fn reverse(&self) -> DataFrame[src]

Get a DataFrame with all the columns in reversed order

pub fn shift(&self, periods: i64) -> DataFrame[src]

Shift the values by a given period and fill the parts that will be empty due to this operation with Nones.

See the method on Series for more info on the shift operation.

pub fn fill_none(
    &self,
    strategy: FillNoneStrategy
) -> Result<DataFrame, PolarsError>
[src]

Replace None values with one of the following strategies:

  • Forward fill (replace None with the previous value)
  • Backward fill (replace None with the next value)
  • Mean fill (replace None with the mean of the whole array)
  • Min fill (replace None with the minimum of the whole array)
  • Max fill (replace None with the maximum of the whole array)

See the method on Series for more info on the fill_none operation.

pub fn max(&self) -> DataFrame[src]

Aggregate the columns to their maximum values.

pub fn std(&self) -> DataFrame[src]

Aggregate the columns to their standard deviation values.

pub fn var(&self) -> DataFrame[src]

Aggregate the columns to their variation values.

pub fn min(&self) -> DataFrame[src]

Aggregate the columns to their minimum values.

pub fn sum(&self) -> DataFrame[src]

Aggregate the columns to their sum values.

pub fn mean(&self) -> DataFrame[src]

Aggregate the columns to their mean values.

pub fn median(&self) -> DataFrame[src]

Aggregate the columns to their median values.

pub fn quantile(&self, quantile: f64) -> Result<DataFrame, PolarsError>[src]

Aggregate the columns to their quantile values.

pub fn hmin(&self) -> Result<Option<Series>, PolarsError>[src]

Aggregate the column horizontally to their min values

pub fn hmax(&self) -> Result<Option<Series>, PolarsError>[src]

Aggregate the column horizontally to their max values

pub fn hsum(&self) -> Result<Option<Series>, PolarsError>[src]

Aggregate the column horizontally to their sum values

pub fn hmean(&self) -> Result<Option<Series>, PolarsError>[src]

Aggregate the column horizontally to their mean values

pub fn pipe<F, B>(self, f: F) -> Result<B, PolarsError> where
    F: Fn(DataFrame) -> Result<B, PolarsError>, 
[src]

Pipe different functions/ closure operations that work on a DataFrame together.

pub fn pipe_mut<F, B>(&mut self, f: F) -> Result<B, PolarsError> where
    F: Fn(&mut DataFrame) -> Result<B, PolarsError>, 
[src]

Pipe different functions/ closure operations that work on a DataFrame together.

pub fn pipe_with_args<F, B, Args>(
    self,
    f: F,
    args: Args
) -> Result<B, PolarsError> where
    F: Fn(DataFrame, Args) -> Result<B, PolarsError>, 
[src]

Pipe different functions/ closure operations that work on a DataFrame together.

pub fn to_dummies(&self) -> Result<DataFrame, PolarsError>[src]

Create dummy variables.

Example



 use polars_core::prelude::*;

 let df = df! {
      "id" => &[1, 2, 3, 1, 2, 3, 1, 1],
      "type" => &["A", "B", "B", "B", "C", "C", "C", "B"],
      "code" => &["X1", "X2", "X3", "X3", "X2", "X2", "X1", "X1"]
  }.unwrap();

  let dummies = df.to_dummies().unwrap();
  dbg!(dummies);

Outputs:

 +------+------+------+--------+--------+--------+---------+---------+---------+
 | id_1 | id_3 | id_2 | type_A | type_B | type_C | code_X1 | code_X2 | code_X3 |
 | ---  | ---  | ---  | ---    | ---    | ---    | ---     | ---     | ---     |
 | u8   | u8   | u8   | u8     | u8     | u8     | u8      | u8      | u8      |
 +======+======+======+========+========+========+=========+=========+=========+
 | 1    | 0    | 0    | 1      | 0      | 0      | 1       | 0       | 0       |
 +------+------+------+--------+--------+--------+---------+---------+---------+
 | 0    | 0    | 1    | 0      | 1      | 0      | 0       | 1       | 0       |
 +------+------+------+--------+--------+--------+---------+---------+---------+
 | 0    | 1    | 0    | 0      | 1      | 0      | 0       | 0       | 1       |
 +------+------+------+--------+--------+--------+---------+---------+---------+
 | 1    | 0    | 0    | 0      | 1      | 0      | 0       | 0       | 1       |
 +------+------+------+--------+--------+--------+---------+---------+---------+
 | 0    | 0    | 1    | 0      | 0      | 1      | 0       | 1       | 0       |
 +------+------+------+--------+--------+--------+---------+---------+---------+
 | 0    | 1    | 0    | 0      | 0      | 1      | 0       | 1       | 0       |
 +------+------+------+--------+--------+--------+---------+---------+---------+
 | 1    | 0    | 0    | 0      | 0      | 1      | 1       | 0       | 0       |
 +------+------+------+--------+--------+--------+---------+---------+---------+
 | 1    | 0    | 0    | 0      | 1      | 0      | 1       | 0       | 0       |
 +------+------+------+--------+--------+--------+---------+---------+---------+

pub fn drop_duplicates(
    &self,
    maintain_order: bool,
    subset: Option<&[String]>
) -> Result<DataFrame, PolarsError>
[src]

Drop duplicate rows from a DataFrame. This fails when there is a column of type List in DataFrame

Example


 use polars_core::prelude::*;

 fn example() -> Result<DataFrame> {
     let df = df! {
                   "flt" => [1., 1., 2., 2., 3., 3.],
                   "int" => [1, 1, 2, 2, 3, 3, ],
                   "str" => ["a", "a", "b", "b", "c", "c"]
               }?;
     df.drop_duplicates(true, None)
 }

Returns

+-----+-----+-----+
| flt | int | str |
| --- | --- | --- |
| f64 | i32 | str |
+=====+=====+=====+
| 1   | 1   | "a" |
+-----+-----+-----+
| 2   | 2   | "b" |
+-----+-----+-----+
| 3   | 3   | "c" |
+-----+-----+-----+

pub fn is_unique(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>[src]

Get a mask of all the unique rows in the DataFrame.

pub fn is_duplicated(&self) -> Result<ChunkedArray<BooleanType>, PolarsError>[src]

Get a mask of all the duplicated rows in the DataFrame.

pub fn null_count(&self) -> DataFrame[src]

Create a new DataFrame that shows the null counts per column.

impl DataFrame[src]

pub fn frame_equal(&self, other: &DataFrame) -> bool[src]

Check if DataFrames are equal. Note that None == None evaluates to false

pub fn frame_equal_missing(&self, other: &DataFrame) -> bool[src]

Check if all values in DataFrames are equal where None == None evaluates to true.

pub fn ptr_equal(&self, other: &DataFrame) -> bool[src]

Checks if the Arc ptrs of the Series are equal

Trait Implementations

impl<'_, '_> Add<&'_ Series> for &'_ DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the + operator.

pub fn add(self, rhs: &Series) -> <&'_ DataFrame as Add<&'_ Series>>::Output[src]

Performs the + operation. Read more

impl<'_> Add<&'_ Series> for DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the + operator.

pub fn add(self, rhs: &Series) -> <DataFrame as Add<&'_ Series>>::Output[src]

Performs the + operation. Read more

impl Clone for DataFrame[src]

pub fn clone(&self) -> DataFrame[src]

Returns a copy of the value. Read more

fn clone_from(&mut self, source: &Self)1.0.0[src]

Performs copy-assignment from source. Read more

impl Debug for DataFrame[src]

pub fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>[src]

Formats the value using the given formatter. Read more

impl Default for DataFrame[src]

pub fn default() -> DataFrame[src]

Returns the “default value” for a type. Read more

impl Display for DataFrame[src]

pub fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>[src]

Formats the value using the given formatter. Read more

impl<'_, '_> Div<&'_ Series> for &'_ DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the / operator.

pub fn div(self, rhs: &Series) -> <&'_ DataFrame as Div<&'_ Series>>::Output[src]

Performs the / operation. Read more

impl<'_> Div<&'_ Series> for DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the / operator.

pub fn div(self, rhs: &Series) -> <DataFrame as Div<&'_ Series>>::Output[src]

Performs the / operation. Read more

impl FromIterator<Series> for DataFrame[src]

pub fn from_iter<T>(iter: T) -> DataFrame where
    T: IntoIterator<Item = Series>, 
[src]

Panics

Panics if Series have different lengths.

impl<'_> Index<&'_ str> for DataFrame[src]

type Output = Series

The returned type after indexing.

pub fn index(&self, index: &str) -> &<DataFrame as Index<&'_ str>>::Output[src]

Performs the indexing (container[index]) operation. Read more

impl Index<Range<usize>> for DataFrame[src]

type Output = [Series]

The returned type after indexing.

pub fn index(
    &self,
    index: Range<usize>
) -> &<DataFrame as Index<Range<usize>>>::Output
[src]

Performs the indexing (container[index]) operation. Read more

impl Index<RangeFrom<usize>> for DataFrame[src]

type Output = [Series]

The returned type after indexing.

pub fn index(
    &self,
    index: RangeFrom<usize>
) -> &<DataFrame as Index<RangeFrom<usize>>>::Output
[src]

Performs the indexing (container[index]) operation. Read more

impl Index<RangeFull> for DataFrame[src]

type Output = [Series]

The returned type after indexing.

pub fn index(
    &self,
    index: RangeFull
) -> &<DataFrame as Index<RangeFull>>::Output
[src]

Performs the indexing (container[index]) operation. Read more

impl Index<RangeInclusive<usize>> for DataFrame[src]

type Output = [Series]

The returned type after indexing.

pub fn index(
    &self,
    index: RangeInclusive<usize>
) -> &<DataFrame as Index<RangeInclusive<usize>>>::Output
[src]

Performs the indexing (container[index]) operation. Read more

impl Index<RangeTo<usize>> for DataFrame[src]

type Output = [Series]

The returned type after indexing.

pub fn index(
    &self,
    index: RangeTo<usize>
) -> &<DataFrame as Index<RangeTo<usize>>>::Output
[src]

Performs the indexing (container[index]) operation. Read more

impl Index<RangeToInclusive<usize>> for DataFrame[src]

type Output = [Series]

The returned type after indexing.

pub fn index(
    &self,
    index: RangeToInclusive<usize>
) -> &<DataFrame as Index<RangeToInclusive<usize>>>::Output
[src]

Performs the indexing (container[index]) operation. Read more

impl Index<usize> for DataFrame[src]

type Output = Series

The returned type after indexing.

pub fn index(&self, index: usize) -> &<DataFrame as Index<usize>>::Output[src]

Performs the indexing (container[index]) operation. Read more

impl IntoLazy for DataFrame[src]

pub fn lazy(self) -> LazyFrame[src]

Convert the DataFrame into a lazy DataFrame

impl<'_> Mul<&'_ Series> for DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the * operator.

pub fn mul(self, rhs: &Series) -> <DataFrame as Mul<&'_ Series>>::Output[src]

Performs the * operation. Read more

impl<'_, '_> Mul<&'_ Series> for &'_ DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the * operator.

pub fn mul(self, rhs: &Series) -> <&'_ DataFrame as Mul<&'_ Series>>::Output[src]

Performs the * operation. Read more

impl<'_, '_> Rem<&'_ Series> for &'_ DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the % operator.

pub fn rem(self, rhs: &Series) -> <&'_ DataFrame as Rem<&'_ Series>>::Output[src]

Performs the % operation. Read more

impl<'_> Rem<&'_ Series> for DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the % operator.

pub fn rem(self, rhs: &Series) -> <DataFrame as Rem<&'_ Series>>::Output[src]

Performs the % operation. Read more

impl<'_> Sub<&'_ Series> for DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the - operator.

pub fn sub(self, rhs: &Series) -> <DataFrame as Sub<&'_ Series>>::Output[src]

Performs the - operation. Read more

impl<'_, '_> Sub<&'_ Series> for &'_ DataFrame[src]

type Output = Result<DataFrame, PolarsError>

The resulting type after applying the - operator.

pub fn sub(self, rhs: &Series) -> <&'_ DataFrame as Sub<&'_ Series>>::Output[src]

Performs the - operation. Read more

impl TryFrom<RecordBatch> for DataFrame[src]

Conversion from Vec into DataFrame

type Error = PolarsError

The type returned in the event of a conversion error.

pub fn try_from(batch: RecordBatch) -> Result<DataFrame, PolarsError>[src]

Performs the conversion.

impl TryFrom<Vec<RecordBatch, Global>> for DataFrame[src]

Conversion from Vec into DataFrame

If batch-size is small it might be advisable to call rechunk to ensure predictable performance

type Error = PolarsError

The type returned in the event of a conversion error.

pub fn try_from(
    batches: Vec<RecordBatch, Global>
) -> Result<DataFrame, PolarsError>
[src]

Performs the conversion.

Auto Trait Implementations

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

pub fn type_id(&self) -> TypeId[src]

Gets the TypeId of self. Read more

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

pub fn borrow(&self) -> &T[src]

Immutably borrows from an owned value. Read more

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

pub fn borrow_mut(&mut self) -> &mut T[src]

Mutably borrows from an owned value. Read more

impl<T, U> Cast<U> for T where
    U: FromCast<T>, 

pub fn cast(self) -> U

Numeric cast from self to T.

impl<T> From<T> for T[src]

pub fn from(t: T) -> T[src]

Performs the conversion.

impl<T> FromCast<T> for T

pub fn from_cast(t: T) -> T

Numeric cast from T to Self.

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

pub fn into(self) -> U[src]

Performs the conversion.

impl<T> Pointable for T

pub const ALIGN: usize

The alignment of pointer.

type Init = T

The type for initializers.

pub unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

pub unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

pub unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

impl<T> ToCell for T where
    T: ToString

pub fn to_cell(self) -> Cell

impl<T> ToOwned for T where
    T: Clone
[src]

type Owned = T

The resulting type after obtaining ownership.

pub fn to_owned(&self) -> T[src]

Creates owned data from borrowed data, usually by cloning. Read more

pub fn clone_into(&self, target: &mut T)[src]

🔬 This is a nightly-only experimental API. (toowned_clone_into)

recently added

Uses borrowed data to replace owned data, usually by cloning. Read more

impl<T> ToString for T where
    T: Display + ?Sized
[src]

pub default fn to_string(&self) -> String[src]

Converts the given value to a String. Read more

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>[src]

Performs the conversion.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>[src]

Performs the conversion.

impl<V, T> VZip<V> for T where
    V: MultiLane<T>, 

pub fn vzip(self) -> V

impl<T, Rhs, Output> NumOps<Rhs, Output> for T where
    T: Sub<Rhs, Output = Output> + Mul<Rhs, Output = Output> + Div<Rhs, Output = Output> + Add<Rhs, Output = Output> + Rem<Rhs, Output = Output>, 
[src]