use super::hash_join::prepare_hashed_relation;
use crate::chunked_array::builder::PrimitiveChunkedBuilder;
use crate::frame::select::Selection;
use crate::prelude::*;
use arrow::array::{PrimitiveBuilder, StringBuilder};
use enum_dispatch::enum_dispatch;
use fnv::FnvBuildHasher;
use itertools::Itertools;
use num::{Num, NumCast, ToPrimitive, Zero};
use rayon::prelude::*;
use std::collections::HashMap;
use std::hash::Hash;
use std::{
fmt::{Debug, Formatter},
ops::Add,
};
fn groupby<T>(a: impl Iterator<Item = T>) -> Vec<(usize, Vec<usize>)>
where
T: Hash + Eq + Copy,
{
let hash_tbl = prepare_hashed_relation(a);
hash_tbl
.into_iter()
.map(|(_, indexes)| {
let first = unsafe { *indexes.get_unchecked(0) };
(first, indexes)
})
.collect()
}
#[enum_dispatch(Series)]
trait IntoGroupTuples {
fn group_tuples(&self) -> Vec<(usize, Vec<usize>)> {
unimplemented!()
}
}
impl<T> IntoGroupTuples for ChunkedArray<T>
where
T: PolarsIntegerType,
T::Native: Eq + Hash,
{
fn group_tuples(&self) -> Vec<(usize, Vec<usize>)> {
if let Ok(slice) = self.cont_slice() {
groupby(slice.iter())
} else {
groupby(self.into_iter())
}
}
}
impl IntoGroupTuples for BooleanChunked {
fn group_tuples(&self) -> Vec<(usize, Vec<usize>)> {
if self.is_optimal_aligned() {
groupby(self.into_no_null_iter())
} else {
groupby(self.into_iter())
}
}
}
impl IntoGroupTuples for Utf8Chunked {
fn group_tuples(&self) -> Vec<(usize, Vec<usize>)> {
if self.is_optimal_aligned() {
groupby(self.into_no_null_iter())
} else {
groupby(self.into_iter())
}
}
}
impl IntoGroupTuples for Float64Chunked {}
impl IntoGroupTuples for Float32Chunked {}
impl IntoGroupTuples for LargeListChunked {}
#[derive(Copy, Clone, Hash, Eq, PartialEq)]
enum Groupable<'a> {
Boolean(bool),
Utf8(&'a str),
UInt8(u8),
UInt16(u16),
UInt32(u32),
UInt64(u64),
Int8(i8),
Int16(i16),
Int32(i32),
Int64(i64),
}
impl<'a> Debug for Groupable<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use Groupable::*;
match self {
Boolean(v) => write!(f, "{}", v),
Utf8(v) => write!(f, "{}", v),
UInt8(v) => write!(f, "{}", v),
UInt16(v) => write!(f, "{}", v),
UInt32(v) => write!(f, "{}", v),
UInt64(v) => write!(f, "{}", v),
Int8(v) => write!(f, "{}", v),
Int16(v) => write!(f, "{}", v),
Int32(v) => write!(f, "{}", v),
Int64(v) => write!(f, "{}", v),
}
}
}
impl Series {
fn as_groupable_iter<'a>(&'a self) -> Box<dyn Iterator<Item = Option<Groupable>> + 'a> {
macro_rules! as_groupable_iter {
($ca:expr, $variant:ident ) => {{
Box::new(
$ca.into_iter()
.map(|opt_b| opt_b.map(|b| Groupable::$variant(b))),
)
}};
}
match self {
Series::Bool(ca) => as_groupable_iter!(ca, Boolean),
Series::UInt8(ca) => as_groupable_iter!(ca, UInt8),
Series::UInt16(ca) => as_groupable_iter!(ca, UInt16),
Series::UInt32(ca) => as_groupable_iter!(ca, UInt32),
Series::UInt64(ca) => as_groupable_iter!(ca, UInt64),
Series::Int8(ca) => as_groupable_iter!(ca, Int8),
Series::Int16(ca) => as_groupable_iter!(ca, Int16),
Series::Int32(ca) => as_groupable_iter!(ca, Int32),
Series::Int64(ca) => as_groupable_iter!(ca, Int64),
Series::Date32(ca) => as_groupable_iter!(ca, Int32),
Series::Date64(ca) => as_groupable_iter!(ca, Int64),
Series::TimestampSecond(ca) => as_groupable_iter!(ca, Int64),
Series::TimestampMillisecond(ca) => as_groupable_iter!(ca, Int64),
Series::TimestampNanosecond(ca) => as_groupable_iter!(ca, Int64),
Series::TimestampMicrosecond(ca) => as_groupable_iter!(ca, Int64),
Series::Time32Second(ca) => as_groupable_iter!(ca, Int32),
Series::Time32Millisecond(ca) => as_groupable_iter!(ca, Int32),
Series::Time64Nanosecond(ca) => as_groupable_iter!(ca, Int64),
Series::Time64Microsecond(ca) => as_groupable_iter!(ca, Int64),
Series::DurationNanosecond(ca) => as_groupable_iter!(ca, Int64),
Series::DurationMicrosecond(ca) => as_groupable_iter!(ca, Int64),
Series::DurationMillisecond(ca) => as_groupable_iter!(ca, Int64),
Series::DurationSecond(ca) => as_groupable_iter!(ca, Int64),
Series::IntervalDayTime(ca) => as_groupable_iter!(ca, Int64),
Series::IntervalYearMonth(ca) => as_groupable_iter!(ca, Int32),
Series::Utf8(ca) => as_groupable_iter!(ca, Utf8),
_ => unimplemented!(),
}
}
}
impl DataFrame {
pub fn groupby<'g, J, S: Selection<'g, J>>(&self, by: S) -> Result<GroupBy> {
let selected_keys = self.select_series(by)?;
let groups = match selected_keys.len() {
1 => selected_keys[0].group_tuples(),
2 => {
let iter = selected_keys[0]
.as_groupable_iter()
.zip(selected_keys[1].as_groupable_iter());
groupby(iter)
}
3 => {
let iter = selected_keys[0]
.as_groupable_iter()
.zip(selected_keys[1].as_groupable_iter())
.zip(selected_keys[2].as_groupable_iter());
groupby(iter)
}
4 => {
let iter = selected_keys[0]
.as_groupable_iter()
.zip(selected_keys[1].as_groupable_iter())
.zip(selected_keys[2].as_groupable_iter())
.zip(selected_keys[3].as_groupable_iter());
groupby(iter)
}
5 => {
let iter = selected_keys[0]
.as_groupable_iter()
.zip(selected_keys[1].as_groupable_iter())
.zip(selected_keys[2].as_groupable_iter())
.zip(selected_keys[3].as_groupable_iter())
.zip(selected_keys[4].as_groupable_iter());
groupby(iter)
}
_ => {
return Err(PolarsError::Other(
"more than 5 combined keys are currently not supported".to_string(),
));
}
};
Ok(GroupBy {
df: self,
selected_keys,
groups,
selected_agg: None,
})
}
}
#[derive(Debug, Clone)]
pub struct GroupBy<'df, 'selection_str> {
df: &'df DataFrame,
selected_keys: Vec<Series>,
groups: Vec<(usize, Vec<usize>)>,
selected_agg: Option<Vec<&'selection_str str>>,
}
#[enum_dispatch(Series)]
trait NumericAggSync {
fn agg_mean(&self, _groups: &Vec<(usize, Vec<usize>)>) -> Series {
unimplemented!()
}
fn agg_min(&self, _groups: &Vec<(usize, Vec<usize>)>) -> Series {
unimplemented!()
}
fn agg_max(&self, _groups: &Vec<(usize, Vec<usize>)>) -> Series {
unimplemented!()
}
fn agg_sum(&self, _groups: &Vec<(usize, Vec<usize>)>) -> Series {
unimplemented!()
}
}
impl NumericAggSync for BooleanChunked {}
impl NumericAggSync for Utf8Chunked {}
impl NumericAggSync for LargeListChunked {}
impl<T> NumericAggSync for ChunkedArray<T>
where
T: PolarsNumericType + Sync,
T::Native: std::ops::Add<Output = T::Native> + Num + NumCast,
{
fn agg_mean(&self, groups: &Vec<(usize, Vec<usize>)>) -> Series {
Series::Float64(
groups
.par_iter()
.map(|(_first, idx)| {
if let Ok(slice) = self.cont_slice() {
let mut sum = 0.;
for i in idx {
sum = sum + slice[*i].to_f64().unwrap()
}
Some(sum / idx.len() as f64)
} else {
let take = unsafe {
self.take_unchecked(idx.into_iter().copied(), Some(self.len()))
};
let opt_sum: Option<T::Native> = take.sum();
opt_sum.map(|sum| sum.to_f64().unwrap() / idx.len() as f64)
}
})
.collect(),
)
}
fn agg_min(&self, groups: &Vec<(usize, Vec<usize>)>) -> Series {
groups
.par_iter()
.map(|(_first, idx)| {
if let Ok(slice) = self.cont_slice() {
let mut min = None;
for i in idx {
let v = slice[*i];
min = match min {
Some(min) => {
if min < v {
Some(min)
} else {
Some(v)
}
}
None => Some(v),
};
}
min
} else {
let take =
unsafe { self.take_unchecked(idx.into_iter().copied(), Some(self.len())) };
take.min()
}
})
.collect::<ChunkedArray<T>>()
.into_series()
}
fn agg_max(&self, groups: &Vec<(usize, Vec<usize>)>) -> Series {
groups
.par_iter()
.map(|(_first, idx)| {
if let Ok(slice) = self.cont_slice() {
let mut max = None;
for i in idx {
let v = slice[*i];
max = match max {
Some(max) => {
if max > v {
Some(max)
} else {
Some(v)
}
}
None => Some(v),
};
}
max
} else {
let take =
unsafe { self.take_unchecked(idx.into_iter().copied(), Some(self.len())) };
take.max()
}
})
.collect::<ChunkedArray<T>>()
.into_series()
}
fn agg_sum(&self, groups: &Vec<(usize, Vec<usize>)>) -> Series {
groups
.par_iter()
.map(|(_first, idx)| {
if let Ok(slice) = self.cont_slice() {
let mut sum = Zero::zero();
for i in idx {
sum = sum + slice[*i]
}
Some(sum)
} else {
let take =
unsafe { self.take_unchecked(idx.into_iter().copied(), Some(self.len())) };
take.sum()
}
})
.collect::<ChunkedArray<T>>()
.into_series()
}
}
#[enum_dispatch(Series)]
trait AggFirst {
fn agg_first(&self, _groups: &Vec<(usize, Vec<usize>)>) -> Series {
unimplemented!()
}
}
macro_rules! impl_agg_first {
($self:ident, $groups:ident, $ca_type:ty) => {{
$groups
.iter()
.map(|(first, _idx)| $self.get(*first))
.collect::<$ca_type>()
.into_series()
}};
}
impl<T> AggFirst for ChunkedArray<T>
where
T: PolarsNumericType + std::marker::Sync,
{
fn agg_first(&self, groups: &Vec<(usize, Vec<usize>)>) -> Series {
impl_agg_first!(self, groups, ChunkedArray<T>)
}
}
impl AggFirst for BooleanChunked {
fn agg_first(&self, groups: &Vec<(usize, Vec<usize>)>) -> Series {
impl_agg_first!(self, groups, BooleanChunked)
}
}
impl AggFirst for Utf8Chunked {
fn agg_first(&self, groups: &Vec<(usize, Vec<usize>)>) -> Series {
groups
.iter()
.map(|(first, _idx)| self.get(*first))
.collect::<Utf8Chunked>()
.into_series()
}
}
impl AggFirst for LargeListChunked {}
impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
pub fn select<S, J>(mut self, selection: S) -> Self
where
S: Selection<'selection_str, J>,
{
self.selected_agg = Some(selection.to_selection_vec());
self
}
fn keys(&self) -> Vec<Series> {
let size;
if let Some(sel) = &self.selected_agg {
size = sel.len() + self.selected_keys.len();
} else {
size = self.selected_keys.len();
}
let mut keys = Vec::with_capacity(size);
unsafe {
self.selected_keys.iter().for_each(|s| {
let key = s.take_iter_unchecked(
self.groups.iter().map(|(idx, _)| *idx),
Some(self.groups.len()),
);
keys.push(key)
});
}
keys
}
fn prepare_agg(&self) -> Result<(Vec<Series>, Vec<Series>)> {
let selection = match &self.selected_agg {
Some(selection) => selection,
None => return Err(PolarsError::NoSelection),
};
let keys = self.keys();
let agg_col = self.df.select_series(selection)?;
Ok((keys, agg_col))
}
pub fn mean(&self) -> Result<DataFrame> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_mean", agg_col.name()];
let mut agg = agg_col.agg_mean(&self.groups);
agg.rename(&new_name);
cols.push(agg);
}
DataFrame::new(cols)
}
pub fn sum(&self) -> Result<DataFrame> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_sum", agg_col.name()];
let mut agg = agg_col.agg_sum(&self.groups);
agg.rename(&new_name);
cols.push(agg);
}
DataFrame::new(cols)
}
pub fn min(&self) -> Result<DataFrame> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_min", agg_col.name()];
let mut agg = agg_col.agg_min(&self.groups);
agg.rename(&new_name);
cols.push(agg);
}
DataFrame::new(cols)
}
pub fn max(&self) -> Result<DataFrame> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_max", agg_col.name()];
let mut agg = agg_col.agg_max(&self.groups);
agg.rename(&new_name);
cols.push(agg);
}
DataFrame::new(cols)
}
pub fn first(&self) -> Result<DataFrame> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_first", agg_col.name()];
let mut agg = agg_col.agg_first(&self.groups);
agg.rename(&new_name);
cols.push(agg);
}
DataFrame::new(cols)
}
pub fn count(&self) -> Result<DataFrame> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_count", agg_col.name()];
let mut builder = PrimitiveChunkedBuilder::new(&new_name, self.groups.len());
for (_first, idx) in &self.groups {
builder.append_value(idx.len() as u32);
}
let ca = builder.finish();
let agg = Series::UInt32(ca);
cols.push(agg);
}
DataFrame::new(cols)
}
pub fn agg_list(&self) -> Result<DataFrame> {
macro_rules! impl_gb {
($type:ty, $agg_col:expr) => {{
let values_builder = PrimitiveBuilder::<$type>::new(self.groups.len());
let mut builder =
LargeListPrimitiveChunkedBuilder::new("", values_builder, self.groups.len());
for (_first, idx) in &self.groups {
let s = unsafe {
$agg_col.take_iter_unchecked(idx.into_iter().copied(), Some(idx.len()))
};
builder.append_opt_series(&Some(s))
}
builder.finish().into_series()
}};
}
macro_rules! impl_gb_utf8 {
($agg_col:expr) => {{
let values_builder = StringBuilder::new(self.groups.len());
let mut builder =
LargeListUtf8ChunkedBuilder::new("", values_builder, self.groups.len());
for (_first, idx) in &self.groups {
let s = unsafe {
$agg_col.take_iter_unchecked(idx.into_iter().copied(), Some(idx.len()))
};
builder.append_series(&s)
}
builder.finish().into_series()
}};
}
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_agg_list", agg_col.name()];
let mut agg =
match_arrow_data_type_apply_macro!(agg_col.dtype(), impl_gb, impl_gb_utf8, agg_col);
agg.rename(&new_name);
cols.push(agg);
}
DataFrame::new(cols)
}
pub fn pivot(
&mut self,
pivot_column: &'selection_str str,
values_column: &'selection_str str,
) -> Pivot {
self.selected_agg = Some(vec![pivot_column, values_column]);
let pivot = Pivot {
gb: self,
pivot_column,
values_column,
};
pivot
}
}
pub struct Pivot<'df, 'selection_str> {
gb: &'df GroupBy<'df, 'selection_str>,
pivot_column: &'selection_str str,
values_column: &'selection_str str,
}
#[enum_dispatch(Series)]
trait ChunkPivot {
fn pivot(
&self,
_pivot_series: &Series,
_keys: Vec<Series>,
_groups: &Vec<(usize, Vec<usize>)>,
_agg_type: PivotAgg,
) -> DataFrame {
unimplemented!()
}
}
impl<T> ChunkPivot for ChunkedArray<T>
where
T: PolarsNumericType,
T::Native: Copy + Num + NumCast,
{
fn pivot(
&self,
pivot_series: &Series,
keys: Vec<Series>,
groups: &Vec<(usize, Vec<usize>)>,
agg_type: PivotAgg,
) -> DataFrame {
let pivot_vec: Vec<_> = pivot_series.as_groupable_iter().collect();
let values_taker = self.take_rand();
let new_column_map = |size| {
let mut columns_agg_map =
HashMap::with_capacity_and_hasher(size, FnvBuildHasher::default());
for opt_column_name in &pivot_vec {
if let Some(column_name) = opt_column_name {
columns_agg_map
.entry(column_name)
.or_insert_with(|| Vec::new());
}
}
columns_agg_map
};
let mut columns_agg_map_main =
HashMap::with_capacity_and_hasher(pivot_vec.len(), FnvBuildHasher::default());
for opt_column_name in &pivot_vec {
if let Some(column_name) = opt_column_name {
columns_agg_map_main.entry(column_name).or_insert_with(|| {
PrimitiveChunkedBuilder::<T>::new(&format!("{:?}", column_name), groups.len())
});
}
}
for (_first, idx) in groups {
let mut columns_agg_map_group = new_column_map(idx.len());
for &i in idx {
let opt_pivot_val = unsafe { pivot_vec.get_unchecked(i) };
if let Some(pivot_val) = opt_pivot_val {
let values_val = values_taker.get(i);
columns_agg_map_group
.get_mut(&pivot_val)
.map(|v| v.push(values_val));
}
}
for (k, v) in &mut columns_agg_map_group {
let main_builder = columns_agg_map_main.get_mut(k).unwrap();
match v.len() {
0 => main_builder.append_null(),
_ => match agg_type {
PivotAgg::First => pivot_agg_first(main_builder, v),
PivotAgg::Sum => pivot_agg_sum(main_builder, v),
PivotAgg::Min => pivot_agg_min(main_builder, v),
PivotAgg::Max => pivot_agg_max(main_builder, v),
PivotAgg::Mean => pivot_agg_mean(main_builder, v),
PivotAgg::Median => pivot_agg_median(main_builder, v),
},
}
}
}
let mut cols = keys;
cols.reserve_exact(columns_agg_map_main.len());
for (_, builder) in columns_agg_map_main {
let ca = builder.finish();
cols.push(ca.into_series());
}
let df = DataFrame::new(cols).unwrap();
df
}
}
impl ChunkPivot for BooleanChunked {}
impl ChunkPivot for Utf8Chunked {}
impl ChunkPivot for LargeListChunked {}
enum PivotAgg {
First,
Sum,
Min,
Max,
Mean,
Median,
}
fn pivot_agg_first<T>(builder: &mut PrimitiveChunkedBuilder<T>, v: &Vec<Option<T::Native>>)
where
T: PolarsNumericType,
{
builder.append_option(v[0]);
}
fn pivot_agg_median<T>(builder: &mut PrimitiveChunkedBuilder<T>, v: &mut Vec<Option<T::Native>>)
where
T: PolarsNumericType,
T::Native: PartialOrd,
{
v.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
builder.append_option(v[v.len() / 2]);
}
fn pivot_agg_sum<T>(builder: &mut PrimitiveChunkedBuilder<T>, v: &Vec<Option<T::Native>>)
where
T: PolarsNumericType,
T::Native: Num + Zero,
{
builder.append_option(v.iter().copied().fold_options(Zero::zero(), Add::add));
}
fn pivot_agg_mean<T>(builder: &mut PrimitiveChunkedBuilder<T>, v: &Vec<Option<T::Native>>)
where
T: PolarsNumericType,
T::Native: Num + Zero + NumCast,
{
builder.append_option(
v.iter()
.copied()
.fold_options::<T::Native, T::Native, _>(Zero::zero(), Add::add)
.map(|sum_val| sum_val / NumCast::from(v.len()).unwrap()),
);
}
fn pivot_agg_min<T>(builder: &mut PrimitiveChunkedBuilder<T>, v: &Vec<Option<T::Native>>)
where
T: PolarsNumericType,
{
let mut min = None;
for opt_val in v {
if let Some(val) = opt_val {
match min {
None => min = Some(*val),
Some(minimum) => {
if val < &minimum {
min = Some(*val)
}
}
}
}
}
builder.append_option(min);
}
fn pivot_agg_max<T>(builder: &mut PrimitiveChunkedBuilder<T>, v: &Vec<Option<T::Native>>)
where
T: PolarsNumericType,
{
let mut max = None;
for opt_val in v {
if let Some(val) = opt_val {
match max {
None => max = Some(*val),
Some(maximum) => {
if val > &maximum {
max = Some(*val)
}
}
}
}
}
builder.append_option(max);
}
impl<'df, 'sel_str> Pivot<'df, 'sel_str> {
pub fn first(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
Ok(values_series.pivot(
pivot_series,
self.gb.keys(),
&self.gb.groups,
PivotAgg::First,
))
}
pub fn sum(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
Ok(values_series.pivot(pivot_series, self.gb.keys(), &self.gb.groups, PivotAgg::Sum))
}
pub fn min(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
Ok(values_series.pivot(pivot_series, self.gb.keys(), &self.gb.groups, PivotAgg::Min))
}
pub fn max(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
Ok(values_series.pivot(pivot_series, self.gb.keys(), &self.gb.groups, PivotAgg::Max))
}
pub fn mean(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
Ok(values_series.pivot(
pivot_series,
self.gb.keys(),
&self.gb.groups,
PivotAgg::Mean,
))
}
pub fn median(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
Ok(values_series.pivot(
pivot_series,
self.gb.keys(),
&self.gb.groups,
PivotAgg::Median,
))
}
}
#[cfg(test)]
mod test {
use crate::prelude::*;
#[test]
fn test_group_by() {
let s0 = Date32Chunked::parse_from_str_slice(
"date",
&[
"2020-08-21",
"2020-08-21",
"2020-08-22",
"2020-08-23",
"2020-08-22",
],
"%Y-%m-%d",
)
.into_series();
let s1 = Series::new("temp", [20, 10, 7, 9, 1].as_ref());
let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01].as_ref());
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
println!("{:?}", df);
println!(
"{:?}",
df.groupby("date").unwrap().select("temp").count().unwrap()
);
println!(
"{:?}",
df.groupby("date")
.unwrap()
.select(&["temp", "rain"])
.mean()
.unwrap()
);
println!(
"multiple keys {:?}",
df.groupby(&["date", "temp"])
.unwrap()
.select("rain")
.mean()
.unwrap()
);
println!(
"{:?}",
df.groupby("date").unwrap().select("temp").sum().unwrap()
);
println!(
"{:?}",
df.groupby("date").unwrap().select("temp").min().unwrap()
);
println!(
"{:?}",
df.groupby("date").unwrap().select("temp").max().unwrap()
);
println!(
"{:?}",
df.groupby("date")
.unwrap()
.select("temp")
.agg_list()
.unwrap()
);
println!(
"{:?}",
df.groupby("date").unwrap().select("temp").first().unwrap()
);
}
#[test]
fn test_pivot() {
let s0 = Series::new("foo", ["A", "A", "B", "B", "C"].as_ref());
let s1 = Series::new("N", [1, 2, 2, 4, 2].as_ref());
let s2 = Series::new("bar", ["k", "l", "m", "m", "l"].as_ref());
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
println!("{:?}", df);
let pvt = df.groupby("foo").unwrap().pivot("bar", "N").sum().unwrap();
assert_eq!(
Vec::from(pvt.column("m").unwrap().i32().unwrap()),
&[None, Some(6), None]
);
let pvt = df.groupby("foo").unwrap().pivot("bar", "N").min().unwrap();
assert_eq!(
Vec::from(pvt.column("m").unwrap().i32().unwrap()),
&[None, Some(2), None]
);
let pvt = df.groupby("foo").unwrap().pivot("bar", "N").max().unwrap();
assert_eq!(
Vec::from(pvt.column("m").unwrap().i32().unwrap()),
&[None, Some(4), None]
);
let pvt = df.groupby("foo").unwrap().pivot("bar", "N").mean().unwrap();
assert_eq!(
Vec::from(pvt.column("m").unwrap().i32().unwrap()),
&[None, Some(3), None]
);
}
}