Enum polars_core::frame::groupby::GroupsProxy
source · pub enum GroupsProxy {
Idx(GroupsIdx),
Slice {
groups: GroupsSlice,
rolling: bool,
},
}Variants§
Implementations§
source§impl GroupsProxy
impl GroupsProxy
sourcepub fn into_idx(self) -> GroupsIdx
pub fn into_idx(self) -> GroupsIdx
Examples found in repository?
src/chunked_array/ops/unique/mod.rs (line 145)
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
fn mode<T: PolarsDataType>(ca: &ChunkedArray<T>) -> ChunkedArray<T>
where
ChunkedArray<T>: IntoGroupsProxy + ChunkTake,
{
if ca.is_empty() {
return ca.clone();
}
let mut groups = ca
.group_tuples(true, false)
.unwrap()
.into_idx()
.into_iter()
.collect_trusted::<Vec<_>>();
groups.sort_unstable_by_key(|k| k.1.len());
let last = &groups.last().unwrap();
let max_occur = last.1.len();
// collect until we don't take with trusted len anymore
// TODO! take directly from iter, but first remove standard trusted-length collect.
let idx = groups
.iter()
.rev()
.take_while(|v| v.1.len() == max_occur)
.map(|v| v.0)
.collect::<Vec<_>>();
// Safety:
// group indices are in bounds
unsafe { ca.take_unchecked(idx.into_iter().map(|i| i as usize).into()) }
}sourcepub fn iter(&self) -> GroupsProxyIter<'_> ⓘ
pub fn iter(&self) -> GroupsProxyIter<'_> ⓘ
Examples found in repository?
src/frame/groupby/mod.rs (line 899)
892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
pub fn apply<F>(&self, mut f: F) -> PolarsResult<DataFrame>
where
F: FnMut(DataFrame) -> PolarsResult<DataFrame> + Send + Sync,
{
let df = self.prepare_apply()?;
let dfs = self
.get_groups()
.iter()
.map(|g| {
// safety
// groups are in bounds
let sub_df = unsafe { take_df(&df, g) };
f(sub_df)
})
.collect::<PolarsResult<Vec<_>>>()?;
let mut df = accumulate_dataframes_vertical(dfs)?;
df.as_single_chunk();
Ok(df)
}More examples
src/frame/mod.rs (line 3090)
3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112
fn unique_impl(
&self,
maintain_order: bool,
subset: Option<&[String]>,
keep: UniqueKeepStrategy,
) -> PolarsResult<Self> {
use UniqueKeepStrategy::*;
let names = match &subset {
Some(s) => s.iter().map(|s| &**s).collect(),
None => self.get_column_names(),
};
let columns = match (keep, maintain_order) {
(First, true) => {
let gb = self.groupby_stable(names)?;
let groups = gb.get_groups();
self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
}
(Last, true) => {
// maintain order by last values, so the sorted groups are not correct as they
// are sorted by the first value
let gb = self.groupby(names)?;
let groups = gb.get_groups();
let last_idx: NoNull<IdxCa> = groups
.iter()
.map(|g| match g {
GroupsIndicator::Idx((_first, idx)) => idx[idx.len() - 1],
GroupsIndicator::Slice([first, len]) => first + len,
})
.collect();
let last_idx = last_idx.sort(false);
return Ok(unsafe { self.take_unchecked(&last_idx) });
}
(First, false) => {
let gb = self.groupby(names)?;
let groups = gb.get_groups();
self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
}
(Last, false) => {
let gb = self.groupby(names)?;
let groups = gb.get_groups();
self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
}
};
Ok(DataFrame::new_no_checks(columns))
}src/frame/groupby/aggregations/agg_list.rs (line 344)
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
let mut can_fast_explode = true;
let mut offsets = Vec::<i64>::with_capacity(groups.len() + 1);
let mut length_so_far = 0i64;
offsets.push(length_so_far);
// we know that iterators length
let iter = {
groups
.iter()
.flat_map(|indicator| {
let (group_vals, len) = match indicator {
GroupsIndicator::Idx((_first, idx)) => {
// Safety:
// group tuples always in bounds
let group_vals = self.take_unchecked(idx.into());
(group_vals, idx.len() as IdxSize)
}
GroupsIndicator::Slice([first, len]) => {
let group_vals = _slice_from_offsets(self, first, len);
(group_vals, len)
}
};
if len == 0 {
can_fast_explode = false;
}
length_so_far += len as i64;
// Safety:
// we know that offsets has allocated enough slots
offsets.push_unchecked(length_so_far);
let arr = group_vals.downcast_iter().next().unwrap().clone();
arr.into_iter_cloned()
})
.trust_my_length(self.len())
};
let mut pe = create_extension(iter);
// Safety:
// this is safe because we just created the PolarsExtension
// meaning that the sentinel is heap allocated and the dereference of the
// pointer does not fail
pe.set_to_series_fn::<T>();
let extension_array = Box::new(pe.take_and_forget()) as ArrayRef;
let extension_dtype = extension_array.data_type();
let data_type = ListArray::<i64>::default_datatype(extension_dtype.clone());
// Safety:
// offsets are monotonically increasing
let arr = Box::new(ListArray::<i64>::new(
data_type,
Offsets::new_unchecked(offsets).into(),
extension_array,
None,
)) as ArrayRef;
let mut listarr = ListChunked::from_chunks(self.name(), vec![arr]);
if can_fast_explode {
listarr.set_fast_explode()
}
listarr.into_series()
}pub fn sort(&mut self)
pub fn group_lengths(&self, name: &str) -> IdxCa
pub fn take_group_firsts(self) -> Vec<IdxSize> ⓘ
sourcepub fn par_iter(&self) -> GroupsProxyParIter<'_>
pub fn par_iter(&self) -> GroupsProxyParIter<'_>
Examples found in repository?
src/frame/groupby/mod.rs (line 877)
870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
pub fn par_apply<F>(&self, f: F) -> PolarsResult<DataFrame>
where
F: Fn(DataFrame) -> PolarsResult<DataFrame> + Send + Sync,
{
let df = self.prepare_apply()?;
let dfs = self
.get_groups()
.par_iter()
.map(|g| {
// safety
// groups are in bounds
let sub_df = unsafe { take_df(&df, g) };
f(sub_df)
})
.collect::<PolarsResult<Vec<_>>>()?;
let mut df = accumulate_dataframes_vertical(dfs)?;
df.as_single_chunk();
Ok(df)
}sourcepub fn unwrap_idx(&self) -> &GroupsIdx
pub fn unwrap_idx(&self) -> &GroupsIdx
sourcepub fn unwrap_slice(&self) -> &GroupsSlice
pub fn unwrap_slice(&self) -> &GroupsSlice
pub fn get(&self, index: usize) -> GroupsIndicator<'_>
sourcepub fn len(&self) -> usize
pub fn len(&self) -> usize
Examples found in repository?
src/frame/groupby/proxy.rs (line 328)
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn group_count(&self) -> IdxCa {
match self {
GroupsProxy::Idx(groups) => {
let ca: NoNull<IdxCa> = groups
.iter()
.map(|(_first, idx)| idx.len() as IdxSize)
.collect_trusted();
ca.into_inner()
}
GroupsProxy::Slice { groups, .. } => {
let ca: NoNull<IdxCa> = groups.iter().map(|[_first, len]| *len).collect_trusted();
ca.into_inner()
}
}
}
pub fn as_list_chunked(&self) -> ListChunked {
match self {
GroupsProxy::Idx(groups) => groups
.iter()
.map(|(_first, idx)| {
let ca: NoNull<IdxCa> = idx.iter().map(|&v| v as IdxSize).collect();
ca.into_inner().into_series()
})
.collect_trusted(),
GroupsProxy::Slice { groups, .. } => groups
.iter()
.map(|&[first, len]| {
let ca: NoNull<IdxCa> = (first..first + len).collect_trusted();
ca.into_inner().into_series()
})
.collect_trusted(),
}
}
pub fn slice(&self, offset: i64, len: usize) -> SlicedGroups {
// Safety:
// we create new `Vec`s from the sliced groups. But we wrap them in ManuallyDrop
// so that we never call drop on them.
// These groups lifetimes are bounded to the `self`. This must remain valid
// for the scope of the aggregation.
let sliced = match self {
GroupsProxy::Idx(groups) => {
let first = unsafe {
let first = slice_slice(groups.first(), offset, len);
let ptr = first.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, first.len(), first.len())
};
let all = unsafe {
let all = slice_slice(groups.all(), offset, len);
let ptr = all.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, all.len(), all.len())
};
ManuallyDrop::new(GroupsProxy::Idx(GroupsIdx::new(
first,
all,
groups.is_sorted(),
)))
}
GroupsProxy::Slice { groups, rolling } => {
let groups = unsafe {
let groups = slice_slice(groups, offset, len);
let ptr = groups.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, groups.len(), groups.len())
};
ManuallyDrop::new(GroupsProxy::Slice {
groups,
rolling: *rolling,
})
}
};
SlicedGroups {
sliced,
borrowed: self,
}
}
}
impl From<GroupsIdx> for GroupsProxy {
fn from(groups: GroupsIdx) -> Self {
GroupsProxy::Idx(groups)
}
}
pub enum GroupsIndicator<'a> {
Idx(BorrowIdxItem<'a>),
Slice([IdxSize; 2]),
}
impl<'a> GroupsIndicator<'a> {
pub fn len(&self) -> usize {
match self {
GroupsIndicator::Idx(g) => g.1.len(),
GroupsIndicator::Slice([_, len]) => *len as usize,
}
}
pub fn first(&self) -> IdxSize {
match self {
GroupsIndicator::Idx(g) => g.0,
GroupsIndicator::Slice([first, _]) => *first,
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
pub struct GroupsProxyIter<'a> {
vals: &'a GroupsProxy,
len: usize,
idx: usize,
}
impl<'a> GroupsProxyIter<'a> {
fn new(vals: &'a GroupsProxy) -> Self {
let len = vals.len();
let idx = 0;
GroupsProxyIter { vals, len, idx }
}
}
impl<'a> Iterator for GroupsProxyIter<'a> {
type Item = GroupsIndicator<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.idx == self.len {
return None;
}
let out = unsafe {
match self.vals {
GroupsProxy::Idx(groups) => {
let item = groups.get_unchecked(self.idx);
Some(GroupsIndicator::Idx(item))
}
GroupsProxy::Slice { groups, .. } => {
Some(GroupsIndicator::Slice(*groups.get_unchecked(self.idx)))
}
}
};
self.idx += 1;
out
}
}
pub struct GroupsProxyParIter<'a> {
vals: &'a GroupsProxy,
len: usize,
}
impl<'a> GroupsProxyParIter<'a> {
fn new(vals: &'a GroupsProxy) -> Self {
let len = vals.len();
GroupsProxyParIter { vals, len }
}More examples
src/series/series_trait.rs (line 131)
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
unsafe fn agg_min(&self, groups: &GroupsProxy) -> Series {
Series::full_null(self._field().name(), groups.len(), self._dtype())
}
unsafe fn agg_max(&self, groups: &GroupsProxy) -> Series {
Series::full_null(self._field().name(), groups.len(), self._dtype())
}
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
/// first cast to `Int64` to prevent overflow issues.
unsafe fn agg_sum(&self, groups: &GroupsProxy) -> Series {
Series::full_null(self._field().name(), groups.len(), self._dtype())
}
unsafe fn agg_std(&self, groups: &GroupsProxy, _ddof: u8) -> Series {
Series::full_null(self._field().name(), groups.len(), self._dtype())
}
unsafe fn agg_var(&self, groups: &GroupsProxy, _ddof: u8) -> Series {
Series::full_null(self._field().name(), groups.len(), self._dtype())
}
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
Series::full_null(self._field().name(), groups.len(), self._dtype())
}src/frame/groupby/aggregations/dispatch.rs (line 127)
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
pub unsafe fn agg_median(&self, groups: &GroupsProxy) -> Series {
use DataType::*;
match self.dtype() {
Float32 => SeriesWrap(self.f32().unwrap().clone()).agg_median(groups),
Float64 => SeriesWrap(self.f64().unwrap().clone()).agg_median(groups),
dt if dt.is_numeric() || dt.is_temporal() => {
let ca = self.to_physical_repr();
let physical_type = ca.dtype();
let s = apply_method_physical_integer!(ca, agg_median, groups);
if dt.is_logical() {
// back to physical and then
// back to logical type
s.cast(physical_type).unwrap().cast(dt).unwrap()
} else {
s
}
}
_ => Series::full_null("", groups.len(), self.dtype()),
}
}
#[doc(hidden)]
pub unsafe fn agg_quantile(
&self,
groups: &GroupsProxy,
quantile: f64,
interpol: QuantileInterpolOptions,
) -> Series {
use DataType::*;
match self.dtype() {
Float32 => {
SeriesWrap(self.f32().unwrap().clone()).agg_quantile(groups, quantile, interpol)
}
Float64 => {
SeriesWrap(self.f64().unwrap().clone()).agg_quantile(groups, quantile, interpol)
}
dt if dt.is_numeric() || dt.is_temporal() => {
let ca = self.to_physical_repr();
let physical_type = ca.dtype();
let s =
apply_method_physical_integer!(ca, agg_quantile, groups, quantile, interpol);
if dt.is_logical() {
// back to physical and then
// back to logical type
s.cast(physical_type).unwrap().cast(dt).unwrap()
} else {
s
}
}
_ => Series::full_null("", groups.len(), self.dtype()),
}
}
#[doc(hidden)]
pub unsafe fn agg_mean(&self, groups: &GroupsProxy) -> Series {
use DataType::*;
match self.dtype() {
Boolean => self.cast(&Float64).unwrap().agg_mean(groups),
Float32 => SeriesWrap(self.f32().unwrap().clone()).agg_mean(groups),
Float64 => SeriesWrap(self.f64().unwrap().clone()).agg_mean(groups),
dt if dt.is_numeric() => {
apply_method_physical_integer!(self, agg_mean, groups)
}
dt @ Duration(_) => {
let s = self.to_physical_repr();
// agg_mean returns Float64
let out = s.agg_mean(groups);
// cast back to Int64 and then to logical duration type
out.cast(&Int64).unwrap().cast(dt).unwrap()
}
_ => Series::full_null("", groups.len(), self.dtype()),
}
}src/frame/groupby/aggregations/agg_list.rs (line 337)
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
let mut can_fast_explode = true;
let mut offsets = Vec::<i64>::with_capacity(groups.len() + 1);
let mut length_so_far = 0i64;
offsets.push(length_so_far);
// we know that iterators length
let iter = {
groups
.iter()
.flat_map(|indicator| {
let (group_vals, len) = match indicator {
GroupsIndicator::Idx((_first, idx)) => {
// Safety:
// group tuples always in bounds
let group_vals = self.take_unchecked(idx.into());
(group_vals, idx.len() as IdxSize)
}
GroupsIndicator::Slice([first, len]) => {
let group_vals = _slice_from_offsets(self, first, len);
(group_vals, len)
}
};
if len == 0 {
can_fast_explode = false;
}
length_so_far += len as i64;
// Safety:
// we know that offsets has allocated enough slots
offsets.push_unchecked(length_so_far);
let arr = group_vals.downcast_iter().next().unwrap().clone();
arr.into_iter_cloned()
})
.trust_my_length(self.len())
};
let mut pe = create_extension(iter);
// Safety:
// this is safe because we just created the PolarsExtension
// meaning that the sentinel is heap allocated and the dereference of the
// pointer does not fail
pe.set_to_series_fn::<T>();
let extension_array = Box::new(pe.take_and_forget()) as ArrayRef;
let extension_dtype = extension_array.data_type();
let data_type = ListArray::<i64>::default_datatype(extension_dtype.clone());
// Safety:
// offsets are monotonically increasing
let arr = Box::new(ListArray::<i64>::new(
data_type,
Offsets::new_unchecked(offsets).into(),
extension_array,
None,
)) as ArrayRef;
let mut listarr = ListChunked::from_chunks(self.name(), vec![arr]);
if can_fast_explode {
listarr.set_fast_explode()
}
listarr.into_series()
}pub fn is_empty(&self) -> bool
sourcepub fn group_count(&self) -> IdxCa
pub fn group_count(&self) -> IdxCa
Examples found in repository?
More examples
src/chunked_array/logical/categorical/ops/unique.rs (line 60)
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
pub fn value_counts(&self) -> PolarsResult<DataFrame> {
let groups = self.logical().group_tuples(true, false).unwrap();
let logical_values = unsafe {
self.logical()
.clone()
.into_series()
.agg_first(&groups)
.u32()
.unwrap()
.clone()
};
let mut values = self.clone();
*values.logical_mut() = logical_values;
let mut counts = groups.group_count();
counts.rename("counts");
let cols = vec![values.into_series(), counts.into_series()];
let df = DataFrame::new_no_checks(cols);
df.sort(["counts"], true)
}sourcepub fn as_list_chunked(&self) -> ListChunked
pub fn as_list_chunked(&self) -> ListChunked
sourcepub fn slice(&self, offset: i64, len: usize) -> SlicedGroups<'_>
pub fn slice(&self, offset: i64, len: usize) -> SlicedGroups<'_>
Examples found in repository?
src/frame/groupby/mod.rs (line 331)
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec<Series> {
#[allow(unused_assignments)]
// needed to keep the lifetimes valid for this scope
let mut groups_owned = None;
let groups = if let Some((offset, len)) = slice {
groups_owned = Some(self.groups.slice(offset, len));
groups_owned.as_deref().unwrap()
} else {
&self.groups
};
POOL.install(|| {
self.selected_keys
.par_iter()
.map(|s| {
match groups {
GroupsProxy::Idx(groups) => {
let mut iter = groups.iter().map(|(first, _idx)| first as usize);
// Safety:
// groups are always in bounds
let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
if groups.sorted {
out.set_sorted(s.is_sorted());
};
out
}
GroupsProxy::Slice { groups, rolling } => {
if *rolling && !groups.is_empty() {
// groups can be sliced
let offset = groups[0][0];
let [upper_offset, upper_len] = groups[groups.len() - 1];
return s.slice(
offset as i64,
((upper_offset + upper_len) - offset) as usize,
);
}
let mut iter = groups.iter().map(|&[first, _len]| first as usize);
// Safety:
// groups are always in bounds
let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
// sliced groups are always in order of discovery
out.set_sorted(s.is_sorted());
out
}
}
})
.collect()
})
}Trait Implementations§
source§impl Clone for GroupsProxy
impl Clone for GroupsProxy
source§fn clone(&self) -> GroupsProxy
fn clone(&self) -> GroupsProxy
Returns a copy of the value. Read more
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moresource§impl Debug for GroupsProxy
impl Debug for GroupsProxy
source§impl Default for GroupsProxy
impl Default for GroupsProxy
source§impl From<GroupsIdx> for GroupsProxy
impl From<GroupsIdx> for GroupsProxy
source§impl PartialEq<GroupsProxy> for GroupsProxy
impl PartialEq<GroupsProxy> for GroupsProxy
source§fn eq(&self, other: &GroupsProxy) -> bool
fn eq(&self, other: &GroupsProxy) -> bool
This method tests for
self and other values to be equal, and is used
by ==.impl Eq for GroupsProxy
impl StructuralEq for GroupsProxy
impl StructuralPartialEq for GroupsProxy
Auto Trait Implementations§
impl RefUnwindSafe for GroupsProxy
impl Send for GroupsProxy
impl Sync for GroupsProxy
impl Unpin for GroupsProxy
impl UnwindSafe for GroupsProxy
Blanket Implementations§
source§impl<Q, K> Equivalent<K> for Qwhere
Q: Eq + ?Sized,
K: Borrow<Q> + ?Sized,
impl<Q, K> Equivalent<K> for Qwhere
Q: Eq + ?Sized,
K: Borrow<Q> + ?Sized,
source§fn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
Compare self to
key and return true if they are equal.