Struct polars_core::prelude::GroupsIdx
source · pub struct GroupsIdx { /* private fields */ }Expand description
Indexes of the groups, the first index is stored separately. this make sorting fast.
Implementations§
source§impl GroupsIdx
impl GroupsIdx
sourcepub fn new(first: Vec<IdxSize>, all: Vec<Vec<IdxSize>>, sorted: bool) -> Self
pub fn new(first: Vec<IdxSize>, all: Vec<Vec<IdxSize>>, sorted: bool) -> Self
Examples found in repository?
src/frame/groupby/proxy.rs (lines 384-388)
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
pub fn slice(&self, offset: i64, len: usize) -> SlicedGroups {
// Safety:
// we create new `Vec`s from the sliced groups. But we wrap them in ManuallyDrop
// so that we never call drop on them.
// These groups lifetimes are bounded to the `self`. This must remain valid
// for the scope of the aggregation.
let sliced = match self {
GroupsProxy::Idx(groups) => {
let first = unsafe {
let first = slice_slice(groups.first(), offset, len);
let ptr = first.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, first.len(), first.len())
};
let all = unsafe {
let all = slice_slice(groups.all(), offset, len);
let ptr = all.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, all.len(), all.len())
};
ManuallyDrop::new(GroupsProxy::Idx(GroupsIdx::new(
first,
all,
groups.is_sorted(),
)))
}
GroupsProxy::Slice { groups, rolling } => {
let groups = unsafe {
let groups = slice_slice(groups, offset, len);
let ptr = groups.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, groups.len(), groups.len())
};
ManuallyDrop::new(GroupsProxy::Slice {
groups,
rolling: *rolling,
})
}
};
SlicedGroups {
sliced,
borrowed: self,
}
}sourcepub fn sort(&mut self)
pub fn sort(&mut self)
Examples found in repository?
src/frame/groupby/proxy.rs (line 235)
231 232 233 234 235 236 237 238 239 240 241 242 243 244
pub fn sort(&mut self) {
match self {
GroupsProxy::Idx(groups) => {
if !groups.is_sorted() {
groups.sort()
}
}
GroupsProxy::Slice { groups, rolling } => {
if !*rolling {
groups.sort_unstable_by_key(|[first, _]| *first);
}
}
}
}sourcepub fn is_sorted(&self) -> bool
pub fn is_sorted(&self) -> bool
Examples found in repository?
src/frame/groupby/proxy.rs (line 234)
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
pub fn sort(&mut self) {
match self {
GroupsProxy::Idx(groups) => {
if !groups.is_sorted() {
groups.sort()
}
}
GroupsProxy::Slice { groups, rolling } => {
if !*rolling {
groups.sort_unstable_by_key(|[first, _]| *first);
}
}
}
}
pub fn group_lengths(&self, name: &str) -> IdxCa {
let ca: NoNull<IdxCa> = match self {
GroupsProxy::Idx(groups) => groups
.iter()
.map(|(_, groups)| groups.len() as IdxSize)
.collect_trusted(),
GroupsProxy::Slice { groups, .. } => groups.iter().map(|g| g[1]).collect_trusted(),
};
let mut ca = ca.into_inner();
ca.rename(name);
ca
}
pub fn take_group_firsts(self) -> Vec<IdxSize> {
match self {
GroupsProxy::Idx(mut groups) => std::mem::take(&mut groups.first),
GroupsProxy::Slice { groups, .. } => {
groups.into_iter().map(|[first, _len]| first).collect()
}
}
}
#[cfg(feature = "private")]
pub fn par_iter(&self) -> GroupsProxyParIter {
GroupsProxyParIter::new(self)
}
/// Get a reference to the `GroupsIdx`.
///
/// # Panic
///
/// panics if the groups are a slice.
pub fn unwrap_idx(&self) -> &GroupsIdx {
match self {
GroupsProxy::Idx(groups) => groups,
GroupsProxy::Slice { .. } => panic!("groups are slices not index"),
}
}
/// Get a reference to the `GroupsSlice`.
///
/// # Panic
///
/// panics if the groups are an idx.
pub fn unwrap_slice(&self) -> &GroupsSlice {
match self {
GroupsProxy::Slice { groups, .. } => groups,
GroupsProxy::Idx(_) => panic!("groups are index not slices"),
}
}
pub fn get(&self, index: usize) -> GroupsIndicator {
match self {
GroupsProxy::Idx(groups) => {
let first = groups.first[index];
let all = &groups.all[index];
GroupsIndicator::Idx((first, all))
}
GroupsProxy::Slice { groups, .. } => GroupsIndicator::Slice(groups[index]),
}
}
/// Get a mutable reference to the `GroupsIdx`.
///
/// # Panic
///
/// panics if the groups are a slice.
pub fn idx_mut(&mut self) -> &mut GroupsIdx {
match self {
GroupsProxy::Idx(groups) => groups,
GroupsProxy::Slice { .. } => panic!("groups are slices not index"),
}
}
pub fn len(&self) -> usize {
match self {
GroupsProxy::Idx(groups) => groups.len(),
GroupsProxy::Slice { groups, .. } => groups.len(),
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn group_count(&self) -> IdxCa {
match self {
GroupsProxy::Idx(groups) => {
let ca: NoNull<IdxCa> = groups
.iter()
.map(|(_first, idx)| idx.len() as IdxSize)
.collect_trusted();
ca.into_inner()
}
GroupsProxy::Slice { groups, .. } => {
let ca: NoNull<IdxCa> = groups.iter().map(|[_first, len]| *len).collect_trusted();
ca.into_inner()
}
}
}
pub fn as_list_chunked(&self) -> ListChunked {
match self {
GroupsProxy::Idx(groups) => groups
.iter()
.map(|(_first, idx)| {
let ca: NoNull<IdxCa> = idx.iter().map(|&v| v as IdxSize).collect();
ca.into_inner().into_series()
})
.collect_trusted(),
GroupsProxy::Slice { groups, .. } => groups
.iter()
.map(|&[first, len]| {
let ca: NoNull<IdxCa> = (first..first + len).collect_trusted();
ca.into_inner().into_series()
})
.collect_trusted(),
}
}
pub fn slice(&self, offset: i64, len: usize) -> SlicedGroups {
// Safety:
// we create new `Vec`s from the sliced groups. But we wrap them in ManuallyDrop
// so that we never call drop on them.
// These groups lifetimes are bounded to the `self`. This must remain valid
// for the scope of the aggregation.
let sliced = match self {
GroupsProxy::Idx(groups) => {
let first = unsafe {
let first = slice_slice(groups.first(), offset, len);
let ptr = first.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, first.len(), first.len())
};
let all = unsafe {
let all = slice_slice(groups.all(), offset, len);
let ptr = all.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, all.len(), all.len())
};
ManuallyDrop::new(GroupsProxy::Idx(GroupsIdx::new(
first,
all,
groups.is_sorted(),
)))
}
GroupsProxy::Slice { groups, rolling } => {
let groups = unsafe {
let groups = slice_slice(groups, offset, len);
let ptr = groups.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, groups.len(), groups.len())
};
ManuallyDrop::new(GroupsProxy::Slice {
groups,
rolling: *rolling,
})
}
};
SlicedGroups {
sliced,
borrowed: self,
}
}sourcepub fn iter(&self) -> Zip<Copied<Iter<'_, IdxSize>>, Iter<'_, Vec<IdxSize>>>
pub fn iter(&self) -> Zip<Copied<Iter<'_, IdxSize>>, Iter<'_, Vec<IdxSize>>>
Examples found in repository?
src/frame/groupby/proxy.rs (line 249)
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
pub fn group_lengths(&self, name: &str) -> IdxCa {
let ca: NoNull<IdxCa> = match self {
GroupsProxy::Idx(groups) => groups
.iter()
.map(|(_, groups)| groups.len() as IdxSize)
.collect_trusted(),
GroupsProxy::Slice { groups, .. } => groups.iter().map(|g| g[1]).collect_trusted(),
};
let mut ca = ca.into_inner();
ca.rename(name);
ca
}
pub fn take_group_firsts(self) -> Vec<IdxSize> {
match self {
GroupsProxy::Idx(mut groups) => std::mem::take(&mut groups.first),
GroupsProxy::Slice { groups, .. } => {
groups.into_iter().map(|[first, _len]| first).collect()
}
}
}
#[cfg(feature = "private")]
pub fn par_iter(&self) -> GroupsProxyParIter {
GroupsProxyParIter::new(self)
}
/// Get a reference to the `GroupsIdx`.
///
/// # Panic
///
/// panics if the groups are a slice.
pub fn unwrap_idx(&self) -> &GroupsIdx {
match self {
GroupsProxy::Idx(groups) => groups,
GroupsProxy::Slice { .. } => panic!("groups are slices not index"),
}
}
/// Get a reference to the `GroupsSlice`.
///
/// # Panic
///
/// panics if the groups are an idx.
pub fn unwrap_slice(&self) -> &GroupsSlice {
match self {
GroupsProxy::Slice { groups, .. } => groups,
GroupsProxy::Idx(_) => panic!("groups are index not slices"),
}
}
pub fn get(&self, index: usize) -> GroupsIndicator {
match self {
GroupsProxy::Idx(groups) => {
let first = groups.first[index];
let all = &groups.all[index];
GroupsIndicator::Idx((first, all))
}
GroupsProxy::Slice { groups, .. } => GroupsIndicator::Slice(groups[index]),
}
}
/// Get a mutable reference to the `GroupsIdx`.
///
/// # Panic
///
/// panics if the groups are a slice.
pub fn idx_mut(&mut self) -> &mut GroupsIdx {
match self {
GroupsProxy::Idx(groups) => groups,
GroupsProxy::Slice { .. } => panic!("groups are slices not index"),
}
}
pub fn len(&self) -> usize {
match self {
GroupsProxy::Idx(groups) => groups.len(),
GroupsProxy::Slice { groups, .. } => groups.len(),
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn group_count(&self) -> IdxCa {
match self {
GroupsProxy::Idx(groups) => {
let ca: NoNull<IdxCa> = groups
.iter()
.map(|(_first, idx)| idx.len() as IdxSize)
.collect_trusted();
ca.into_inner()
}
GroupsProxy::Slice { groups, .. } => {
let ca: NoNull<IdxCa> = groups.iter().map(|[_first, len]| *len).collect_trusted();
ca.into_inner()
}
}
}
pub fn as_list_chunked(&self) -> ListChunked {
match self {
GroupsProxy::Idx(groups) => groups
.iter()
.map(|(_first, idx)| {
let ca: NoNull<IdxCa> = idx.iter().map(|&v| v as IdxSize).collect();
ca.into_inner().into_series()
})
.collect_trusted(),
GroupsProxy::Slice { groups, .. } => groups
.iter()
.map(|&[first, len]| {
let ca: NoNull<IdxCa> = (first..first + len).collect_trusted();
ca.into_inner().into_series()
})
.collect_trusted(),
}
}More examples
src/frame/groupby/aggregations/dispatch.rs (line 52)
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
pub unsafe fn agg_first(&self, groups: &GroupsProxy) -> Series {
let out = match groups {
GroupsProxy::Idx(groups) => {
let mut iter = groups.iter().map(|(first, idx)| {
if idx.is_empty() {
None
} else {
Some(first as usize)
}
});
// Safety:
// groups are always in bounds
self.take_opt_iter_unchecked(&mut iter)
}
GroupsProxy::Slice { groups, .. } => {
let mut iter =
groups.iter().map(
|&[first, len]| {
if len == 0 {
None
} else {
Some(first as usize)
}
},
);
// Safety:
// groups are always in bounds
self.take_opt_iter_unchecked(&mut iter)
}
};
self.restore_logical(out)
}src/frame/groupby/mod.rs (line 343)
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec<Series> {
#[allow(unused_assignments)]
// needed to keep the lifetimes valid for this scope
let mut groups_owned = None;
let groups = if let Some((offset, len)) = slice {
groups_owned = Some(self.groups.slice(offset, len));
groups_owned.as_deref().unwrap()
} else {
&self.groups
};
POOL.install(|| {
self.selected_keys
.par_iter()
.map(|s| {
match groups {
GroupsProxy::Idx(groups) => {
let mut iter = groups.iter().map(|(first, _idx)| first as usize);
// Safety:
// groups are always in bounds
let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
if groups.sorted {
out.set_sorted(s.is_sorted());
};
out
}
GroupsProxy::Slice { groups, rolling } => {
if *rolling && !groups.is_empty() {
// groups can be sliced
let offset = groups[0][0];
let [upper_offset, upper_len] = groups[groups.len() - 1];
return s.slice(
offset as i64,
((upper_offset + upper_len) - offset) as usize,
);
}
let mut iter = groups.iter().map(|&[first, _len]| first as usize);
// Safety:
// groups are always in bounds
let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
// sliced groups are always in order of discovery
out.set_sorted(s.is_sorted());
out
}
}
})
.collect()
})
}src/frame/groupby/aggregations/agg_list.rs (line 34)
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
let ca = self.rechunk();
match groups {
GroupsProxy::Idx(groups) => {
let mut can_fast_explode = true;
let arr = ca.downcast_iter().next().unwrap();
let values = arr.values();
let mut offsets = Vec::<i64>::with_capacity(groups.len() + 1);
let mut length_so_far = 0i64;
offsets.push(length_so_far);
let mut list_values = Vec::<T::Native>::with_capacity(self.len());
groups.iter().for_each(|(_, idx)| {
let idx_len = idx.len();
if idx_len == 0 {
can_fast_explode = false;
}
length_so_far += idx_len as i64;
// Safety:
// group tuples are in bounds
{
list_values.extend(idx.iter().map(|idx| {
debug_assert!((*idx as usize) < values.len());
*values.get_unchecked(*idx as usize)
}));
// Safety:
// we know that offsets has allocated enough slots
offsets.push_unchecked(length_so_far);
}
});
let validity = if arr.null_count() > 0 {
let old_validity = arr.validity().unwrap();
let mut validity = MutableBitmap::from_len_set(list_values.len());
let mut count = 0;
groups.iter().for_each(|(_, idx)| {
for i in idx {
if !old_validity.get_bit_unchecked(*i as usize) {
validity.set_bit_unchecked(count, false)
}
count += 1;
}
});
Some(validity.into())
} else {
None
};
let array =
PrimitiveArray::new(T::get_dtype().to_arrow(), list_values.into(), validity);
let data_type = ListArray::<i64>::default_datatype(T::get_dtype().to_arrow());
// Safety:
// offsets are monotonically increasing
let arr = ListArray::<i64>::new(
data_type,
Offsets::new_unchecked(offsets).into(),
Box::new(array),
None,
);
let mut ca = ListChunked::from_chunks(self.name(), vec![Box::new(arr)]);
if can_fast_explode {
ca.set_fast_explode()
}
ca.into()
}
GroupsProxy::Slice { groups, .. } => {
let mut can_fast_explode = true;
let arr = ca.downcast_iter().next().unwrap();
let values = arr.values();
let mut offsets = Vec::<i64>::with_capacity(groups.len() + 1);
let mut length_so_far = 0i64;
offsets.push(length_so_far);
let mut list_values = Vec::<T::Native>::with_capacity(self.len());
groups.iter().for_each(|&[first, len]| {
if len == 0 {
can_fast_explode = false;
}
length_so_far += len as i64;
list_values.extend_from_slice(&values[first as usize..(first + len) as usize]);
{
// Safety:
// we know that offsets has allocated enough slots
offsets.push_unchecked(length_so_far);
}
});
let validity = if arr.null_count() > 0 {
let old_validity = arr.validity().unwrap();
let mut validity = MutableBitmap::from_len_set(list_values.len());
let mut count = 0;
groups.iter().for_each(|[first, len]| {
for i in *first..(*first + *len) {
if !old_validity.get_bit_unchecked(i as usize) {
validity.set_bit_unchecked(count, false)
}
count += 1;
}
});
Some(validity.into())
} else {
None
};
let array =
PrimitiveArray::new(T::get_dtype().to_arrow(), list_values.into(), validity);
let data_type = ListArray::<i64>::default_datatype(T::get_dtype().to_arrow());
let arr = ListArray::<i64>::new(
data_type,
Offsets::new_unchecked(offsets).into(),
Box::new(array),
None,
);
let mut ca = ListChunked::from_chunks(self.name(), vec![Box::new(arr)]);
if can_fast_explode {
ca.set_fast_explode()
}
ca.into()
}
}
}
}
impl AggList for BooleanChunked {
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
match groups {
GroupsProxy::Idx(groups) => {
let mut builder =
ListBooleanChunkedBuilder::new(self.name(), groups.len(), self.len());
for idx in groups.all().iter() {
let ca = { self.take_unchecked(idx.into()) };
builder.append(&ca)
}
builder.finish().into_series()
}
GroupsProxy::Slice { groups, .. } => {
let mut builder =
ListBooleanChunkedBuilder::new(self.name(), groups.len(), self.len());
for [first, len] in groups {
let ca = self.slice(*first as i64, *len as usize);
builder.append(&ca)
}
builder.finish().into_series()
}
}
}
}
impl AggList for Utf8Chunked {
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
match groups {
GroupsProxy::Idx(groups) => {
let mut builder =
ListUtf8ChunkedBuilder::new(self.name(), groups.len(), self.len());
for idx in groups.all().iter() {
let ca = { self.take_unchecked(idx.into()) };
builder.append(&ca)
}
builder.finish().into_series()
}
GroupsProxy::Slice { groups, .. } => {
let mut builder =
ListUtf8ChunkedBuilder::new(self.name(), groups.len(), self.len());
for [first, len] in groups {
let ca = self.slice(*first as i64, *len as usize);
builder.append(&ca)
}
builder.finish().into_series()
}
}
}
}
#[cfg(feature = "dtype-binary")]
impl AggList for BinaryChunked {
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
match groups {
GroupsProxy::Idx(groups) => {
let mut builder =
ListBinaryChunkedBuilder::new(self.name(), groups.len(), self.len());
for idx in groups.all().iter() {
let ca = { self.take_unchecked(idx.into()) };
builder.append(&ca)
}
builder.finish().into_series()
}
GroupsProxy::Slice { groups, .. } => {
let mut builder =
ListBinaryChunkedBuilder::new(self.name(), groups.len(), self.len());
for [first, len] in groups {
let ca = self.slice(*first as i64, *len as usize);
builder.append(&ca)
}
builder.finish().into_series()
}
}
}
}
fn agg_list_list<F: Fn(&ListChunked, bool, &mut Vec<i64>, &mut i64, &mut Vec<ArrayRef>) -> bool>(
ca: &ListChunked,
groups_len: usize,
func: F,
) -> Series {
let can_fast_explode = true;
let mut offsets = Vec::<i64>::with_capacity(groups_len + 1);
let mut length_so_far = 0i64;
offsets.push(length_so_far);
let mut list_values = Vec::with_capacity(groups_len);
let can_fast_explode = func(
ca,
can_fast_explode,
&mut offsets,
&mut length_so_far,
&mut list_values,
);
if groups_len == 0 {
list_values.push(ca.chunks[0].slice(0, 0))
}
let arrays = list_values.iter().map(|arr| &**arr).collect::<Vec<_>>();
let list_values: ArrayRef = arrow::compute::concatenate::concatenate(&arrays).unwrap();
let data_type = ListArray::<i64>::default_datatype(list_values.data_type().clone());
// Safety:
// offsets are monotonically increasing
let arr = unsafe {
Box::new(ListArray::<i64>::new(
data_type,
Offsets::new_unchecked(offsets).into(),
list_values,
None,
)) as ArrayRef
};
let mut listarr = ListChunked::from_chunks(ca.name(), vec![arr]);
if can_fast_explode {
listarr.set_fast_explode()
}
listarr.into_series()
}
impl AggList for ListChunked {
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
match groups {
GroupsProxy::Idx(groups) => {
let func = |ca: &ListChunked,
mut can_fast_explode: bool,
offsets: &mut Vec<i64>,
length_so_far: &mut i64,
list_values: &mut Vec<ArrayRef>| {
groups.iter().for_each(|(_, idx)| {
let idx_len = idx.len();
if idx_len == 0 {
can_fast_explode = false;
}
*length_so_far += idx_len as i64;
// Safety:
// group tuples are in bounds
{
let mut s = ca.take_unchecked(idx.into());
let arr = s.chunks.pop().unwrap();
list_values.push(arr);
// Safety:
// we know that offsets has allocated enough slots
offsets.push_unchecked(*length_so_far);
}
});
can_fast_explode
};
agg_list_list(self, groups.len(), func)
}
GroupsProxy::Slice { groups, .. } => {
let func = |ca: &ListChunked,
mut can_fast_explode: bool,
offsets: &mut Vec<i64>,
length_so_far: &mut i64,
list_values: &mut Vec<ArrayRef>| {
groups.iter().for_each(|&[first, len]| {
if len == 0 {
can_fast_explode = false;
}
*length_so_far += len as i64;
let mut s = ca.slice(first as i64, len as usize);
let arr = s.chunks.pop().unwrap();
list_values.push(arr);
{
// Safety:
// we know that offsets has allocated enough slots
offsets.push_unchecked(*length_so_far);
}
});
can_fast_explode
};
agg_list_list(self, groups.len(), func)
}
}
}sourcepub fn all(&self) -> &[Vec<IdxSize>]
pub fn all(&self) -> &[Vec<IdxSize>]
Examples found in repository?
More examples
src/frame/groupby/aggregations/dispatch.rs (line 190)
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
pub unsafe fn agg_last(&self, groups: &GroupsProxy) -> Series {
let out = match groups {
GroupsProxy::Idx(groups) => {
let mut iter = groups.all().iter().map(|idx| {
if idx.is_empty() {
None
} else {
Some(idx[idx.len() - 1] as usize)
}
});
self.take_opt_iter_unchecked(&mut iter)
}
GroupsProxy::Slice { groups, .. } => {
let mut iter = groups.iter().map(|&[first, len]| {
if len == 0 {
None
} else {
Some((first + len - 1) as usize)
}
});
self.take_opt_iter_unchecked(&mut iter)
}
};
self.restore_logical(out)
}src/frame/groupby/aggregations/agg_list.rs (line 157)
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
match groups {
GroupsProxy::Idx(groups) => {
let mut builder =
ListBooleanChunkedBuilder::new(self.name(), groups.len(), self.len());
for idx in groups.all().iter() {
let ca = { self.take_unchecked(idx.into()) };
builder.append(&ca)
}
builder.finish().into_series()
}
GroupsProxy::Slice { groups, .. } => {
let mut builder =
ListBooleanChunkedBuilder::new(self.name(), groups.len(), self.len());
for [first, len] in groups {
let ca = self.slice(*first as i64, *len as usize);
builder.append(&ca)
}
builder.finish().into_series()
}
}
}
}
impl AggList for Utf8Chunked {
unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
match groups {
GroupsProxy::Idx(groups) => {
let mut builder =
ListUtf8ChunkedBuilder::new(self.name(), groups.len(), self.len());
for idx in groups.all().iter() {
let ca = { self.take_unchecked(idx.into()) };
builder.append(&ca)
}
builder.finish().into_series()
}
GroupsProxy::Slice { groups, .. } => {
let mut builder =
ListUtf8ChunkedBuilder::new(self.name(), groups.len(), self.len());
for [first, len] in groups {
let ca = self.slice(*first as i64, *len as usize);
builder.append(&ca)
}
builder.finish().into_series()
}
}
}src/frame/groupby/proxy.rs (line 380)
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
pub fn slice(&self, offset: i64, len: usize) -> SlicedGroups {
// Safety:
// we create new `Vec`s from the sliced groups. But we wrap them in ManuallyDrop
// so that we never call drop on them.
// These groups lifetimes are bounded to the `self`. This must remain valid
// for the scope of the aggregation.
let sliced = match self {
GroupsProxy::Idx(groups) => {
let first = unsafe {
let first = slice_slice(groups.first(), offset, len);
let ptr = first.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, first.len(), first.len())
};
let all = unsafe {
let all = slice_slice(groups.all(), offset, len);
let ptr = all.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, all.len(), all.len())
};
ManuallyDrop::new(GroupsProxy::Idx(GroupsIdx::new(
first,
all,
groups.is_sorted(),
)))
}
GroupsProxy::Slice { groups, rolling } => {
let groups = unsafe {
let groups = slice_slice(groups, offset, len);
let ptr = groups.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, groups.len(), groups.len())
};
ManuallyDrop::new(GroupsProxy::Slice {
groups,
rolling: *rolling,
})
}
};
SlicedGroups {
sliced,
borrowed: self,
}
}sourcepub fn first(&self) -> &[IdxSize]
pub fn first(&self) -> &[IdxSize]
Examples found in repository?
src/frame/groupby/proxy.rs (line 374)
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
pub fn slice(&self, offset: i64, len: usize) -> SlicedGroups {
// Safety:
// we create new `Vec`s from the sliced groups. But we wrap them in ManuallyDrop
// so that we never call drop on them.
// These groups lifetimes are bounded to the `self`. This must remain valid
// for the scope of the aggregation.
let sliced = match self {
GroupsProxy::Idx(groups) => {
let first = unsafe {
let first = slice_slice(groups.first(), offset, len);
let ptr = first.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, first.len(), first.len())
};
let all = unsafe {
let all = slice_slice(groups.all(), offset, len);
let ptr = all.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, all.len(), all.len())
};
ManuallyDrop::new(GroupsProxy::Idx(GroupsIdx::new(
first,
all,
groups.is_sorted(),
)))
}
GroupsProxy::Slice { groups, rolling } => {
let groups = unsafe {
let groups = slice_slice(groups, offset, len);
let ptr = groups.as_ptr() as *mut _;
Vec::from_raw_parts(ptr, groups.len(), groups.len())
};
ManuallyDrop::new(GroupsProxy::Slice {
groups,
rolling: *rolling,
})
}
};
SlicedGroups {
sliced,
borrowed: self,
}
}pub fn first_mut(&mut self) -> &mut Vec<IdxSize> ⓘ
Trait Implementations§
source§impl From<GroupsIdx> for GroupsProxy
impl From<GroupsIdx> for GroupsProxy
source§impl FromParallelIterator<(u32, Vec<u32, Global>)> for GroupsIdx
impl FromParallelIterator<(u32, Vec<u32, Global>)> for GroupsIdx
source§fn from_par_iter<I>(par_iter: I) -> Selfwhere
I: IntoParallelIterator<Item = IdxItem>,
fn from_par_iter<I>(par_iter: I) -> Selfwhere
I: IntoParallelIterator<Item = IdxItem>,
Creates an instance of the collection from the parallel iterator
par_iter. Read moresource§impl<'a> IntoIterator for &'a GroupsIdx
impl<'a> IntoIterator for &'a GroupsIdx
source§impl IntoIterator for GroupsIdx
impl IntoIterator for GroupsIdx
source§impl<'a> IntoParallelIterator for &'a GroupsIdx
impl<'a> IntoParallelIterator for &'a GroupsIdx
source§impl IntoParallelIterator for GroupsIdx
impl IntoParallelIterator for GroupsIdx
source§impl PartialEq<GroupsIdx> for GroupsIdx
impl PartialEq<GroupsIdx> for GroupsIdx
impl Eq for GroupsIdx
impl StructuralEq for GroupsIdx
impl StructuralPartialEq for GroupsIdx
Auto Trait Implementations§
impl RefUnwindSafe for GroupsIdx
impl Send for GroupsIdx
impl Sync for GroupsIdx
impl Unpin for GroupsIdx
impl UnwindSafe for GroupsIdx
Blanket Implementations§
source§impl<Q, K> Equivalent<K> for Qwhere
Q: Eq + ?Sized,
K: Borrow<Q> + ?Sized,
impl<Q, K> Equivalent<K> for Qwhere
Q: Eq + ?Sized,
K: Borrow<Q> + ?Sized,
source§fn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
Compare self to
key and return true if they are equal.source§impl<'data, I> IntoParallelRefIterator<'data> for Iwhere
I: 'data + ?Sized,
&'data I: IntoParallelIterator,
impl<'data, I> IntoParallelRefIterator<'data> for Iwhere
I: 'data + ?Sized,
&'data I: IntoParallelIterator,
§type Iter = <&'data I as IntoParallelIterator>::Iter
type Iter = <&'data I as IntoParallelIterator>::Iter
The type of the parallel iterator that will be returned.
§type Item = <&'data I as IntoParallelIterator>::Item
type Item = <&'data I as IntoParallelIterator>::Item
The type of item that the parallel iterator will produce.
This will typically be an
&'data T reference type.