Trait polars_arrow::utils::CustomIterTools
source · pub trait CustomIterTools: Iterator {
fn fold_first_<F>(self, f: F) -> Option<Self::Item>
where
Self: Sized,
F: FnMut(Self::Item, Self::Item) -> Self::Item,
{ ... }
unsafe fn trust_my_length(
self,
length: usize
) -> TrustMyLength<Self, Self::Item> ⓘ
where
Self: Sized,
{ ... }
fn collect_trusted<T: FromTrustedLenIterator<Self::Item>>(self) -> T
where
Self: Sized + TrustedLen,
{ ... }
fn collect_reversed<T: FromIteratorReversed<Self::Item>>(self) -> T
where
Self: Sized + TrustedLen,
{ ... }
fn all_equal(&mut self) -> bool
where
Self: Sized,
Self::Item: PartialEq,
{ ... }
fn fold_options<A, B, F>(&mut self, start: B, f: F) -> Option<B>
where
Self: Iterator<Item = Option<A>>,
F: FnMut(B, A) -> B,
{ ... }
}Provided Methods§
fn fold_first_<F>(self, f: F) -> Option<Self::Item>where
Self: Sized,
F: FnMut(Self::Item, Self::Item) -> Self::Item,
sourceunsafe fn trust_my_length(self, length: usize) -> TrustMyLength<Self, Self::Item> ⓘwhere
Self: Sized,
unsafe fn trust_my_length(self, length: usize) -> TrustMyLength<Self, Self::Item> ⓘwhere
Self: Sized,
Examples found in repository?
src/array/mod.rs (line 177)
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
where
I: IntoIterator<Item = Option<P>>,
P: IntoIterator<Item = Option<Ref>>,
Ref: AsRef<str>,
{
let iterator = iter.into_iter();
let (lower, _) = iterator.size_hint();
let mut validity = MutableBitmap::with_capacity(lower);
let mut offsets = Vec::<i64>::with_capacity(lower + 1);
let mut length_so_far = 0i64;
offsets.push(length_so_far);
let values: Utf8Array<i64> = iterator
.filter_map(|opt_iter| match opt_iter {
Some(x) => {
let it = x.into_iter();
length_so_far += it.size_hint().0 as i64;
validity.push(true);
offsets.push(length_so_far);
Some(it)
}
None => {
validity.push(false);
None
}
})
.flatten()
.trust_my_length(n_elements)
.collect();
// Safety:
// offsets are monotonically increasing
ListArray::new(
ListArray::<i64>::default_datatype(DataType::LargeUtf8),
Offsets::new_unchecked(offsets).into(),
Box::new(values),
Some(validity.into()),
)
}
/// Create a list-array from an iterator.
/// Used in groupby agg-list
///
/// # Safety
/// Will produce incorrect arrays if size hint is incorrect.
unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
where
I: IntoIterator<Item = Option<P>>,
P: IntoIterator<Item = Option<Ref>>,
Ref: AsRef<[u8]>,
{
let iterator = iter.into_iter();
let (lower, _) = iterator.size_hint();
let mut validity = MutableBitmap::with_capacity(lower);
let mut offsets = Vec::<i64>::with_capacity(lower + 1);
let mut length_so_far = 0i64;
offsets.push(length_so_far);
let values: BinaryArray<i64> = iterator
.filter_map(|opt_iter| match opt_iter {
Some(x) => {
let it = x.into_iter();
length_so_far += it.size_hint().0 as i64;
validity.push(true);
offsets.push(length_so_far);
Some(it)
}
None => {
validity.push(false);
None
}
})
.flatten()
.trust_my_length(n_elements)
.collect();
// Safety:
// offsets are monotonically increasing
ListArray::new(
ListArray::<i64>::default_datatype(DataType::LargeBinary),
Offsets::new_unchecked(offsets).into(),
Box::new(values),
Some(validity.into()),
)
}sourcefn collect_trusted<T: FromTrustedLenIterator<Self::Item>>(self) -> Twhere
Self: Sized + TrustedLen,
fn collect_trusted<T: FromTrustedLenIterator<Self::Item>>(self) -> Twhere
Self: Sized + TrustedLen,
Examples found in repository?
src/kernels/rolling/no_nulls/mod.rs (line 56)
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
pub(super) fn rolling_apply_agg_window<'a, Agg, T, Fo>(
values: &'a [T],
window_size: usize,
min_periods: usize,
det_offsets_fn: Fo,
) -> ArrayRef
where
Fo: Fn(Idx, WindowSize, Len) -> (Start, End),
Agg: RollingAggWindowNoNulls<'a, T>,
T: Debug + IsFloat + NativeType,
{
let len = values.len();
let (start, end) = det_offsets_fn(0, window_size, len);
let mut agg_window = Agg::new(values, start, end);
let out = (0..len)
.map(|idx| {
let (start, end) = det_offsets_fn(idx, window_size, len);
// safety:
// we are in bounds
unsafe { agg_window.update(start, end) }
})
.collect_trusted::<Vec<_>>();
let validity = create_validity(min_periods, len, window_size, det_offsets_fn);
Box::new(PrimitiveArray::new(
T::PRIMITIVE.into(),
out.into(),
validity.map(|b| b.into()),
))
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum QuantileInterpolOptions {
Nearest,
Lower,
Higher,
Midpoint,
Linear,
}
impl Default for QuantileInterpolOptions {
fn default() -> Self {
QuantileInterpolOptions::Nearest
}
}
pub(super) fn rolling_apply_weights<T, Fo, Fa>(
values: &[T],
window_size: usize,
min_periods: usize,
det_offsets_fn: Fo,
aggregator: Fa,
weights: &[T],
) -> ArrayRef
where
T: NativeType,
Fo: Fn(Idx, WindowSize, Len) -> (Start, End),
Fa: Fn(&[T], &[T]) -> T,
{
assert_eq!(weights.len(), window_size);
let len = values.len();
let out = (0..len)
.map(|idx| {
let (start, end) = det_offsets_fn(idx, window_size, len);
let vals = unsafe { values.get_unchecked(start..end) };
aggregator(vals, weights)
})
.collect_trusted::<Vec<T>>();
let validity = create_validity(min_periods, len, window_size, det_offsets_fn);
Box::new(PrimitiveArray::new(
DataType::from(T::PRIMITIVE),
out.into(),
validity.map(|b| b.into()),
))
}More examples
src/kernels/ewm/average.rs (line 46)
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
pub fn ewm_mean<I, T>(xs: I, alpha: T, adjust: bool, min_periods: usize) -> PrimitiveArray<T>
where
I: IntoIterator<Item = Option<T>>,
I::IntoIter: TrustedLen,
T: Float + NativeType + AddAssign,
{
if alpha.is_one() {
return ewm_mean_alpha_equals_one(xs, min_periods);
}
let one_sub_alpha = T::one() - alpha;
let mut opt_mean = None;
let mut non_null_cnt = 0usize;
let wgt = alpha;
let mut wgt_sum = if adjust { T::zero() } else { T::one() };
xs.into_iter()
.map(|opt_x| {
if let Some(x) = opt_x {
non_null_cnt += 1;
let prev_mean = opt_mean.unwrap_or(x);
wgt_sum = one_sub_alpha * wgt_sum + wgt;
let curr_mean = prev_mean + (x - prev_mean) * wgt / wgt_sum;
opt_mean = Some(curr_mean);
}
match non_null_cnt < min_periods {
true => None,
false => opt_mean,
}
})
.collect_trusted()
}
/// To prevent numerical instability (and as a slight optimization), we
/// special-case ``alpha=1``.
fn ewm_mean_alpha_equals_one<I, T>(xs: I, min_periods: usize) -> PrimitiveArray<T>
where
I: IntoIterator<Item = Option<T>>,
I::IntoIter: TrustedLen,
T: Float + NativeType + AddAssign,
{
let mut non_null_count = 0usize;
xs.into_iter()
.map(|opt_x| {
if opt_x.is_some() {
non_null_count += 1;
}
match non_null_count < min_periods {
true => None,
false => opt_x,
}
})
.collect_trusted()
}src/kernels/rolling/no_nulls/quantile.rs (line 241)
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
fn rolling_apply_quantile<T, Fo, Fa>(
values: &[T],
quantile: f64,
interpolation: QuantileInterpolOptions,
window_size: usize,
min_periods: usize,
det_offsets_fn: Fo,
aggregator: Fa,
) -> ArrayRef
where
Fo: Fn(Idx, WindowSize, Len) -> (Start, End),
Fa: Fn(&[T], f64, QuantileInterpolOptions) -> T,
T: Debug + NativeType + IsFloat + PartialOrd,
{
let len = values.len();
let (start, end) = det_offsets_fn(0, window_size, len);
let mut sorted_window = SortedBuf::new(values, start, end);
let out = (0..len)
.map(|idx| {
let (start, end) = det_offsets_fn(idx, window_size, len);
// Safety:
// we are in bounds
let window = unsafe { sorted_window.update(start, end) };
aggregator(window, quantile, interpolation)
})
.collect_trusted::<Vec<T>>();
let validity = create_validity(min_periods, len, window_size, det_offsets_fn);
Box::new(PrimitiveArray::new(
T::PRIMITIVE.into(),
out.into(),
validity.map(|b| b.into()),
))
}
#[allow(clippy::too_many_arguments)]
fn rolling_apply_convolve_quantile<T, Fo, Fa>(
values: &[T],
quantile: f64,
interpolation: QuantileInterpolOptions,
window_size: usize,
min_periods: usize,
det_offsets_fn: Fo,
aggregator: Fa,
weights: &[f64],
) -> ArrayRef
where
Fo: Fn(Idx, WindowSize, Len) -> (Start, End),
Fa: Fn(&[T], f64, QuantileInterpolOptions) -> T,
T: Debug + NativeType + Mul<Output = T> + NumCast + ToPrimitive + Zero + IsFloat + PartialOrd,
{
assert_eq!(weights.len(), window_size);
let mut buf = vec![T::zero(); window_size];
let len = values.len();
let out = (0..len)
.map(|idx| {
let (start, end) = det_offsets_fn(idx, window_size, len);
let vals = unsafe { values.get_unchecked(start..end) };
buf.iter_mut()
.zip(vals.iter().zip(weights))
.for_each(|(b, (v, w))| *b = *v * NumCast::from(*w).unwrap());
sort_buf(&mut buf);
aggregator(&buf, quantile, interpolation)
})
.collect_trusted::<Vec<T>>();
let validity = create_validity(min_periods, len, window_size, det_offsets_fn);
Box::new(PrimitiveArray::new(
T::PRIMITIVE.into(),
out.into(),
validity.map(|b| b.into()),
))
}src/kernels/list.rs (line 61)
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
fn sublist_get_indexes(arr: &ListArray<i64>, index: i64) -> IdxArr {
let offsets = arr.offsets().as_slice();
let mut iter = offsets.iter();
// the indices can be sliced, so we should not start at 0.
let mut cum_offset = (*offsets.first().unwrap_or(&0)) as IdxSize;
if let Some(mut previous) = iter.next().copied() {
let a: IdxArr = iter
.map(|&offset| {
let len = offset - previous;
previous = offset;
// make sure that empty lists don't get accessed
// and out of bounds return null
if len == 0 {
return None;
}
if index >= len {
cum_offset += len as IdxSize;
return None;
}
let out = index
.negative_to_usize(len as usize)
.map(|idx| idx as IdxSize + cum_offset);
cum_offset += len as IdxSize;
out
})
.collect_trusted();
a
} else {
IdxArr::from_slice([])
}
}src/kernels/rolling/nulls/mod.rs (line 77)
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
pub(super) fn rolling_apply_agg_window<'a, Agg, T, Fo>(
values: &'a [T],
validity: &'a Bitmap,
window_size: usize,
min_periods: usize,
det_offsets_fn: Fo,
) -> ArrayRef
where
Fo: Fn(Idx, WindowSize, Len) -> (Start, End) + Copy,
Agg: RollingAggWindowNulls<'a, T>,
T: IsFloat + NativeType,
{
let len = values.len();
let (start, end) = det_offsets_fn(0, window_size, len);
// Safety; we are in bounds
let mut agg_window = unsafe { Agg::new(values, validity, start, end) };
let mut validity = match create_validity(min_periods, len, window_size, det_offsets_fn) {
Some(v) => v,
None => {
let mut validity = MutableBitmap::with_capacity(len);
validity.extend_constant(len, true);
validity
}
};
let out = (0..len)
.map(|idx| {
let (start, end) = det_offsets_fn(idx, window_size, len);
// safety:
// we are in bounds
let agg = unsafe { agg_window.update(start, end) };
match agg {
Some(val) => {
if agg_window.is_valid(min_periods) {
val
} else {
// safety: we are in bounds
unsafe { validity.set_unchecked(idx, false) };
T::default()
}
}
None => {
// safety: we are in bounds
unsafe { validity.set_unchecked(idx, false) };
T::default()
}
}
})
.collect_trusted::<Vec<_>>();
Box::new(PrimitiveArray::new(
T::PRIMITIVE.into(),
out.into(),
Some(validity.into()),
))
}src/compute/take/mod.rs (line 67)
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
pub unsafe fn take_primitive_unchecked<T: NativeType>(
arr: &PrimitiveArray<T>,
indices: &IdxArr,
) -> Box<PrimitiveArray<T>> {
let array_values = arr.values().as_slice();
let index_values = indices.values().as_slice();
let validity_values = arr.validity().expect("should have nulls");
// first take the values, these are always needed
let values: Vec<T> = index_values
.iter()
.map(|idx| {
debug_assert!((*idx as usize) < array_values.len());
*array_values.get_unchecked(*idx as usize)
})
.collect_trusted();
// the validity buffer we will fill with all valid. And we unset the ones that are null
// in later checks
// this is in the assumption that most values will be valid.
// Maybe we could add another branch based on the null count
let mut validity = MutableBitmap::with_capacity(indices.len());
validity.extend_constant(indices.len(), true);
let validity_ptr = validity.as_slice().as_ptr() as *mut u8;
if let Some(validity_indices) = indices.validity().as_ref() {
index_values.iter().enumerate().for_each(|(i, idx)| {
// i is iteration count
// idx is the index that we take from the values array.
let idx = *idx as usize;
if !validity_indices.get_bit_unchecked(i) || !validity_values.get_bit_unchecked(idx) {
unset_bit_raw(validity_ptr, i);
}
});
} else {
index_values.iter().enumerate().for_each(|(i, idx)| {
let idx = *idx as usize;
if !validity_values.get_bit_unchecked(idx) {
unset_bit_raw(validity_ptr, i);
}
});
};
let arr = PrimitiveArray::new(T::PRIMITIVE.into(), values.into(), Some(validity.into()));
Box::new(arr)
}Additional examples can be found in: