Trait polars_arrow::utils::CustomIterTools

source ·

pub trait CustomIterTools: Iterator {
    fn fold_first_<F>(self, f: F) -> Option<Self::Item>
    where
        Self: Sized,
        F: FnMut(Self::Item, Self::Item) -> Self::Item,
    { ... }
    unsafe fn trust_my_length(
        self,
        length: usize
    ) -> TrustMyLength<Self, Self::Item> ⓘ
    where
        Self: Sized,
    { ... }
    fn collect_trusted<T: FromTrustedLenIterator<Self::Item>>(self) -> T
    where
        Self: Sized + TrustedLen,
    { ... }
    fn collect_reversed<T: FromIteratorReversed<Self::Item>>(self) -> T
    where
        Self: Sized + TrustedLen,
    { ... }
    fn all_equal(&mut self) -> bool
    where
        Self: Sized,
        Self::Item: PartialEq,
    { ... }
    fn fold_options<A, B, F>(&mut self, start: B, f: F) -> Option<B>
    where
        Self: Iterator<Item = Option<A>>,
        F: FnMut(B, A) -> B,
    { ... }
}

Provided Methods§

source

fn fold_first_<F>(self, f: F) -> Option<Self::Item>where
Self: Sized,
F: FnMut(Self::Item, Self::Item) -> Self::Item,

source

unsafe fn trust_my_length(self, length: usize) -> TrustMyLength<Self, Self::Item> ⓘwhere
Self: Sized,

Turn any iterator in a trusted length iterator

Safety

The given length must be correct.

Examples found in repository ?

src/array/mod.rs (line 177)

    unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
    where
        I: IntoIterator<Item = Option<P>>,
        P: IntoIterator<Item = Option<Ref>>,
        Ref: AsRef<str>,
    {
        let iterator = iter.into_iter();
        let (lower, _) = iterator.size_hint();

        let mut validity = MutableBitmap::with_capacity(lower);
        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
        let mut length_so_far = 0i64;
        offsets.push(length_so_far);
        let values: Utf8Array<i64> = iterator
            .filter_map(|opt_iter| match opt_iter {
                Some(x) => {
                    let it = x.into_iter();
                    length_so_far += it.size_hint().0 as i64;
                    validity.push(true);
                    offsets.push(length_so_far);
                    Some(it)
                }
                None => {
                    validity.push(false);
                    None
                }
            })
            .flatten()
            .trust_my_length(n_elements)
            .collect();

        // Safety:
        // offsets are monotonically increasing
        ListArray::new(
            ListArray::<i64>::default_datatype(DataType::LargeUtf8),
            Offsets::new_unchecked(offsets).into(),
            Box::new(values),
            Some(validity.into()),
        )
    }

    /// Create a list-array from an iterator.
    /// Used in groupby agg-list
    ///
    /// # Safety
    /// Will produce incorrect arrays if size hint is incorrect.
    unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
    where
        I: IntoIterator<Item = Option<P>>,
        P: IntoIterator<Item = Option<Ref>>,
        Ref: AsRef<[u8]>,
    {
        let iterator = iter.into_iter();
        let (lower, _) = iterator.size_hint();

        let mut validity = MutableBitmap::with_capacity(lower);
        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
        let mut length_so_far = 0i64;
        offsets.push(length_so_far);
        let values: BinaryArray<i64> = iterator
            .filter_map(|opt_iter| match opt_iter {
                Some(x) => {
                    let it = x.into_iter();
                    length_so_far += it.size_hint().0 as i64;
                    validity.push(true);
                    offsets.push(length_so_far);
                    Some(it)
                }
                None => {
                    validity.push(false);
                    None
                }
            })
            .flatten()
            .trust_my_length(n_elements)
            .collect();

        // Safety:
        // offsets are monotonically increasing
        ListArray::new(
            ListArray::<i64>::default_datatype(DataType::LargeBinary),
            Offsets::new_unchecked(offsets).into(),
            Box::new(values),
            Some(validity.into()),
        )
    }

source

fn collect_trusted<T: FromTrustedLenIterator<Self::Item>>(self) -> Twhere
Self: Sized + TrustedLen,

Examples found in repository ?

src/kernels/rolling/no_nulls/mod.rs (line 56)

pub(super) fn rolling_apply_agg_window<'a, Agg, T, Fo>(
    values: &'a [T],
    window_size: usize,
    min_periods: usize,
    det_offsets_fn: Fo,
) -> ArrayRef
where
    Fo: Fn(Idx, WindowSize, Len) -> (Start, End),
    Agg: RollingAggWindowNoNulls<'a, T>,
    T: Debug + IsFloat + NativeType,
{
    let len = values.len();
    let (start, end) = det_offsets_fn(0, window_size, len);
    let mut agg_window = Agg::new(values, start, end);

    let out = (0..len)
        .map(|idx| {
            let (start, end) = det_offsets_fn(idx, window_size, len);
            // safety:
            // we are in bounds
            unsafe { agg_window.update(start, end) }
        })
        .collect_trusted::<Vec<_>>();

    let validity = create_validity(min_periods, len, window_size, det_offsets_fn);
    Box::new(PrimitiveArray::new(
        T::PRIMITIVE.into(),
        out.into(),
        validity.map(|b| b.into()),
    ))
}

#[derive(Clone, Copy, PartialEq, Eq, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum QuantileInterpolOptions {
    Nearest,
    Lower,
    Higher,
    Midpoint,
    Linear,
}

impl Default for QuantileInterpolOptions {
    fn default() -> Self {
        QuantileInterpolOptions::Nearest
    }
}

pub(super) fn rolling_apply_weights<T, Fo, Fa>(
    values: &[T],
    window_size: usize,
    min_periods: usize,
    det_offsets_fn: Fo,
    aggregator: Fa,
    weights: &[T],
) -> ArrayRef
where
    T: NativeType,
    Fo: Fn(Idx, WindowSize, Len) -> (Start, End),
    Fa: Fn(&[T], &[T]) -> T,
{
    assert_eq!(weights.len(), window_size);
    let len = values.len();
    let out = (0..len)
        .map(|idx| {
            let (start, end) = det_offsets_fn(idx, window_size, len);
            let vals = unsafe { values.get_unchecked(start..end) };

            aggregator(vals, weights)
        })
        .collect_trusted::<Vec<T>>();

    let validity = create_validity(min_periods, len, window_size, det_offsets_fn);
    Box::new(PrimitiveArray::new(
        DataType::from(T::PRIMITIVE),
        out.into(),
        validity.map(|b| b.into()),
    ))
}

More examples

Hide additional examples

src/kernels/ewm/average.rs (line 46)

pub fn ewm_mean<I, T>(xs: I, alpha: T, adjust: bool, min_periods: usize) -> PrimitiveArray<T>
where
    I: IntoIterator<Item = Option<T>>,
    I::IntoIter: TrustedLen,
    T: Float + NativeType + AddAssign,
{
    if alpha.is_one() {
        return ewm_mean_alpha_equals_one(xs, min_periods);
    }

    let one_sub_alpha = T::one() - alpha;

    let mut opt_mean = None;
    let mut non_null_cnt = 0usize;

    let wgt = alpha;
    let mut wgt_sum = if adjust { T::zero() } else { T::one() };

    xs.into_iter()
        .map(|opt_x| {
            if let Some(x) = opt_x {
                non_null_cnt += 1;

                let prev_mean = opt_mean.unwrap_or(x);

                wgt_sum = one_sub_alpha * wgt_sum + wgt;

                let curr_mean = prev_mean + (x - prev_mean) * wgt / wgt_sum;

                opt_mean = Some(curr_mean);
            }
            match non_null_cnt < min_periods {
                true => None,
                false => opt_mean,
            }
        })
        .collect_trusted()
}

/// To prevent numerical instability (and as a slight optimization), we
/// special-case ``alpha=1``.
fn ewm_mean_alpha_equals_one<I, T>(xs: I, min_periods: usize) -> PrimitiveArray<T>
where
    I: IntoIterator<Item = Option<T>>,
    I::IntoIter: TrustedLen,
    T: Float + NativeType + AddAssign,
{
    let mut non_null_count = 0usize;
    xs.into_iter()
        .map(|opt_x| {
            if opt_x.is_some() {
                non_null_count += 1;
            }
            match non_null_count < min_periods {
                true => None,
                false => opt_x,
            }
        })
        .collect_trusted()
}

src/kernels/rolling/no_nulls/quantile.rs (line 241)

fn rolling_apply_quantile<T, Fo, Fa>(
    values: &[T],
    quantile: f64,
    interpolation: QuantileInterpolOptions,
    window_size: usize,
    min_periods: usize,
    det_offsets_fn: Fo,
    aggregator: Fa,
) -> ArrayRef
where
    Fo: Fn(Idx, WindowSize, Len) -> (Start, End),
    Fa: Fn(&[T], f64, QuantileInterpolOptions) -> T,
    T: Debug + NativeType + IsFloat + PartialOrd,
{
    let len = values.len();
    let (start, end) = det_offsets_fn(0, window_size, len);
    let mut sorted_window = SortedBuf::new(values, start, end);

    let out = (0..len)
        .map(|idx| {
            let (start, end) = det_offsets_fn(idx, window_size, len);

            // Safety:
            // we are in bounds
            let window = unsafe { sorted_window.update(start, end) };
            aggregator(window, quantile, interpolation)
        })
        .collect_trusted::<Vec<T>>();

    let validity = create_validity(min_periods, len, window_size, det_offsets_fn);
    Box::new(PrimitiveArray::new(
        T::PRIMITIVE.into(),
        out.into(),
        validity.map(|b| b.into()),
    ))
}

#[allow(clippy::too_many_arguments)]
fn rolling_apply_convolve_quantile<T, Fo, Fa>(
    values: &[T],
    quantile: f64,
    interpolation: QuantileInterpolOptions,
    window_size: usize,
    min_periods: usize,
    det_offsets_fn: Fo,
    aggregator: Fa,
    weights: &[f64],
) -> ArrayRef
where
    Fo: Fn(Idx, WindowSize, Len) -> (Start, End),
    Fa: Fn(&[T], f64, QuantileInterpolOptions) -> T,
    T: Debug + NativeType + Mul<Output = T> + NumCast + ToPrimitive + Zero + IsFloat + PartialOrd,
{
    assert_eq!(weights.len(), window_size);
    let mut buf = vec![T::zero(); window_size];
    let len = values.len();
    let out = (0..len)
        .map(|idx| {
            let (start, end) = det_offsets_fn(idx, window_size, len);
            let vals = unsafe { values.get_unchecked(start..end) };
            buf.iter_mut()
                .zip(vals.iter().zip(weights))
                .for_each(|(b, (v, w))| *b = *v * NumCast::from(*w).unwrap());

            sort_buf(&mut buf);
            aggregator(&buf, quantile, interpolation)
        })
        .collect_trusted::<Vec<T>>();

    let validity = create_validity(min_periods, len, window_size, det_offsets_fn);
    Box::new(PrimitiveArray::new(
        T::PRIMITIVE.into(),
        out.into(),
        validity.map(|b| b.into()),
    ))
}

src/kernels/list.rs (line 61)

fn sublist_get_indexes(arr: &ListArray<i64>, index: i64) -> IdxArr {
    let offsets = arr.offsets().as_slice();
    let mut iter = offsets.iter();

    // the indices can be sliced, so we should not start at 0.
    let mut cum_offset = (*offsets.first().unwrap_or(&0)) as IdxSize;

    if let Some(mut previous) = iter.next().copied() {
        let a: IdxArr = iter
            .map(|&offset| {
                let len = offset - previous;
                previous = offset;
                // make sure that empty lists don't get accessed
                // and out of bounds return null
                if len == 0 {
                    return None;
                }
                if index >= len {
                    cum_offset += len as IdxSize;
                    return None;
                }

                let out = index
                    .negative_to_usize(len as usize)
                    .map(|idx| idx as IdxSize + cum_offset);
                cum_offset += len as IdxSize;
                out
            })
            .collect_trusted();

        a
    } else {
        IdxArr::from_slice([])
    }
}

src/kernels/rolling/nulls/mod.rs (line 77)

pub(super) fn rolling_apply_agg_window<'a, Agg, T, Fo>(
    values: &'a [T],
    validity: &'a Bitmap,
    window_size: usize,
    min_periods: usize,
    det_offsets_fn: Fo,
) -> ArrayRef
where
    Fo: Fn(Idx, WindowSize, Len) -> (Start, End) + Copy,
    Agg: RollingAggWindowNulls<'a, T>,
    T: IsFloat + NativeType,
{
    let len = values.len();
    let (start, end) = det_offsets_fn(0, window_size, len);
    // Safety; we are in bounds
    let mut agg_window = unsafe { Agg::new(values, validity, start, end) };

    let mut validity = match create_validity(min_periods, len, window_size, det_offsets_fn) {
        Some(v) => v,
        None => {
            let mut validity = MutableBitmap::with_capacity(len);
            validity.extend_constant(len, true);
            validity
        }
    };

    let out = (0..len)
        .map(|idx| {
            let (start, end) = det_offsets_fn(idx, window_size, len);
            // safety:
            // we are in bounds
            let agg = unsafe { agg_window.update(start, end) };
            match agg {
                Some(val) => {
                    if agg_window.is_valid(min_periods) {
                        val
                    } else {
                        // safety: we are in bounds
                        unsafe { validity.set_unchecked(idx, false) };
                        T::default()
                    }
                }
                None => {
                    // safety: we are in bounds
                    unsafe { validity.set_unchecked(idx, false) };
                    T::default()
                }
            }
        })
        .collect_trusted::<Vec<_>>();

    Box::new(PrimitiveArray::new(
        T::PRIMITIVE.into(),
        out.into(),
        Some(validity.into()),
    ))
}

src/compute/take/mod.rs (line 67)

pub unsafe fn take_primitive_unchecked<T: NativeType>(
    arr: &PrimitiveArray<T>,
    indices: &IdxArr,
) -> Box<PrimitiveArray<T>> {
    let array_values = arr.values().as_slice();
    let index_values = indices.values().as_slice();
    let validity_values = arr.validity().expect("should have nulls");

    // first take the values, these are always needed
    let values: Vec<T> = index_values
        .iter()
        .map(|idx| {
            debug_assert!((*idx as usize) < array_values.len());
            *array_values.get_unchecked(*idx as usize)
        })
        .collect_trusted();

    // the validity buffer we will fill with all valid. And we unset the ones that are null
    // in later checks
    // this is in the assumption that most values will be valid.
    // Maybe we could add another branch based on the null count
    let mut validity = MutableBitmap::with_capacity(indices.len());
    validity.extend_constant(indices.len(), true);
    let validity_ptr = validity.as_slice().as_ptr() as *mut u8;

    if let Some(validity_indices) = indices.validity().as_ref() {
        index_values.iter().enumerate().for_each(|(i, idx)| {
            // i is iteration count
            // idx is the index that we take from the values array.
            let idx = *idx as usize;
            if !validity_indices.get_bit_unchecked(i) || !validity_values.get_bit_unchecked(idx) {
                unset_bit_raw(validity_ptr, i);
            }
        });
    } else {
        index_values.iter().enumerate().for_each(|(i, idx)| {
            let idx = *idx as usize;
            if !validity_values.get_bit_unchecked(idx) {
                unset_bit_raw(validity_ptr, i);
            }
        });
    };
    let arr = PrimitiveArray::new(T::PRIMITIVE.into(), values.into(), Some(validity.into()));

    Box::new(arr)
}

Additional examples can be found in:

source

fn collect_reversed<T: FromIteratorReversed<Self::Item>>(self) -> Twhere
Self: Sized + TrustedLen,

source

fn all_equal(&mut self) -> boolwhere
Self: Sized,
Self::Item: PartialEq,

source

fn fold_options<A, B, F>(&mut self, start: B, f: F) -> Option<B>where
Self: Iterator<Item = Option<A>>,
F: FnMut(B, A) -> B,

Implementors§

source §

Trait polars_arrow::utils::CustomIterTools

Provided Methods§

fn fold_first_<F>(self, f: F) -> Option<Self::Item>where Self: Sized, F: FnMut(Self::Item, Self::Item) -> Self::Item,

unsafe fn trust_my_length(self, length: usize) -> TrustMyLength<Self, Self::Item> ⓘwhere Self: Sized,

Safety

Examples found in repository?

fn collect_trusted<T: FromTrustedLenIterator<Self::Item>>(self) -> Twhere Self: Sized + TrustedLen,

Examples found in repository?

fn collect_reversed<T: FromIteratorReversed<Self::Item>>(self) -> Twhere Self: Sized + TrustedLen,

fn all_equal(&mut self) -> boolwhere Self: Sized, Self::Item: PartialEq,

fn fold_options<A, B, F>(&mut self, start: B, f: F) -> Option<B>where Self: Iterator<Item = Option<A>>, F: FnMut(B, A) -> B,

Implementors§

impl<T> CustomIterTools for Twhere T: Iterator + ?Sized,

fn fold_first_<F>(self, f: F) -> Option<Self::Item>where
Self: Sized,
F: FnMut(Self::Item, Self::Item) -> Self::Item,

unsafe fn trust_my_length(self, length: usize) -> TrustMyLength<Self, Self::Item> ⓘwhere
Self: Sized,

Examples found in repository ?

fn collect_trusted<T: FromTrustedLenIterator<Self::Item>>(self) -> Twhere
Self: Sized + TrustedLen,

Examples found in repository ?

fn collect_reversed<T: FromIteratorReversed<Self::Item>>(self) -> Twhere
Self: Sized + TrustedLen,

fn all_equal(&mut self) -> boolwhere
Self: Sized,
Self::Item: PartialEq,

fn fold_options<A, B, F>(&mut self, start: B, f: F) -> Option<B>where
Self: Iterator<Item = Option<A>>,
F: FnMut(B, A) -> B,

impl<T> CustomIterTools for Twhere
T: Iterator + ?Sized,