1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
use std::convert::TryFrom;

use arrow::bitmap::MutableBitmap;
use polars_arrow::array::PolarsArray;
use polars_arrow::bit_util::unset_bit_raw;
use polars_arrow::compute::take::take_value_indices_from_list;

use crate::prelude::*;

/// Take kernel for multiple chunks. We directly return a ChunkedArray because that path chooses the fastest collection path.
pub(crate) fn take_primitive_iter_n_chunks<T: PolarsNumericType, I: IntoIterator<Item = usize>>(
    ca: &ChunkedArray<T>,
    indices: I,
) -> ChunkedArray<T> {
    let taker = ca.take_rand();
    indices.into_iter().map(|idx| taker.get(idx)).collect()
}

/// Take kernel for multiple chunks where an iterator can produce None values.
/// Used in join operations. We directly return a ChunkedArray because that path chooses the fastest collection path.
pub(crate) fn take_primitive_opt_iter_n_chunks<
    T: PolarsNumericType,
    I: IntoIterator<Item = Option<usize>>,
>(
    ca: &ChunkedArray<T>,
    indices: I,
) -> ChunkedArray<T> {
    let taker = ca.take_rand();
    indices
        .into_iter()
        .map(|opt_idx| opt_idx.and_then(|idx| taker.get(idx)))
        .collect()
}

/// Forked and adapted from arrow-rs
/// This is faster because it does no bounds checks and allocates directly into aligned memory
///
/// # Safety
/// No bounds checks
pub(crate) unsafe fn take_list_unchecked(
    values: &ListArray<i64>,
    indices: &IdxArr,
) -> ListArray<i64> {
    // taking the whole list or a contiguous sublist
    let (list_indices, offsets) = take_value_indices_from_list(values, indices);

    // tmp series so that we can take primitives from it
    let s = Series::try_from(("", values.values().clone() as ArrayRef)).unwrap();
    let taken = s
        .take_unchecked(&IdxCa::from_chunks(
            "",
            vec![Box::new(list_indices) as ArrayRef],
        ))
        .unwrap();

    let taken = taken.array_ref(0).clone();

    let validity =
        // if null count > 0
        if values.has_validity() || indices.has_validity() {
            // determine null buffer, which are a function of `values` and `indices`
            let mut validity = MutableBitmap::with_capacity(indices.len());
            let validity_ptr = validity.as_slice().as_ptr() as *mut u8;
            validity.extend_constant(indices.len(), true);

            {
                offsets.as_slice().windows(2).enumerate().for_each(
                    |(i, window): (usize, &[i64])| {
                        if window[0] == window[1] {
                            // offsets are equal, slot is null
                            unset_bit_raw(validity_ptr, i);
                        }
                    },
                );
            }
            Some(validity.into())
        } else {
            None
        };
    let dtype = ListArray::<i64>::default_datatype(taken.data_type().clone());
    // Safety:
    // offsets are monotonically increasing
    ListArray::new(dtype, offsets.into(), taken, validity)
}