Skip to main content

polars_ops/series/ops/
index.rs

1use arrow::array::Array;
2use arrow::bitmap::BitmapBuilder;
3use arrow::compute::utils::combine_validities_and;
4use arrow::datatypes::IdxArr;
5use num_traits::{Bounded, ToPrimitive, Zero};
6use polars_core::error::{PolarsResult, polars_bail, polars_ensure};
7use polars_core::prelude::{ChunkedArray, IdxCa, IdxSize, PolarsIntegerType, Series};
8use polars_core::with_match_physical_integer_polars_type;
9use polars_utils::select::select_unpredictable;
10use polars_utils::vec::PushUnchecked;
11
12/// UNSIGNED conversion:
13/// - `0 <= v < target_len`  → `Some(v)`
14/// - `v >= target_len`      → `None`
15///
16/// SIGNED conversion with Python-style negative semantics:
17/// - `v < -target_len`              → `None`
18/// - `-target_len <= v < 0`         → `Some(target_len + v)`
19/// - `0 <= v < target_len`          → `Some(v)`
20/// - `v >= target_len`              → `None`
21pub fn convert_and_bound_idx_ca<T>(
22    ca: &ChunkedArray<T>,
23    target_len: usize,
24    null_on_oob: bool,
25) -> PolarsResult<IdxCa>
26where
27    T: PolarsIntegerType,
28    T::Native: ToPrimitive,
29{
30    let mut out = Vec::with_capacity(ca.len());
31    let mut in_bounds = BitmapBuilder::with_capacity(ca.len());
32    assert!(target_len < IdxSize::MAX as usize);
33
34    let unsigned = T::Native::min_value() == T::Native::zero(); // Optimized to constant by compiler.
35    if unsigned {
36        let len_u64 = target_len as u64;
37        for arr in ca.downcast_iter() {
38            for v in arr.values().iter() {
39                // SAFETY: we reserved.
40                unsafe {
41                    if let Some(v_u64) = v.to_u64() {
42                        // Usually infallible.
43                        out.push_unchecked(v_u64 as IdxSize);
44                        in_bounds.push_unchecked(v_u64 < len_u64);
45                    } else {
46                        in_bounds.push_unchecked(false);
47                    }
48                }
49            }
50        }
51    } else {
52        let len_i64 = target_len as i64;
53        for arr in ca.downcast_iter() {
54            for v in arr.values().iter() {
55                // SAFETY: we reserved.
56                unsafe {
57                    if let Some(v_i64) = v.to_i64() {
58                        // Usually infallible.
59                        let mut shifted = v_i64;
60                        shifted += select_unpredictable(v_i64 < 0, len_i64, 0);
61                        out.push_unchecked(shifted as IdxSize);
62                        in_bounds.push_unchecked((v_i64 >= -len_i64) & (v_i64 < len_i64));
63                    } else {
64                        in_bounds.push_unchecked(false);
65                    }
66                }
67            }
68        }
69    }
70
71    let idx_arr = IdxArr::from_vec(out);
72    let in_bounds_valid = in_bounds.into_opt_validity();
73    let ca_valid = ca.rechunk_validity();
74    let valid = combine_validities_and(in_bounds_valid.as_ref(), ca_valid.as_ref());
75    let out = idx_arr.with_validity(valid);
76
77    if !null_on_oob && out.null_count() != ca.null_count() {
78        polars_bail!(
79            OutOfBounds: "gather indices are out of bounds"
80        );
81    }
82
83    Ok(out.into())
84}
85
86/// Convert arbitrary integer Series into IdxCa, using `target_len` as logical length.
87///
88/// - All OOB indices are mapped to null in `convert_*`.
89/// - We track null counts before and after:
90///   - if `null_on_oob == true`, extra nulls are expected and we just return.
91///   - if `null_on_oob == false` and new nulls appear, we raise OutOfBounds.
92pub fn convert_and_bound_index(
93    s: &Series,
94    target_len: usize,
95    null_on_oob: bool,
96) -> PolarsResult<IdxCa> {
97    let dtype = s.dtype();
98    polars_ensure!(
99        dtype.is_integer(),
100        InvalidOperation: "expected integers as index"
101    );
102
103    with_match_physical_integer_polars_type!(dtype, |$T| {
104        let ca: &ChunkedArray<$T> = s.as_ref().as_ref();
105        convert_and_bound_idx_ca(ca, target_len, null_on_oob)
106    })
107}