use crate::core::data::ColumnVector;
use crate::core::utils::Parallelizable;
#[cfg(feature = "parallel")]
use rayon::prelude::*;
pub(crate) fn cut(
values: &[f64],
validity: &Option<Vec<u8>>,
bins: &[f64],
labels: &[String],
) -> Vec<Option<String>> {
values
.maybe_par_iter()
.enumerate()
.map(|(i, &val)| {
if !ColumnVector::is_valid_in_mask(validity, i) || val.is_nan() {
return None;
}
let bin_idx = find_bin(val, bins);
Some(labels[bin_idx].clone())
})
.collect()
}
fn find_bin(value: f64, bins: &[f64]) -> usize {
let last_bin_idx = bins.len() - 2;
let last_edge = bins[bins.len() - 1];
if value >= last_edge {
return last_bin_idx;
}
match bins.binary_search_by(|probe| {
probe
.partial_cmp(&value)
.expect("Failed to compare floating point values")
}) {
Ok(idx) => {
if idx > last_bin_idx {
last_bin_idx
} else {
idx
}
}
Err(err) => {
if err == 0 {
0
} else if err > last_bin_idx {
last_bin_idx
} else {
err - 1
}
}
}
}