use crate::array::Array;
use crate::error::{NumRs2Error, Result};
use num_traits::{Float, NumCast};
use scirs2_core::parallel_ops::*;
use super::basic::PARALLEL_THRESHOLD;
pub fn histogram<T: Float + Clone + NumCast + std::fmt::Display + Send + Sync + 'static>(
a: &Array<T>,
bins: usize,
range: Option<(T, T)>,
weights: Option<&Array<T>>,
) -> Result<(Array<T>, Array<T>)> {
use super::basic::Statistics;
let data = a.to_vec();
if data.is_empty() || bins == 0 {
return Err(NumRs2Error::InvalidOperation(
"Cannot compute histogram of an empty array or with zero bins".to_string(),
));
}
let (min_val, max_val) = match range {
Some((min, max)) => {
if min >= max {
return Err(NumRs2Error::InvalidOperation(format!(
"Range ({}, {}) is invalid: min must be less than max",
min, max
)));
}
(min, max)
}
None => (a.min(), a.max()),
};
let step = (max_val - min_val) / T::from(bins).expect("bins should be representable");
let mut bin_edges = Vec::with_capacity(bins + 1);
for i in 0..=bins {
bin_edges.push(min_val + step * T::from(i).expect("index should be representable"));
}
let mut counts = vec![T::zero(); bins];
let inv_step = T::one() / step;
if data.len() >= PARALLEL_THRESHOLD {
let chunk_size = (64 * 1024 / std::mem::size_of::<T>())
.max(1024)
.min(data.len());
if let Some(w) = weights {
let weights_data = w.to_vec();
if weights_data.len() != data.len() {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![data.len()],
actual: vec![weights_data.len()],
});
}
let partial_histograms: Vec<Vec<T>> = data
.par_chunks(chunk_size)
.zip(weights_data.par_chunks(chunk_size))
.map(|(chunk, weight_chunk)| {
let mut local_counts = vec![T::zero(); bins];
for (&val, &weight) in chunk.iter().zip(weight_chunk.iter()) {
if val >= min_val && val <= max_val {
let bin_idx = if val == max_val {
bins - 1
} else {
((val - min_val) * inv_step)
.to_usize()
.expect("bin index should be convertible")
.min(bins - 1)
};
local_counts[bin_idx] = local_counts[bin_idx] + weight;
}
}
local_counts
})
.collect();
for partial in partial_histograms {
for (i, &count) in partial.iter().enumerate() {
counts[i] = counts[i] + count;
}
}
} else {
let partial_histograms: Vec<Vec<T>> = data
.par_chunks(chunk_size)
.map(|chunk| {
let mut local_counts = vec![T::zero(); bins];
for &val in chunk {
if val >= min_val && val <= max_val {
let bin_idx = if val == max_val {
bins - 1
} else {
((val - min_val) * inv_step)
.to_usize()
.expect("bin index should be convertible")
.min(bins - 1)
};
local_counts[bin_idx] = local_counts[bin_idx] + T::one();
}
}
local_counts
})
.collect();
for partial in partial_histograms {
for (i, &count) in partial.iter().enumerate() {
counts[i] = counts[i] + count;
}
}
}
} else {
if let Some(w) = weights {
let weights_data = w.to_vec();
if weights_data.len() != data.len() {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![data.len()],
actual: vec![weights_data.len()],
});
}
for (i, &val) in data.iter().enumerate() {
if val >= min_val && val <= max_val {
let bin_idx = if val == max_val {
bins - 1
} else {
((val - min_val) * inv_step)
.to_usize()
.expect("bin index should be convertible")
.min(bins - 1)
};
counts[bin_idx] = counts[bin_idx] + weights_data[i];
}
}
} else {
for &val in &data {
if val >= min_val && val <= max_val {
let bin_idx = if val == max_val {
bins - 1
} else {
((val - min_val) * inv_step)
.to_usize()
.expect("bin index should be convertible")
.min(bins - 1)
};
counts[bin_idx] = counts[bin_idx] + T::one();
}
}
}
}
Ok((Array::from_vec(counts), Array::from_vec(bin_edges)))
}
#[derive(Clone, Debug)]
pub enum BinSpec {
Count(usize),
Auto,
Sqrt,
Sturges,
Fd,
Rice,
Scott,
Doane,
}
impl From<usize> for BinSpec {
fn from(n: usize) -> Self {
BinSpec::Count(n)
}
}
impl From<&str> for BinSpec {
fn from(s: &str) -> Self {
match s.to_lowercase().as_str() {
"auto" => BinSpec::Auto,
"sqrt" => BinSpec::Sqrt,
"sturges" => BinSpec::Sturges,
"fd" | "freedman-diaconis" => BinSpec::Fd,
"rice" => BinSpec::Rice,
"scott" => BinSpec::Scott,
"doane" => BinSpec::Doane,
_ => BinSpec::Auto,
}
}
}
pub fn histogram_bin_edges<T: Float + Clone + NumCast + std::fmt::Display + Send + Sync>(
a: &Array<T>,
bins: impl Into<BinSpec>,
range: Option<(T, T)>,
) -> Result<Array<T>> {
let data = a.to_vec();
if data.is_empty() {
return Err(NumRs2Error::InvalidOperation(
"Cannot compute bin edges for an empty array".to_string(),
));
}
let bins_spec = bins.into();
let (min_val, max_val) = match range {
Some((min, max)) => {
if min >= max {
return Err(NumRs2Error::InvalidOperation(format!(
"Range ({}, {}) is invalid: min must be less than max",
min, max
)));
}
(min, max)
}
None => {
let min = data.iter().cloned().fold(T::infinity(), |a, b| a.min(b));
let max = data
.iter()
.cloned()
.fold(T::neg_infinity(), |a, b| a.max(b));
(min, max)
}
};
let n_bins = match bins_spec {
BinSpec::Count(n) => {
if n == 0 {
return Err(NumRs2Error::InvalidOperation(
"Number of bins must be positive".to_string(),
));
}
n
}
BinSpec::Auto => {
let sturges = compute_sturges_bins(data.len());
let fd = compute_fd_bins(&data, min_val, max_val);
sturges.max(fd)
}
BinSpec::Sqrt => {
let n = data.len() as f64;
n.sqrt().ceil() as usize
}
BinSpec::Sturges => compute_sturges_bins(data.len()),
BinSpec::Fd => compute_fd_bins(&data, min_val, max_val),
BinSpec::Rice => {
let n = data.len() as f64;
(2.0 * n.powf(1.0 / 3.0)).ceil() as usize
}
BinSpec::Scott => compute_scott_bins(&data, min_val, max_val),
BinSpec::Doane => compute_doane_bins(&data),
};
let n_bins = n_bins.max(1);
let step = (max_val - min_val) / T::from(n_bins).expect("n_bins should be representable");
let mut bin_edges = Vec::with_capacity(n_bins + 1);
for i in 0..=n_bins {
bin_edges.push(min_val + step * T::from(i).expect("index should be representable"));
}
Ok(Array::from_vec(bin_edges))
}
fn compute_sturges_bins(n: usize) -> usize {
let n_f = n as f64;
(n_f.log2() + 1.0).ceil() as usize
}
fn compute_fd_bins<T: Float + Clone + NumCast>(data: &[T], min_val: T, max_val: T) -> usize {
if data.len() < 4 {
return 1;
}
let mut sorted: Vec<T> = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
let q1_idx = n / 4;
let q3_idx = (3 * n) / 4;
let iqr = sorted[q3_idx] - sorted[q1_idx];
if iqr == T::zero() {
return 1;
}
let n_f = T::from(n).expect("n should be representable");
let h = T::from(2.0).expect("2.0 should be representable") * iqr
/ n_f.powf(T::from(1.0 / 3.0).expect("1/3 should be representable"));
if h == T::zero() {
return 1;
}
let range = max_val - min_val;
((range / h)
.to_f64()
.expect("range/h should be convertible")
.ceil() as usize)
.max(1)
}
fn compute_scott_bins<T: Float + Clone + NumCast>(data: &[T], min_val: T, max_val: T) -> usize {
if data.len() < 2 {
return 1;
}
let n = data.len();
let n_f = T::from(n).expect("n should be representable");
let mean: T = data.iter().cloned().fold(T::zero(), |a, b| a + b) / n_f;
let variance: T = data
.iter()
.cloned()
.map(|x| (x - mean) * (x - mean))
.fold(T::zero(), |a, b| a + b)
/ n_f;
let std_dev = variance.sqrt();
if std_dev == T::zero() {
return 1;
}
let h = T::from(3.49).expect("3.49 should be representable") * std_dev
/ n_f.powf(T::from(1.0 / 3.0).expect("1/3 should be representable"));
if h == T::zero() {
return 1;
}
let range = max_val - min_val;
((range / h)
.to_f64()
.expect("range/h should be convertible")
.ceil() as usize)
.max(1)
}
fn compute_doane_bins<T: Float + Clone + NumCast>(data: &[T]) -> usize {
if data.len() < 3 {
return 1;
}
let n = data.len();
let n_f = n as f64;
let mean: f64 = data
.iter()
.map(|x| x.to_f64().expect("value should be convertible to f64"))
.sum::<f64>()
/ n_f;
let variance: f64 = data
.iter()
.map(|x| {
let d = x.to_f64().expect("value should be convertible to f64") - mean;
d * d
})
.sum::<f64>()
/ n_f;
let std_dev = variance.sqrt();
if std_dev == 0.0 {
return 1;
}
let skewness: f64 = data
.iter()
.map(|x| {
let d = (x.to_f64().expect("value should be convertible to f64") - mean) / std_dev;
d * d * d
})
.sum::<f64>()
/ n_f;
let sigma_g1 = (6.0 * (n_f - 2.0) / ((n_f + 1.0) * (n_f + 3.0))).sqrt();
let k = 1.0 + n_f.log2() + (1.0 + skewness.abs() / sigma_g1).log2();
k.ceil() as usize
}
pub enum HistBins {
Single(usize),
Tuple(usize, usize),
}
impl From<usize> for HistBins {
fn from(val: usize) -> Self {
HistBins::Single(val)
}
}
impl From<(usize, usize)> for HistBins {
fn from(val: (usize, usize)) -> Self {
HistBins::Tuple(val.0, val.1)
}
}
pub fn histogram2d<T: Float + Clone + NumCast + std::fmt::Display + Send + Sync>(
x: &Array<T>,
y: &Array<T>,
bins: impl Into<HistBins>,
range: Option<((T, T), (T, T))>,
weights: Option<&Array<T>>,
) -> Result<(Array<T>, Array<T>, Array<T>)> {
let bins_val = bins.into();
let (x_bins, y_bins) = match bins_val {
HistBins::Single(n) => (n, n),
HistBins::Tuple(nx, ny) => (nx, ny),
};
let x_data = x.to_vec();
let y_data = y.to_vec();
if x_data.len() != y_data.len() {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![x_data.len()],
actual: vec![y_data.len()],
});
}
if x_data.is_empty() || x_bins == 0 || y_bins == 0 {
return Err(NumRs2Error::InvalidOperation(
"Cannot compute histogram2d with empty arrays or zero bins".to_string(),
));
}
let (x_min, x_max) = match range {
Some(((min, max), _)) => {
if min >= max {
return Err(NumRs2Error::InvalidOperation(format!(
"X range ({}, {}) is invalid: min must be less than max",
min, max
)));
}
(min, max)
}
None => {
let x_data = x.to_vec();
let x_min = x_data
.iter()
.fold(x_data[0], |acc, &val| if val < acc { val } else { acc });
let x_max = x_data
.iter()
.fold(x_data[0], |acc, &val| if val > acc { val } else { acc });
(x_min, x_max)
}
};
let (y_min, y_max) = match range {
Some((_, (min, max))) => {
if min >= max {
return Err(NumRs2Error::InvalidOperation(format!(
"Y range ({}, {}) is invalid: min must be less than max",
min, max
)));
}
(min, max)
}
None => {
let y_data = y.to_vec();
let y_min = y_data
.iter()
.fold(y_data[0], |acc, &val| if val < acc { val } else { acc });
let y_max = y_data
.iter()
.fold(y_data[0], |acc, &val| if val > acc { val } else { acc });
(y_min, y_max)
}
};
let x_step = (x_max - x_min) / T::from(x_bins).expect("x_bins should be representable");
let mut x_edges = Vec::with_capacity(x_bins + 1);
for i in 0..=x_bins {
x_edges.push(x_min + x_step * T::from(i).expect("index should be representable"));
}
let y_step = (y_max - y_min) / T::from(y_bins).expect("y_bins should be representable");
let mut y_edges = Vec::with_capacity(y_bins + 1);
for i in 0..=y_bins {
y_edges.push(y_min + y_step * T::from(i).expect("index should be representable"));
}
let x_inv_step = T::one() / x_step;
let y_inv_step = T::one() / y_step;
let total_bins = x_bins * y_bins;
let flat_hist = if x_data.len() >= PARALLEL_THRESHOLD {
let chunk_size = (64 * 1024 / (2 * std::mem::size_of::<T>()))
.max(1024)
.min(x_data.len());
if let Some(w) = weights {
let weights_data = w.to_vec();
if weights_data.len() != x_data.len() {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![x_data.len()],
actual: vec![weights_data.len()],
});
}
let indices: Vec<usize> = (0..x_data.len()).collect();
let partial_histograms: Vec<Vec<T>> = indices
.par_chunks(chunk_size)
.map(|idx_chunk| {
let mut local_hist = vec![T::zero(); total_bins];
for &i in idx_chunk {
let x_val = x_data[i];
let y_val = y_data[i];
let weight = weights_data[i];
if x_val >= x_min && x_val <= x_max && y_val >= y_min && y_val <= y_max {
let x_idx = if x_val == x_max {
x_bins - 1
} else {
((x_val - x_min) * x_inv_step)
.to_usize()
.expect("x bin index should be convertible")
.min(x_bins - 1)
};
let y_idx = if y_val == y_max {
y_bins - 1
} else {
((y_val - y_min) * y_inv_step)
.to_usize()
.expect("y bin index should be convertible")
.min(y_bins - 1)
};
local_hist[x_idx * y_bins + y_idx] =
local_hist[x_idx * y_bins + y_idx] + weight;
}
}
local_hist
})
.collect();
let mut result = vec![T::zero(); total_bins];
for partial in partial_histograms {
for (i, &count) in partial.iter().enumerate() {
result[i] = result[i] + count;
}
}
result
} else {
let indices: Vec<usize> = (0..x_data.len()).collect();
let partial_histograms: Vec<Vec<T>> = indices
.par_chunks(chunk_size)
.map(|idx_chunk| {
let mut local_hist = vec![T::zero(); total_bins];
for &i in idx_chunk {
let x_val = x_data[i];
let y_val = y_data[i];
if x_val >= x_min && x_val <= x_max && y_val >= y_min && y_val <= y_max {
let x_idx = if x_val == x_max {
x_bins - 1
} else {
((x_val - x_min) * x_inv_step)
.to_usize()
.expect("x bin index should be convertible")
.min(x_bins - 1)
};
let y_idx = if y_val == y_max {
y_bins - 1
} else {
((y_val - y_min) * y_inv_step)
.to_usize()
.expect("y bin index should be convertible")
.min(y_bins - 1)
};
local_hist[x_idx * y_bins + y_idx] =
local_hist[x_idx * y_bins + y_idx] + T::one();
}
}
local_hist
})
.collect();
let mut result = vec![T::zero(); total_bins];
for partial in partial_histograms {
for (i, &count) in partial.iter().enumerate() {
result[i] = result[i] + count;
}
}
result
}
} else {
let mut hist = vec![T::zero(); total_bins];
if let Some(w) = weights {
let weights_data = w.to_vec();
if weights_data.len() != x_data.len() {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![x_data.len()],
actual: vec![weights_data.len()],
});
}
for i in 0..x_data.len() {
let x_val = x_data[i];
let y_val = y_data[i];
let weight = weights_data[i];
if x_val >= x_min && x_val <= x_max && y_val >= y_min && y_val <= y_max {
let x_idx = if x_val == x_max {
x_bins - 1
} else {
((x_val - x_min) * x_inv_step)
.to_usize()
.expect("x bin index should be convertible")
.min(x_bins - 1)
};
let y_idx = if y_val == y_max {
y_bins - 1
} else {
((y_val - y_min) * y_inv_step)
.to_usize()
.expect("y bin index should be convertible")
.min(y_bins - 1)
};
hist[x_idx * y_bins + y_idx] = hist[x_idx * y_bins + y_idx] + weight;
}
}
} else {
for i in 0..x_data.len() {
let x_val = x_data[i];
let y_val = y_data[i];
if x_val >= x_min && x_val <= x_max && y_val >= y_min && y_val <= y_max {
let x_idx = if x_val == x_max {
x_bins - 1
} else {
((x_val - x_min) * x_inv_step)
.to_usize()
.expect("x bin index should be convertible")
.min(x_bins - 1)
};
let y_idx = if y_val == y_max {
y_bins - 1
} else {
((y_val - y_min) * y_inv_step)
.to_usize()
.expect("y bin index should be convertible")
.min(y_bins - 1)
};
hist[x_idx * y_bins + y_idx] = hist[x_idx * y_bins + y_idx] + T::one();
}
}
}
hist
};
Ok((
Array::from_vec(flat_hist).reshape(&[x_bins, y_bins]),
Array::from_vec(x_edges),
Array::from_vec(y_edges),
))
}
pub fn histogram_dd<T: Float + Clone + NumCast + std::fmt::Display>(
sample: &Array<T>,
bins: &[usize],
range: Option<Vec<(T, T)>>,
weights: Option<&Array<T>>,
) -> Result<(Array<T>, Vec<Array<T>>)> {
let shape = sample.shape();
if shape.len() != 2 {
return Err(NumRs2Error::InvalidOperation(
"histogram_dd requires 2D input array of shape (N, D)".to_string(),
));
}
let n_samples = shape[0];
let n_dims = shape[1];
if n_samples == 0 || n_dims == 0 {
return Err(NumRs2Error::InvalidOperation(
"Cannot compute histogram of empty data".to_string(),
));
}
if bins.is_empty() {
return Err(NumRs2Error::InvalidOperation(
"bins array cannot be empty".to_string(),
));
}
let bin_counts = if bins.len() == 1 {
vec![bins[0]; n_dims]
} else if bins.len() == n_dims {
bins.to_vec()
} else {
return Err(NumRs2Error::InvalidOperation(format!(
"bins length {} does not match number of dimensions {}",
bins.len(),
n_dims
)));
};
for &b in &bin_counts {
if b == 0 {
return Err(NumRs2Error::InvalidOperation(
"Number of bins must be greater than 0".to_string(),
));
}
}
if let Some(w) = weights {
if w.shape()[0] != n_samples {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![n_samples],
actual: w.shape().to_vec(),
});
}
}
let mut ranges = Vec::with_capacity(n_dims);
let sample_data = sample.to_vec();
if let Some(r) = range {
if r.len() != n_dims {
return Err(NumRs2Error::InvalidOperation(format!(
"range length {} does not match number of dimensions {}",
r.len(),
n_dims
)));
}
ranges = r;
} else {
for d in 0..n_dims {
let mut min_val = sample_data[d];
let mut max_val = sample_data[d];
for i in 0..n_samples {
let val = sample_data[i * n_dims + d];
if val < min_val {
min_val = val;
}
if val > max_val {
max_val = val;
}
}
let epsilon = T::from(1e-10).expect("epsilon should be representable");
max_val = max_val + epsilon;
ranges.push((min_val, max_val));
}
}
let mut edges = Vec::with_capacity(n_dims);
let mut bin_steps = Vec::with_capacity(n_dims);
for (d, &n_bins) in bin_counts.iter().enumerate() {
let (min_val, max_val) = ranges[d];
let step = (max_val - min_val) / T::from(n_bins).expect("n_bins should be representable");
bin_steps.push(step);
let mut dim_edges = Vec::with_capacity(n_bins + 1);
for i in 0..=n_bins {
dim_edges.push(min_val + step * T::from(i).expect("index should be representable"));
}
edges.push(Array::from_vec(dim_edges));
}
let hist_shape: Vec<usize> = bin_counts.clone();
let total_bins: usize = hist_shape.iter().product();
let mut hist_data = vec![T::zero(); total_bins];
let indices_to_linear = |indices: &[usize]| -> usize {
let mut linear = 0;
let mut stride = 1;
for i in (0..n_dims).rev() {
linear += indices[i] * stride;
stride *= hist_shape[i];
}
linear
};
if let Some(w) = weights {
let weights_data = w.to_vec();
for i in 0..n_samples {
let mut indices = Vec::with_capacity(n_dims);
let mut in_bounds = true;
for d in 0..n_dims {
let val = sample_data[i * n_dims + d];
let (min_val, max_val) = ranges[d];
if val < min_val || val > max_val {
in_bounds = false;
break;
}
let mut idx = ((val - min_val) / bin_steps[d])
.to_usize()
.expect("bin index should be convertible");
if idx >= bin_counts[d] {
idx = bin_counts[d] - 1;
}
indices.push(idx);
}
if in_bounds {
let linear_idx = indices_to_linear(&indices);
hist_data[linear_idx] = hist_data[linear_idx] + weights_data[i];
}
}
} else {
for i in 0..n_samples {
let mut indices = Vec::with_capacity(n_dims);
let mut in_bounds = true;
for d in 0..n_dims {
let val = sample_data[i * n_dims + d];
let (min_val, max_val) = ranges[d];
if val < min_val || val > max_val {
in_bounds = false;
break;
}
let mut idx = ((val - min_val) / bin_steps[d])
.to_usize()
.expect("bin index should be convertible");
if idx >= bin_counts[d] {
idx = bin_counts[d] - 1;
}
indices.push(idx);
}
if in_bounds {
let linear_idx = indices_to_linear(&indices);
hist_data[linear_idx] = hist_data[linear_idx] + T::one();
}
}
}
let hist = Array::from_vec(hist_data).reshape(&hist_shape);
Ok((hist, edges))
}
pub fn bincount<T: Float + Clone + NumCast + Send + Sync>(
a: &Array<T>,
weights: Option<&Array<T>>,
minlength: Option<usize>,
) -> Result<Array<T>> {
let data = a.to_vec();
if data.is_empty() {
let min_len = minlength.unwrap_or(0);
let counts = vec![T::zero(); min_len];
return Ok(Array::from_vec(counts));
}
let max_val = data
.iter()
.fold(data[0], |max, &val| if val > max { val } else { max });
if max_val < T::zero() {
return Err(NumRs2Error::InvalidOperation(
"All values in bincount input array must be non-negative".to_string(),
));
}
let max_idx = max_val
.to_usize()
.expect("max value should be convertible to usize");
let min_length = minlength.unwrap_or(0);
let bin_count = (max_idx + 1).max(min_length);
let counts = if data.len() >= PARALLEL_THRESHOLD {
let chunk_size = (64 * 1024 / std::mem::size_of::<T>())
.max(1024)
.min(data.len());
if let Some(w) = weights {
let weights_data = w.to_vec();
if weights_data.len() != data.len() {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![data.len()],
actual: vec![weights_data.len()],
});
}
if data.par_iter().any(|&val| val < T::zero()) {
return Err(NumRs2Error::InvalidOperation(
"All values in bincount input array must be non-negative".to_string(),
));
}
let indices: Vec<usize> = (0..data.len()).collect();
let partial_counts: Vec<Vec<T>> = indices
.par_chunks(chunk_size)
.map(|idx_chunk| {
let mut local_counts = vec![T::zero(); bin_count];
for &i in idx_chunk {
let idx = data[i]
.to_usize()
.expect("index should be convertible to usize");
if idx < bin_count {
local_counts[idx] = local_counts[idx] + weights_data[i];
}
}
local_counts
})
.collect();
let mut result = vec![T::zero(); bin_count];
for partial in partial_counts {
for (i, &count) in partial.iter().enumerate() {
result[i] = result[i] + count;
}
}
result
} else {
if data.par_iter().any(|&val| val < T::zero()) {
return Err(NumRs2Error::InvalidOperation(
"All values in bincount input array must be non-negative".to_string(),
));
}
let partial_counts: Vec<Vec<T>> = data
.par_chunks(chunk_size)
.map(|chunk| {
let mut local_counts = vec![T::zero(); bin_count];
for &val in chunk {
let idx = val
.to_usize()
.expect("index should be convertible to usize");
if idx < bin_count {
local_counts[idx] = local_counts[idx] + T::one();
}
}
local_counts
})
.collect();
let mut result = vec![T::zero(); bin_count];
for partial in partial_counts {
for (i, &count) in partial.iter().enumerate() {
result[i] = result[i] + count;
}
}
result
}
} else {
let mut counts = vec![T::zero(); bin_count];
if let Some(w) = weights {
let weights_data = w.to_vec();
if weights_data.len() != data.len() {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![data.len()],
actual: vec![weights_data.len()],
});
}
for (i, &val) in data.iter().enumerate() {
if val < T::zero() {
return Err(NumRs2Error::InvalidOperation(
"All values in bincount input array must be non-negative".to_string(),
));
}
let idx = val
.to_usize()
.expect("index should be convertible to usize");
if idx < bin_count {
counts[idx] = counts[idx] + weights_data[i];
}
}
} else {
for &val in &data {
if val < T::zero() {
return Err(NumRs2Error::InvalidOperation(
"All values in bincount input array must be non-negative".to_string(),
));
}
let idx = val
.to_usize()
.expect("index should be convertible to usize");
if idx < bin_count {
counts[idx] = counts[idx] + T::one();
}
}
}
counts
};
Ok(Array::from_vec(counts))
}
pub fn digitize<T: Float + Clone + NumCast + Send + Sync>(
x: &Array<T>,
bins: &Array<T>,
right: Option<bool>,
) -> Result<Array<usize>> {
let x_data = x.to_vec();
let bins_data = bins.to_vec();
if bins_data.is_empty() {
return Err(NumRs2Error::InvalidOperation(
"Bins array cannot be empty".to_string(),
));
}
let mut increasing = true;
let mut decreasing = true;
for i in 1..bins_data.len() {
if bins_data[i] > bins_data[i - 1] {
decreasing = false;
}
if bins_data[i] < bins_data[i - 1] {
increasing = false;
}
}
if !increasing && !decreasing {
return Err(NumRs2Error::InvalidOperation(
"Bins must be monotonically increasing or decreasing".to_string(),
));
}
let use_right = right.unwrap_or(false);
let mut result = Vec::with_capacity(x_data.len());
if increasing {
for &val in &x_data {
let mut idx = 0;
for (i, &edge) in bins_data.iter().enumerate() {
if (use_right && val <= edge) || (!use_right && val < edge) {
idx = i;
break;
}
idx = bins_data.len();
}
result.push(idx);
}
} else {
for &val in &x_data {
let mut idx = 0;
for (i, &edge) in bins_data.iter().enumerate() {
if (use_right && val >= edge) || (!use_right && val > edge) {
idx = i;
break;
}
idx = bins_data.len();
}
result.push(idx);
}
}
Ok(Array::from_vec(result))
}