use ::ndarray::{Array, ArrayView, Ix1, Ix2};
use num_traits::{Float, FromPrimitive};
pub type HistogramResult<T> = Result<(Array<usize, Ix1>, Array<T, Ix1>), &'static str>;
pub type Histogram2dResult<T> =
Result<(Array<usize, Ix2>, Array<T, Ix1>, Array<T, Ix1>), &'static str>;
#[allow(dead_code)]
pub fn histogram<T>(
array: ArrayView<T, Ix1>,
bins: usize,
range: Option<(T, T)>,
weights: Option<ArrayView<T, Ix1>>,
) -> HistogramResult<T>
where
T: Clone + Float + FromPrimitive,
{
if array.is_empty() {
return Err("Cannot compute histogram of an empty array");
}
if bins == 0 {
return Err("Number of bins must be positive");
}
let (min_val, max_val) = match range {
Some(r) => r,
None => {
let mut min_val = T::infinity();
let mut max_val = T::neg_infinity();
for &val in array.iter() {
if val < min_val {
min_val = val;
}
if val > max_val {
max_val = val;
}
}
(min_val, max_val)
}
};
if min_val >= max_val {
return Err("Range must be (min, max) with min < max");
}
let mut bin_edges = Array::<T, Ix1>::zeros(bins + 1);
let bin_width = (max_val - min_val) / T::from_usize(bins).expect("Operation failed");
for i in 0..=bins {
bin_edges[i] = min_val + bin_width * T::from_usize(i).expect("Operation failed");
}
bin_edges[bins] = max_val;
let mut hist = Array::<usize, Ix1>::zeros(bins);
match weights {
Some(w) => {
if w.len() != array.len() {
return Err("Weights array must have the same length as the data array");
}
for (&val, &weight) in array.iter().zip(w.iter()) {
if val < min_val || val > max_val {
continue;
}
if val == max_val {
hist[bins - 1] += 1;
continue;
}
let scaled_val = (val - min_val) / bin_width;
let bin_idx = scaled_val.to_usize().unwrap_or(0);
let bin_idx = bin_idx.min(bins - 1);
let weight_int = weight.to_usize().unwrap_or(1);
hist[bin_idx] += weight_int;
}
}
None => {
for &val in array.iter() {
if val < min_val || val > max_val {
continue;
}
if val == max_val {
hist[bins - 1] += 1;
continue;
}
let scaled_val = (val - min_val) / bin_width;
let bin_idx = scaled_val.to_usize().unwrap_or(0);
let bin_idx = bin_idx.min(bins - 1);
hist[bin_idx] += 1;
}
}
}
Ok((hist, bin_edges))
}
#[allow(dead_code)]
pub fn histogram2d<T>(
x: ArrayView<T, Ix1>,
y: ArrayView<T, Ix1>,
bins: Option<(usize, usize)>,
range: Option<((T, T), (T, T))>,
weights: Option<ArrayView<T, Ix1>>,
) -> Histogram2dResult<T>
where
T: Clone + Float + FromPrimitive,
{
if x.is_empty() || y.is_empty() {
return Err("Cannot compute histogram of empty arrays");
}
if x.len() != y.len() {
return Err("x and y arrays must have the same length");
}
let (x_bins, y_bins) = bins.unwrap_or((10, 10));
if x_bins == 0 || y_bins == 0 {
return Err("Number of bins must be positive");
}
let ((x_min, x_max), (y_min, y_max)) = match range {
Some(r) => r,
None => {
let mut x_min = T::infinity();
let mut x_max = T::neg_infinity();
let mut y_min = T::infinity();
let mut y_max = T::neg_infinity();
for (&x_val, &y_val) in x.iter().zip(y.iter()) {
if x_val < x_min {
x_min = x_val;
}
if x_val > x_max {
x_max = x_val;
}
if y_val < y_min {
y_min = y_val;
}
if y_val > y_max {
y_max = y_val;
}
}
((x_min, x_max), (y_min, y_max))
}
};
if x_min >= x_max || y_min >= y_max {
return Err("Range must be (min, max) with min < max");
}
let mut x_edges = Array::<T, Ix1>::zeros(x_bins + 1);
let mut y_edges = Array::<T, Ix1>::zeros(y_bins + 1);
let x_bin_width = (x_max - x_min) / T::from_usize(x_bins).expect("Operation failed");
let y_bin_width = (y_max - y_min) / T::from_usize(y_bins).expect("Operation failed");
for i in 0..=x_bins {
x_edges[i] = x_min + x_bin_width * T::from_usize(i).expect("Operation failed");
}
for i in 0..=y_bins {
y_edges[i] = y_min + y_bin_width * T::from_usize(i).expect("Operation failed");
}
x_edges[x_bins] = x_max;
y_edges[y_bins] = y_max;
let mut hist = Array::<usize, Ix2>::zeros((y_bins, x_bins));
match weights {
Some(w) => {
if w.len() != x.len() {
return Err("Weights array must have the same length as the data arrays");
}
for ((&x_val, &y_val), &weight) in x.iter().zip(y.iter()).zip(w.iter()) {
if x_val < x_min || x_val > x_max || y_val < y_min || y_val > y_max {
continue;
}
let x_scaled = (x_val - x_min) / x_bin_width;
let y_scaled = (y_val - y_min) / y_bin_width;
let mut x_idx = x_scaled.to_usize().unwrap_or(0);
let mut y_idx = y_scaled.to_usize().unwrap_or(0);
if x_val == x_max {
x_idx = x_bins - 1;
} else {
x_idx = x_idx.min(x_bins - 1);
}
if y_val == y_max {
y_idx = y_bins - 1;
} else {
y_idx = y_idx.min(y_bins - 1);
}
let weight_int = weight.to_usize().unwrap_or(1);
hist[[y_idx, x_idx]] += weight_int;
}
}
None => {
for (&x_val, &y_val) in x.iter().zip(y.iter()) {
if x_val < x_min || x_val > x_max || y_val < y_min || y_val > y_max {
continue;
}
let x_scaled = (x_val - x_min) / x_bin_width;
let y_scaled = (y_val - y_min) / y_bin_width;
let mut x_idx = x_scaled.to_usize().unwrap_or(0);
let mut y_idx = y_scaled.to_usize().unwrap_or(0);
if x_val == x_max {
x_idx = x_bins - 1;
} else {
x_idx = x_idx.min(x_bins - 1);
}
if y_val == y_max {
y_idx = y_bins - 1;
} else {
y_idx = y_idx.min(y_bins - 1);
}
hist[[y_idx, x_idx]] += 1;
}
}
}
Ok((hist, x_edges, y_edges))
}
#[allow(dead_code)]
pub fn quantile<T>(
array: ArrayView<T, Ix1>,
q: ArrayView<T, Ix1>,
method: Option<&str>,
) -> Result<Array<T, Ix1>, &'static str>
where
T: Clone + Float + FromPrimitive,
{
if array.is_empty() {
return Err("Cannot compute quantile of an empty array");
}
for &val in q.iter() {
if val < T::from_f64(0.0).expect("Operation failed")
|| val > T::from_f64(1.0).expect("Operation failed")
{
return Err("Quantile values must be between 0 and 1");
}
}
let mut sorted: Vec<T> = array.iter().copied().collect();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
let mut result = Array::<T, Ix1>::zeros(q.len());
let method = method.unwrap_or("linear");
for (i, &q_val) in q.iter().enumerate() {
if q_val == T::from_f64(0.0).expect("Operation failed") {
result[i] = sorted[0];
continue;
}
if q_val == T::from_f64(1.0).expect("Operation failed") {
result[i] = sorted[n - 1];
continue;
}
let h = T::from_usize(n - 1).expect("Operation failed") * q_val;
let h_floor = h.floor();
let idx_low = h_floor.to_usize().unwrap_or(0).min(n - 1);
let idx_high = (idx_low + 1).min(n - 1);
match method {
"linear" => {
let weight = h - h_floor;
result[i] = sorted[idx_low] * (T::from_f64(1.0).expect("Operation failed") - weight) + sorted[idx_high] * weight;
}
"lower" => {
result[i] = sorted[idx_low];
}
"higher" => {
result[i] = sorted[idx_high];
}
"midpoint" => {
result[i] = (sorted[idx_low] + sorted[idx_high]) / T::from_f64(2.0).expect("Operation failed");
}
"nearest" => {
let weight = h - h_floor;
if weight < T::from_f64(0.5).expect("Operation failed") {
result[i] = sorted[idx_low];
} else {
result[i] = sorted[idx_high];
}
}
_ => return Err("Invalid interpolation method. Use 'linear', 'lower', 'higher', 'midpoint', or 'nearest'"),
}
}
Ok(result)
}
#[allow(dead_code)]
pub fn bincount(
array: ArrayView<usize, Ix1>,
minlength: Option<usize>,
weights: Option<ArrayView<f64, Ix1>>,
) -> Result<Array<f64, Ix1>, &'static str> {
if array.is_empty() {
return Err("Cannot compute bincount of an empty array");
}
let mut max_val = 0;
for &val in array.iter() {
if val > max_val {
max_val = val;
}
}
let length = if let Some(min_len) = minlength {
max_val.max(min_len - 1) + 1
} else {
max_val + 1
};
let mut result = Array::<f64, Ix1>::zeros(length);
match weights {
Some(w) => {
if w.len() != array.len() {
return Err("Weights array must have same length as input array");
}
for (&idx, &weight) in array.iter().zip(w.iter()) {
result[idx] += weight;
}
}
None => {
for &idx in array.iter() {
result[idx] += 1.0;
}
}
}
Ok(result)
}
#[allow(dead_code)]
pub fn digitize<T>(
array: ArrayView<T, Ix1>,
bins: ArrayView<T, Ix1>,
right: bool,
result_type: &str,
) -> Result<Array<usize, Ix1>, &'static str>
where
T: Clone + Float + FromPrimitive,
{
if array.is_empty() {
return Err("Cannot digitize an empty array");
}
if bins.is_empty() {
return Err("Bins array cannot be empty");
}
for i in 1..bins.len() {
if bins[i] <= bins[i.saturating_sub(1)] {
return Err("Bins must be monotonically increasing");
}
}
let mut result = Array::<usize, Ix1>::zeros(array.len());
for (i, &val) in array.iter().enumerate() {
let mut bin_idx = 0;
if right {
for j in 0..bins.len() {
if val <= bins[j] {
bin_idx = j;
break;
}
bin_idx = bins.len();
}
} else {
for j in 0..bins.len() {
if val < bins[j] {
bin_idx = j;
break;
}
bin_idx = bins.len();
}
}
result[i] = bin_idx;
}
if result_type == "indices" {
Ok(result)
} else if result_type == "values" {
Err("use digitize_values() to return bin-edge values instead of indices")
} else {
Err("result_type must be 'indices' or 'values'")
}
}
#[allow(dead_code)]
pub fn digitize_values<T>(
array: ArrayView<T, Ix1>,
bins: ArrayView<T, Ix1>,
right: bool,
) -> Result<Array<T, Ix1>, &'static str>
where
T: Clone + Float + FromPrimitive,
{
if array.is_empty() {
return Err("Cannot digitize an empty array");
}
if bins.is_empty() {
return Err("Bins array cannot be empty");
}
for i in 1..bins.len() {
if bins[i] <= bins[i.saturating_sub(1)] {
return Err("Bins must be monotonically increasing");
}
}
let last_idx = bins.len() - 1;
let mut result: Vec<T> = Vec::with_capacity(array.len());
for &val in array.iter() {
let mut bin_idx = 0usize;
if right {
for j in 0..bins.len() {
if val <= bins[j] {
bin_idx = j;
break;
}
bin_idx = bins.len();
}
} else {
for j in 0..bins.len() {
if val < bins[j] {
bin_idx = j;
break;
}
bin_idx = bins.len();
}
}
let edge_idx = bin_idx.saturating_sub(1).min(last_idx);
result.push(bins[edge_idx]);
}
Ok(Array::from_vec(result))
}
#[cfg(test)]
mod tests {
use super::*;
use ::ndarray::array;
#[test]
fn test_digitize_values_basic() {
let data = array![1.2f64, 3.5, 5.1, 0.8, 2.9];
let bins = array![1.0f64, 3.0, 5.0];
let values =
digitize_values(data.view(), bins.view(), false).expect("digitize_values failed");
assert_eq!(values[0], 1.0); assert_eq!(values[1], 3.0); assert_eq!(values[2], 5.0); assert_eq!(values[3], 1.0); assert_eq!(values[4], 1.0); }
#[test]
fn test_digitize_values_right_inclusive() {
let data = array![0.5f64, 1.5, 3.5];
let bins = array![1.0f64, 3.0, 5.0];
let values =
digitize_values(data.view(), bins.view(), true).expect("digitize_values right failed");
assert_eq!(values[0], 1.0);
assert_eq!(values[1], 1.0);
assert_eq!(values[2], 3.0);
}
#[test]
fn test_digitize_values_past_last_bin() {
let data = array![100.0f64];
let bins = array![1.0f64, 2.0, 3.0];
let values = digitize_values(data.view(), bins.view(), false)
.expect("digitize_values overflow failed");
assert_eq!(values[0], 3.0);
}
#[test]
fn test_digitize_values_errors() {
let data = array![1.0f64];
let empty_bins: Array<f64, Ix1> = Array::zeros(0);
assert!(digitize_values(data.view(), empty_bins.view(), false).is_err());
let empty_data: Array<f64, Ix1> = Array::zeros(0);
let bins = array![1.0f64, 2.0];
assert!(digitize_values(empty_data.view(), bins.view(), false).is_err());
}
}