use crate::array::Array;
use crate::axis_ops::AxisOps;
use crate::python::array::PyArray;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use std::cmp::Ordering;
#[pyfunction]
fn mean(py: Python<'_>, a: &PyArray, axis: Option<usize>) -> PyResult<Py<PyAny>> {
match axis {
None => {
let v = a.mean();
Ok(v.into_pyobject(py)?.into_any().unbind())
}
Some(ax) => {
let result = a
.inner
.mean_axis(Some(ax))
.map_err(|e| PyValueError::new_err(e.to_string()))?;
let py_arr = PyArray { inner: result };
py_arr.into_pyobject(py).map(|b| b.into_any().unbind())
}
}
}
#[pyfunction]
fn median(py: Python<'_>, a: &PyArray, axis: Option<usize>) -> PyResult<Py<PyAny>> {
match axis {
None => {
let mut data = a.tolist();
if data.is_empty() {
return Err(PyValueError::new_err(
"Cannot compute median of empty array",
));
}
data.sort_by(|x: &f64, y: &f64| x.partial_cmp(y).unwrap_or(Ordering::Equal));
let len = data.len();
let v = if len.is_multiple_of(2) {
(data[len / 2 - 1] + data[len / 2]) / 2.0
} else {
data[len / 2]
};
Ok(v.into_pyobject(py)?.into_any().unbind())
}
Some(ax) => {
let shape = a.inner.shape();
let ndim = a.inner.ndim();
if ax >= ndim {
return Err(PyValueError::new_err(format!(
"Axis {} out of bounds for array of dimension {}",
ax, ndim
)));
}
let axis_len = shape[ax];
if axis_len == 0 {
return Err(PyValueError::new_err("Cannot compute median of empty axis"));
}
let mut out_shape = shape.clone();
out_shape.remove(ax);
let out_size: usize = out_shape.iter().product::<usize>().max(1);
let data = a.inner.to_vec();
let mut result = vec![0.0_f64; out_size];
for out_i in 0..out_size {
let mut out_idx = vec![0usize; out_shape.len()];
let mut tmp = out_i;
for d in (0..out_shape.len()).rev() {
out_idx[d] = tmp % out_shape[d];
tmp /= out_shape[d];
}
let mut slice = Vec::with_capacity(axis_len);
for j in 0..axis_len {
let mut full_idx = out_idx.clone();
full_idx.insert(ax, j);
let mut linear = 0usize;
let mut stride = 1usize;
for d in (0..shape.len()).rev() {
linear += full_idx[d] * stride;
stride *= shape[d];
}
slice.push(data[linear]);
}
slice.sort_by(|x, y| x.partial_cmp(y).unwrap_or(Ordering::Equal));
let n = slice.len();
result[out_i] = if n.is_multiple_of(2) {
(slice[n / 2 - 1] + slice[n / 2]) / 2.0
} else {
slice[n / 2]
};
}
let arr = Array::from_vec(result).reshape(&out_shape);
let py_arr = PyArray { inner: arr };
py_arr.into_pyobject(py).map(|b| b.into_any().unbind())
}
}
}
#[pyfunction]
#[pyo3(name = "std")]
fn stddev(
py: Python<'_>,
a: &PyArray,
axis: Option<usize>,
ddof: Option<usize>,
) -> PyResult<Py<PyAny>> {
let ddof_val = ddof.unwrap_or(0);
match axis {
None => {
let data = a.tolist();
let n = data.len();
if n <= ddof_val {
return Err(PyValueError::new_err("Sample size is too small"));
}
let mean = data.iter().sum::<f64>() / n as f64;
let variance =
data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - ddof_val) as f64;
Ok(variance.sqrt().into_pyobject(py)?.into_any().unbind())
}
Some(ax) => {
let var_arr = a
.inner
.var_axis(Some(ax))
.map_err(|e| PyValueError::new_err(e.to_string()))?;
let axis_n = a.inner.shape()[ax];
if axis_n <= ddof_val {
return Err(PyValueError::new_err("Sample size is too small"));
}
let scale = axis_n as f64 / (axis_n - ddof_val) as f64;
let scaled = var_arr.map(|v| (v * scale).sqrt());
let py_arr = PyArray { inner: scaled };
py_arr.into_pyobject(py).map(|b| b.into_any().unbind())
}
}
}
#[pyfunction]
fn var(
py: Python<'_>,
a: &PyArray,
axis: Option<usize>,
ddof: Option<usize>,
) -> PyResult<Py<PyAny>> {
let ddof_val = ddof.unwrap_or(0);
match axis {
None => {
let data = a.tolist();
let n = data.len();
if n <= ddof_val {
return Err(PyValueError::new_err("Sample size is too small"));
}
let mean = data.iter().sum::<f64>() / n as f64;
let variance =
data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - ddof_val) as f64;
Ok(variance.into_pyobject(py)?.into_any().unbind())
}
Some(ax) => {
let var_arr = a
.inner
.var_axis(Some(ax))
.map_err(|e| PyValueError::new_err(e.to_string()))?;
let axis_n = a.inner.shape()[ax];
if axis_n <= ddof_val {
return Err(PyValueError::new_err("Sample size is too small"));
}
let scale = axis_n as f64 / (axis_n - ddof_val) as f64;
let scaled = var_arr.map(|v| v * scale);
let py_arr = PyArray { inner: scaled };
py_arr.into_pyobject(py).map(|b| b.into_any().unbind())
}
}
}
#[pyfunction]
fn corrcoef(x: &PyArray, y: Option<&PyArray>) -> PyResult<PyArray> {
if let Some(y_arr) = y {
let x_data = x.inner.to_vec();
let y_data = y_arr.inner.to_vec();
if x_data.len() != y_data.len() {
return Err(PyValueError::new_err("Arrays must have the same length"));
}
if x_data.is_empty() {
return Err(PyValueError::new_err("Arrays must not be empty"));
}
let mean_x = x_data.iter().sum::<f64>() / x_data.len() as f64;
let mean_y = y_data.iter().sum::<f64>() / y_data.len() as f64;
let mut cov = 0.0;
let mut var_x = 0.0;
let mut var_y = 0.0;
for i in 0..x_data.len() {
let dx = x_data[i] - mean_x;
let dy = y_data[i] - mean_y;
cov += dx * dy;
var_x += dx * dx;
var_y += dy * dy;
}
let std_x = var_x.sqrt();
let std_y = var_y.sqrt();
if std_x == 0.0 || std_y == 0.0 {
return Err(PyValueError::new_err("Standard deviation is zero"));
}
let corr = cov / (std_x * std_y);
let data = vec![1.0, corr, corr, 1.0];
Ok(PyArray {
inner: crate::array::Array::from_vec(data).reshape(&[2, 2]),
})
} else {
Err(PyValueError::new_err(
"Single array correlation not yet supported",
))
}
}
#[pyfunction]
fn cov(m: &PyArray, rowvar: Option<bool>) -> PyResult<PyArray> {
let cov_matrix = crate::stats::correlation::cov::<f64>(&m.inner, None, rowvar, None, None)
.map_err(|e| PyValueError::new_err(e.to_string()))?;
Ok(PyArray { inner: cov_matrix })
}
#[pyfunction]
fn histogram(
a: &PyArray,
bins: Option<usize>,
range: Option<(f64, f64)>,
) -> PyResult<(PyArray, PyArray)> {
let bins = bins.unwrap_or(10);
let data = a.tolist();
if data.is_empty() {
return Err(PyValueError::new_err(
"Cannot compute histogram of empty array",
));
}
let (min_val, max_val) = if let Some((min, max)) = range {
(min, max)
} else {
let min = data
.iter()
.copied()
.min_by(|a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.ok_or_else(|| PyValueError::new_err("Cannot find minimum"))?;
let max = data
.iter()
.copied()
.max_by(|a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.ok_or_else(|| PyValueError::new_err("Cannot find maximum"))?;
(min, max)
};
let bin_width = (max_val - min_val) / bins as f64;
let mut counts = vec![0usize; bins];
for &value in &data {
if value >= min_val && value <= max_val {
let bin_idx = ((value - min_val) / bin_width).floor() as usize;
let bin_idx = bin_idx.min(bins - 1);
counts[bin_idx] += 1;
}
}
let edges: Vec<f64> = (0..=bins).map(|i| min_val + i as f64 * bin_width).collect();
let counts_f64: Vec<f64> = counts.iter().map(|&c| c as f64).collect();
Ok((
PyArray {
inner: crate::array::Array::from_vec(counts_f64),
},
PyArray {
inner: crate::array::Array::from_vec(edges),
},
))
}
#[pyfunction]
fn percentile(a: &PyArray, q: f64) -> PyResult<f64> {
if !(0.0..=100.0).contains(&q) {
return Err(PyValueError::new_err(
"Percentile must be between 0 and 100",
));
}
let mut data = a.tolist();
if data.is_empty() {
return Err(PyValueError::new_err(
"Cannot compute percentile of empty array",
));
}
data.sort_by(|a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(Ordering::Equal));
let index = (q / 100.0) * (data.len() - 1) as f64;
let lower = index.floor() as usize;
let upper = index.ceil() as usize;
let fraction = index - lower as f64;
Ok(data[lower] + fraction * (data[upper] - data[lower]))
}
#[pyfunction]
fn randn(size: Vec<usize>) -> PyResult<PyArray> {
use scirs2_core::random::*;
let total_size: usize = size.iter().product();
let mut rng = thread_rng();
let dist = Normal::new(0.0, 1.0).map_err(|e| {
PyValueError::new_err(format!("Failed to create normal distribution: {}", e))
})?;
let data: Vec<f64> = (0..total_size).map(|_| dist.sample(&mut rng)).collect();
Ok(PyArray {
inner: crate::array::Array::from_vec(data).reshape(&size),
})
}
#[pyfunction]
fn rand(size: Vec<usize>) -> PyResult<PyArray> {
use scirs2_core::random::*;
let total_size: usize = size.iter().product();
let mut rng = thread_rng();
let data: Vec<f64> = (0..total_size).map(|_| rng.random::<f64>()).collect();
Ok(PyArray {
inner: crate::array::Array::from_vec(data).reshape(&size),
})
}
pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> {
let stats_module = PyModule::new(m.py(), "stats")?;
stats_module.add_function(wrap_pyfunction!(mean, m)?)?;
stats_module.add_function(wrap_pyfunction!(median, m)?)?;
stats_module.add_function(wrap_pyfunction!(stddev, m)?)?;
stats_module.add_function(wrap_pyfunction!(var, m)?)?;
stats_module.add_function(wrap_pyfunction!(corrcoef, m)?)?;
stats_module.add_function(wrap_pyfunction!(cov, m)?)?;
stats_module.add_function(wrap_pyfunction!(histogram, m)?)?;
stats_module.add_function(wrap_pyfunction!(percentile, m)?)?;
m.add_submodule(&stats_module)?;
let random_module = PyModule::new(m.py(), "random")?;
random_module.add_function(wrap_pyfunction!(randn, m)?)?;
random_module.add_function(wrap_pyfunction!(rand, m)?)?;
m.add_submodule(&random_module)?;
Ok(())
}