Skip to main content

scirs2/stats/
functions_3.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use pyo3::exceptions::PyRuntimeError;
6use pyo3::prelude::*;
7use pyo3::types::{PyAny, PyDict};
8use scirs2_core::{ndarray::ArrayView1, Array1};
9use scirs2_numpy::{PyArray1, PyArrayMethods};
10use scirs2_stats::{
11    anderson_darling, bartlett, brown_forsythe, chi2_gof, covariance_simd, dagostino_k2, deciles,
12    kruskal_wallis, kurtosis_simd, levene, mann_whitney, mean_simd, moment_simd, one_way_anova,
13    pearson_r_simd, percentile_range_simd, sem_simd, shapiro_wilk, skewness_simd, std_simd,
14    variance_simd, wilcoxon, QuantileInterpolation,
15};
16
17/// Compute deciles (10th, 20th, ..., 90th percentiles) of a dataset
18#[pyfunction]
19pub fn deciles_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<Py<PyArray1<f64>>> {
20    let binding = data.readonly();
21    let arr = binding.as_array();
22    let result = deciles::<f64>(&arr.view(), QuantileInterpolation::Linear)
23        .map_err(|e| PyRuntimeError::new_err(format!("Deciles calculation failed: {}", e)))?;
24    Ok(PyArray1::from_vec(data.py(), result.to_vec()).into())
25}
26/// Compute standard error of the mean (SEM)
27#[pyfunction]
28#[pyo3(signature = (data, ddof = 1))]
29pub fn sem_py(data: &Bound<'_, PyArray1<f64>>, ddof: usize) -> PyResult<f64> {
30    let binding = data.readonly();
31    let arr = binding.as_array();
32    sem_simd::<f64, _>(&arr, ddof)
33        .map_err(|e| PyRuntimeError::new_err(format!("SEM calculation failed: {}", e)))
34}
35/// Compute the range between two percentiles
36#[pyfunction]
37#[pyo3(signature = (data, lower_pct, upper_pct, interpolation = "linear"))]
38pub fn percentile_range_py(
39    data: &Bound<'_, PyArray1<f64>>,
40    lower_pct: f64,
41    upper_pct: f64,
42    interpolation: &str,
43) -> PyResult<f64> {
44    let binding = data.readonly();
45    let mut arr = binding.as_array().to_owned();
46    percentile_range_simd::<f64, _>(&mut arr, lower_pct, upper_pct, interpolation)
47        .map_err(|e| PyRuntimeError::new_err(format!("Percentile range calculation failed: {}", e)))
48}
49/// Compute SIMD-optimized skewness (third standardized moment)
50#[pyfunction]
51#[pyo3(signature = (data, bias = false))]
52pub fn skewness_simd_py(data: &Bound<'_, PyArray1<f64>>, bias: bool) -> PyResult<f64> {
53    let binding = data.readonly();
54    let arr = binding.as_array();
55    skewness_simd::<f64, _>(&arr.view(), bias)
56        .map_err(|e| PyRuntimeError::new_err(format!("SIMD skewness calculation failed: {}", e)))
57}
58/// Compute SIMD-optimized kurtosis (fourth standardized moment)
59#[pyfunction]
60#[pyo3(signature = (data, fisher = true, bias = false))]
61pub fn kurtosis_simd_py(
62    data: &Bound<'_, PyArray1<f64>>,
63    fisher: bool,
64    bias: bool,
65) -> PyResult<f64> {
66    let binding = data.readonly();
67    let arr = binding.as_array();
68    kurtosis_simd::<f64, _>(&arr.view(), fisher, bias)
69        .map_err(|e| PyRuntimeError::new_err(format!("SIMD kurtosis calculation failed: {}", e)))
70}
71/// Compute SIMD-optimized Pearson correlation coefficient
72#[pyfunction]
73pub fn pearson_r_simd_py(
74    x: &Bound<'_, PyArray1<f64>>,
75    y: &Bound<'_, PyArray1<f64>>,
76) -> PyResult<f64> {
77    let x_binding = x.readonly();
78    let y_binding = y.readonly();
79    let x_arr = x_binding.as_array();
80    let y_arr = y_binding.as_array();
81    pearson_r_simd::<f64, _>(&x_arr.view(), &y_arr.view()).map_err(|e| {
82        PyRuntimeError::new_err(format!(
83            "SIMD Pearson correlation calculation failed: {}",
84            e
85        ))
86    })
87}
88/// Compute SIMD-optimized covariance
89#[pyfunction]
90#[pyo3(signature = (x, y, ddof = 1))]
91pub fn covariance_simd_py(
92    x: &Bound<'_, PyArray1<f64>>,
93    y: &Bound<'_, PyArray1<f64>>,
94    ddof: usize,
95) -> PyResult<f64> {
96    let x_binding = x.readonly();
97    let y_binding = y.readonly();
98    let x_arr = x_binding.as_array();
99    let y_arr = y_binding.as_array();
100    covariance_simd::<f64, _>(&x_arr.view(), &y_arr.view(), ddof)
101        .map_err(|e| PyRuntimeError::new_err(format!("SIMD covariance calculation failed: {}", e)))
102}
103/// Compute SIMD-optimized nth statistical moment
104#[pyfunction]
105#[pyo3(signature = (data, moment_order, center = true))]
106pub fn moment_simd_py(
107    data: &Bound<'_, PyArray1<f64>>,
108    moment_order: usize,
109    center: bool,
110) -> PyResult<f64> {
111    let binding = data.readonly();
112    let arr = binding.as_array();
113    moment_simd::<f64, _>(&arr.view(), moment_order, center)
114        .map_err(|e| PyRuntimeError::new_err(format!("SIMD moment calculation failed: {}", e)))
115}
116/// Compute SIMD-optimized mean
117#[pyfunction]
118pub fn mean_simd_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
119    let binding = data.readonly();
120    let arr = binding.as_array();
121    mean_simd::<f64, _>(&arr.view())
122        .map_err(|e| PyRuntimeError::new_err(format!("SIMD mean calculation failed: {}", e)))
123}
124/// Compute SIMD-optimized standard deviation
125#[pyfunction]
126#[pyo3(signature = (data, ddof = 1))]
127pub fn std_simd_py(data: &Bound<'_, PyArray1<f64>>, ddof: usize) -> PyResult<f64> {
128    let binding = data.readonly();
129    let arr = binding.as_array();
130    std_simd::<f64, _>(&arr.view(), ddof).map_err(|e| {
131        PyRuntimeError::new_err(format!("SIMD standard deviation calculation failed: {}", e))
132    })
133}
134/// Compute SIMD-optimized variance
135#[pyfunction]
136#[pyo3(signature = (data, ddof = 1))]
137pub fn variance_simd_py(data: &Bound<'_, PyArray1<f64>>, ddof: usize) -> PyResult<f64> {
138    let binding = data.readonly();
139    let arr = binding.as_array();
140    variance_simd::<f64, _>(&arr.view(), ddof)
141        .map_err(|e| PyRuntimeError::new_err(format!("SIMD variance calculation failed: {}", e)))
142}
143/// Shapiro-Wilk test for normality
144///
145/// Tests the null hypothesis that the data was drawn from a normal distribution.
146///
147/// Parameters:
148/// - data: Input data array
149///
150/// Returns:
151/// - Dict with 'statistic' (W statistic) and 'pvalue'
152#[pyfunction]
153pub fn shapiro_py(py: Python, data: &Bound<'_, PyArray1<f64>>) -> PyResult<Py<PyAny>> {
154    let binding = data.readonly();
155    let arr = binding.as_array();
156    let (statistic, pvalue) = shapiro_wilk(&arr.view())
157        .map_err(|e| PyRuntimeError::new_err(format!("Shapiro-Wilk test failed: {}", e)))?;
158    let dict = PyDict::new(py);
159    dict.set_item("statistic", statistic)?;
160    dict.set_item("pvalue", pvalue)?;
161    Ok(dict.into())
162}
163/// Chi-square goodness-of-fit test
164///
165/// Tests whether observed frequencies differ from expected frequencies.
166///
167/// Parameters:
168/// - observed: Observed frequencies (integers or floats)
169/// - expected: Expected frequencies (optional, defaults to uniform)
170///
171/// Returns:
172/// - Dict with 'statistic', 'pvalue', 'dof' (degrees of freedom)
173#[pyfunction]
174#[pyo3(signature = (observed, expected = None))]
175pub fn chisquare_py(
176    py: Python,
177    observed: &Bound<'_, PyArray1<f64>>,
178    expected: Option<&Bound<'_, PyArray1<f64>>>,
179) -> PyResult<Py<PyAny>> {
180    let obs_binding = observed.readonly();
181    let obs_arr = obs_binding.as_array();
182    let obs_int: Vec<i64> = obs_arr.iter().map(|&x| x.round() as i64).collect();
183    let obs_int_arr = Array1::from_vec(obs_int);
184    let exp_opt = expected.map(|e| {
185        let e_binding = e.readonly();
186        let e_arr = e_binding.as_array();
187        e_arr.to_owned()
188    });
189    let result = chi2_gof(&obs_int_arr.view(), exp_opt.as_ref().map(|e| e.view()))
190        .map_err(|e| PyRuntimeError::new_err(format!("Chi-square test failed: {}", e)))?;
191    let dict = PyDict::new(py);
192    dict.set_item("statistic", result.statistic)?;
193    dict.set_item("pvalue", result.p_value)?;
194    dict.set_item("dof", result.df)?;
195    Ok(dict.into())
196}
197/// One-way ANOVA (Analysis of Variance)
198///
199/// Tests whether the means of multiple groups are equal.
200///
201/// Parameters:
202/// - *args: Variable number of arrays, each representing a group
203///
204/// Returns:
205/// - Dict with 'f_statistic', 'pvalue', 'df_between', 'df_within',
206///   'ss_between', 'ss_within', 'ms_between', 'ms_within'
207#[pyfunction(signature = (*args))]
208pub fn f_oneway_py(py: Python, args: &Bound<'_, pyo3::types::PyTuple>) -> PyResult<Py<PyAny>> {
209    if args.len() < 2 {
210        return Err(PyRuntimeError::new_err("Need at least 2 groups for ANOVA"));
211    }
212    let mut group_arrays = Vec::new();
213    for item in args.iter() {
214        let arr: &Bound<'_, PyArray1<f64>> = item.cast()?;
215        let binding = arr.readonly();
216        group_arrays.push(binding.as_array().to_owned());
217    }
218    let group_views: Vec<ArrayView1<f64>> = group_arrays.iter().map(|a| a.view()).collect();
219    let group_refs: Vec<&ArrayView1<f64>> = group_views.iter().collect();
220    let result = one_way_anova(&group_refs)
221        .map_err(|e| PyRuntimeError::new_err(format!("ANOVA failed: {}", e)))?;
222    let dict = PyDict::new(py);
223    dict.set_item("f_statistic", result.f_statistic)?;
224    dict.set_item("pvalue", result.p_value)?;
225    dict.set_item("df_between", result.df_treatment)?;
226    dict.set_item("df_within", result.df_error)?;
227    dict.set_item("ss_between", result.ss_treatment)?;
228    dict.set_item("ss_within", result.ss_error)?;
229    dict.set_item("ms_between", result.ms_treatment)?;
230    dict.set_item("ms_within", result.ms_error)?;
231    Ok(dict.into())
232}
233/// Wilcoxon signed-rank test for paired samples.
234///
235/// Parameters:
236/// - x: First array of observations
237/// - y: Second array of observations (paired with x)
238/// - zero_method: How to handle zero differences: "wilcox" (default), "pratt"
239/// - correction: Whether to apply continuity correction (default: True)
240///
241/// Returns:
242/// - Dict with 'statistic', 'pvalue'
243#[pyfunction]
244#[pyo3(signature = (x, y, zero_method = "wilcox", correction = true))]
245pub fn wilcoxon_py(
246    py: Python,
247    x: &Bound<'_, PyArray1<f64>>,
248    y: &Bound<'_, PyArray1<f64>>,
249    zero_method: &str,
250    correction: bool,
251) -> PyResult<Py<PyAny>> {
252    let x_data = x.readonly();
253    let x_arr = x_data.as_array();
254    let y_data = y.readonly();
255    let y_arr = y_data.as_array();
256    let (statistic, pvalue) = wilcoxon(&x_arr.view(), &y_arr.view(), zero_method, correction)
257        .map_err(|e| PyRuntimeError::new_err(format!("Wilcoxon test failed: {}", e)))?;
258    let dict = PyDict::new(py);
259    dict.set_item("statistic", statistic)?;
260    dict.set_item("pvalue", pvalue)?;
261    Ok(dict.into())
262}
263/// Mann-Whitney U test for independent samples.
264///
265/// Parameters:
266/// - x: First array of observations
267/// - y: Second array of observations
268/// - alternative: Alternative hypothesis: "two-sided" (default), "less", or "greater"
269/// - use_continuity: Whether to apply continuity correction (default: True)
270///
271/// Returns:
272/// - Dict with 'statistic', 'pvalue'
273#[pyfunction]
274#[pyo3(signature = (x, y, alternative = "two-sided", use_continuity = true))]
275pub fn mannwhitneyu_py(
276    py: Python,
277    x: &Bound<'_, PyArray1<f64>>,
278    y: &Bound<'_, PyArray1<f64>>,
279    alternative: &str,
280    use_continuity: bool,
281) -> PyResult<Py<PyAny>> {
282    let x_data = x.readonly();
283    let x_arr = x_data.as_array();
284    let y_data = y.readonly();
285    let y_arr = y_data.as_array();
286    let (statistic, pvalue) =
287        mann_whitney(&x_arr.view(), &y_arr.view(), alternative, use_continuity)
288            .map_err(|e| PyRuntimeError::new_err(format!("Mann-Whitney U test failed: {}", e)))?;
289    let dict = PyDict::new(py);
290    dict.set_item("statistic", statistic)?;
291    dict.set_item("pvalue", pvalue)?;
292    Ok(dict.into())
293}
294/// Kruskal-Wallis H-test for independent samples.
295///
296/// Parameters:
297/// - *args: Variable number of arrays, one for each group
298///
299/// Returns:
300/// - Dict with 'statistic', 'pvalue'
301#[pyfunction(signature = (*args))]
302pub fn kruskal_py(py: Python, args: &Bound<'_, pyo3::types::PyTuple>) -> PyResult<Py<PyAny>> {
303    if args.len() < 2 {
304        return Err(PyRuntimeError::new_err(
305            "Need at least 2 groups for Kruskal-Wallis test",
306        ));
307    }
308    let mut arrays = Vec::new();
309    for item in args.iter() {
310        let arr: &Bound<'_, PyArray1<f64>> = item.cast()?;
311        let readonly = arr.readonly();
312        let owned = readonly.as_array().to_owned();
313        arrays.push(owned);
314    }
315    let views: Vec<_> = arrays.iter().map(|a| a.view()).collect();
316    let (statistic, pvalue) = kruskal_wallis(&views)
317        .map_err(|e| PyRuntimeError::new_err(format!("Kruskal-Wallis test failed: {}", e)))?;
318    let dict = PyDict::new(py);
319    dict.set_item("statistic", statistic)?;
320    dict.set_item("pvalue", pvalue)?;
321    Ok(dict.into())
322}
323/// Levene's test for homogeneity of variance.
324///
325/// Parameters:
326/// - *args: Two or more arrays, each representing a group
327/// - center: Which function to use: "mean", "median" (default), or "trimmed"
328/// - proportion_to_cut: When using "trimmed", the proportion to cut from each end (default: 0.05)
329///
330/// Returns:
331/// - Dict with 'statistic', 'pvalue'
332#[pyfunction(signature = (*args, center = "median", proportion_to_cut = 0.05))]
333pub fn levene_py(
334    py: Python,
335    args: &Bound<'_, pyo3::types::PyTuple>,
336    center: &str,
337    proportion_to_cut: f64,
338) -> PyResult<Py<PyAny>> {
339    if args.len() < 2 {
340        return Err(PyRuntimeError::new_err(
341            "Need at least 2 groups for Levene's test",
342        ));
343    }
344    let mut arrays = Vec::new();
345    for item in args.iter() {
346        let arr: &Bound<'_, PyArray1<f64>> = item.cast()?;
347        let readonly = arr.readonly();
348        let owned = readonly.as_array().to_owned();
349        arrays.push(owned);
350    }
351    let views: Vec<_> = arrays.iter().map(|a| a.view()).collect();
352    let (statistic, pvalue) = levene(&views, center, proportion_to_cut)
353        .map_err(|e| PyRuntimeError::new_err(format!("Levene's test failed: {}", e)))?;
354    let dict = PyDict::new(py);
355    dict.set_item("statistic", statistic)?;
356    dict.set_item("pvalue", pvalue)?;
357    Ok(dict.into())
358}
359/// Bartlett's test for homogeneity of variance.
360///
361/// Parameters:
362/// - *args: Two or more arrays, each representing a group
363///
364/// Returns:
365/// - Dict with 'statistic', 'pvalue'
366#[pyfunction(signature = (*args))]
367pub fn bartlett_test_py(py: Python, args: &Bound<'_, pyo3::types::PyTuple>) -> PyResult<Py<PyAny>> {
368    if args.len() < 2 {
369        return Err(PyRuntimeError::new_err(
370            "Need at least 2 groups for Bartlett's test",
371        ));
372    }
373    let mut arrays = Vec::new();
374    for item in args.iter() {
375        let arr: &Bound<'_, PyArray1<f64>> = item.cast()?;
376        let readonly = arr.readonly();
377        let owned = readonly.as_array().to_owned();
378        arrays.push(owned);
379    }
380    let views: Vec<_> = arrays.iter().map(|a| a.view()).collect();
381    let (statistic, pvalue) = bartlett(&views)
382        .map_err(|e| PyRuntimeError::new_err(format!("Bartlett's test failed: {}", e)))?;
383    let dict = PyDict::new(py);
384    dict.set_item("statistic", statistic)?;
385    dict.set_item("pvalue", pvalue)?;
386    Ok(dict.into())
387}
388/// Brown-Forsythe test for homogeneity of variance.
389///
390/// Parameters:
391/// - *args: Two or more arrays, each representing a group
392///
393/// Returns:
394/// - Dict with 'statistic', 'pvalue'
395#[pyfunction(signature = (*args))]
396pub fn brown_forsythe_py(
397    py: Python,
398    args: &Bound<'_, pyo3::types::PyTuple>,
399) -> PyResult<Py<PyAny>> {
400    if args.len() < 2 {
401        return Err(PyRuntimeError::new_err(
402            "Need at least 2 groups for Brown-Forsythe test",
403        ));
404    }
405    let mut arrays = Vec::new();
406    for item in args.iter() {
407        let arr: &Bound<'_, PyArray1<f64>> = item.cast()?;
408        let readonly = arr.readonly();
409        let owned = readonly.as_array().to_owned();
410        arrays.push(owned);
411    }
412    let views: Vec<_> = arrays.iter().map(|a| a.view()).collect();
413    let (statistic, pvalue) = brown_forsythe(&views)
414        .map_err(|e| PyRuntimeError::new_err(format!("Brown-Forsythe test failed: {}", e)))?;
415    let dict = PyDict::new(py);
416    dict.set_item("statistic", statistic)?;
417    dict.set_item("pvalue", pvalue)?;
418    Ok(dict.into())
419}
420/// Anderson-Darling test for normality.
421///
422/// Tests whether a sample comes from a normal distribution using the
423/// Anderson-Darling statistic. More sensitive to deviations in the tails
424/// than the Shapiro-Wilk test.
425///
426/// Parameters:
427///     x: Array of sample data
428///
429/// Returns:
430///     Dictionary with 'statistic' and 'pvalue' keys
431#[pyfunction]
432pub fn anderson_darling_py(py: Python, x: &Bound<'_, PyArray1<f64>>) -> PyResult<Py<PyAny>> {
433    let x_data = x.readonly();
434    let x_arr = x_data.as_array();
435    let (statistic, pvalue) = anderson_darling(&x_arr.view())
436        .map_err(|e| PyRuntimeError::new_err(format!("Anderson-Darling test failed: {}", e)))?;
437    let dict = PyDict::new(py);
438    dict.set_item("statistic", statistic)?;
439    dict.set_item("pvalue", pvalue)?;
440    Ok(dict.into())
441}
442/// D'Agostino's K-squared test for normality.
443///
444/// Tests whether a sample comes from a normal distribution using the
445/// D'Agostino-Pearson K² test, which combines tests for skewness and kurtosis.
446///
447/// Parameters:
448///     x: Array of sample data (minimum 20 observations)
449///
450/// Returns:
451///     Dictionary with 'statistic' and 'pvalue' keys
452#[pyfunction]
453pub fn dagostino_k2_py(py: Python, x: &Bound<'_, PyArray1<f64>>) -> PyResult<Py<PyAny>> {
454    let x_data = x.readonly();
455    let x_arr = x_data.as_array();
456    let (statistic, pvalue) = dagostino_k2(&x_arr.view())
457        .map_err(|e| PyRuntimeError::new_err(format!("D'Agostino K² test failed: {}", e)))?;
458    let dict = PyDict::new(py);
459    dict.set_item("statistic", statistic)?;
460    dict.set_item("pvalue", pvalue)?;
461    Ok(dict.into())
462}