Skip to main content

scirs2/stats/
functions_4.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use pyo3::exceptions::PyRuntimeError;
6use pyo3::prelude::*;
7use pyo3::types::{PyAny, PyDict};
8use scirs2_core::Array2;
9use scirs2_numpy::{PyArray1, PyArray2, PyArrayMethods};
10use scirs2_stats::contingency::{fisher_exact, odds_ratio, relative_risk};
11use scirs2_stats::{
12    chi2_independence, chi2_yates, friedman, ks_2samp, linregress, pearsonr, polyfit, spearmanr,
13    tukey_hsd,
14};
15
16/// Two-sample Kolmogorov-Smirnov test.
17///
18/// Tests whether two samples come from the same distribution using the
19/// Kolmogorov-Smirnov statistic.
20///
21/// Parameters:
22///     x: First sample array
23///     y: Second sample array
24///     alternative: Type of hypothesis test ("two-sided", "less", or "greater")
25///
26/// Returns:
27///     Dictionary with 'statistic' and 'pvalue' keys
28#[pyfunction]
29#[pyo3(signature = (x, y, alternative = "two-sided"))]
30pub fn ks_2samp_py(
31    py: Python,
32    x: &Bound<'_, PyArray1<f64>>,
33    y: &Bound<'_, PyArray1<f64>>,
34    alternative: &str,
35) -> PyResult<Py<PyAny>> {
36    let x_data = x.readonly();
37    let x_arr = x_data.as_array();
38    let y_data = y.readonly();
39    let y_arr = y_data.as_array();
40    let (statistic, pvalue) = ks_2samp(&x_arr.view(), &y_arr.view(), alternative)
41        .map_err(|e| PyRuntimeError::new_err(format!("Two-sample KS test failed: {}", e)))?;
42    let dict = PyDict::new(py);
43    dict.set_item("statistic", statistic)?;
44    dict.set_item("pvalue", pvalue)?;
45    Ok(dict.into())
46}
47/// Friedman test for repeated measures.
48///
49/// Tests whether k treatments have different effects across n subjects.
50/// This is a nonparametric alternative to repeated measures ANOVA.
51///
52/// Parameters:
53///     data: 2D array with shape (n_subjects, n_treatments)
54///
55/// Returns:
56///     Dictionary with 'statistic' and 'pvalue' keys
57#[pyfunction]
58pub fn friedman_py(py: Python, data: &Bound<'_, PyArray2<f64>>) -> PyResult<Py<PyAny>> {
59    let data_readonly = data.readonly();
60    let data_view = data_readonly.as_array();
61    let data_arr = scirs2_core::ndarray::Array2::from_shape_vec(
62        data_view.dim(),
63        data_view.iter().copied().collect(),
64    )
65    .map_err(|e| PyRuntimeError::new_err(format!("Array conversion failed: {}", e)))?;
66    let (statistic, pvalue) = friedman(&data_arr.view())
67        .map_err(|e| PyRuntimeError::new_err(format!("Friedman test failed: {}", e)))?;
68    let dict = PyDict::new(py);
69    dict.set_item("statistic", statistic)?;
70    dict.set_item("pvalue", pvalue)?;
71    Ok(dict.into())
72}
73/// Chi-square test for independence (contingency table).
74///
75/// Tests whether two categorical variables are independent using a
76/// contingency table of observed frequencies.
77///
78/// Parameters:
79///     observed: 2D array of observed frequencies (contingency table)
80///
81/// Returns:
82///     Dictionary with 'statistic', 'pvalue', 'df', and 'expected'
83#[pyfunction]
84pub fn chi2_independence_py(
85    py: Python,
86    observed: &Bound<'_, PyArray2<i64>>,
87) -> PyResult<Py<PyAny>> {
88    let data_readonly = observed.readonly();
89    let data_view = data_readonly.as_array();
90    let data_arr = scirs2_core::ndarray::Array2::from_shape_vec(
91        data_view.dim(),
92        data_view.iter().copied().collect(),
93    )
94    .map_err(|e| PyRuntimeError::new_err(format!("Array conversion failed: {}", e)))?;
95    let result = chi2_independence::<f64, i64>(&data_arr.view()).map_err(|e| {
96        PyRuntimeError::new_err(format!("Chi-square independence test failed: {}", e))
97    })?;
98    let dict = PyDict::new(py);
99    dict.set_item("statistic", result.statistic)?;
100    dict.set_item("pvalue", result.p_value)?;
101    dict.set_item("df", result.df)?;
102    let shape = result.expected.dim();
103    let expected_vec: Vec<Vec<f64>> = (0..shape.0)
104        .map(|i| (0..shape.1).map(|j| result.expected[(i, j)]).collect())
105        .collect();
106    let expected_py = PyArray2::from_vec2(py, &expected_vec)
107        .map_err(|e| PyRuntimeError::new_err(format!("Failed to create expected array: {}", e)))?;
108    dict.set_item("expected", expected_py)?;
109    Ok(dict.into())
110}
111/// Chi-square test with Yates' continuity correction for 2x2 tables.
112///
113/// Applies Yates' correction to improve the chi-square approximation
114/// for small sample sizes in 2x2 contingency tables.
115///
116/// Parameters:
117///     observed: 2x2 array of observed frequencies
118///
119/// Returns:
120///     Dictionary with 'statistic', 'pvalue', 'df', and 'expected'
121#[pyfunction]
122pub fn chi2_yates_py(py: Python, observed: &Bound<'_, PyArray2<i64>>) -> PyResult<Py<PyAny>> {
123    let data_readonly = observed.readonly();
124    let data_view = data_readonly.as_array();
125    let shape = data_view.dim();
126    if shape.0 != 2 || shape.1 != 2 {
127        return Err(PyRuntimeError::new_err(
128            "Yates' correction requires a 2x2 contingency table",
129        ));
130    }
131    let data_arr = scirs2_core::ndarray::Array2::from_shape_vec(
132        data_view.dim(),
133        data_view.iter().copied().collect(),
134    )
135    .map_err(|e| PyRuntimeError::new_err(format!("Array conversion failed: {}", e)))?;
136    let result = chi2_yates::<f64, i64>(&data_arr.view())
137        .map_err(|e| PyRuntimeError::new_err(format!("Chi-square Yates' test failed: {}", e)))?;
138    let dict = PyDict::new(py);
139    dict.set_item("statistic", result.statistic)?;
140    dict.set_item("pvalue", result.p_value)?;
141    dict.set_item("df", result.df)?;
142    let expected_vec: Vec<f64> = result.expected.iter().copied().collect();
143    let expected_py = PyArray2::from_vec2(
144        py,
145        &[expected_vec[0..2].to_vec(), expected_vec[2..4].to_vec()],
146    )
147    .map_err(|e| PyRuntimeError::new_err(format!("Failed to create expected array: {}", e)))?;
148    dict.set_item("expected", expected_py)?;
149    Ok(dict.into())
150}
151/// Fisher's exact test for 2x2 contingency tables.
152///
153/// Performs Fisher's exact test on a 2x2 contingency table. This test is
154/// particularly useful for small sample sizes where the chi-square approximation
155/// may not be valid.
156///
157/// Parameters:
158///     table: 2x2 array of observed frequencies (must be 2x2)
159///     alternative: Alternative hypothesis (default: "two-sided")
160///                 - "two-sided": Test if association exists
161///                 - "less": Test if odds ratio < 1
162///                 - "greater": Test if odds ratio > 1
163///
164/// Returns:
165///     Dictionary with:
166///     - odds_ratio: Odds ratio (a*d)/(b*c)
167///     - pvalue: P-value for the test
168#[pyfunction]
169#[pyo3(signature = (table, alternative = "two-sided"))]
170pub fn fisher_exact_py(
171    py: Python,
172    table: &Bound<'_, PyArray2<f64>>,
173    alternative: &str,
174) -> PyResult<Py<PyAny>> {
175    let table_readonly = table.readonly();
176    let table_arr = Array2::from_shape_vec(
177        table_readonly.as_array().dim(),
178        table_readonly.as_array().iter().copied().collect(),
179    )
180    .map_err(|e| PyRuntimeError::new_err(format!("Array conversion failed: {}", e)))?;
181    let (odds_ratio, pvalue) = fisher_exact(&table_arr.view(), alternative)
182        .map_err(|e| PyRuntimeError::new_err(format!("Fisher's exact test failed: {}", e)))?;
183    let dict = PyDict::new(py);
184    dict.set_item("odds_ratio", odds_ratio)?;
185    dict.set_item("pvalue", pvalue)?;
186    Ok(dict.into())
187}
188/// Calculate odds ratio for a 2x2 contingency table.
189///
190/// The odds ratio is a measure of association between exposure and outcome.
191/// It represents the odds of outcome occurring in the exposed group relative
192/// to the unexposed group.
193///
194/// For a 2x2 table:
195///              Outcome+  Outcome-
196///   Exposed+      a         b
197///   Exposed-      c         d
198///
199/// Odds ratio = (a*d) / (b*c)
200///
201/// Parameters:
202///     table: 2x2 array of observed frequencies
203///
204/// Returns:
205///     Odds ratio value
206#[pyfunction]
207pub fn odds_ratio_py(table: &Bound<'_, PyArray2<f64>>) -> PyResult<f64> {
208    let table_readonly = table.readonly();
209    let table_arr = Array2::from_shape_vec(
210        table_readonly.as_array().dim(),
211        table_readonly.as_array().iter().copied().collect(),
212    )
213    .map_err(|e| PyRuntimeError::new_err(format!("Array conversion failed: {}", e)))?;
214    let or = odds_ratio(&table_arr.view())
215        .map_err(|e| PyRuntimeError::new_err(format!("Odds ratio calculation failed: {}", e)))?;
216    Ok(or)
217}
218/// Calculate relative risk (risk ratio) for a 2x2 contingency table.
219///
220/// The relative risk is a measure of association between exposure and outcome
221/// in cohort studies. It represents the risk of outcome in the exposed group
222/// relative to the unexposed group.
223///
224/// For a 2x2 table:
225///              Outcome+  Outcome-
226///   Exposed+      a         b
227///   Exposed-      c         d
228///
229/// Relative Risk = [a/(a+b)] / [c/(c+d)]
230///
231/// Parameters:
232///     table: 2x2 array of observed frequencies
233///
234/// Returns:
235///     Relative risk value
236#[pyfunction]
237pub fn relative_risk_py(table: &Bound<'_, PyArray2<f64>>) -> PyResult<f64> {
238    let table_readonly = table.readonly();
239    let table_arr = Array2::from_shape_vec(
240        table_readonly.as_array().dim(),
241        table_readonly.as_array().iter().copied().collect(),
242    )
243    .map_err(|e| PyRuntimeError::new_err(format!("Array conversion failed: {}", e)))?;
244    let rr = relative_risk(&table_arr.view())
245        .map_err(|e| PyRuntimeError::new_err(format!("Relative risk calculation failed: {}", e)))?;
246    Ok(rr)
247}
248/// Calculate a simple linear regression on two 1D arrays.
249///
250/// Performs ordinary least squares (OLS) linear regression to fit a line
251/// y = slope * x + intercept to the data, and computes the correlation
252/// coefficient, p-value, and standard error of the slope.
253///
254/// Parameters:
255///     x: Independent variable (predictor)
256///     y: Dependent variable (response)
257///        Must be same length as x
258///
259/// Returns:
260///     Dictionary with:
261///     - slope: Slope of the regression line
262///     - intercept: Y-intercept of the regression line
263///     - rvalue: Correlation coefficient (Pearson's r)
264///     - pvalue: Two-sided p-value for testing Hâ‚€: slope = 0
265///     - stderr: Standard error of the slope estimate
266#[pyfunction]
267pub fn linregress_py(
268    py: Python,
269    x: &Bound<'_, PyArray1<f64>>,
270    y: &Bound<'_, PyArray1<f64>>,
271) -> PyResult<Py<PyAny>> {
272    let x_readonly = x.readonly();
273    let x_arr = x_readonly.as_array();
274    let y_readonly = y.readonly();
275    let y_arr = y_readonly.as_array();
276    let (slope, intercept, rvalue, pvalue, stderr) = linregress(&x_arr.view(), &y_arr.view())
277        .map_err(|e| PyRuntimeError::new_err(format!("Linear regression failed: {}", e)))?;
278    let dict = PyDict::new(py);
279    dict.set_item("slope", slope)?;
280    dict.set_item("intercept", intercept)?;
281    dict.set_item("rvalue", rvalue)?;
282    dict.set_item("pvalue", pvalue)?;
283    dict.set_item("stderr", stderr)?;
284    Ok(dict.into())
285}
286/// Fit a polynomial of specified degree to data.
287///
288/// Fits a polynomial p(x) = c[0] + c[1]*x + c[2]*x^2 + ... + c[deg]*x^deg
289/// using least squares regression.
290///
291/// Parameters:
292///     x: Independent variable data (1D array)
293///     y: Dependent variable data (1D array, same length as x)
294///     deg: Degree of the fitting polynomial
295///
296/// Returns:
297///     Dictionary containing:
298///     - coefficients: Polynomial coefficients (c[0], c[1], ..., c[deg])
299///     - r_squared: Coefficient of determination
300///     - adj_r_squared: Adjusted R-squared
301///     - residuals: Residual values (y - fitted)
302///     - fitted_values: Fitted (predicted) y values
303///
304/// Example:
305///     >>> import scirs2
306///     >>> x = [0, 1, 2, 3, 4]
307///     >>> y = [1, 3, 9, 19, 33]  # y ≈ 1 + 2x + x^2
308///     >>> result = scirs2.polyfit(x, y, deg=2)
309///     >>> print(result["coefficients"])  # Should be close to [1, 2, 1]
310///
311/// TODO: Registration issue - function compiles but doesn't register with PyO3
312/// See /tmp/scirs2_session10_polyfit_issue.md for details
313#[allow(dead_code)]
314#[pyfunction]
315pub fn polyfit_py(
316    py: Python,
317    x: &Bound<'_, PyArray1<f64>>,
318    y: &Bound<'_, PyArray1<f64>>,
319    deg: usize,
320) -> PyResult<Py<PyAny>> {
321    let x_readonly = x.readonly();
322    let x_arr = x_readonly.as_array();
323    let y_readonly = y.readonly();
324    let y_arr = y_readonly.as_array();
325    let result = polyfit::<f64>(&x_arr.view(), &y_arr.view(), deg)
326        .map_err(|e| PyRuntimeError::new_err(format!("Polynomial fit failed: {}", e)))?;
327    let dict = PyDict::new(py);
328    let coef_vec: Vec<f64> = result.coefficients.to_vec();
329    dict.set_item("coefficients", coef_vec)?;
330    dict.set_item("r_squared", result.r_squared)?;
331    dict.set_item("adj_r_squared", result.adj_r_squared)?;
332    let residuals_vec: Vec<f64> = result.residuals.to_vec();
333    dict.set_item("residuals", residuals_vec)?;
334    let fitted_vec: Vec<f64> = result.fitted_values.to_vec();
335    dict.set_item("fitted_values", fitted_vec)?;
336    Ok(dict.into())
337}
338/// Tukey's Honestly Significant Difference (HSD) post-hoc test.
339///
340/// Performs pairwise comparisons between group means after a significant
341/// ANOVA result. Controls the family-wise error rate.
342///
343/// Parameters:
344///     *args: Variable number of group arrays (minimum 2 groups)
345///     alpha: Significance level (default: 0.05)
346///
347/// Returns:
348///     List of dictionaries, each containing:
349///     - group1: Index of first group
350///     - group2: Index of second group
351///     - mean_diff: Mean difference between groups
352///     - pvalue: P-value for the comparison
353///     - significant: Whether the difference is significant at alpha level
354#[pyfunction]
355#[pyo3(signature = (*args, alpha = 0.05))]
356pub fn tukey_hsd_py(
357    py: Python,
358    args: &Bound<'_, pyo3::types::PyTuple>,
359    alpha: f64,
360) -> PyResult<Py<PyAny>> {
361    if args.len() < 2 {
362        return Err(PyRuntimeError::new_err(
363            "Need at least 2 groups for Tukey's HSD",
364        ));
365    }
366    let mut arrays = Vec::new();
367    for item in args.iter() {
368        let arr: &Bound<'_, PyArray1<f64>> = item.cast()?;
369        let readonly = arr.readonly();
370        let owned = readonly.as_array().to_owned();
371        arrays.push(owned);
372    }
373    let views: Vec<_> = arrays.iter().map(|a| a.view()).collect();
374    let view_refs: Vec<&_> = views.iter().collect();
375    let results = tukey_hsd(&view_refs, alpha)
376        .map_err(|e| PyRuntimeError::new_err(format!("Tukey's HSD failed: {}", e)))?;
377    let result_list = pyo3::types::PyList::empty(py);
378    for (group1, group2, mean_diff, pvalue, significant) in results {
379        let dict = PyDict::new(py);
380        dict.set_item("group1", group1)?;
381        dict.set_item("group2", group2)?;
382        dict.set_item("mean_diff", mean_diff)?;
383        dict.set_item("pvalue", pvalue)?;
384        dict.set_item("significant", significant)?;
385        result_list.append(dict)?;
386    }
387    Ok(result_list.into())
388}
389/// Pearson correlation coefficient with significance test.
390///
391/// Calculates the Pearson correlation coefficient and tests for non-correlation.
392///
393/// Parameters:
394///     x: First array of observations
395///     y: Second array of observations (same length as x)
396///     alternative: Type of test: "two-sided" (default), "less", or "greater"
397///
398/// Returns:
399///     Dictionary containing:
400///     - correlation: Pearson correlation coefficient (r)
401///     - pvalue: P-value for testing non-correlation
402#[pyfunction]
403#[pyo3(signature = (x, y, alternative = "two-sided"))]
404pub fn pearsonr_py(
405    py: Python,
406    x: &Bound<'_, PyArray1<f64>>,
407    y: &Bound<'_, PyArray1<f64>>,
408    alternative: &str,
409) -> PyResult<Py<PyAny>> {
410    let x_readonly = x.readonly();
411    let x_arr = x_readonly.as_array();
412    let y_readonly = y.readonly();
413    let y_arr = y_readonly.as_array();
414    let (r, pvalue) = pearsonr(&x_arr.view(), &y_arr.view(), alternative)
415        .map_err(|e| PyRuntimeError::new_err(format!("Pearson correlation test failed: {}", e)))?;
416    let dict = PyDict::new(py);
417    dict.set_item("correlation", r)?;
418    dict.set_item("pvalue", pvalue)?;
419    Ok(dict.into())
420}
421/// Spearman rank correlation coefficient with significance test.
422///
423/// Calculates the Spearman rank correlation coefficient and tests for non-correlation.
424///
425/// Parameters:
426///     x: First array of observations
427///     y: Second array of observations (same length as x)
428///     alternative: Type of test: "two-sided" (default), "less", or "greater"
429///
430/// Returns:
431///     Dictionary containing:
432///     - correlation: Spearman rank correlation coefficient (rho)
433///     - pvalue: P-value for testing non-correlation
434#[pyfunction]
435#[pyo3(signature = (x, y, alternative = "two-sided"))]
436pub fn spearmanr_py(
437    py: Python,
438    x: &Bound<'_, PyArray1<f64>>,
439    y: &Bound<'_, PyArray1<f64>>,
440    alternative: &str,
441) -> PyResult<Py<PyAny>> {
442    let x_readonly = x.readonly();
443    let x_arr = x_readonly.as_array();
444    let y_readonly = y.readonly();
445    let y_arr = y_readonly.as_array();
446    let (rho, pvalue) = spearmanr(&x_arr.view(), &y_arr.view(), alternative)
447        .map_err(|e| PyRuntimeError::new_err(format!("Spearman correlation test failed: {}", e)))?;
448    let dict = PyDict::new(py);
449    dict.set_item("correlation", rho)?;
450    dict.set_item("pvalue", pvalue)?;
451    Ok(dict.into())
452}