Skip to main content

scirs2/stats/
functions_2.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use pyo3::exceptions::PyRuntimeError;
6use pyo3::prelude::*;
7use pyo3::types::{PyAny, PyDict};
8use scirs2_numpy::{PyArray1, PyArrayMethods};
9use scirs2_stats::tests::ttest::{ttest_rel, Alternative};
10use scirs2_stats::{
11    boxplot_stats, coef_variation, cross_entropy, data_range, entropy, gini_coefficient,
12    kl_divergence, kurtosis_ci, mean_abs_deviation, median_abs_deviation, moment, quartiles,
13    quintiles, skewness_ci, weighted_mean, winsorized_mean, winsorized_variance,
14    QuantileInterpolation,
15};
16
17/// Paired (related samples) t-test.
18///
19/// Tests whether the means of two related/paired samples differ.
20/// This is a parametric test for paired observations (e.g., before/after measurements).
21///
22/// Parameters:
23///     a: First array of observations (e.g., before treatment)
24///     b: Second array of observations (e.g., after treatment)
25///         Must be same length as a (paired observations)
26///     alternative: Alternative hypothesis: "two-sided" (default), "less", or "greater"
27///
28/// Returns:
29///     Dictionary with 'statistic' (t), 'pvalue', and 'df' (degrees of freedom)
30#[pyfunction]
31#[pyo3(signature = (a, b, alternative = "two-sided"))]
32pub fn ttest_rel_py(
33    py: Python,
34    a: &Bound<'_, PyArray1<f64>>,
35    b: &Bound<'_, PyArray1<f64>>,
36    alternative: &str,
37) -> PyResult<Py<PyAny>> {
38    let a_binding = a.readonly();
39    let a_arr = a_binding.as_array();
40    let b_binding = b.readonly();
41    let b_arr = b_binding.as_array();
42    let alt = match alternative.to_lowercase().as_str() {
43        "two-sided" | "two_sided" => Alternative::TwoSided,
44        "less" => Alternative::Less,
45        "greater" => Alternative::Greater,
46        _ => {
47            return Err(PyRuntimeError::new_err(format!(
48                "Invalid alternative: {}. Use 'two-sided', 'less', or 'greater'",
49                alternative
50            )));
51        }
52    };
53    let result = ttest_rel(&a_arr.view(), &b_arr.view(), alt, "omit")
54        .map_err(|e| PyRuntimeError::new_err(format!("Paired t-test failed: {}", e)))?;
55    let dict = PyDict::new(py);
56    dict.set_item("statistic", result.statistic)?;
57    dict.set_item("pvalue", result.pvalue)?;
58    dict.set_item("df", result.df)?;
59    Ok(dict.into())
60}
61/// Skewness of the data
62#[pyfunction]
63pub fn skew_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
64    let binding = data.readonly();
65    let arr = binding.as_array();
66    let n = arr.len() as f64;
67    if n < 3.0 {
68        return Err(PyRuntimeError::new_err(
69            "Need at least 3 data points for skewness",
70        ));
71    }
72    let mean: f64 = arr.iter().sum::<f64>() / n;
73    let m2: f64 = arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
74    let m3: f64 = arr.iter().map(|x| (x - mean).powi(3)).sum::<f64>() / n;
75    if m2 == 0.0 {
76        return Ok(0.0);
77    }
78    Ok(m3 / m2.powf(1.5))
79}
80/// Kurtosis of the data
81#[pyfunction]
82pub fn kurtosis_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
83    let binding = data.readonly();
84    let arr = binding.as_array();
85    let n = arr.len() as f64;
86    if n < 4.0 {
87        return Err(PyRuntimeError::new_err(
88            "Need at least 4 data points for kurtosis",
89        ));
90    }
91    let mean: f64 = arr.iter().sum::<f64>() / n;
92    let m2: f64 = arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
93    let m4: f64 = arr.iter().map(|x| (x - mean).powi(4)).sum::<f64>() / n;
94    if m2 == 0.0 {
95        return Ok(0.0);
96    }
97    Ok(m4 / m2.powi(2) - 3.0)
98}
99/// Mode of the data (most frequent value)
100#[pyfunction]
101pub fn mode_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
102    let binding = data.readonly();
103    let arr = binding.as_array();
104    if arr.is_empty() {
105        return Err(PyRuntimeError::new_err(
106            "Cannot compute mode of empty array",
107        ));
108    }
109    let mut values: Vec<f64> = arr.iter().copied().collect();
110    values.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
111    let mut max_count = 0;
112    let mut mode = values[0];
113    let mut current_count = 1;
114    let mut current_val = values[0];
115    for &v in values.iter().skip(1) {
116        if (v - current_val).abs() < 1e-10 {
117            current_count += 1;
118        } else {
119            if current_count > max_count {
120                max_count = current_count;
121                mode = current_val;
122            }
123            current_count = 1;
124            current_val = v;
125        }
126    }
127    if current_count > max_count {
128        mode = current_val;
129    }
130    Ok(mode)
131}
132/// Geometric mean
133#[pyfunction]
134pub fn gmean_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
135    let binding = data.readonly();
136    let arr = binding.as_array();
137    let n = arr.len();
138    if n == 0 {
139        return Err(PyRuntimeError::new_err(
140            "Cannot compute geometric mean of empty array",
141        ));
142    }
143    if arr.iter().any(|&x| x <= 0.0) {
144        return Err(PyRuntimeError::new_err(
145            "Geometric mean requires all positive values",
146        ));
147    }
148    let log_sum: f64 = arr.iter().map(|x| x.ln()).sum();
149    Ok((log_sum / n as f64).exp())
150}
151/// Harmonic mean
152#[pyfunction]
153pub fn hmean_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
154    let binding = data.readonly();
155    let arr = binding.as_array();
156    let n = arr.len();
157    if n == 0 {
158        return Err(PyRuntimeError::new_err(
159            "Cannot compute harmonic mean of empty array",
160        ));
161    }
162    if arr.iter().any(|&x| x <= 0.0) {
163        return Err(PyRuntimeError::new_err(
164            "Harmonic mean requires all positive values",
165        ));
166    }
167    let reciprocal_sum: f64 = arr.iter().map(|x| 1.0 / x).sum();
168    Ok(n as f64 / reciprocal_sum)
169}
170/// Z-score normalization
171#[pyfunction]
172pub fn zscore_py(py: Python, data: &Bound<'_, PyArray1<f64>>) -> PyResult<Py<PyArray1<f64>>> {
173    let binding = data.readonly();
174    let arr = binding.as_array();
175    let n = arr.len() as f64;
176    let mean: f64 = arr.iter().sum::<f64>() / n;
177    let std: f64 = (arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n).sqrt();
178    if std == 0.0 {
179        return Err(PyRuntimeError::new_err(
180            "Standard deviation is zero, cannot compute z-scores",
181        ));
182    }
183    let zscores: Vec<f64> = arr.iter().map(|x| (x - mean) / std).collect();
184    Ok(PyArray1::from_vec(py, zscores).into())
185}
186/// Mean absolute deviation
187///
188/// Calculates the average absolute deviation from the mean (or specified center).
189///
190/// Parameters:
191/// - data: Input data array
192/// - center: Optional center value (default: None, uses mean)
193///
194/// Returns:
195/// - Mean absolute deviation value
196#[pyfunction]
197#[pyo3(signature = (data, center = None))]
198pub fn mean_abs_deviation_py(
199    data: &Bound<'_, PyArray1<f64>>,
200    center: Option<f64>,
201) -> PyResult<f64> {
202    let binding = data.readonly();
203    let arr = binding.as_array();
204    mean_abs_deviation(&arr.view(), center)
205        .map_err(|e| PyRuntimeError::new_err(format!("Mean absolute deviation failed: {}", e)))
206}
207/// Median absolute deviation
208///
209/// Calculates the median absolute deviation from the median.
210/// This is a robust measure of variability.
211///
212/// Parameters:
213/// - data: Input data array
214/// - center: Optional center value (default: None, uses median)
215/// - scale: Optional scale factor (default: None, uses 1.0)
216///
217/// Returns:
218/// - Median absolute deviation value
219#[pyfunction]
220#[pyo3(signature = (data, center = None, scale = None))]
221pub fn median_abs_deviation_py(
222    data: &Bound<'_, PyArray1<f64>>,
223    center: Option<f64>,
224    scale: Option<f64>,
225) -> PyResult<f64> {
226    let binding = data.readonly();
227    let arr = binding.as_array();
228    median_abs_deviation(&arr.view(), center, scale)
229        .map_err(|e| PyRuntimeError::new_err(format!("Median absolute deviation failed: {}", e)))
230}
231/// Data range
232///
233/// Calculates the range of the data (maximum - minimum).
234///
235/// Parameters:
236/// - data: Input data array
237///
238/// Returns:
239/// - Range value (max - min)
240#[pyfunction]
241pub fn data_range_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
242    let binding = data.readonly();
243    let arr = binding.as_array();
244    data_range(&arr.view())
245        .map_err(|e| PyRuntimeError::new_err(format!("Data range calculation failed: {}", e)))
246}
247/// Coefficient of variation
248///
249/// Calculates the coefficient of variation (standard deviation / mean).
250/// This is a unitless measure of relative variability.
251///
252/// Parameters:
253/// - data: Input data array
254/// - ddof: Degrees of freedom for standard deviation (default: 1)
255///
256/// Returns:
257/// - Coefficient of variation
258#[pyfunction]
259#[pyo3(signature = (data, ddof = 1))]
260pub fn coef_variation_py(data: &Bound<'_, PyArray1<f64>>, ddof: usize) -> PyResult<f64> {
261    let binding = data.readonly();
262    let arr = binding.as_array();
263    coef_variation(&arr.view(), ddof).map_err(|e| {
264        PyRuntimeError::new_err(format!(
265            "Coefficient of variation calculation failed: {}",
266            e
267        ))
268    })
269}
270/// Gini coefficient
271///
272/// Calculates the Gini coefficient, a measure of inequality.
273/// Values range from 0 (perfect equality) to 1 (maximum inequality).
274///
275/// Parameters:
276/// - data: Input data array (must be non-negative)
277///
278/// Returns:
279/// - Gini coefficient
280#[pyfunction]
281pub fn gini_coefficient_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
282    let binding = data.readonly();
283    let arr = binding.as_array();
284    gini_coefficient(&arr.view())
285        .map_err(|e| PyRuntimeError::new_err(format!("Gini coefficient calculation failed: {}", e)))
286}
287/// Compute boxplot statistics (five-number summary + outliers)
288#[pyfunction]
289#[pyo3(signature = (data, whis = 1.5))]
290pub fn boxplot_stats_py(
291    py: Python,
292    data: &Bound<'_, PyArray1<f64>>,
293    whis: f64,
294) -> PyResult<Py<PyAny>> {
295    let binding = data.readonly();
296    let arr = binding.as_array();
297    let (q1, median, q3, whislo, whishi, outliers) =
298        boxplot_stats::<f64>(&arr.view(), Some(whis), QuantileInterpolation::Linear).map_err(
299            |e| PyRuntimeError::new_err(format!("Boxplot stats calculation failed: {}", e)),
300        )?;
301    let dict = PyDict::new(py);
302    dict.set_item("q1", q1)?;
303    dict.set_item("median", median)?;
304    dict.set_item("q3", q3)?;
305    dict.set_item("whislo", whislo)?;
306    dict.set_item("whishi", whishi)?;
307    dict.set_item("outliers", outliers)?;
308    Ok(dict.into())
309}
310/// Compute quartiles (Q1, Q2, Q3)
311#[pyfunction]
312pub fn quartiles_py(
313    py: Python<'_>,
314    data: &Bound<'_, PyArray1<f64>>,
315) -> PyResult<Py<PyArray1<f64>>> {
316    let binding = data.readonly();
317    let arr = binding.as_array();
318    let result = quartiles::<f64>(&arr.view(), QuantileInterpolation::Linear)
319        .map_err(|e| PyRuntimeError::new_err(format!("Quartiles calculation failed: {}", e)))?;
320    Ok(PyArray1::from_vec(py, result.to_vec()).into())
321}
322/// Compute winsorized mean (robust mean)
323#[pyfunction]
324#[pyo3(signature = (data, limits = 0.1))]
325pub fn winsorized_mean_py(data: &Bound<'_, PyArray1<f64>>, limits: f64) -> PyResult<f64> {
326    let binding = data.readonly();
327    let arr = binding.as_array();
328    winsorized_mean::<f64>(&arr.view(), limits)
329        .map_err(|e| PyRuntimeError::new_err(format!("Winsorized mean calculation failed: {}", e)))
330}
331/// Compute winsorized variance (robust variance)
332#[pyfunction]
333#[pyo3(signature = (data, limits = 0.1, ddof = 1))]
334pub fn winsorized_variance_py(
335    data: &Bound<'_, PyArray1<f64>>,
336    limits: f64,
337    ddof: usize,
338) -> PyResult<f64> {
339    let binding = data.readonly();
340    let arr = binding.as_array();
341    winsorized_variance::<f64>(&arr.view(), limits, ddof).map_err(|e| {
342        PyRuntimeError::new_err(format!("Winsorized variance calculation failed: {}", e))
343    })
344}
345/// Compute Shannon entropy of discrete data
346#[pyfunction]
347#[pyo3(signature = (data, base = None))]
348pub fn entropy_py(data: &Bound<'_, PyArray1<i64>>, base: Option<f64>) -> PyResult<f64> {
349    let binding = data.readonly();
350    let arr = binding.as_array();
351    entropy::<i64>(&arr.view(), base)
352        .map_err(|e| PyRuntimeError::new_err(format!("Entropy calculation failed: {}", e)))
353}
354/// Compute Kullback-Leibler divergence between two probability distributions
355#[pyfunction]
356pub fn kl_divergence_py(
357    p: &Bound<'_, PyArray1<f64>>,
358    q: &Bound<'_, PyArray1<f64>>,
359) -> PyResult<f64> {
360    let p_binding = p.readonly();
361    let p_arr = p_binding.as_array();
362    let q_binding = q.readonly();
363    let q_arr = q_binding.as_array();
364    kl_divergence::<f64>(&p_arr.view(), &q_arr.view())
365        .map_err(|e| PyRuntimeError::new_err(format!("KL divergence calculation failed: {}", e)))
366}
367/// Compute cross-entropy between two probability distributions
368#[pyfunction]
369pub fn cross_entropy_py(
370    p: &Bound<'_, PyArray1<f64>>,
371    q: &Bound<'_, PyArray1<f64>>,
372) -> PyResult<f64> {
373    let p_binding = p.readonly();
374    let p_arr = p_binding.as_array();
375    let q_binding = q.readonly();
376    let q_arr = q_binding.as_array();
377    cross_entropy::<f64>(&p_arr.view(), &q_arr.view())
378        .map_err(|e| PyRuntimeError::new_err(format!("Cross-entropy calculation failed: {}", e)))
379}
380/// Compute weighted mean
381#[pyfunction]
382pub fn weighted_mean_py(
383    data: &Bound<'_, PyArray1<f64>>,
384    weights: &Bound<'_, PyArray1<f64>>,
385) -> PyResult<f64> {
386    let data_binding = data.readonly();
387    let data_arr = data_binding.as_array();
388    let weights_binding = weights.readonly();
389    let weights_arr = weights_binding.as_array();
390    weighted_mean::<f64>(&data_arr.view(), &weights_arr.view())
391        .map_err(|e| PyRuntimeError::new_err(format!("Weighted mean calculation failed: {}", e)))
392}
393/// Compute statistical moment
394#[pyfunction]
395#[pyo3(signature = (data, order, center = true))]
396pub fn moment_py(data: &Bound<'_, PyArray1<f64>>, order: usize, center: bool) -> PyResult<f64> {
397    let binding = data.readonly();
398    let arr = binding.as_array();
399    moment::<f64>(&arr.view(), order, center, None)
400        .map_err(|e| PyRuntimeError::new_err(format!("Moment calculation failed: {}", e)))
401}
402/// Compute quintiles (20th, 40th, 60th, 80th percentiles) of a dataset
403#[pyfunction]
404pub fn quintiles_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<Py<PyArray1<f64>>> {
405    let binding = data.readonly();
406    let arr = binding.as_array();
407    let result = quintiles::<f64>(&arr.view(), QuantileInterpolation::Linear)
408        .map_err(|e| PyRuntimeError::new_err(format!("Quintiles calculation failed: {}", e)))?;
409    Ok(PyArray1::from_vec(data.py(), result.to_vec()).into())
410}
411/// Compute skewness with bootstrap confidence interval
412#[pyfunction]
413#[pyo3(
414    signature = (data, bias = false, confidence = 0.95, n_bootstrap = 1000, seed = None)
415)]
416pub fn skewness_ci_py(
417    py: Python,
418    data: &Bound<'_, PyArray1<f64>>,
419    bias: bool,
420    confidence: f64,
421    n_bootstrap: usize,
422    seed: Option<u64>,
423) -> PyResult<Py<PyAny>> {
424    let binding = data.readonly();
425    let arr = binding.as_array();
426    let result = skewness_ci::<f64>(&arr.view(), bias, Some(confidence), Some(n_bootstrap), seed)
427        .map_err(|e| {
428        PyRuntimeError::new_err(format!("Skewness CI calculation failed: {}", e))
429    })?;
430    let dict = PyDict::new(py);
431    dict.set_item("estimate", result.estimate)?;
432    dict.set_item("lower", result.lower)?;
433    dict.set_item("upper", result.upper)?;
434    dict.set_item("confidence", result.confidence)?;
435    Ok(dict.into())
436}
437/// Compute kurtosis with bootstrap confidence interval
438#[pyfunction]
439#[pyo3(
440    signature = (
441        data,
442        fisher = true,
443        bias = false,
444        confidence = 0.95,
445        n_bootstrap = 1000,
446        seed = None
447    )
448)]
449pub fn kurtosis_ci_py(
450    py: Python,
451    data: &Bound<'_, PyArray1<f64>>,
452    fisher: bool,
453    bias: bool,
454    confidence: f64,
455    n_bootstrap: usize,
456    seed: Option<u64>,
457) -> PyResult<Py<PyAny>> {
458    let binding = data.readonly();
459    let arr = binding.as_array();
460    let result = kurtosis_ci::<f64>(
461        &arr.view(),
462        fisher,
463        bias,
464        Some(confidence),
465        Some(n_bootstrap),
466        seed,
467    )
468    .map_err(|e| PyRuntimeError::new_err(format!("Kurtosis CI calculation failed: {}", e)))?;
469    let dict = PyDict::new(py);
470    dict.set_item("estimate", result.estimate)?;
471    dict.set_item("lower", result.lower)?;
472    dict.set_item("upper", result.upper)?;
473    dict.set_item("confidence", result.confidence)?;
474    Ok(dict.into())
475}