1use pyo3::exceptions::PyRuntimeError;
6use pyo3::prelude::*;
7use pyo3::types::{PyAny, PyDict};
8use scirs2_numpy::{PyArray1, PyArrayMethods};
9use scirs2_stats::tests::ttest::{ttest_rel, Alternative};
10use scirs2_stats::{
11 boxplot_stats, coef_variation, cross_entropy, data_range, entropy, gini_coefficient,
12 kl_divergence, kurtosis_ci, mean_abs_deviation, median_abs_deviation, moment, quartiles,
13 quintiles, skewness_ci, weighted_mean, winsorized_mean, winsorized_variance,
14 QuantileInterpolation,
15};
16
17#[pyfunction]
31#[pyo3(signature = (a, b, alternative = "two-sided"))]
32pub fn ttest_rel_py(
33 py: Python,
34 a: &Bound<'_, PyArray1<f64>>,
35 b: &Bound<'_, PyArray1<f64>>,
36 alternative: &str,
37) -> PyResult<Py<PyAny>> {
38 let a_binding = a.readonly();
39 let a_arr = a_binding.as_array();
40 let b_binding = b.readonly();
41 let b_arr = b_binding.as_array();
42 let alt = match alternative.to_lowercase().as_str() {
43 "two-sided" | "two_sided" => Alternative::TwoSided,
44 "less" => Alternative::Less,
45 "greater" => Alternative::Greater,
46 _ => {
47 return Err(PyRuntimeError::new_err(format!(
48 "Invalid alternative: {}. Use 'two-sided', 'less', or 'greater'",
49 alternative
50 )));
51 }
52 };
53 let result = ttest_rel(&a_arr.view(), &b_arr.view(), alt, "omit")
54 .map_err(|e| PyRuntimeError::new_err(format!("Paired t-test failed: {}", e)))?;
55 let dict = PyDict::new(py);
56 dict.set_item("statistic", result.statistic)?;
57 dict.set_item("pvalue", result.pvalue)?;
58 dict.set_item("df", result.df)?;
59 Ok(dict.into())
60}
61#[pyfunction]
63pub fn skew_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
64 let binding = data.readonly();
65 let arr = binding.as_array();
66 let n = arr.len() as f64;
67 if n < 3.0 {
68 return Err(PyRuntimeError::new_err(
69 "Need at least 3 data points for skewness",
70 ));
71 }
72 let mean: f64 = arr.iter().sum::<f64>() / n;
73 let m2: f64 = arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
74 let m3: f64 = arr.iter().map(|x| (x - mean).powi(3)).sum::<f64>() / n;
75 if m2 == 0.0 {
76 return Ok(0.0);
77 }
78 Ok(m3 / m2.powf(1.5))
79}
80#[pyfunction]
82pub fn kurtosis_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
83 let binding = data.readonly();
84 let arr = binding.as_array();
85 let n = arr.len() as f64;
86 if n < 4.0 {
87 return Err(PyRuntimeError::new_err(
88 "Need at least 4 data points for kurtosis",
89 ));
90 }
91 let mean: f64 = arr.iter().sum::<f64>() / n;
92 let m2: f64 = arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
93 let m4: f64 = arr.iter().map(|x| (x - mean).powi(4)).sum::<f64>() / n;
94 if m2 == 0.0 {
95 return Ok(0.0);
96 }
97 Ok(m4 / m2.powi(2) - 3.0)
98}
99#[pyfunction]
101pub fn mode_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
102 let binding = data.readonly();
103 let arr = binding.as_array();
104 if arr.is_empty() {
105 return Err(PyRuntimeError::new_err(
106 "Cannot compute mode of empty array",
107 ));
108 }
109 let mut values: Vec<f64> = arr.iter().copied().collect();
110 values.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
111 let mut max_count = 0;
112 let mut mode = values[0];
113 let mut current_count = 1;
114 let mut current_val = values[0];
115 for &v in values.iter().skip(1) {
116 if (v - current_val).abs() < 1e-10 {
117 current_count += 1;
118 } else {
119 if current_count > max_count {
120 max_count = current_count;
121 mode = current_val;
122 }
123 current_count = 1;
124 current_val = v;
125 }
126 }
127 if current_count > max_count {
128 mode = current_val;
129 }
130 Ok(mode)
131}
132#[pyfunction]
134pub fn gmean_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
135 let binding = data.readonly();
136 let arr = binding.as_array();
137 let n = arr.len();
138 if n == 0 {
139 return Err(PyRuntimeError::new_err(
140 "Cannot compute geometric mean of empty array",
141 ));
142 }
143 if arr.iter().any(|&x| x <= 0.0) {
144 return Err(PyRuntimeError::new_err(
145 "Geometric mean requires all positive values",
146 ));
147 }
148 let log_sum: f64 = arr.iter().map(|x| x.ln()).sum();
149 Ok((log_sum / n as f64).exp())
150}
151#[pyfunction]
153pub fn hmean_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
154 let binding = data.readonly();
155 let arr = binding.as_array();
156 let n = arr.len();
157 if n == 0 {
158 return Err(PyRuntimeError::new_err(
159 "Cannot compute harmonic mean of empty array",
160 ));
161 }
162 if arr.iter().any(|&x| x <= 0.0) {
163 return Err(PyRuntimeError::new_err(
164 "Harmonic mean requires all positive values",
165 ));
166 }
167 let reciprocal_sum: f64 = arr.iter().map(|x| 1.0 / x).sum();
168 Ok(n as f64 / reciprocal_sum)
169}
170#[pyfunction]
172pub fn zscore_py(py: Python, data: &Bound<'_, PyArray1<f64>>) -> PyResult<Py<PyArray1<f64>>> {
173 let binding = data.readonly();
174 let arr = binding.as_array();
175 let n = arr.len() as f64;
176 let mean: f64 = arr.iter().sum::<f64>() / n;
177 let std: f64 = (arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n).sqrt();
178 if std == 0.0 {
179 return Err(PyRuntimeError::new_err(
180 "Standard deviation is zero, cannot compute z-scores",
181 ));
182 }
183 let zscores: Vec<f64> = arr.iter().map(|x| (x - mean) / std).collect();
184 Ok(PyArray1::from_vec(py, zscores).into())
185}
186#[pyfunction]
197#[pyo3(signature = (data, center = None))]
198pub fn mean_abs_deviation_py(
199 data: &Bound<'_, PyArray1<f64>>,
200 center: Option<f64>,
201) -> PyResult<f64> {
202 let binding = data.readonly();
203 let arr = binding.as_array();
204 mean_abs_deviation(&arr.view(), center)
205 .map_err(|e| PyRuntimeError::new_err(format!("Mean absolute deviation failed: {}", e)))
206}
207#[pyfunction]
220#[pyo3(signature = (data, center = None, scale = None))]
221pub fn median_abs_deviation_py(
222 data: &Bound<'_, PyArray1<f64>>,
223 center: Option<f64>,
224 scale: Option<f64>,
225) -> PyResult<f64> {
226 let binding = data.readonly();
227 let arr = binding.as_array();
228 median_abs_deviation(&arr.view(), center, scale)
229 .map_err(|e| PyRuntimeError::new_err(format!("Median absolute deviation failed: {}", e)))
230}
231#[pyfunction]
241pub fn data_range_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
242 let binding = data.readonly();
243 let arr = binding.as_array();
244 data_range(&arr.view())
245 .map_err(|e| PyRuntimeError::new_err(format!("Data range calculation failed: {}", e)))
246}
247#[pyfunction]
259#[pyo3(signature = (data, ddof = 1))]
260pub fn coef_variation_py(data: &Bound<'_, PyArray1<f64>>, ddof: usize) -> PyResult<f64> {
261 let binding = data.readonly();
262 let arr = binding.as_array();
263 coef_variation(&arr.view(), ddof).map_err(|e| {
264 PyRuntimeError::new_err(format!(
265 "Coefficient of variation calculation failed: {}",
266 e
267 ))
268 })
269}
270#[pyfunction]
281pub fn gini_coefficient_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<f64> {
282 let binding = data.readonly();
283 let arr = binding.as_array();
284 gini_coefficient(&arr.view())
285 .map_err(|e| PyRuntimeError::new_err(format!("Gini coefficient calculation failed: {}", e)))
286}
287#[pyfunction]
289#[pyo3(signature = (data, whis = 1.5))]
290pub fn boxplot_stats_py(
291 py: Python,
292 data: &Bound<'_, PyArray1<f64>>,
293 whis: f64,
294) -> PyResult<Py<PyAny>> {
295 let binding = data.readonly();
296 let arr = binding.as_array();
297 let (q1, median, q3, whislo, whishi, outliers) =
298 boxplot_stats::<f64>(&arr.view(), Some(whis), QuantileInterpolation::Linear).map_err(
299 |e| PyRuntimeError::new_err(format!("Boxplot stats calculation failed: {}", e)),
300 )?;
301 let dict = PyDict::new(py);
302 dict.set_item("q1", q1)?;
303 dict.set_item("median", median)?;
304 dict.set_item("q3", q3)?;
305 dict.set_item("whislo", whislo)?;
306 dict.set_item("whishi", whishi)?;
307 dict.set_item("outliers", outliers)?;
308 Ok(dict.into())
309}
310#[pyfunction]
312pub fn quartiles_py(
313 py: Python<'_>,
314 data: &Bound<'_, PyArray1<f64>>,
315) -> PyResult<Py<PyArray1<f64>>> {
316 let binding = data.readonly();
317 let arr = binding.as_array();
318 let result = quartiles::<f64>(&arr.view(), QuantileInterpolation::Linear)
319 .map_err(|e| PyRuntimeError::new_err(format!("Quartiles calculation failed: {}", e)))?;
320 Ok(PyArray1::from_vec(py, result.to_vec()).into())
321}
322#[pyfunction]
324#[pyo3(signature = (data, limits = 0.1))]
325pub fn winsorized_mean_py(data: &Bound<'_, PyArray1<f64>>, limits: f64) -> PyResult<f64> {
326 let binding = data.readonly();
327 let arr = binding.as_array();
328 winsorized_mean::<f64>(&arr.view(), limits)
329 .map_err(|e| PyRuntimeError::new_err(format!("Winsorized mean calculation failed: {}", e)))
330}
331#[pyfunction]
333#[pyo3(signature = (data, limits = 0.1, ddof = 1))]
334pub fn winsorized_variance_py(
335 data: &Bound<'_, PyArray1<f64>>,
336 limits: f64,
337 ddof: usize,
338) -> PyResult<f64> {
339 let binding = data.readonly();
340 let arr = binding.as_array();
341 winsorized_variance::<f64>(&arr.view(), limits, ddof).map_err(|e| {
342 PyRuntimeError::new_err(format!("Winsorized variance calculation failed: {}", e))
343 })
344}
345#[pyfunction]
347#[pyo3(signature = (data, base = None))]
348pub fn entropy_py(data: &Bound<'_, PyArray1<i64>>, base: Option<f64>) -> PyResult<f64> {
349 let binding = data.readonly();
350 let arr = binding.as_array();
351 entropy::<i64>(&arr.view(), base)
352 .map_err(|e| PyRuntimeError::new_err(format!("Entropy calculation failed: {}", e)))
353}
354#[pyfunction]
356pub fn kl_divergence_py(
357 p: &Bound<'_, PyArray1<f64>>,
358 q: &Bound<'_, PyArray1<f64>>,
359) -> PyResult<f64> {
360 let p_binding = p.readonly();
361 let p_arr = p_binding.as_array();
362 let q_binding = q.readonly();
363 let q_arr = q_binding.as_array();
364 kl_divergence::<f64>(&p_arr.view(), &q_arr.view())
365 .map_err(|e| PyRuntimeError::new_err(format!("KL divergence calculation failed: {}", e)))
366}
367#[pyfunction]
369pub fn cross_entropy_py(
370 p: &Bound<'_, PyArray1<f64>>,
371 q: &Bound<'_, PyArray1<f64>>,
372) -> PyResult<f64> {
373 let p_binding = p.readonly();
374 let p_arr = p_binding.as_array();
375 let q_binding = q.readonly();
376 let q_arr = q_binding.as_array();
377 cross_entropy::<f64>(&p_arr.view(), &q_arr.view())
378 .map_err(|e| PyRuntimeError::new_err(format!("Cross-entropy calculation failed: {}", e)))
379}
380#[pyfunction]
382pub fn weighted_mean_py(
383 data: &Bound<'_, PyArray1<f64>>,
384 weights: &Bound<'_, PyArray1<f64>>,
385) -> PyResult<f64> {
386 let data_binding = data.readonly();
387 let data_arr = data_binding.as_array();
388 let weights_binding = weights.readonly();
389 let weights_arr = weights_binding.as_array();
390 weighted_mean::<f64>(&data_arr.view(), &weights_arr.view())
391 .map_err(|e| PyRuntimeError::new_err(format!("Weighted mean calculation failed: {}", e)))
392}
393#[pyfunction]
395#[pyo3(signature = (data, order, center = true))]
396pub fn moment_py(data: &Bound<'_, PyArray1<f64>>, order: usize, center: bool) -> PyResult<f64> {
397 let binding = data.readonly();
398 let arr = binding.as_array();
399 moment::<f64>(&arr.view(), order, center, None)
400 .map_err(|e| PyRuntimeError::new_err(format!("Moment calculation failed: {}", e)))
401}
402#[pyfunction]
404pub fn quintiles_py(data: &Bound<'_, PyArray1<f64>>) -> PyResult<Py<PyArray1<f64>>> {
405 let binding = data.readonly();
406 let arr = binding.as_array();
407 let result = quintiles::<f64>(&arr.view(), QuantileInterpolation::Linear)
408 .map_err(|e| PyRuntimeError::new_err(format!("Quintiles calculation failed: {}", e)))?;
409 Ok(PyArray1::from_vec(data.py(), result.to_vec()).into())
410}
411#[pyfunction]
413#[pyo3(
414 signature = (data, bias = false, confidence = 0.95, n_bootstrap = 1000, seed = None)
415)]
416pub fn skewness_ci_py(
417 py: Python,
418 data: &Bound<'_, PyArray1<f64>>,
419 bias: bool,
420 confidence: f64,
421 n_bootstrap: usize,
422 seed: Option<u64>,
423) -> PyResult<Py<PyAny>> {
424 let binding = data.readonly();
425 let arr = binding.as_array();
426 let result = skewness_ci::<f64>(&arr.view(), bias, Some(confidence), Some(n_bootstrap), seed)
427 .map_err(|e| {
428 PyRuntimeError::new_err(format!("Skewness CI calculation failed: {}", e))
429 })?;
430 let dict = PyDict::new(py);
431 dict.set_item("estimate", result.estimate)?;
432 dict.set_item("lower", result.lower)?;
433 dict.set_item("upper", result.upper)?;
434 dict.set_item("confidence", result.confidence)?;
435 Ok(dict.into())
436}
437#[pyfunction]
439#[pyo3(
440 signature = (
441 data,
442 fisher = true,
443 bias = false,
444 confidence = 0.95,
445 n_bootstrap = 1000,
446 seed = None
447 )
448)]
449pub fn kurtosis_ci_py(
450 py: Python,
451 data: &Bound<'_, PyArray1<f64>>,
452 fisher: bool,
453 bias: bool,
454 confidence: f64,
455 n_bootstrap: usize,
456 seed: Option<u64>,
457) -> PyResult<Py<PyAny>> {
458 let binding = data.readonly();
459 let arr = binding.as_array();
460 let result = kurtosis_ci::<f64>(
461 &arr.view(),
462 fisher,
463 bias,
464 Some(confidence),
465 Some(n_bootstrap),
466 seed,
467 )
468 .map_err(|e| PyRuntimeError::new_err(format!("Kurtosis CI calculation failed: {}", e)))?;
469 let dict = PyDict::new(py);
470 dict.set_item("estimate", result.estimate)?;
471 dict.set_item("lower", result.lower)?;
472 dict.set_item("upper", result.upper)?;
473 dict.set_item("confidence", result.confidence)?;
474 Ok(dict.into())
475}