sklears_utils/array_utils/
stats.rs1use crate::{UtilsError, UtilsResult};
4use scirs2_core::ndarray::{Array1, ArrayView1};
5use scirs2_core::numeric::{Float, ToPrimitive};
6
7#[derive(Debug, Clone)]
9pub struct ArrayStatistics<T> {
10 pub mean: T,
11 pub std: T,
12 pub min: T,
13 pub max: T,
14 pub median: T,
15 pub count: usize,
16}
17
18pub fn array_sum<T>(array: &Array1<T>) -> UtilsResult<T>
20where
21 T: Float + Clone,
22{
23 if array.is_empty() {
24 return Err(UtilsError::EmptyInput);
25 }
26
27 let sum = array.iter().fold(T::zero(), |acc, &x| acc + x);
28 Ok(sum)
29}
30
31pub fn array_mean<T>(array: &Array1<T>) -> UtilsResult<T>
33where
34 T: Float + Clone + ToPrimitive,
35{
36 if array.is_empty() {
37 return Err(UtilsError::EmptyInput);
38 }
39
40 let sum = array.iter().fold(T::zero(), |acc, &x| acc + x);
41 let count = T::from(array.len()).unwrap();
42 Ok(sum / count)
43}
44
45pub fn array_mean_f64(array: &ArrayView1<f64>) -> UtilsResult<f64> {
47 if array.is_empty() {
48 return Err(UtilsError::EmptyInput);
49 }
50
51 let mut sum = 0.0;
53 let mut c = 0.0; for &value in array {
56 let y = value - c;
57 let t = sum + y;
58 c = (t - sum) - y;
59 sum = t;
60 }
61
62 Ok(sum / array.len() as f64)
63}
64
65pub fn array_var<T>(array: &Array1<T>) -> UtilsResult<T>
67where
68 T: Float + Clone + ToPrimitive,
69{
70 if array.len() <= 1 {
71 return Err(UtilsError::InsufficientData {
72 min: 2,
73 actual: array.len(),
74 });
75 }
76
77 let mean = array_mean(array)?;
78 let variance = array
79 .iter()
80 .map(|&x| {
81 let diff = x - mean;
82 diff * diff
83 })
84 .fold(T::zero(), |acc, x| acc + x);
85
86 let n_minus_1 = T::from(array.len() - 1).unwrap();
87 Ok(variance / n_minus_1)
88}
89
90pub fn array_variance_f64(array: &ArrayView1<f64>) -> UtilsResult<f64> {
92 if array.len() <= 1 {
93 return Err(UtilsError::InsufficientData {
94 min: 2,
95 actual: array.len(),
96 });
97 }
98
99 let mean = array_mean_f64(array)?;
100
101 let mut sum_sq_diff = 0.0;
103 let mut correction = 0.0; for &value in array {
106 let diff = value - mean;
107 let sq_diff = diff * diff;
108 let y = sq_diff - correction;
109 let t = sum_sq_diff + y;
110 correction = (t - sum_sq_diff) - y;
111 sum_sq_diff = t;
112 }
113
114 Ok(sum_sq_diff / (array.len() - 1) as f64)
115}
116
117pub fn array_std<T>(array: &Array1<T>) -> UtilsResult<T>
119where
120 T: Float + Clone + ToPrimitive,
121{
122 let variance = array_var(array)?;
123 Ok(variance.sqrt())
124}
125
126pub fn array_min<T>(array: &Array1<T>) -> UtilsResult<T>
128where
129 T: PartialOrd + Clone,
130{
131 if array.is_empty() {
132 return Err(UtilsError::EmptyInput);
133 }
134
135 let mut min_val = &array[0];
136 for val in array.iter().skip(1) {
137 if val < min_val {
138 min_val = val;
139 }
140 }
141 Ok(min_val.clone())
142}
143
144pub fn array_max<T>(array: &Array1<T>) -> UtilsResult<T>
146where
147 T: PartialOrd + Clone,
148{
149 if array.is_empty() {
150 return Err(UtilsError::EmptyInput);
151 }
152
153 let mut max_val = &array[0];
154 for val in array.iter().skip(1) {
155 if val > max_val {
156 max_val = val;
157 }
158 }
159 Ok(max_val.clone())
160}
161
162pub fn array_min_max<T>(array: &Array1<T>) -> UtilsResult<(T, T)>
164where
165 T: PartialOrd + Clone,
166{
167 if array.is_empty() {
168 return Err(UtilsError::EmptyInput);
169 }
170
171 let mut min_val = &array[0];
172 let mut max_val = &array[0];
173
174 for val in array.iter().skip(1) {
175 if val < min_val {
176 min_val = val;
177 }
178 if val > max_val {
179 max_val = val;
180 }
181 }
182
183 Ok((min_val.clone(), max_val.clone()))
184}
185
186pub fn array_median<T>(array: &Array1<T>) -> UtilsResult<T>
188where
189 T: PartialOrd + Clone + Float,
190{
191 if array.is_empty() {
192 return Err(UtilsError::EmptyInput);
193 }
194
195 let mut sorted: Vec<T> = array.iter().cloned().collect();
196 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
197
198 let len = sorted.len();
199 if len % 2 == 1 {
200 Ok(sorted[len / 2])
201 } else {
202 let mid = len / 2;
203 let sum = sorted[mid - 1] + sorted[mid];
204 Ok(sum / T::from(2.0).unwrap())
205 }
206}
207
208pub fn array_percentile<T>(array: &Array1<T>, percentile: f64) -> UtilsResult<T>
210where
211 T: PartialOrd + Clone + Float,
212{
213 if array.is_empty() {
214 return Err(UtilsError::EmptyInput);
215 }
216
217 if !(0.0..=100.0).contains(&percentile) {
218 return Err(UtilsError::InvalidParameter(
219 "Percentile must be between 0 and 100".to_string(),
220 ));
221 }
222
223 let mut sorted: Vec<T> = array.iter().cloned().collect();
224 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
225
226 let len = sorted.len() as f64;
227 let index = (percentile / 100.0) * (len - 1.0);
228 let lower_index = index.floor() as usize;
229 let upper_index = index.ceil() as usize;
230
231 if lower_index == upper_index {
232 Ok(sorted[lower_index])
233 } else {
234 let weight = T::from(index - lower_index as f64).unwrap();
235 let lower_val = sorted[lower_index];
236 let upper_val = sorted[upper_index];
237 Ok(lower_val + weight * (upper_val - lower_val))
238 }
239}
240
241pub fn array_quantiles<T>(array: &Array1<T>, quantiles: &[f64]) -> UtilsResult<Vec<T>>
243where
244 T: PartialOrd + Clone + Float,
245{
246 if array.is_empty() {
247 return Err(UtilsError::EmptyInput);
248 }
249
250 let mut sorted: Vec<T> = array.iter().cloned().collect();
251 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
252
253 let mut results = Vec::with_capacity(quantiles.len());
254 let len = sorted.len() as f64;
255
256 for &q in quantiles {
257 if !(0.0..=1.0).contains(&q) {
258 return Err(UtilsError::InvalidParameter(
259 "Quantiles must be between 0 and 1".to_string(),
260 ));
261 }
262
263 let index = q * (len - 1.0);
264 let lower_index = index.floor() as usize;
265 let upper_index = index.ceil() as usize;
266
267 let result = if lower_index == upper_index {
268 sorted[lower_index]
269 } else {
270 let weight = T::from(index - lower_index as f64).unwrap();
271 let lower_val = sorted[lower_index];
272 let upper_val = sorted[upper_index];
273 lower_val + weight * (upper_val - lower_val)
274 };
275
276 results.push(result);
277 }
278
279 Ok(results)
280}
281
282pub fn array_cumsum<T>(array: &Array1<T>) -> UtilsResult<Array1<T>>
284where
285 T: Float + Clone,
286{
287 if array.is_empty() {
288 return Err(UtilsError::EmptyInput);
289 }
290
291 let mut result = Vec::with_capacity(array.len());
292 let mut cumulative = T::zero();
293
294 for &value in array {
295 cumulative = cumulative + value;
296 result.push(cumulative);
297 }
298
299 Ok(Array1::from_vec(result))
300}
301
302pub fn array_standardize<T>(array: &Array1<T>) -> UtilsResult<Array1<T>>
304where
305 T: Float + Clone + ToPrimitive,
306{
307 if array.len() <= 1 {
308 return Err(UtilsError::InsufficientData {
309 min: 2,
310 actual: array.len(),
311 });
312 }
313
314 let mean = array_mean(array)?;
315 let std = array_std(array)?;
316
317 if std.abs() < T::from(1e-10).unwrap() {
318 return Err(UtilsError::InvalidParameter(
319 "Standard deviation too small for standardization".to_string(),
320 ));
321 }
322
323 let standardized = array.mapv(|x| (x - mean) / std);
324 Ok(standardized)
325}
326
327pub fn array_min_max_normalize<T>(array: &Array1<T>) -> UtilsResult<Array1<T>>
329where
330 T: PartialOrd + Float + Clone,
331{
332 if array.is_empty() {
333 return Err(UtilsError::EmptyInput);
334 }
335
336 let (min_val, max_val) = array_min_max(array)?;
337 let range = max_val - min_val;
338
339 if range.abs() < T::from(1e-10).unwrap() {
340 return Ok(Array1::zeros(array.len()));
342 }
343
344 let normalized = array.mapv(|x| (x - min_val) / range);
345 Ok(normalized)
346}
347
348pub fn array_describe<T>(array: &Array1<T>) -> UtilsResult<ArrayStatistics<T>>
350where
351 T: Float + Clone + ToPrimitive + PartialOrd,
352{
353 if array.is_empty() {
354 return Err(UtilsError::EmptyInput);
355 }
356
357 Ok(ArrayStatistics {
358 mean: array_mean(array)?,
359 std: array_std(array)?,
360 min: array_min(array)?,
361 max: array_max(array)?,
362 median: array_median(array)?,
363 count: array.len(),
364 })
365}