1use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{ArrayBase, ArrayView1, Data, Ix1, Ix2};
8use scirs2_core::numeric::{Float, NumCast};
9use scirs2_core::simd_ops::SimdUnifiedOps;
10
11#[allow(dead_code)]
16pub fn distance_matrix_simd<F, D>(
17 data: &ArrayBase<D, Ix2>,
18 metric: &str,
19) -> StatsResult<scirs2_core::ndarray::Array2<F>>
20where
21 F: Float + NumCast + SimdUnifiedOps + std::fmt::Display + std::iter::Sum + Send + Sync,
22 D: Data<Elem = F>,
23{
24 let (n_samples_, n_features) = data.dim();
25
26 if n_samples_ == 0 || n_features == 0 {
27 return Err(StatsError::InvalidArgument(
28 "Data matrix cannot be empty".to_string(),
29 ));
30 }
31
32 let mut distances = scirs2_core::ndarray::Array2::zeros((n_samples_, n_samples_));
33
34 for i in 0..n_samples_ {
36 for j in (i + 1)..n_samples_ {
37 let row_i = data.row(i);
38 let row_j = data.row(j);
39
40 let distance = match metric {
41 "euclidean" => euclidean_distance_simd(&row_i, &row_j)?,
42 "manhattan" => manhattan_distance_simd(&row_i, &row_j)?,
43 "cosine" => cosine_distance_simd(&row_i, &row_j)?,
44 _ => {
45 return Err(StatsError::InvalidArgument(format!(
46 "Unknown metric: {}",
47 metric
48 )))
49 }
50 };
51
52 distances[(i, j)] = distance;
53 distances[(j, i)] = distance; }
55 }
56
57 Ok(distances)
58}
59
60#[allow(dead_code)]
62pub fn euclidean_distance_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F>
63where
64 F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
65{
66 if x.len() != y.len() {
67 return Err(StatsError::DimensionMismatch(format!(
68 "Vectors must have the same length: {} vs {}",
69 x.len(),
70 y.len()
71 )));
72 }
73
74 let n = x.len();
75 let sum_sq_diff = if n > 16 {
76 let diff = F::simd_sub(&x.view(), &y.view());
78 let sq_diff = F::simd_mul(&diff.view(), &diff.view());
79 F::simd_sum(&sq_diff.view())
80 } else {
81 x.iter()
83 .zip(y.iter())
84 .map(|(&xi, &yi)| {
85 let diff = xi - yi;
86 diff * diff
87 })
88 .fold(F::zero(), |acc, x| acc + x)
89 };
90
91 Ok(sum_sq_diff.sqrt())
92}
93
94#[allow(dead_code)]
96pub fn manhattan_distance_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F>
97where
98 F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
99{
100 if x.len() != y.len() {
101 return Err(StatsError::DimensionMismatch(format!(
102 "Vectors must have the same length: {} vs {}",
103 x.len(),
104 y.len()
105 )));
106 }
107
108 let n = x.len();
109 let sum_abs_diff = if n > 16 {
110 let diff = F::simd_sub(&x.view(), &y.view());
112 let abs_diff = F::simd_abs(&diff.view());
113 F::simd_sum(&abs_diff.view())
114 } else {
115 x.iter()
117 .zip(y.iter())
118 .map(|(&xi, &yi)| (xi - yi).abs())
119 .fold(F::zero(), |acc, x| acc + x)
120 };
121
122 Ok(sum_abs_diff)
123}
124
125#[allow(dead_code)]
127pub fn cosine_distance_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F>
128where
129 F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
130{
131 if x.len() != y.len() {
132 return Err(StatsError::DimensionMismatch(format!(
133 "Vectors must have the same length: {} vs {}",
134 x.len(),
135 y.len()
136 )));
137 }
138
139 let n = x.len();
140 let (dot_product, norm_x_sq, norm_y_sq) = if n > 16 {
141 let dot = F::simd_mul(&x.view(), &y.view());
143 let dot_product = F::simd_sum(&dot.view());
144
145 let x_sq = F::simd_mul(&x.view(), &x.view());
146 let norm_x_sq = F::simd_sum(&x_sq.view());
147
148 let y_sq = F::simd_mul(&y.view(), &y.view());
149 let norm_y_sq = F::simd_sum(&y_sq.view());
150
151 (dot_product, norm_x_sq, norm_y_sq)
152 } else {
153 let mut dot_product = F::zero();
155 let mut norm_x_sq = F::zero();
156 let mut norm_y_sq = F::zero();
157
158 for (&xi, &yi) in x.iter().zip(y.iter()) {
159 dot_product = dot_product + xi * yi;
160 norm_x_sq = norm_x_sq + xi * xi;
161 norm_y_sq = norm_y_sq + yi * yi;
162 }
163
164 (dot_product, norm_x_sq, norm_y_sq)
165 };
166
167 let norm_product = (norm_x_sq * norm_y_sq).sqrt();
168 if norm_product <= F::epsilon() {
169 return Err(StatsError::InvalidArgument(
170 "Cannot compute cosine distance for zero vectors".to_string(),
171 ));
172 }
173
174 let cosine_similarity = dot_product / norm_product;
175 Ok(F::one() - cosine_similarity)
176}
177
178pub struct MovingWindowSIMD<F> {
182 windowsize: usize,
183 _phantom: std::marker::PhantomData<F>,
184}
185
186impl<F> MovingWindowSIMD<F>
187where
188 F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
189{
190 pub fn new(_windowsize: usize) -> Self {
191 Self {
192 windowsize: _windowsize,
193 _phantom: std::marker::PhantomData,
194 }
195 }
196
197 pub fn moving_mean<D>(
199 &self,
200 data: &ArrayBase<D, Ix1>,
201 ) -> StatsResult<scirs2_core::ndarray::Array1<F>>
202 where
203 D: Data<Elem = F>,
204 {
205 if data.len() < self.windowsize {
206 return Err(StatsError::InvalidArgument(
207 "Data length must be >= window size".to_string(),
208 ));
209 }
210
211 let n_windows = data.len() - self.windowsize + 1;
212 let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);
213
214 if self.windowsize > 16 {
215 for i in 0..n_windows {
217 let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);
218 let sum = F::simd_sum(&window);
219 result[i] = sum / F::from(self.windowsize).expect("Failed to convert to float");
220 }
221 } else {
222 let mut window_sum = data
224 .slice(scirs2_core::ndarray::s![0..self.windowsize])
225 .iter()
226 .fold(F::zero(), |acc, &x| acc + x);
227
228 result[0] = window_sum / F::from(self.windowsize).expect("Failed to convert to float");
229
230 for i in 1..n_windows {
231 window_sum = window_sum - data[i - 1] + data[i + self.windowsize - 1];
232 result[i] =
233 window_sum / F::from(self.windowsize).expect("Failed to convert to float");
234 }
235 }
236
237 Ok(result)
238 }
239
240 pub fn moving_variance<D>(
242 &self,
243 data: &ArrayBase<D, Ix1>,
244 ddof: usize,
245 ) -> StatsResult<scirs2_core::ndarray::Array1<F>>
246 where
247 D: Data<Elem = F>,
248 {
249 if data.len() < self.windowsize {
250 return Err(StatsError::InvalidArgument(
251 "Data length must be >= window size".to_string(),
252 ));
253 }
254
255 if self.windowsize <= ddof {
256 return Err(StatsError::InvalidArgument(
257 "Window size must be > ddof".to_string(),
258 ));
259 }
260
261 let n_windows = data.len() - self.windowsize + 1;
262 let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);
263
264 for i in 0..n_windows {
265 let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);
266
267 if self.windowsize > 16 {
268 let mean = F::simd_sum(&window.view())
270 / F::from(self.windowsize).expect("Failed to convert to float");
271 let mean_array = scirs2_core::ndarray::Array1::from_elem(self.windowsize, mean);
272 let diff = F::simd_sub(&window, &mean_array.view());
273 let sq_diff = F::simd_mul(&diff.view(), &diff.view());
274 let sum_sq_diff = F::simd_sum(&sq_diff.view());
275
276 result[i] = sum_sq_diff
277 / F::from(self.windowsize - ddof).expect("Failed to convert to float");
278 } else {
279 let mut mean = F::zero();
281 let mut m2 = F::zero();
282 let mut count = 0;
283
284 for &val in window.iter() {
285 count += 1;
286 let delta = val - mean;
287 mean = mean + delta / F::from(count).expect("Failed to convert to float");
288 let delta2 = val - mean;
289 m2 = m2 + delta * delta2;
290 }
291
292 result[i] = m2 / F::from(count - ddof).expect("Failed to convert to float");
293 }
294 }
295
296 Ok(result)
297 }
298
299 pub fn moving_min<D>(
301 &self,
302 data: &ArrayBase<D, Ix1>,
303 ) -> StatsResult<scirs2_core::ndarray::Array1<F>>
304 where
305 D: Data<Elem = F>,
306 {
307 if data.len() < self.windowsize {
308 return Err(StatsError::InvalidArgument(
309 "Data length must be >= window size".to_string(),
310 ));
311 }
312
313 let n_windows = data.len() - self.windowsize + 1;
314 let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);
315
316 for i in 0..n_windows {
317 let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);
318
319 result[i] = window.iter().fold(
321 window[0],
322 |min_val, &x| if x < min_val { x } else { min_val },
323 );
324 }
325
326 Ok(result)
327 }
328
329 pub fn moving_max<D>(
331 &self,
332 data: &ArrayBase<D, Ix1>,
333 ) -> StatsResult<scirs2_core::ndarray::Array1<F>>
334 where
335 D: Data<Elem = F>,
336 {
337 if data.len() < self.windowsize {
338 return Err(StatsError::InvalidArgument(
339 "Data length must be >= window size".to_string(),
340 ));
341 }
342
343 let n_windows = data.len() - self.windowsize + 1;
344 let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);
345
346 for i in 0..n_windows {
347 let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);
348
349 result[i] = window.iter().fold(
351 window[0],
352 |max_val, &x| if x > max_val { x } else { max_val },
353 );
354 }
355
356 Ok(result)
357 }
358}
359
360#[allow(dead_code)]
364pub fn histogram_simd<F, D>(
365 data: &ArrayBase<D, Ix1>,
366 bins: usize,
367 range: Option<(F, F)>,
368) -> StatsResult<(
369 scirs2_core::ndarray::Array1<usize>,
370 scirs2_core::ndarray::Array1<F>,
371)>
372where
373 F: Float + NumCast + SimdUnifiedOps + std::fmt::Display + std::iter::Sum + Send + Sync,
374 D: Data<Elem = F>,
375{
376 if data.is_empty() {
377 return Err(StatsError::InvalidArgument(
378 "Data cannot be empty".to_string(),
379 ));
380 }
381
382 if bins == 0 {
383 return Err(StatsError::InvalidArgument(
384 "Number of bins must be positive".to_string(),
385 ));
386 }
387
388 let (min_val, max_val) = if let Some((min, max)) = range {
392 (min, max)
393 } else {
394 let min_val = data
396 .iter()
397 .fold(data[0], |min, &x| if x < min { x } else { min });
398 let max_val = data
399 .iter()
400 .fold(data[0], |max, &x| if x > max { x } else { max });
401 (min_val, max_val)
402 };
403
404 if max_val <= min_val {
405 return Err(StatsError::InvalidArgument(
406 "Invalid range: max must be > min".to_string(),
407 ));
408 }
409
410 let mut bin_edges = scirs2_core::ndarray::Array1::zeros(bins + 1);
412 let range_width = max_val - min_val;
413 let bin_width = range_width / F::from(bins).expect("Failed to convert to float");
414
415 for i in 0..=bins {
416 bin_edges[i] = min_val + F::from(i).expect("Failed to convert to float") * bin_width;
417 }
418
419 let mut counts = scirs2_core::ndarray::Array1::zeros(bins);
421
422 for &value in data.iter() {
424 if value >= min_val && value <= max_val {
425 let bin_idx = if value == max_val {
426 bins - 1 } else {
428 let normalized = (value - min_val) / bin_width;
429 normalized
430 .floor()
431 .to_usize()
432 .expect("Operation failed")
433 .min(bins - 1)
434 };
435 counts[bin_idx] += 1;
436 }
437 }
438
439 Ok((counts, bin_edges))
440}
441
442#[allow(dead_code)]
446pub fn detect_outliers_zscore_simd<F, D>(
447 data: &ArrayBase<D, Ix1>,
448 threshold: F,
449) -> StatsResult<scirs2_core::ndarray::Array1<bool>>
450where
451 F: Float + NumCast + SimdUnifiedOps + std::fmt::Display + std::iter::Sum + Send + Sync,
452 D: Data<Elem = F>,
453{
454 if data.is_empty() {
455 return Err(StatsError::InvalidArgument(
456 "Data cannot be empty".to_string(),
457 ));
458 }
459
460 let n = data.len();
462
463 let mean = data.iter().fold(F::zero(), |acc, &x| acc + x)
465 / F::from(n).expect("Failed to convert to float");
466
467 let variance = {
469 let sum_sq_diff = data
470 .iter()
471 .map(|&x| {
472 let diff = x - mean;
473 diff * diff
474 })
475 .fold(F::zero(), |acc, x| acc + x);
476 sum_sq_diff / F::from(n - 1).expect("Failed to convert to float")
477 };
478
479 let std_dev = variance.sqrt();
480
481 if std_dev <= F::epsilon() {
482 return Ok(scirs2_core::ndarray::Array1::from_elem(n, false));
484 }
485
486 let mut outliers = scirs2_core::ndarray::Array1::from_elem(n, false);
488
489 for (i, &value) in data.iter().enumerate() {
491 let z_score = (value - mean) / std_dev;
492 outliers[i] = z_score.abs() > threshold;
493 }
494
495 Ok(outliers)
496}
497
498#[cfg(test)]
499mod tests {
500 use super::*;
501 use approx::assert_relative_eq;
502 use scirs2_core::ndarray::array;
503
504 #[test]
505 fn test_euclidean_distance_simd() {
506 let x = array![1.0, 2.0, 3.0];
507 let y = array![4.0, 5.0, 6.0];
508 let distance = euclidean_distance_simd(&x.view(), &y.view()).expect("Operation failed");
511 let expected = ((4.0 - 1.0).powi(2) + (5.0 - 2.0).powi(2) + (6.0 - 3.0).powi(2)).sqrt();
512 assert_relative_eq!(distance, expected, epsilon = 1e-10);
513 }
514
515 #[test]
516 fn test_moving_window_simd() {
517 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
518 let window = MovingWindowSIMD::new(3);
519
520 let moving_mean = window.moving_mean(&data.view()).expect("Operation failed");
521 assert_relative_eq!(moving_mean[0], 2.0, epsilon = 1e-10); assert_relative_eq!(moving_mean[1], 3.0, epsilon = 1e-10); assert_relative_eq!(moving_mean[7], 9.0, epsilon = 1e-10); }
525
526 #[test]
527 fn test_histogram_simd() {
528 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
529 let (counts, edges) = histogram_simd(&data.view(), 5, None).expect("Operation failed");
530
531 assert_eq!(counts.len(), 5);
532 assert_eq!(edges.len(), 6);
533 assert_eq!(counts.sum(), 5); }
535
536 #[test]
537 fn test_outlier_detection_simd() {
538 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let outliers = detect_outliers_zscore_simd(&data.view(), 2.0).expect("Operation failed");
540
541 assert!(!outliers[0]); assert!(!outliers[4]); assert!(outliers[5]); }
545}