1use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
25use std::collections::HashMap;
26use thiserror::Error;
27
28#[derive(Error, Debug)]
33pub enum MathError {
34 #[error("Invalid input: {0}")]
36 InvalidInput(String),
37
38 #[error("Numerical error: {0}")]
40 NumericalError(String),
41
42 #[error("Dimension mismatch: expected {expected}, got {actual}")]
44 DimensionMismatch { expected: String, actual: String },
45
46 #[error("Empty input data")]
48 EmptyInput,
49
50 #[error("Division by zero")]
52 DivisionByZero,
53}
54
55pub type MathResult<T> = std::result::Result<T, MathError>;
59
60pub trait Statistics {
65 fn mean(&self) -> MathResult<f64>;
67
68 fn variance(&self) -> MathResult<f64>;
70
71 fn std_dev(&self) -> MathResult<f64>;
73
74 fn min(&self) -> MathResult<f64>;
76
77 fn max(&self) -> MathResult<f64>;
79
80 fn quantile(&self, q: f64) -> MathResult<f64>;
84
85 fn skewness(&self) -> MathResult<f64>;
90
91 fn kurtosis(&self) -> MathResult<f64>;
96}
97
98pub trait VectorMath {
102 fn dot(&self, other: &[f64]) -> MathResult<f64>;
104
105 fn norm(&self, p: f64) -> MathResult<f64>;
110
111 fn normalize(&self) -> MathResult<Vec<f64>>;
113
114 fn standardize(&self) -> MathResult<Vec<f64>>;
116
117 fn clip(&self, min: f64, max: f64) -> Vec<f64>;
119
120 fn add(&self, other: &[f64]) -> MathResult<Vec<f64>>;
122
123 fn subtract(&self, other: &[f64]) -> MathResult<Vec<f64>>;
125
126 fn multiply(&self, scalar: f64) -> Vec<f64>;
128}
129
130pub trait MatrixMath {
134 fn mean_along_axis(&self, axis: usize) -> MathResult<Array1<f64>>;
139
140 fn variance_along_axis(&self, axis: usize) -> MathResult<Array1<f64>>;
142
143 fn covariance_matrix(&self) -> MathResult<Array2<f64>>;
145
146 fn correlation_matrix(&self) -> MathResult<Array2<f64>>;
148}
149
150impl Statistics for Vec<f64> {
152 fn mean(&self) -> MathResult<f64> {
153 compute_mean(self)
154 }
155
156 fn variance(&self) -> MathResult<f64> {
157 compute_variance(self)
158 }
159
160 fn std_dev(&self) -> MathResult<f64> {
161 compute_std_dev(self)
162 }
163
164 fn min(&self) -> MathResult<f64> {
165 if self.is_empty() {
166 return Err(MathError::EmptyInput);
167 }
168 Ok(self.iter().fold(f64::INFINITY, |a, &b| a.min(b)))
169 }
170
171 fn max(&self) -> MathResult<f64> {
172 if self.is_empty() {
173 return Err(MathError::EmptyInput);
174 }
175 Ok(self.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)))
176 }
177
178 fn quantile(&self, q: f64) -> MathResult<f64> {
179 if !(0.0..=1.0).contains(&q) {
180 return Err(MathError::InvalidInput(
181 format!("Quantile must be between 0 and 1, got {}", q)
182 ));
183 }
184
185 if self.is_empty() {
186 return Err(MathError::EmptyInput);
187 }
188
189 let mut sorted = self.clone();
190 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
191
192 let index = q * (sorted.len() - 1) as f64;
193 let lower_index = index.floor() as usize;
194 let upper_index = index.ceil() as usize;
195
196 if lower_index == upper_index {
197 Ok(sorted[lower_index])
198 } else {
199 let weight = index - lower_index as f64;
200 Ok(sorted[lower_index] * (1.0 - weight) + sorted[upper_index] * weight)
201 }
202 }
203
204 fn skewness(&self) -> MathResult<f64> {
205 let mean = self.mean()?;
206 let std_dev = self.std_dev()?;
207
208 if std_dev == 0.0 {
209 return Ok(0.0);
210 }
211
212 let n = self.len() as f64;
213 let sum_cubed_deviations: f64 = self.iter()
214 .map(|&x| ((x - mean) / std_dev).powi(3))
215 .sum();
216
217 Ok(sum_cubed_deviations / n)
218 }
219
220 fn kurtosis(&self) -> MathResult<f64> {
221 let mean = self.mean()?;
222 let std_dev = self.std_dev()?;
223
224 if std_dev == 0.0 {
225 return Ok(0.0);
226 }
227
228 let n = self.len() as f64;
229 let sum_fourth_deviations: f64 = self.iter()
230 .map(|&x| ((x - mean) / std_dev).powi(4))
231 .sum();
232
233 Ok(sum_fourth_deviations / n - 3.0) }
235}
236
237impl Statistics for Array1<f64> {
239 fn mean(&self) -> MathResult<f64> {
240 compute_mean(self.as_slice()
241 .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
242 }
243
244 fn variance(&self) -> MathResult<f64> {
245 compute_variance(self.as_slice()
246 .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
247 }
248
249 fn std_dev(&self) -> MathResult<f64> {
250 compute_std_dev(self.as_slice()
251 .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
252 }
253
254 fn min(&self) -> MathResult<f64> {
255 if self.is_empty() {
256 return Err(MathError::EmptyInput);
257 }
258 Ok(self.iter().fold(f64::INFINITY, |a, &b| a.min(b)))
259 }
260
261 fn max(&self) -> MathResult<f64> {
262 if self.is_empty() {
263 return Err(MathError::EmptyInput);
264 }
265 Ok(self.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)))
266 }
267
268 fn quantile(&self, q: f64) -> MathResult<f64> {
269 compute_quantile(&self.to_vec(), q)
270 }
271
272 fn skewness(&self) -> MathResult<f64> {
273 compute_skewness(self.as_slice()
274 .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
275 }
276
277 fn kurtosis(&self) -> MathResult<f64> {
278 compute_kurtosis(self.as_slice()
279 .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
280 }
281}
282
283impl Statistics for ArrayView1<'_, f64> {
285 fn mean(&self) -> MathResult<f64> {
286 compute_mean(self.as_slice()
287 .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
288 }
289
290 fn variance(&self) -> MathResult<f64> {
291 compute_variance(self.as_slice()
292 .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
293 }
294
295 fn std_dev(&self) -> MathResult<f64> {
296 compute_std_dev(self.as_slice()
297 .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
298 }
299
300 fn min(&self) -> MathResult<f64> {
301 if self.is_empty() {
302 return Err(MathError::EmptyInput);
303 }
304 Ok(self.iter().fold(f64::INFINITY, |a, &b| a.min(b)))
305 }
306
307 fn max(&self) -> MathResult<f64> {
308 if self.is_empty() {
309 return Err(MathError::EmptyInput);
310 }
311 Ok(self.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)))
312 }
313
314 fn quantile(&self, q: f64) -> MathResult<f64> {
315 compute_quantile(&self.to_vec(), q)
316 }
317
318 fn skewness(&self) -> MathResult<f64> {
319 compute_skewness(self.as_slice()
320 .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
321 }
322
323 fn kurtosis(&self) -> MathResult<f64> {
324 compute_kurtosis(self.as_slice()
325 .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
326 }
327}
328
329fn compute_skewness(values: &[f64]) -> MathResult<f64> {
331 let mean = compute_mean(values)?;
332 let std_dev = compute_std_dev(values)?;
333
334 if std_dev == 0.0 {
335 return Ok(0.0);
336 }
337
338 let n = values.len() as f64;
339 let sum_cubed_deviations: f64 = values.iter()
340 .map(|&x| ((x - mean) / std_dev).powi(3))
341 .sum();
342
343 Ok(sum_cubed_deviations / n)
344}
345
346fn compute_kurtosis(values: &[f64]) -> MathResult<f64> {
347 let mean = compute_mean(values)?;
348 let std_dev = compute_std_dev(values)?;
349
350 if std_dev == 0.0 {
351 return Ok(0.0);
352 }
353
354 let n = values.len() as f64;
355 let sum_fourth_deviations: f64 = values.iter()
356 .map(|&x| ((x - mean) / std_dev).powi(4))
357 .sum();
358
359 Ok(sum_fourth_deviations / n - 3.0)
360}
361
362fn compute_quantile(values: &[f64], q: f64) -> MathResult<f64> {
364 if !(0.0..=1.0).contains(&q) {
365 return Err(MathError::InvalidInput(
366 format!("Quantile must be between 0 and 1, got {}", q)
367 ));
368 }
369
370 if values.is_empty() {
371 return Err(MathError::EmptyInput);
372 }
373
374 let mut sorted = values.to_vec();
375 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
376
377 let index = q * (sorted.len() - 1) as f64;
378 let lower_index = index.floor() as usize;
379 let upper_index = index.ceil() as usize;
380
381 if lower_index == upper_index {
382 Ok(sorted[lower_index])
383 } else {
384 let weight = index - lower_index as f64;
385 Ok(sorted[lower_index] * (1.0 - weight) + sorted[upper_index] * weight)
386 }
387}
388
389impl VectorMath for Vec<f64> {
391 fn dot(&self, other: &[f64]) -> MathResult<f64> {
392 if self.len() != other.len() {
393 return Err(MathError::DimensionMismatch {
394 expected: self.len().to_string(),
395 actual: other.len().to_string(),
396 });
397 }
398
399 Ok(self.iter()
400 .zip(other.iter())
401 .map(|(&a, &b)| a * b)
402 .sum())
403 }
404
405 fn norm(&self, p: f64) -> MathResult<f64> {
406 if p <= 0.0 {
407 return Err(MathError::InvalidInput(
408 format!("p must be positive, got {}", p)
409 ));
410 }
411
412 if p == 1.0 {
413 Ok(l1_norm(self))
414 } else if p == 2.0 {
415 Ok(l2_norm(self))
416 } else {
417 let sum: f64 = self.iter()
418 .map(|&x| x.abs().powf(p))
419 .sum();
420 Ok(sum.powf(1.0 / p))
421 }
422 }
423
424 fn normalize(&self) -> MathResult<Vec<f64>> {
425 normalize_vector(self)
426 }
427
428 fn standardize(&self) -> MathResult<Vec<f64>> {
429 standardize_vector(self)
430 }
431
432 fn clip(&self, min: f64, max: f64) -> Vec<f64> {
433 clip_values(self, min, max)
434 }
435
436 fn add(&self, other: &[f64]) -> MathResult<Vec<f64>> {
437 if self.len() != other.len() {
438 return Err(MathError::DimensionMismatch {
439 expected: self.len().to_string(),
440 actual: other.len().to_string(),
441 });
442 }
443
444 Ok(self.iter()
445 .zip(other.iter())
446 .map(|(&a, &b)| a + b)
447 .collect())
448 }
449
450 fn subtract(&self, other: &[f64]) -> MathResult<Vec<f64>> {
451 if self.len() != other.len() {
452 return Err(MathError::DimensionMismatch {
453 expected: self.len().to_string(),
454 actual: other.len().to_string(),
455 });
456 }
457
458 Ok(self.iter()
459 .zip(other.iter())
460 .map(|(&a, &b)| a - b)
461 .collect())
462 }
463
464 fn multiply(&self, scalar: f64) -> Vec<f64> {
465 self.iter().map(|&x| x * scalar).collect()
466 }
467}
468
469impl MatrixMath for Array2<f64> {
471 fn mean_along_axis(&self, axis: usize) -> MathResult<Array1<f64>> {
472 if axis > 1 {
473 return Err(MathError::InvalidInput(
474 format!("Axis must be 0 or 1, got {}", axis)
475 ));
476 }
477
478 match axis {
479 0 => {
480 let n_rows = self.nrows() as f64;
482 Ok(self.mean_axis(ndarray::Axis(0)).unwrap() / n_rows)
483 }
484 1 => {
485 let n_cols = self.ncols() as f64;
487 Ok(self.mean_axis(ndarray::Axis(1)).unwrap() / n_cols)
488 }
489 _ => unreachable!(),
490 }
491 }
492
493 fn variance_along_axis(&self, axis: usize) -> MathResult<Array1<f64>> {
494 if axis > 1 {
495 return Err(MathError::InvalidInput(
496 format!("Axis must be 0 or 1, got {}", axis)
497 ));
498 }
499
500 let means = self.mean_along_axis(axis)?;
501 let n = if axis == 0 { self.nrows() } else { self.ncols() } as f64;
502
503 match axis {
504 0 => {
505 let variances: Vec<f64> = (0..self.ncols())
507 .map(|col| {
508 let col_data = self.column(col);
509 let mean = means[col];
510 col_data.iter()
511 .map(|&x| (x - mean).powi(2))
512 .sum::<f64>() / (n - 1.0)
513 })
514 .collect();
515 Ok(Array1::from(variances))
516 }
517 1 => {
518 let variances: Vec<f64> = (0..self.nrows())
520 .map(|row| {
521 let row_data = self.row(row);
522 let mean = means[row];
523 row_data.iter()
524 .map(|&x| (x - mean).powi(2))
525 .sum::<f64>() / (n - 1.0)
526 })
527 .collect();
528 Ok(Array1::from(variances))
529 }
530 _ => unreachable!(),
531 }
532 }
533
534 fn covariance_matrix(&self) -> MathResult<Array2<f64>> {
535 let n_samples = self.nrows() as f64;
536 if n_samples <= 1.0 {
537 return Err(MathError::InvalidInput(
538 "Need at least 2 samples for covariance".to_string()
539 ));
540 }
541
542 let means = self.mean_along_axis(0)?;
543 let mut covariance = Array2::zeros((self.ncols(), self.ncols()));
544
545 for i in 0..self.ncols() {
546 for j in 0..self.ncols() {
547 let cov: f64 = (0..self.nrows())
548 .map(|k| (self[[k, i]] - means[i]) * (self[[k, j]] - means[j]))
549 .sum::<f64>() / (n_samples - 1.0);
550 covariance[[i, j]] = cov;
551 }
552 }
553
554 Ok(covariance)
555 }
556
557 fn correlation_matrix(&self) -> MathResult<Array2<f64>> {
558 let covariance = self.covariance_matrix()?;
559 let std_devs = self.variance_along_axis(0)?.mapv(|v| v.sqrt());
560
561 let mut correlation = Array2::zeros(covariance.raw_dim());
562
563 for i in 0..covariance.nrows() {
564 for j in 0..covariance.ncols() {
565 if std_devs[i] == 0.0 || std_devs[j] == 0.0 {
566 correlation[[i, j]] = 0.0;
567 } else {
568 correlation[[i, j]] = covariance[[i, j]] / (std_devs[i] * std_devs[j]);
569 }
570 }
571 }
572
573 Ok(correlation)
574 }
575}
576
577pub fn compute_mean(values: &[f64]) -> MathResult<f64> {
593 if values.is_empty() {
594 return Err(MathError::EmptyInput);
595 }
596
597 let sum: f64 = values.iter().sum();
598 Ok(sum / values.len() as f64)
599}
600
601pub fn compute_variance(values: &[f64]) -> MathResult<f64> {
617 if values.len() < 2 {
618 return Err(MathError::InvalidInput(
619 "Need at least 2 values for variance".to_string()
620 ));
621 }
622
623 let mean = compute_mean(values)?;
624 let sum_sq_diff: f64 = values.iter()
625 .map(|&x| (x - mean).powi(2))
626 .sum();
627
628 Ok(sum_sq_diff / (values.len() - 1) as f64)
629}
630
631pub fn compute_std_dev(values: &[f64]) -> MathResult<f64> {
643 let variance = compute_variance(values)?;
644 Ok(variance.sqrt())
645}
646
647pub fn softmax(values: &[f64]) -> MathResult<Vec<f64>> {
663 if values.is_empty() {
664 return Err(MathError::EmptyInput);
665 }
666
667 let max_val = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
669 let exps: Vec<f64> = values.iter()
670 .map(|&x| (x - max_val).exp())
671 .collect();
672
673 let sum_exps: f64 = exps.iter().sum();
674
675 Ok(exps.iter().map(|&x| x / sum_exps).collect())
676}
677
678pub fn sigmoid(x: f64) -> f64 {
690 1.0 / (1.0 + (-x).exp())
691}
692
693pub fn logit(p: f64) -> MathResult<f64> {
707 if p <= 0.0 || p >= 1.0 {
708 return Err(MathError::InvalidInput(
709 format!("Probability must be in (0, 1), got {}", p)
710 ));
711 }
712
713 Ok((p / (1.0 - p)).ln())
714}
715
716pub fn l1_norm(values: &[f64]) -> f64 {
728 values.iter().map(|&x| x.abs()).sum()
729}
730
731pub fn l2_norm(values: &[f64]) -> f64 {
743 values.iter().map(|&x| x.powi(2)).sum::<f64>().sqrt()
744}
745
746pub fn euclidean_distance(a: &[f64], b: &[f64]) -> MathResult<f64> {
761 if a.len() != b.len() {
762 return Err(MathError::DimensionMismatch {
763 expected: a.len().to_string(),
764 actual: b.len().to_string(),
765 });
766 }
767
768 let sum_sq: f64 = a.iter()
769 .zip(b.iter())
770 .map(|(&x, &y)| (x - y).powi(2))
771 .sum();
772
773 Ok(sum_sq.sqrt())
774}
775
776pub fn clip_values(values: &[f64], min: f64, max: f64) -> Vec<f64> {
790 values.iter()
791 .map(|&x| x.max(min).min(max))
792 .collect()
793}
794
795pub fn normalize_vector(values: &[f64]) -> MathResult<Vec<f64>> {
809 let norm = l2_norm(values);
810 if norm == 0.0 {
811 return Err(MathError::NumericalError("Cannot normalize zero vector".to_string()));
812 }
813
814 Ok(values.iter().map(|&x| x / norm).collect())
815}
816
817pub fn standardize_vector(values: &[f64]) -> MathResult<Vec<f64>> {
831 let mean = compute_mean(values)?;
832 let std_dev = compute_std_dev(values)?;
833
834 if std_dev == 0.0 {
835 return Err(MathError::NumericalError("Cannot standardize constant vector".to_string()));
836 }
837
838 Ok(values.iter().map(|&x| (x - mean) / std_dev).collect())
839}
840
841pub fn weighted_mean(values: &[f64], weights: &[f64]) -> MathResult<f64> {
857 if values.len() != weights.len() {
858 return Err(MathError::DimensionMismatch {
859 expected: values.len().to_string(),
860 actual: weights.len().to_string(),
861 });
862 }
863
864 if values.is_empty() {
865 return Err(MathError::EmptyInput);
866 }
867
868 let weighted_sum: f64 = values.iter()
869 .zip(weights.iter())
870 .map(|(&x, &w)| x * w)
871 .sum();
872
873 let weight_sum: f64 = weights.iter().sum();
874
875 if weight_sum == 0.0 {
876 return Err(MathError::DivisionByZero);
877 }
878
879 Ok(weighted_sum / weight_sum)
880}
881
882pub fn rmse(predictions: &[f64], targets: &[f64]) -> MathResult<f64> {
898 if predictions.len() != targets.len() {
899 return Err(MathError::DimensionMismatch {
900 expected: predictions.len().to_string(),
901 actual: targets.len().to_string(),
902 });
903 }
904
905 if predictions.is_empty() {
906 return Err(MathError::EmptyInput);
907 }
908
909 let mse: f64 = predictions.iter()
910 .zip(targets.iter())
911 .map(|(&pred, &target)| (pred - target).powi(2))
912 .sum::<f64>() / predictions.len() as f64;
913
914 Ok(mse.sqrt())
915}
916