Skip to main content

so_utils/
numerical.rs

1//! Numerical utilities for statistical computing
2
3use ndarray::{Array1, Array2};
4
5/// Check if a value is finite (not NaN or infinite)
6pub fn is_finite(x: f64) -> bool {
7    x.is_finite()
8}
9
10/// Check if all values in an array are finite
11pub fn all_finite(arr: &Array1<f64>) -> bool {
12    arr.iter().all(|&x| is_finite(x))
13}
14
15/// Check if all values in a 2D array are finite
16pub fn all_finite_2d(arr: &Array2<f64>) -> bool {
17    arr.iter().all(|&x| is_finite(x))
18}
19
20/// Replace NaN values with a specified value
21pub fn replace_nan(arr: &mut Array1<f64>, value: f64) {
22    for x in arr.iter_mut() {
23        if x.is_nan() {
24            *x = value;
25        }
26    }
27}
28
29/// Replace infinite values with a specified value
30pub fn replace_inf(arr: &mut Array1<f64>, value: f64) {
31    for x in arr.iter_mut() {
32        if x.is_infinite() {
33            *x = value;
34        }
35    }
36}
37
38/// Clip values to a specified range
39pub fn clip(arr: &mut Array1<f64>, min: f64, max: f64) {
40    for x in arr.iter_mut() {
41        *x = x.clamp(min, max);
42    }
43}
44
45/// Standardize array (z-score normalization)
46pub fn standardize(arr: &Array1<f64>) -> Option<Array1<f64>> {
47    let mean = arr.mean()?;
48    let std = arr.std(1.0);
49
50    if std == 0.0 {
51        return None;
52    }
53
54    Some((arr - mean) / std)
55}
56
57/// Min-max normalization to [0, 1] range
58pub fn min_max_normalize(arr: &Array1<f64>) -> Option<Array1<f64>> {
59    let min = arr.iter().fold(f64::INFINITY, |a, &b| a.min(b));
60    let max = arr.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
61
62    if min == max {
63        return None;
64    }
65
66    Some((arr - min) / (max - min))
67}
68
69/// Softmax function for probability distribution
70pub fn softmax(arr: &Array1<f64>) -> Array1<f64> {
71    let max_val = arr.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
72    let exp_arr: Array1<f64> = arr.map(|&x| (x - max_val).exp());
73    let sum: f64 = exp_arr.sum();
74    exp_arr / sum
75}
76
77/// Sigmoid function
78pub fn sigmoid(x: f64) -> f64 {
79    1.0 / (1.0 + (-x).exp())
80}
81
82/// Logit function (inverse of sigmoid)
83pub fn logit(p: f64) -> Option<f64> {
84    if p <= 0.0 || p >= 1.0 {
85        return None;
86    }
87    Some((p / (1.0 - p)).ln())
88}
89
90/// Logistic function (alias for sigmoid)
91pub fn logistic(x: f64) -> f64 {
92    sigmoid(x)
93}
94
95/// Compute log-sum-exp for numerical stability
96pub fn log_sum_exp(arr: &Array1<f64>) -> f64 {
97    let max_val = arr.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
98    let sum_exp: f64 = arr.iter().map(|&x| (x - max_val).exp()).sum();
99    max_val + sum_exp.ln()
100}
101
102/// Compute the pairwise correlation matrix (from original tools/utils.rs)
103pub fn correlation_matrix(data: &Array2<f64>) -> crate::Result<Array2<f64>> {
104    use crate::error::UtilsError;
105
106    let (n_samples, n_features) = data.dim();
107
108    if n_samples < 2 {
109        return Err(UtilsError::DataError(
110            "Need at least 2 samples to compute correlation".to_string(),
111        ));
112    }
113
114    let mut corr = Array2::zeros((n_features, n_features));
115
116    for i in 0..n_features {
117        for j in 0..n_features {
118            let x = data.column(i);
119            let y = data.column(j);
120
121            let x_mean = x.mean().unwrap_or(0.0);
122            let y_mean = y.mean().unwrap_or(0.0);
123
124            let mut numerator = 0.0;
125            let mut denom_x = 0.0;
126            let mut denom_y = 0.0;
127
128            for k in 0..n_samples {
129                let x_diff = x[k] - x_mean;
130                let y_diff = y[k] - y_mean;
131
132                numerator += x_diff * y_diff;
133                denom_x += x_diff * x_diff;
134                denom_y += y_diff * y_diff;
135            }
136
137            if denom_x > 0.0 && denom_y > 0.0 {
138                corr[(i, j)] = numerator / (denom_x.sqrt() * denom_y.sqrt());
139            } else {
140                corr[(i, j)] = 0.0;
141            }
142        }
143    }
144
145    Ok(corr)
146}
147
148/// Compute the covariance matrix (from original tools/utils.rs)
149pub fn covariance_matrix(data: &Array2<f64>, ddof: f64) -> crate::Result<Array2<f64>> {
150    use crate::error::UtilsError;
151
152    let (n_samples, n_features) = data.dim();
153
154    if n_samples as f64 <= ddof {
155        return Err(UtilsError::DataError(format!(
156            "Not enough samples for covariance with ddof={}",
157            ddof
158        )));
159    }
160
161    let mut cov = Array2::zeros((n_features, n_features));
162    let means: Vec<f64> = (0..n_features)
163        .map(|i| data.column(i).mean().unwrap_or(0.0))
164        .collect();
165
166    for i in 0..n_features {
167        for j in 0..=i {
168            let mut sum = 0.0;
169            for k in 0..n_samples {
170                sum += (data[(k, i)] - means[i]) * (data[(k, j)] - means[j]);
171            }
172            let value = sum / (n_samples as f64 - ddof);
173            cov[(i, j)] = value;
174            if i != j {
175                cov[(j, i)] = value;
176            }
177        }
178    }
179
180    Ok(cov)
181}