ghostflow_ml/
metrics.rs

1//! Model evaluation metrics
2
3use ghostflow_core::Tensor;
4
5// ============ Classification Metrics ============
6
7/// Compute accuracy score
8pub fn accuracy_score(y_true: &Tensor, y_pred: &Tensor) -> f32 {
9    let y_true_data = y_true.data_f32();
10    let y_pred_data = y_pred.data_f32();
11    
12    let correct: usize = y_true_data.iter()
13        .zip(y_pred_data.iter())
14        .filter(|(&t, &p)| (t - p).abs() < 0.5)
15        .count();
16    
17    correct as f32 / y_true_data.len() as f32
18}
19
20/// Compute precision score for binary classification
21pub fn precision_score(y_true: &Tensor, y_pred: &Tensor) -> f32 {
22    let y_true_data = y_true.data_f32();
23    let y_pred_data = y_pred.data_f32();
24    
25    let mut tp = 0usize;
26    let mut fp = 0usize;
27    
28    for (&t, &p) in y_true_data.iter().zip(y_pred_data.iter()) {
29        let t_pos = t > 0.5;
30        let p_pos = p > 0.5;
31        
32        if p_pos && t_pos { tp += 1; }
33        if p_pos && !t_pos { fp += 1; }
34    }
35    
36    if tp + fp == 0 { 0.0 } else { tp as f32 / (tp + fp) as f32 }
37}
38
39/// Compute recall score for binary classification
40pub fn recall_score(y_true: &Tensor, y_pred: &Tensor) -> f32 {
41    let y_true_data = y_true.data_f32();
42    let y_pred_data = y_pred.data_f32();
43    
44    let mut tp = 0usize;
45    let mut fn_ = 0usize;
46    
47    for (&t, &p) in y_true_data.iter().zip(y_pred_data.iter()) {
48        let t_pos = t > 0.5;
49        let p_pos = p > 0.5;
50        
51        if p_pos && t_pos { tp += 1; }
52        if !p_pos && t_pos { fn_ += 1; }
53    }
54    
55    if tp + fn_ == 0 { 0.0 } else { tp as f32 / (tp + fn_) as f32 }
56}
57
58/// Compute F1 score for binary classification
59pub fn f1_score(y_true: &Tensor, y_pred: &Tensor) -> f32 {
60    let precision = precision_score(y_true, y_pred);
61    let recall = recall_score(y_true, y_pred);
62    
63    if precision + recall == 0.0 {
64        0.0
65    } else {
66        2.0 * precision * recall / (precision + recall)
67    }
68}
69
70/// Compute confusion matrix
71pub fn confusion_matrix(y_true: &Tensor, y_pred: &Tensor, n_classes: usize) -> Vec<Vec<usize>> {
72    let y_true_data = y_true.data_f32();
73    let y_pred_data = y_pred.data_f32();
74    
75    let mut matrix = vec![vec![0usize; n_classes]; n_classes];
76    
77    for (&t, &p) in y_true_data.iter().zip(y_pred_data.iter()) {
78        let t_class = t.round() as usize;
79        let p_class = p.round() as usize;
80        
81        if t_class < n_classes && p_class < n_classes {
82            matrix[t_class][p_class] += 1;
83        }
84    }
85    
86    matrix
87}
88
89/// Compute ROC AUC score for binary classification
90pub fn roc_auc_score(y_true: &Tensor, y_scores: &Tensor) -> f32 {
91    let y_true_data = y_true.data_f32();
92    let y_scores_data = y_scores.data_f32();
93    
94    // Sort by scores descending
95    let mut pairs: Vec<(f32, f32)> = y_true_data.iter()
96        .zip(y_scores_data.iter())
97        .map(|(&t, &s)| (t, s))
98        .collect();
99    pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
100    
101    let n_pos: f32 = y_true_data.iter().filter(|&&t| t > 0.5).count() as f32;
102    let n_neg: f32 = y_true_data.len() as f32 - n_pos;
103    
104    if n_pos == 0.0 || n_neg == 0.0 {
105        return 0.5;
106    }
107    
108    let mut auc = 0.0f32;
109    let mut tp = 0.0f32;
110    let mut _fp = 0.0f32;
111    let mut prev_tp = 0.0f32;
112    
113    for (label, _) in pairs {
114        if label > 0.5 {
115            tp += 1.0;
116        } else {
117            _fp += 1.0;
118            auc += (tp + prev_tp) / 2.0;
119        }
120        prev_tp = tp;
121    }
122    
123    auc / (n_pos * n_neg)
124}
125
126/// Classification report as a struct
127pub struct ClassificationReport {
128    pub precision: Vec<f32>,
129    pub recall: Vec<f32>,
130    pub f1: Vec<f32>,
131    pub support: Vec<usize>,
132    pub accuracy: f32,
133}
134
135pub fn classification_report(y_true: &Tensor, y_pred: &Tensor, n_classes: usize) -> ClassificationReport {
136    let cm = confusion_matrix(y_true, y_pred, n_classes);
137    
138    let mut precision = vec![0.0f32; n_classes];
139    let mut recall = vec![0.0f32; n_classes];
140    let mut f1 = vec![0.0f32; n_classes];
141    let mut support = vec![0usize; n_classes];
142    
143    for c in 0..n_classes {
144        let tp = cm[c][c];
145        let fp: usize = (0..n_classes).map(|i| cm[i][c]).sum::<usize>() - tp;
146        let fn_: usize = cm[c].iter().sum::<usize>() - tp;
147        
148        support[c] = cm[c].iter().sum();
149        
150        precision[c] = if tp + fp == 0 { 0.0 } else { tp as f32 / (tp + fp) as f32 };
151        recall[c] = if tp + fn_ == 0 { 0.0 } else { tp as f32 / (tp + fn_) as f32 };
152        f1[c] = if precision[c] + recall[c] == 0.0 {
153            0.0
154        } else {
155            2.0 * precision[c] * recall[c] / (precision[c] + recall[c])
156        };
157    }
158    
159    ClassificationReport {
160        precision,
161        recall,
162        f1,
163        support,
164        accuracy: accuracy_score(y_true, y_pred),
165    }
166}
167
168
169// ============ Regression Metrics ============
170
171/// Mean Squared Error
172pub fn mean_squared_error(y_true: &Tensor, y_pred: &Tensor) -> f32 {
173    let y_true_data = y_true.data_f32();
174    let y_pred_data = y_pred.data_f32();
175    
176    y_true_data.iter()
177        .zip(y_pred_data.iter())
178        .map(|(&t, &p)| (t - p).powi(2))
179        .sum::<f32>() / y_true_data.len() as f32
180}
181
182/// Root Mean Squared Error
183pub fn root_mean_squared_error(y_true: &Tensor, y_pred: &Tensor) -> f32 {
184    mean_squared_error(y_true, y_pred).sqrt()
185}
186
187/// Mean Absolute Error
188pub fn mean_absolute_error(y_true: &Tensor, y_pred: &Tensor) -> f32 {
189    let y_true_data = y_true.data_f32();
190    let y_pred_data = y_pred.data_f32();
191    
192    y_true_data.iter()
193        .zip(y_pred_data.iter())
194        .map(|(&t, &p)| (t - p).abs())
195        .sum::<f32>() / y_true_data.len() as f32
196}
197
198/// R² Score (Coefficient of Determination)
199pub fn r2_score(y_true: &Tensor, y_pred: &Tensor) -> f32 {
200    let y_true_data = y_true.data_f32();
201    let y_pred_data = y_pred.data_f32();
202    
203    let y_mean: f32 = y_true_data.iter().sum::<f32>() / y_true_data.len() as f32;
204    
205    let ss_res: f32 = y_true_data.iter()
206        .zip(y_pred_data.iter())
207        .map(|(&t, &p)| (t - p).powi(2))
208        .sum();
209    
210    let ss_tot: f32 = y_true_data.iter()
211        .map(|&t| (t - y_mean).powi(2))
212        .sum();
213    
214    1.0 - ss_res / ss_tot.max(1e-10)
215}
216
217/// Mean Absolute Percentage Error
218pub fn mean_absolute_percentage_error(y_true: &Tensor, y_pred: &Tensor) -> f32 {
219    let y_true_data = y_true.data_f32();
220    let y_pred_data = y_pred.data_f32();
221    
222    y_true_data.iter()
223        .zip(y_pred_data.iter())
224        .map(|(&t, &p)| ((t - p) / t.abs().max(1e-10)).abs())
225        .sum::<f32>() / y_true_data.len() as f32 * 100.0
226}
227
228/// Explained Variance Score
229pub fn explained_variance_score(y_true: &Tensor, y_pred: &Tensor) -> f32 {
230    let y_true_data = y_true.data_f32();
231    let y_pred_data = y_pred.data_f32();
232    let n = y_true_data.len() as f32;
233    
234    // Compute residuals
235    let residuals: Vec<f32> = y_true_data.iter()
236        .zip(y_pred_data.iter())
237        .map(|(&t, &p)| t - p)
238        .collect();
239    
240    let res_mean: f32 = residuals.iter().sum::<f32>() / n;
241    let res_var: f32 = residuals.iter().map(|&r| (r - res_mean).powi(2)).sum::<f32>() / n;
242    
243    let y_mean: f32 = y_true_data.iter().sum::<f32>() / n;
244    let y_var: f32 = y_true_data.iter().map(|&y| (y - y_mean).powi(2)).sum::<f32>() / n;
245    
246    1.0 - res_var / y_var.max(1e-10)
247}
248
249// ============ Clustering Metrics ============
250
251/// Silhouette Score
252pub fn silhouette_score(x: &Tensor, labels: &Tensor) -> f32 {
253    let x_data = x.data_f32();
254    let labels_data = labels.data_f32();
255    let n_samples = x.dims()[0];
256    let n_features = x.dims()[1];
257    
258    if n_samples < 2 {
259        return 0.0;
260    }
261    
262    let n_clusters = labels_data.iter().map(|&l| l as usize).max().unwrap_or(0) + 1;
263    
264    let mut total_silhouette = 0.0f32;
265    
266    for i in 0..n_samples {
267        let xi = &x_data[i * n_features..(i + 1) * n_features];
268        let cluster_i = labels_data[i] as usize;
269        
270        // Compute a(i): mean distance to points in same cluster
271        let mut a_sum = 0.0f32;
272        let mut a_count = 0usize;
273        
274        for j in 0..n_samples {
275            if i != j && labels_data[j] as usize == cluster_i {
276                let xj = &x_data[j * n_features..(j + 1) * n_features];
277                let dist: f32 = xi.iter().zip(xj.iter())
278                    .map(|(&a, &b)| (a - b).powi(2))
279                    .sum::<f32>()
280                    .sqrt();
281                a_sum += dist;
282                a_count += 1;
283            }
284        }
285        
286        let a = if a_count > 0 { a_sum / a_count as f32 } else { 0.0 };
287        
288        // Compute b(i): min mean distance to points in other clusters
289        let mut b = f32::INFINITY;
290        
291        for c in 0..n_clusters {
292            if c == cluster_i { continue; }
293            
294            let mut b_sum = 0.0f32;
295            let mut b_count = 0usize;
296            
297            for j in 0..n_samples {
298                if labels_data[j] as usize == c {
299                    let xj = &x_data[j * n_features..(j + 1) * n_features];
300                    let dist: f32 = xi.iter().zip(xj.iter())
301                        .map(|(&a, &b)| (a - b).powi(2))
302                        .sum::<f32>()
303                        .sqrt();
304                    b_sum += dist;
305                    b_count += 1;
306                }
307            }
308            
309            if b_count > 0 {
310                b = b.min(b_sum / b_count as f32);
311            }
312        }
313        
314        if b.is_infinite() { b = 0.0; }
315        
316        let s = if a.max(b) > 0.0 { (b - a) / a.max(b) } else { 0.0 };
317        total_silhouette += s;
318    }
319    
320    total_silhouette / n_samples as f32
321}
322
323/// Davies-Bouldin Index (lower is better)
324pub fn davies_bouldin_score(x: &Tensor, labels: &Tensor) -> f32 {
325    let x_data = x.data_f32();
326    let labels_data = labels.data_f32();
327    let n_samples = x.dims()[0];
328    let n_features = x.dims()[1];
329    
330    let n_clusters = labels_data.iter().map(|&l| l as usize).max().unwrap_or(0) + 1;
331    
332    // Compute cluster centroids
333    let mut centroids = vec![vec![0.0f32; n_features]; n_clusters];
334    let mut counts = vec![0usize; n_clusters];
335    
336    for i in 0..n_samples {
337        let cluster = labels_data[i] as usize;
338        counts[cluster] += 1;
339        for j in 0..n_features {
340            centroids[cluster][j] += x_data[i * n_features + j];
341        }
342    }
343    
344    for c in 0..n_clusters {
345        if counts[c] > 0 {
346            for j in 0..n_features {
347                centroids[c][j] /= counts[c] as f32;
348            }
349        }
350    }
351    
352    // Compute scatter for each cluster
353    let mut scatter = vec![0.0f32; n_clusters];
354    for i in 0..n_samples {
355        let cluster = labels_data[i] as usize;
356        let xi = &x_data[i * n_features..(i + 1) * n_features];
357        let dist: f32 = xi.iter().zip(centroids[cluster].iter())
358            .map(|(&a, &b)| (a - b).powi(2))
359            .sum::<f32>()
360            .sqrt();
361        scatter[cluster] += dist;
362    }
363    
364    for c in 0..n_clusters {
365        if counts[c] > 0 {
366            scatter[c] /= counts[c] as f32;
367        }
368    }
369    
370    // Compute DB index
371    let mut db_sum = 0.0f32;
372    for i in 0..n_clusters {
373        let mut max_ratio = 0.0f32;
374        for j in 0..n_clusters {
375            if i != j {
376                let centroid_dist: f32 = centroids[i].iter().zip(centroids[j].iter())
377                    .map(|(&a, &b)| (a - b).powi(2))
378                    .sum::<f32>()
379                    .sqrt();
380                
381                if centroid_dist > 0.0 {
382                    let ratio = (scatter[i] + scatter[j]) / centroid_dist;
383                    max_ratio = max_ratio.max(ratio);
384                }
385            }
386        }
387        db_sum += max_ratio;
388    }
389    
390    db_sum / n_clusters as f32
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396
397    #[test]
398    fn test_accuracy() {
399        let y_true = Tensor::from_slice(&[0.0f32, 1.0, 1.0, 0.0], &[4]).unwrap();
400        let y_pred = Tensor::from_slice(&[0.0f32, 1.0, 0.0, 0.0], &[4]).unwrap();
401        
402        let acc = accuracy_score(&y_true, &y_pred);
403        assert!((acc - 0.75).abs() < 0.01);
404    }
405
406    #[test]
407    fn test_mse() {
408        let y_true = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0], &[4]).unwrap();
409        let y_pred = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0], &[4]).unwrap();
410        
411        let mse = mean_squared_error(&y_true, &y_pred);
412        assert!(mse < 0.01);
413    }
414
415    #[test]
416    fn test_r2() {
417        let y_true = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0], &[4]).unwrap();
418        let y_pred = Tensor::from_slice(&[1.1f32, 2.1, 2.9, 3.9], &[4]).unwrap();
419        
420        let r2 = r2_score(&y_true, &y_pred);
421        assert!(r2 > 0.9);
422    }
423}
424
425