ghostflow_ml/
calibration.rs1use ghostflow_core::Tensor;
4
5#[derive(Clone)]
7pub struct IsotonicRegression {
8 pub increasing: bool,
9 pub out_of_bounds: OutOfBounds,
10 x_thresholds_: Option<Vec<f32>>,
11 y_thresholds_: Option<Vec<f32>>,
12 x_min_: f32,
13 x_max_: f32,
14}
15
16#[derive(Clone, Copy, Debug)]
17pub enum OutOfBounds {
18 Nan,
19 Clip,
20 Raise,
21}
22
23impl IsotonicRegression {
24 pub fn new() -> Self {
25 IsotonicRegression {
26 increasing: true,
27 out_of_bounds: OutOfBounds::Clip,
28 x_thresholds_: None,
29 y_thresholds_: None,
30 x_min_: 0.0,
31 x_max_: 1.0,
32 }
33 }
34
35 pub fn increasing(mut self, inc: bool) -> Self {
36 self.increasing = inc;
37 self
38 }
39
40 fn pava(&self, y: &[f32], weights: &[f32]) -> Vec<f32> {
42 let n = y.len();
43 if n == 0 {
44 return vec![];
45 }
46
47 let mut result = y.to_vec();
48 let mut w = weights.to_vec();
49
50 loop {
52 let mut changed = false;
53 let mut i = 0;
54
55 while i < result.len() - 1 {
56 let violates = if self.increasing {
57 result[i] > result[i + 1]
58 } else {
59 result[i] < result[i + 1]
60 };
61
62 if violates {
63 let new_val = (w[i] * result[i] + w[i + 1] * result[i + 1]) / (w[i] + w[i + 1]);
65 let new_weight = w[i] + w[i + 1];
66
67 result[i] = new_val;
68 w[i] = new_weight;
69 result.remove(i + 1);
70 w.remove(i + 1);
71
72 changed = true;
73 } else {
74 i += 1;
75 }
76 }
77
78 if !changed {
79 break;
80 }
81 }
82
83 result
84 }
85
86 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
87 let x_data = x.data_f32();
88 let y_data = y.data_f32();
89 let n_samples = x_data.len();
90
91 let mut indices: Vec<usize> = (0..n_samples).collect();
93 indices.sort_by(|&a, &b| x_data[a].partial_cmp(&x_data[b]).unwrap());
94
95 let x_sorted: Vec<f32> = indices.iter().map(|&i| x_data[i]).collect();
96 let y_sorted: Vec<f32> = indices.iter().map(|&i| y_data[i]).collect();
97 let weights = vec![1.0f32; n_samples];
98
99 self.x_min_ = x_sorted[0];
100 self.x_max_ = x_sorted[n_samples - 1];
101
102 let y_isotonic = self.pava(&y_sorted, &weights);
104
105 let mut x_thresholds = Vec::new();
108 let mut y_thresholds = Vec::new();
109
110 let mut i = 0;
111 let mut iso_idx = 0;
112
113 while i < n_samples && iso_idx < y_isotonic.len() {
114 x_thresholds.push(x_sorted[i]);
115 y_thresholds.push(y_isotonic[iso_idx]);
116
117 let mut count = 1;
119 while i + count < n_samples && iso_idx < y_isotonic.len() {
120 if count >= y_isotonic.len() - iso_idx {
121 break;
122 }
123 count += 1;
124 }
125
126 i += 1;
127 if i < n_samples {
128 iso_idx = (iso_idx + 1).min(y_isotonic.len() - 1);
129 }
130 }
131
132 let mut final_x = vec![x_thresholds[0]];
134 let mut final_y = vec![y_thresholds[0]];
135
136 for i in 1..x_thresholds.len() {
137 if (x_thresholds[i] - final_x.last().unwrap()).abs() > 1e-10 {
138 final_x.push(x_thresholds[i]);
139 final_y.push(y_thresholds[i]);
140 }
141 }
142
143 self.x_thresholds_ = Some(final_x);
144 self.y_thresholds_ = Some(final_y);
145 }
146
147 pub fn transform(&self, x: &Tensor) -> Tensor {
148 let x_data = x.data_f32();
149 let n_samples = x_data.len();
150
151 let x_thresh = self.x_thresholds_.as_ref().expect("Model not fitted");
152 let y_thresh = self.y_thresholds_.as_ref().expect("Model not fitted");
153
154 let predictions: Vec<f32> = x_data.iter()
155 .map(|&xi| {
156 if xi < self.x_min_ {
158 match self.out_of_bounds {
159 OutOfBounds::Clip => return y_thresh[0],
160 OutOfBounds::Nan => return f32::NAN,
161 OutOfBounds::Raise => return y_thresh[0],
162 }
163 }
164 if xi > self.x_max_ {
165 match self.out_of_bounds {
166 OutOfBounds::Clip => return *y_thresh.last().unwrap(),
167 OutOfBounds::Nan => return f32::NAN,
168 OutOfBounds::Raise => return *y_thresh.last().unwrap(),
169 }
170 }
171
172 let mut lo = 0;
174 let mut hi = x_thresh.len() - 1;
175
176 while lo < hi {
177 let mid = (lo + hi + 1) / 2;
178 if x_thresh[mid] <= xi {
179 lo = mid;
180 } else {
181 hi = mid - 1;
182 }
183 }
184
185 if lo < x_thresh.len() - 1 {
187 let x0 = x_thresh[lo];
188 let x1 = x_thresh[lo + 1];
189 let y0 = y_thresh[lo];
190 let y1 = y_thresh[lo + 1];
191
192 if (x1 - x0).abs() > 1e-10 {
193 y0 + (y1 - y0) * (xi - x0) / (x1 - x0)
194 } else {
195 y0
196 }
197 } else {
198 y_thresh[lo]
199 }
200 })
201 .collect();
202
203 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
204 }
205
206 pub fn fit_transform(&mut self, x: &Tensor, y: &Tensor) -> Tensor {
207 self.fit(x, y);
208 self.transform(x)
209 }
210
211 pub fn predict(&self, x: &Tensor) -> Tensor {
212 self.transform(x)
213 }
214}
215
216impl Default for IsotonicRegression {
217 fn default() -> Self {
218 Self::new()
219 }
220}
221
222#[derive(Clone)]
224pub struct PlattScaling {
225 pub max_iter: usize,
226 pub tol: f32,
227 a_: Option<f32>,
228 b_: Option<f32>,
229}
230
231impl PlattScaling {
232 pub fn new() -> Self {
233 PlattScaling {
234 max_iter: 100,
235 tol: 1e-5,
236 a_: None,
237 b_: None,
238 }
239 }
240
241 fn sigmoid(x: f32) -> f32 {
242 if x >= 0.0 {
243 1.0 / (1.0 + (-x).exp())
244 } else {
245 let exp_x = x.exp();
246 exp_x / (1.0 + exp_x)
247 }
248 }
249
250 pub fn fit(&mut self, scores: &Tensor, y: &Tensor) {
251 let scores_data = scores.data_f32();
252 let y_data = y.data_f32();
253 let n_samples = scores_data.len();
254
255 let n_pos = y_data.iter().filter(|&&yi| yi > 0.5).count();
257 let n_neg = n_samples - n_pos;
258
259 let t_pos = (n_pos as f32 + 1.0) / (n_pos as f32 + 2.0);
260 let t_neg = 1.0 / (n_neg as f32 + 2.0);
261
262 let targets: Vec<f32> = y_data.iter()
263 .map(|&yi| if yi > 0.5 { t_pos } else { t_neg })
264 .collect();
265
266 let mut a = 0.0f32;
268 let mut b = (((n_neg + 1) as f32) / ((n_pos + 1) as f32)).ln();
269
270 let min_step = 1e-10;
272 let sigma = 1e-12;
273
274 for _ in 0..self.max_iter {
275 let probs: Vec<f32> = scores_data.iter()
277 .map(|&s| Self::sigmoid(a * s + b))
278 .collect();
279
280 let mut loss = 0.0f32;
282 for i in 0..n_samples {
283 let p = probs[i];
284 loss -= targets[i] * p.max(1e-10).ln() + (1.0 - targets[i]) * (1.0 - p).max(1e-10).ln();
285 }
286
287 let mut d1a = 0.0f32;
289 let mut d1b = 0.0f32;
290 let mut d2a = 0.0f32;
291 let mut d2b = 0.0f32;
292 let mut d2ab = 0.0f32;
293
294 for i in 0..n_samples {
295 let p = probs[i];
296 let t = targets[i];
297 let d1 = p - t;
298 let d2 = p * (1.0 - p);
299
300 d1a += scores_data[i] * d1;
301 d1b += d1;
302 d2a += scores_data[i] * scores_data[i] * d2;
303 d2b += d2;
304 d2ab += scores_data[i] * d2;
305 }
306
307 d2a += sigma;
309 d2b += sigma;
310
311 let det = d2a * d2b - d2ab * d2ab;
313 if det.abs() < 1e-10 {
314 break;
315 }
316
317 let da = -(d2b * d1a - d2ab * d1b) / det;
318 let db = -(-d2ab * d1a + d2a * d1b) / det;
319
320 let mut step = 1.0f32;
322 let mut accepted = false;
323 while step > min_step {
324 let new_a = a + step * da;
325 let new_b = b + step * db;
326
327 let mut new_loss = 0.0f32;
329 for i in 0..n_samples {
330 let p = Self::sigmoid(new_a * scores_data[i] + new_b);
331 new_loss -= targets[i] * p.max(1e-10).ln() + (1.0 - targets[i]) * (1.0 - p).max(1e-10).ln();
332 }
333
334 if new_loss < loss {
336 a = new_a;
337 b = new_b;
338 accepted = true;
339 break;
340 }
341
342 step *= 0.5;
344 }
345
346 if !accepted {
348 a += 0.01 * da;
349 b += 0.01 * db;
350 }
351
352 if da.abs() < self.tol && db.abs() < self.tol {
354 break;
355 }
356 }
357
358 self.a_ = Some(a);
359 self.b_ = Some(b);
360 }
361
362 pub fn transform(&self, scores: &Tensor) -> Tensor {
363 let scores_data = scores.data_f32();
364 let n_samples = scores_data.len();
365
366 let a = self.a_.expect("Model not fitted");
367 let b = self.b_.expect("Model not fitted");
368
369 let probs: Vec<f32> = scores_data.iter()
370 .map(|&s| Self::sigmoid(a * s + b))
371 .collect();
372
373 Tensor::from_slice(&probs, &[n_samples]).unwrap()
374 }
375
376 pub fn fit_transform(&mut self, scores: &Tensor, y: &Tensor) -> Tensor {
377 self.fit(scores, y);
378 self.transform(scores)
379 }
380}
381
382impl Default for PlattScaling {
383 fn default() -> Self {
384 Self::new()
385 }
386}
387
388pub struct CalibratedClassifier {
390 pub method: CalibrationMethod,
391 pub cv: usize,
392 #[allow(dead_code)]
393 calibrators_: Vec<CalibrationMethod>,
394}
395
396#[derive(Clone)]
397pub enum CalibrationMethod {
398 Sigmoid(PlattScaling),
399 Isotonic(IsotonicRegression),
400}
401
402impl CalibratedClassifier {
403 pub fn new(method: CalibrationMethod) -> Self {
404 CalibratedClassifier {
405 method,
406 cv: 5,
407 calibrators_: Vec::new(),
408 }
409 }
410
411 pub fn cv(mut self, cv: usize) -> Self {
412 self.cv = cv;
413 self
414 }
415}
416
417#[cfg(test)]
418mod tests {
419 use super::*;
420
421 #[test]
422 fn test_isotonic_regression() {
423 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5]).unwrap();
424 let y = Tensor::from_slice(&[1.0f32, 3.0, 2.0, 4.0, 5.0], &[5]).unwrap();
425
426 let mut ir = IsotonicRegression::new();
427 let transformed = ir.fit_transform(&x, &y);
428
429 assert_eq!(transformed.dims(), &[5]);
430 }
431
432 #[test]
433 fn test_platt_scaling() {
434 let scores = Tensor::from_slice(&[-2.0f32, -1.0, 0.0, 1.0, 2.0], &[5]).unwrap();
435 let y = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 1.0, 1.0], &[5]).unwrap();
436
437 let mut ps = PlattScaling::new();
438 let probs = ps.fit_transform(&scores, &y);
439
440 let probs_data = probs.storage().as_slice::<f32>().to_vec();
441 assert!(probs_data.iter().all(|&p| p >= 0.0 && p <= 1.0));
443 }
444}
445
446