ghostflow_ml/
calibration.rs1use ghostflow_core::Tensor;
4
5#[derive(Clone)]
7pub struct IsotonicRegression {
8 pub increasing: bool,
9 pub out_of_bounds: OutOfBounds,
10 x_thresholds_: Option<Vec<f32>>,
11 y_thresholds_: Option<Vec<f32>>,
12 x_min_: f32,
13 x_max_: f32,
14}
15
16#[derive(Clone, Copy, Debug)]
17pub enum OutOfBounds {
18 Nan,
19 Clip,
20 Raise,
21}
22
23impl IsotonicRegression {
24 pub fn new() -> Self {
25 IsotonicRegression {
26 increasing: true,
27 out_of_bounds: OutOfBounds::Clip,
28 x_thresholds_: None,
29 y_thresholds_: None,
30 x_min_: 0.0,
31 x_max_: 1.0,
32 }
33 }
34
35 pub fn increasing(mut self, inc: bool) -> Self {
36 self.increasing = inc;
37 self
38 }
39
40 fn pava(&self, y: &[f32], weights: &[f32]) -> Vec<f32> {
42 let n = y.len();
43 if n == 0 {
44 return vec![];
45 }
46
47 let mut result = y.to_vec();
48 let mut w = weights.to_vec();
49
50 loop {
52 let mut changed = false;
53 let mut i = 0;
54
55 while i < result.len() - 1 {
56 let violates = if self.increasing {
57 result[i] > result[i + 1]
58 } else {
59 result[i] < result[i + 1]
60 };
61
62 if violates {
63 let new_val = (w[i] * result[i] + w[i + 1] * result[i + 1]) / (w[i] + w[i + 1]);
65 let new_weight = w[i] + w[i + 1];
66
67 result[i] = new_val;
68 w[i] = new_weight;
69 result.remove(i + 1);
70 w.remove(i + 1);
71
72 changed = true;
73 } else {
74 i += 1;
75 }
76 }
77
78 if !changed {
79 break;
80 }
81 }
82
83 result
84 }
85
86 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
87 let x_data = x.data_f32();
88 let y_data = y.data_f32();
89 let n_samples = x_data.len();
90
91 let mut indices: Vec<usize> = (0..n_samples).collect();
93 indices.sort_by(|&a, &b| x_data[a].partial_cmp(&x_data[b]).unwrap());
94
95 let x_sorted: Vec<f32> = indices.iter().map(|&i| x_data[i]).collect();
96 let y_sorted: Vec<f32> = indices.iter().map(|&i| y_data[i]).collect();
97 let weights = vec![1.0f32; n_samples];
98
99 self.x_min_ = x_sorted[0];
100 self.x_max_ = x_sorted[n_samples - 1];
101
102 let y_isotonic = self.pava(&y_sorted, &weights);
104
105 let mut x_thresholds = Vec::new();
108 let mut y_thresholds = Vec::new();
109
110 let mut i = 0;
111 let mut iso_idx = 0;
112
113 while i < n_samples && iso_idx < y_isotonic.len() {
114 x_thresholds.push(x_sorted[i]);
115 y_thresholds.push(y_isotonic[iso_idx]);
116
117 let mut count = 1;
119 while i + count < n_samples && iso_idx < y_isotonic.len() {
120 if count >= y_isotonic.len() - iso_idx {
121 break;
122 }
123 count += 1;
124 }
125
126 i += 1;
127 if i < n_samples {
128 iso_idx = (iso_idx + 1).min(y_isotonic.len() - 1);
129 }
130 }
131
132 let mut final_x = vec![x_thresholds[0]];
134 let mut final_y = vec![y_thresholds[0]];
135
136 for i in 1..x_thresholds.len() {
137 if (x_thresholds[i] - final_x.last().unwrap()).abs() > 1e-10 {
138 final_x.push(x_thresholds[i]);
139 final_y.push(y_thresholds[i]);
140 }
141 }
142
143 self.x_thresholds_ = Some(final_x);
144 self.y_thresholds_ = Some(final_y);
145 }
146
147 pub fn transform(&self, x: &Tensor) -> Tensor {
148 let x_data = x.data_f32();
149 let n_samples = x_data.len();
150
151 let x_thresh = self.x_thresholds_.as_ref().expect("Model not fitted");
152 let y_thresh = self.y_thresholds_.as_ref().expect("Model not fitted");
153
154 let predictions: Vec<f32> = x_data.iter()
155 .map(|&xi| {
156 if xi < self.x_min_ {
158 match self.out_of_bounds {
159 OutOfBounds::Clip => return y_thresh[0],
160 OutOfBounds::Nan => return f32::NAN,
161 OutOfBounds::Raise => return y_thresh[0],
162 }
163 }
164 if xi > self.x_max_ {
165 match self.out_of_bounds {
166 OutOfBounds::Clip => return *y_thresh.last().unwrap(),
167 OutOfBounds::Nan => return f32::NAN,
168 OutOfBounds::Raise => return *y_thresh.last().unwrap(),
169 }
170 }
171
172 let mut lo = 0;
174 let mut hi = x_thresh.len() - 1;
175
176 while lo < hi {
177 let mid = (lo + hi + 1) / 2;
178 if x_thresh[mid] <= xi {
179 lo = mid;
180 } else {
181 hi = mid - 1;
182 }
183 }
184
185 if lo < x_thresh.len() - 1 {
187 let x0 = x_thresh[lo];
188 let x1 = x_thresh[lo + 1];
189 let y0 = y_thresh[lo];
190 let y1 = y_thresh[lo + 1];
191
192 if (x1 - x0).abs() > 1e-10 {
193 y0 + (y1 - y0) * (xi - x0) / (x1 - x0)
194 } else {
195 y0
196 }
197 } else {
198 y_thresh[lo]
199 }
200 })
201 .collect();
202
203 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
204 }
205
206 pub fn fit_transform(&mut self, x: &Tensor, y: &Tensor) -> Tensor {
207 self.fit(x, y);
208 self.transform(x)
209 }
210
211 pub fn predict(&self, x: &Tensor) -> Tensor {
212 self.transform(x)
213 }
214}
215
216impl Default for IsotonicRegression {
217 fn default() -> Self {
218 Self::new()
219 }
220}
221
222#[derive(Clone)]
224pub struct PlattScaling {
225 pub max_iter: usize,
226 pub tol: f32,
227 a_: Option<f32>,
228 b_: Option<f32>,
229}
230
231impl PlattScaling {
232 pub fn new() -> Self {
233 PlattScaling {
234 max_iter: 100,
235 tol: 1e-5,
236 a_: None,
237 b_: None,
238 }
239 }
240
241 fn sigmoid(x: f32) -> f32 {
242 if x >= 0.0 {
243 1.0 / (1.0 + (-x).exp())
244 } else {
245 let exp_x = x.exp();
246 exp_x / (1.0 + exp_x)
247 }
248 }
249
250 pub fn fit(&mut self, scores: &Tensor, y: &Tensor) {
251 let scores_data = scores.data_f32();
252 let y_data = y.data_f32();
253 let n_samples = scores_data.len();
254
255 let n_pos = y_data.iter().filter(|&&yi| yi > 0.5).count();
257 let n_neg = n_samples - n_pos;
258
259 let t_pos = (n_pos as f32 + 1.0) / (n_pos as f32 + 2.0);
260 let t_neg = 1.0 / (n_neg as f32 + 2.0);
261
262 let targets: Vec<f32> = y_data.iter()
263 .map(|&yi| if yi > 0.5 { t_pos } else { t_neg })
264 .collect();
265
266 let mut a = 0.0f32;
268 let mut b = (((n_neg + 1) as f32) / ((n_pos + 1) as f32)).ln();
269
270 let min_step = 1e-10;
272 let sigma = 1e-12;
273
274 for _ in 0..self.max_iter {
275 let probs: Vec<f32> = scores_data.iter()
277 .map(|&s| Self::sigmoid(a * s + b))
278 .collect();
279
280 let mut d1a = 0.0f32;
282 let mut d1b = 0.0f32;
283 let mut d2a = 0.0f32;
284 let mut d2b = 0.0f32;
285 let mut d2ab = 0.0f32;
286
287 for i in 0..n_samples {
288 let p = probs[i];
289 let t = targets[i];
290 let d1 = p - t;
291 let d2 = p * (1.0 - p);
292
293 d1a += scores_data[i] * d1;
294 d1b += d1;
295 d2a += scores_data[i] * scores_data[i] * d2;
296 d2b += d2;
297 d2ab += scores_data[i] * d2;
298 }
299
300 d2a += sigma;
302 d2b += sigma;
303
304 let det = d2a * d2b - d2ab * d2ab;
306 if det.abs() < 1e-10 {
307 break;
308 }
309
310 let da = -(d2b * d1a - d2ab * d1b) / det;
311 let db = -(-d2ab * d1a + d2a * d1b) / det;
312
313 let step = 1.0f32;
315 while step > min_step {
316 let new_a = a + step * da;
317 let new_b = b + step * db;
318
319 let mut _loss = 0.0f32;
321 for i in 0..n_samples {
322 let p = Self::sigmoid(new_a * scores_data[i] + new_b);
323 _loss -= targets[i] * p.max(1e-10).ln() + (1.0 - targets[i]) * (1.0 - p).max(1e-10).ln();
324 }
325
326 a = new_a;
328 b = new_b;
329 break;
330 }
331
332 if da.abs() < self.tol && db.abs() < self.tol {
334 break;
335 }
336 }
337
338 self.a_ = Some(a);
339 self.b_ = Some(b);
340 }
341
342 pub fn transform(&self, scores: &Tensor) -> Tensor {
343 let scores_data = scores.data_f32();
344 let n_samples = scores_data.len();
345
346 let a = self.a_.expect("Model not fitted");
347 let b = self.b_.expect("Model not fitted");
348
349 let probs: Vec<f32> = scores_data.iter()
350 .map(|&s| Self::sigmoid(a * s + b))
351 .collect();
352
353 Tensor::from_slice(&probs, &[n_samples]).unwrap()
354 }
355
356 pub fn fit_transform(&mut self, scores: &Tensor, y: &Tensor) -> Tensor {
357 self.fit(scores, y);
358 self.transform(scores)
359 }
360}
361
362impl Default for PlattScaling {
363 fn default() -> Self {
364 Self::new()
365 }
366}
367
368pub struct CalibratedClassifier {
370 pub method: CalibrationMethod,
371 pub cv: usize,
372 #[allow(dead_code)]
373 calibrators_: Vec<CalibrationMethod>,
374}
375
376#[derive(Clone)]
377pub enum CalibrationMethod {
378 Sigmoid(PlattScaling),
379 Isotonic(IsotonicRegression),
380}
381
382impl CalibratedClassifier {
383 pub fn new(method: CalibrationMethod) -> Self {
384 CalibratedClassifier {
385 method,
386 cv: 5,
387 calibrators_: Vec::new(),
388 }
389 }
390
391 pub fn cv(mut self, cv: usize) -> Self {
392 self.cv = cv;
393 self
394 }
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400
401 #[test]
402 fn test_isotonic_regression() {
403 let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0], &[5]).unwrap();
404 let y = Tensor::from_slice(&[1.0, 3.0, 2.0, 4.0, 5.0], &[5]).unwrap();
405
406 let mut ir = IsotonicRegression::new();
407 let transformed = ir.fit_transform(&x, &y);
408
409 assert_eq!(transformed.dims(), &[5]);
410 }
411
412 #[test]
413 fn test_platt_scaling() {
414 let scores = Tensor::from_slice(&[-2.0, -1.0, 0.0, 1.0, 2.0], &[5]).unwrap();
415 let y = Tensor::from_slice(&[0.0, 0.0, 0.0, 1.0, 1.0], &[5]).unwrap();
416
417 let mut ps = PlattScaling::new();
418 let probs = ps.fit_transform(&scores, &y);
419
420 let probs_data = probs.data_f32();
421 assert!(probs_data.iter().all(|&p| p >= 0.0 && p <= 1.0));
423 }
424}