1use std::io::Write;
11use std::fs::{OpenOptions, File};
12
13use statrs::distribution::{Beta, Continuous, ContinuousCDF};
14use array2d::Array2D;
15use quadrature::double_exponential;
16
17use crate::datagen;
18use datagen::{BetaParams, BinaryClassScores};
19
20#[derive(Debug)]
24pub struct CostRatioDensity{
25 pub c_density_obj: Beta
26}
27
28impl CostRatioDensity {
29 pub fn new(bparms: BetaParams) -> CostRatioDensity {
30 CostRatioDensity {
31 c_density_obj: Beta::new(bparms.alpha, bparms.beta).unwrap()
32 }
33 }
34 pub fn uc(&self, cost: f64) -> f64 {
38 cost*self.c_density_obj.pdf(cost)
39 }
40 pub fn u1mc(&self, cost: f64) -> f64 {
44 (1.0-cost)*self.c_density_obj.pdf(cost)
45 }
46 pub fn cdf(&self, cost: f64) -> f64 {self.c_density_obj.cdf(cost) }
50 pub fn pdf(&self, cost: f64) -> f64 {
54 self.c_density_obj.pdf(cost)
55 }
56}
57
58#[derive(Debug)]
65pub struct HMeasure{
66 cost_distribution: CostRatioDensity,
67 class0_prior: Option<f64>,
68 class1_prior: Option<f64>,
69 c0_num: Option<usize>,
70 c1_num: Option<usize>,
71 pub h: Option<f64>
72}
73
74#[derive(Debug)]
78pub struct HMeasureResults{
79 pub h: f64,
80 pub convex_hull: Vec<Vec<f64>>,
81 pub roc_curve: Vec<Vec<f64>>,
82 pub int_components: Vec<Vec<f64>>,
83 pub class0_score: Vec<f64>,
84 pub class1_score: Vec<f64>
85}
86
87pub trait SaveData<File> {
91 fn save(&self, f: &mut File);
92}
93
94impl<File: Write> SaveData<File> for Vec<f64>{
95 fn save(&self, f: &mut File){
96 let sep = ",";
97 for (idx, element) in self.iter().enumerate() {
98 f.write_all(idx.to_string().as_bytes()).expect("the unexpected");
99 f.write_all(sep.as_bytes()).expect("the unexpected");
100 f.write_all(element.to_string().as_bytes()).expect("the unexpected");
101 f.write_all(b"\n").expect("the unexpected");
102 }
103 }
104}
105
106impl<File: Write> SaveData<File> for Array2D<f64>{
107 fn save(&self, f: &mut File){
108 let num_rows = self.column_len();
109 let num_cols = self.row_len();
110 let mut i = 0;
111 let sep = ",";
112 while i < num_rows {
113 let mut j = 0;
114 f.write_all(i.to_string().as_bytes()).expect("the unexpected");
115 f.write_all(sep.as_bytes()).expect("the unexpected");
116 while j < num_cols {
117 let element = self[(i,j)];
118 f.write_all(element.to_string().as_bytes()).expect("the unexpected");
119 if j < num_cols-1 {
120 f.write_all(sep.as_bytes()).expect("the unexpected");
121 }
122 j += 1;
123 }
124 f.write_all(b"\n").expect("the unexpected");
125 i += 1;
126 }
127 }
128}
129
130impl<File: Write> SaveData<File> for Vec<Vec<f64>>{
131 fn save(&self, f: &mut File){
132 let num_rows = self.len();
133 let num_cols = self[0].len();
134 let mut i = 0;
135 let sep = ",";
136 while i < num_rows {
137 let mut j = 0;
138 f.write_all(i.to_string().as_bytes()).expect("the unexpected");
139 f.write_all(sep.as_bytes()).expect("the unexpected");
140 while j < num_cols {
141 let element = self[i][j];
142 f.write_all(element.to_string().as_bytes()).expect("the unexpected");
143 if j < num_cols-1 {
144 f.write_all(sep.as_bytes()).expect("the unexpected");
145 }
146 j += 1;
147 }
148 f.write_all(b"\n").expect("the unexpected");
149 i += 1;
150 }
151 }
152}
153
154pub fn write_vec<T: SaveData<File>>(vec: &T, filepath: &str, header: &str){
158 let f = &mut OpenOptions::new()
159 .append(true)
160 .create(true)
161 .open(filepath)
162 .expect("the unexpected");
163
164 f.write_all(header.as_bytes()).expect("the unexpected");
165 f.write_all(b"\n").expect("the unexpected");
166 vec.save(f)
167}
168
169impl HMeasureResults{
173 pub fn save(&self, result_set: &str, filepath: &str){
174 let chull_name = self.result_set_path("ch", filepath, result_set);
175 write_vec(&self.convex_hull, chull_name.as_str(), "idx,chull0,chull1");
176
177 let roc_name = self.result_set_path("rc", filepath, result_set);
178 write_vec(&self.roc_curve, roc_name.as_str(), "idx,roc0,roc1");
179
180 let intcomp_name = self.result_set_path("ic", filepath, result_set);
181 write_vec(&self.int_components, intcomp_name.as_str(), "idx,ic0,ic1,ic2,ic3");
182
183 let score0_name = self.result_set_path("s0", filepath, result_set);
184 write_vec(&self.class0_score, score0_name.as_str(), "idx,score");
185
186 let score1_name = self.result_set_path("s1", filepath, result_set);
187 write_vec(&self.class1_score, score1_name.as_str(), "idx,score");
188 }
189 pub fn result_set_path(&self, set_name: &str, filepath: &str, resultset: &str) -> String{
190 let name = format!("{}/{}_{}.csv", filepath, set_name, resultset);
191 name
192 }
193}
194
195impl HMeasure{
199 pub fn new(cost_distribution: CostRatioDensity, class0_prior: Option<f64>, class1_prior: Option<f64>) -> HMeasure {
200 HMeasure {
201 cost_distribution: cost_distribution,
202 class0_prior: class0_prior,
203 class1_prior: class1_prior,
204 c0_num: None,
205 c1_num: None,
206 h: None
207 }
208 }
209
210 pub fn h_measure(&mut self, scores: &mut BinaryClassScores) -> HMeasureResults {
211 self.c0_num = Some(scores.class0.len());
212 self.c1_num = Some(scores.class1.len());
213 let num_scores = self.c0_num.unwrap_or(0)+self.c1_num.unwrap_or(0);
214 if self.class0_prior.is_none() || self.class1_prior.is_none() {
215 self.class0_prior = Some(self.c0_num.unwrap_or(0) as f64/num_scores as f64);
216 self.class1_prior = Some(self.c1_num.unwrap_or(0) as f64/num_scores as f64);
217 }
218 scores.class0.sort_by(|a, b| a.partial_cmp(b).unwrap());
219 scores.class1.sort_by(|a, b| a.partial_cmp(b).unwrap());
220 let (c_scores, c_classes) = self.merge_scores(&scores.class0, &scores.class1);
221 let roc_curve = self.build_roc(&c_scores, &c_classes);
222 let (convex_hull, int_components) = self.build_chull(&roc_curve);
223 self.h = self._build_h(&int_components);
224
225 let h = self.h.unwrap();
226 let class0_score = scores.class0.clone();
227 let class1_score = scores.class1.clone();
228 HMeasureResults{ h,
229 convex_hull,
230 roc_curve,
231 int_components,
232 class0_score,
233 class1_score
234 }
235 }
236
237 fn merge_scores(&self, c0: &Vec<f64>, c1: &Vec<f64>) -> (Vec<f64>, Vec<u8>) {
238 let mut c0_i = 0;
239 let mut c1_i = 0;
240 let mut cm_i = 0;
241 let c0_num= self.c0_num.unwrap_or(0);
242 let c1_num= self.c1_num.unwrap_or(0);
243 let vec_size = c0_num + c1_num;
244 let mut c_scores = vec![0.0;vec_size];
245 let mut c_classes = vec![0;vec_size];
246
247 while c0_i < c0_num || c1_i < c1_num {
248 if c0_i < c0_num && c1_i < c1_num {
249 if c0[c0_i] <= c1[c1_i] {
250 c_scores[cm_i] = c0[c0_i];
251 c_classes[cm_i] = 0;
252 c0_i += 1;
253 cm_i += 1;
254 } else if c1[c1_i] <= c0[c0_i] {
255 c_scores[cm_i] = c1[c1_i];
256 c_classes[cm_i] = 1;
257 c1_i += 1;
258 cm_i += 1;
259 }
260 } else if c0_i < c0_num && c1_i >= c1_num {
261 c_scores[cm_i] = c0[c0_i];
262 c_classes[cm_i] = 0;
263 c0_i += 1;
264 cm_i += 1;
265 } else if c1_i < c1_num && c0_i >= c0_num {
266 c_scores[cm_i] = c1[c1_i];
267 c_classes[cm_i] = 1;
268 c1_i += 1;
269 cm_i += 1;
270 }
271 }
272
273 (c_scores, c_classes)
274 }
275
276 fn build_roc(&self, c_scores: &Vec<f64>, c_classes: &Vec<u8>) -> Vec<Vec<f64>> {
277 let num_rows = c_scores.len();
278 let mut roc_points: Vec<Vec<f64>> = vec![];
279 let mut roc_point = [0.0, 0.0];
280 roc_points.push(vec![roc_point[0], roc_point[1]]);
281 let mut i = 0;
282 while i < num_rows {
283 let (duplicate_0, duplicate_1) = HMeasure::duplicate_classes(c_scores, c_classes, i);
284 let duplicate_i = duplicate_0 + duplicate_1;
285 roc_point[0] += duplicate_1 as f64 / self.c1_num.unwrap_or(1) as f64;
286 roc_point[1] += duplicate_0 as f64 / self.c0_num.unwrap_or(1) as f64;
287 roc_points.push(vec![roc_point[0], roc_point[1]]);
288 i += duplicate_i;
289 }
290 roc_points
291 }
292
293 fn duplicate_classes(scores: &Vec<f64>, classes: &Vec<u8>, idx: usize) -> (usize,usize) {
294 let mut dup_class_0 = vec![];
295 let mut dup_class_1 = vec![];
296 let score_len = scores.len();
297 for j in idx..score_len{
300 if scores[j] == scores[idx] {
301 if classes[j] == 0 {
302 dup_class_0.push(classes[j]);
303 }
304 else {
305 dup_class_1.push(classes[j]);
306 }
307 } else if scores[j] > scores[idx] {
308 break
309 }
310 }
311
312 (dup_class_0.len(), dup_class_1.len())
313 }
314
315 fn build_chull(&self, roc_c: &Vec<Vec<f64>>) -> (Vec<Vec<f64>>, Vec<Vec<f64>>) {
316 let num_rows = roc_c.len();
317 let mut chull: Vec<Vec<f64>> = vec![];
318 let mut int_components: Vec<Vec<f64>> = vec![];
319 let mut cval_loc_prior = [0.0, 0.0];
320 let mut cval_prior = 0.0;
321 let mut i = 0;
322 while i < num_rows-1 {
323 let cvals = self._cvals(&roc_c, i);
324 let c_argmin = HMeasure::argmin(&cvals);
325 let cval_loc = [roc_c[i+c_argmin+1][0],roc_c[i+c_argmin+1][1]];
326 chull.push(vec![cval_loc[0],cval_loc[1]]);
327 int_components.push(vec![cvals[c_argmin], cval_prior, cval_loc_prior[0], cval_loc_prior[1]]);
328 cval_loc_prior = cval_loc;
329 cval_prior = cvals[c_argmin];
330 i += c_argmin+1;
331 }
332
333 int_components.push(vec![1.0, cval_prior, cval_loc_prior[0], cval_loc_prior[1]]);
334
335 (chull, int_components)
336 }
337
338 fn _cvals(&self, roc_c: &Vec<Vec<f64>>, current_idx: usize) -> Vec<f64> {
339 let mut cvals: Vec<f64> = vec![];
340 let current_point: [f64;2] = [roc_c[current_idx][0],roc_c[current_idx][1]];
341 let mut i_point_idx = current_idx + 1;
342 while i_point_idx < roc_c.len() {
343 let i_point: [f64;2] = [roc_c[i_point_idx][0],roc_c[i_point_idx][1]];
344 cvals.push(self._cval(¤t_point, &i_point));
345 i_point_idx += 1;
346 }
347 cvals
348 }
349
350 fn _cval(&self, current_point: &[f64; 2], i_point: &[f64;2]) -> f64 {
351 let numerator = self.class1_prior.unwrap_or(0.5)*(i_point[0]-current_point[0]);
352 let denominator = self.class0_prior.unwrap_or(0.5)*(i_point[1]-current_point[1]) + self.class1_prior.unwrap_or(0.5)*(i_point[0]-current_point[0]);
353 numerator/denominator
354 }
355
356 fn argmin(v: &Vec<f64>) -> usize {
357 let mut min_idx = 0;
358 let mut min_val = v[0];
359 for (i, &val) in v.iter().enumerate() {
360 if val < min_val {
361 min_idx = i;
362 min_val = val;
363 }
364 }
365 min_idx
366 }
367
368 fn _build_h(&self, int_components: &Vec<Vec<f64>>) -> Option<f64> {
369 let l = self._build_l(&int_components).unwrap();
370 let l_max = self._l_max().unwrap();
371 Some(1.0-l/l_max)
373 }
374
375 fn _build_l(&self, int_components: &Vec<Vec<f64>>) -> Option<f64> {
376 let mut l = 0.0;
377 let mut i = 0;
378 while i < int_components.len() {
379 let int_vals = &int_components[i];
380 l += self._int_l_step(int_vals);
381 i += 1
382 }
383 Some(l)
384 }
385
386 fn _int_l_step(&self, int_vals: &Vec<f64>) -> f64 {
387 let r0i = int_vals[3];
388 let r1i = int_vals[2];
389 let ci = int_vals[1];
390 let cip1 = int_vals[0];
391 let c0_prior = self.class0_prior.unwrap();
392 let c1_prior = self.class1_prior.unwrap();
393 let int_uc = double_exponential::integrate(|x| self.cost_distribution.uc(x), ci, cip1, 1e-9).integral;
394 let int_u1mc = double_exponential::integrate(|x| self.cost_distribution.u1mc(x), ci, cip1, 1e-9).integral;
395
396 let l_step = c0_prior*(1.0-r0i)*int_uc + c1_prior*r1i*int_u1mc;
397
398 return l_step
399 }
400
401 fn _l_max(&self) -> Option<f64> {
402 let c0_prior = self.class0_prior.unwrap();
403 let c1_prior = self.class1_prior.unwrap();
404 let mut l_max = double_exponential::integrate(|x| self.cost_distribution.uc(x), 0.0, c1_prior, 1e-9).integral*c0_prior;
405 l_max += double_exponential::integrate(|x| self.cost_distribution.u1mc(x), c1_prior, 1.0,1e-9).integral*c1_prior;
406 Some(l_max)
407 }
408}
409
410#[cfg(test)]
411mod tests {
412 use super::{CostRatioDensity,HMeasure};
413 use super::datagen::{BetaParams, BinaryClassifierScores, BinaryClassParams};
414 #[test]
415 fn assert_low_hmeasure() {
416 let c0_a: f64 = 4.0;
420 let c1_a: f64 = 4.0;
421 let c0_b: f64 = 4.0;
422 let c1_b: f64 = 4.0;
423 let c0_n: usize = 2000;
424 let c1_n: usize = 2000;
425 let mut rng = BinaryClassifierScores::generate_rng(13);
427 let class0_params = BetaParams { alpha: c0_a, beta: c0_b };
429 let class1_params = BetaParams { alpha: c1_a, beta: c1_b };
430 let bcp = &BinaryClassParams { class0: class0_params, class1: class1_params };
431 let mut bcs = BinaryClassifierScores::new(&bcp, c0_n, c1_n, &mut rng);
432
433 let cost_density_params = BetaParams { alpha: 2.0, beta: 2.0 };
435 let crd = CostRatioDensity::new(cost_density_params);
436
437 let mut hm = HMeasure::new(crd, None, None);
439 let scores = &mut bcs.scores;
440 let hmr = hm.h_measure(scores);
441 assert!(hmr.h < 0.001);
446 }
447}