1use ghostflow_core::Tensor;
4
5pub struct OneVsRestClassifier {
7 classifiers_: Vec<BinaryClassifier>,
8 classes_: Vec<i32>,
9 n_features_: usize,
10}
11
12#[derive(Clone)]
13struct BinaryClassifier {
14 coef: Vec<f32>,
15 intercept: f32,
16}
17
18impl BinaryClassifier {
19 fn new(n_features: usize) -> Self {
20 BinaryClassifier {
21 coef: vec![0.0; n_features],
22 intercept: 0.0,
23 }
24 }
25
26 fn fit(&mut self, x: &[f32], y: &[f32], n_samples: usize, n_features: usize) {
27 let lr = 0.1f32;
28 let max_iter = 100;
29 let alpha = 0.01f32;
30
31 for _ in 0..max_iter {
32 let mut grad_coef = vec![0.0f32; n_features];
33 let mut grad_intercept = 0.0f32;
34
35 for i in 0..n_samples {
36 let mut z = self.intercept;
37 for j in 0..n_features {
38 z += self.coef[j] * x[i * n_features + j];
39 }
40 let pred = 1.0 / (1.0 + (-z).exp());
41 let error = pred - y[i];
42
43 grad_intercept += error;
44 for j in 0..n_features {
45 grad_coef[j] += error * x[i * n_features + j];
46 }
47 }
48
49 self.intercept -= lr * grad_intercept / n_samples as f32;
50 for j in 0..n_features {
51 self.coef[j] -= lr * (grad_coef[j] / n_samples as f32 + alpha * self.coef[j]);
52 }
53 }
54 }
55
56 fn predict_proba(&self, x: &[f32], n_features: usize) -> f32 {
57 let mut z = self.intercept;
58 for j in 0..n_features {
59 z += self.coef[j] * x[j];
60 }
61 1.0 / (1.0 + (-z).exp())
62 }
63}
64
65impl OneVsRestClassifier {
66 pub fn new() -> Self {
67 OneVsRestClassifier {
68 classifiers_: Vec::new(),
69 classes_: Vec::new(),
70 n_features_: 0,
71 }
72 }
73
74 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
75 let x_data = x.data_f32();
76 let y_data = y.data_f32();
77 let n_samples = x.dims()[0];
78 let n_features = x.dims()[1];
79
80 self.n_features_ = n_features;
81
82 let mut classes: Vec<i32> = y_data.iter().map(|&v| v as i32).collect();
84 classes.sort();
85 classes.dedup();
86 self.classes_ = classes.clone();
87
88 self.classifiers_ = classes.iter()
90 .map(|&class| {
91 let y_binary: Vec<f32> = y_data.iter()
92 .map(|&v| if v as i32 == class { 1.0 } else { 0.0 })
93 .collect();
94
95 let mut clf = BinaryClassifier::new(n_features);
96 clf.fit(&x_data, &y_binary, n_samples, n_features);
97 clf
98 })
99 .collect();
100 }
101
102 pub fn predict(&self, x: &Tensor) -> Tensor {
103 let proba = self.predict_proba(x);
104 let proba_data = proba.data_f32();
105 let n_samples = x.dims()[0];
106 let n_classes = self.classes_.len();
107
108 let labels: Vec<f32> = (0..n_samples)
109 .map(|i| {
110 let mut max_prob = f32::NEG_INFINITY;
111 let mut max_class = self.classes_[0];
112 for (k, &class) in self.classes_.iter().enumerate() {
113 if proba_data[i * n_classes + k] > max_prob {
114 max_prob = proba_data[i * n_classes + k];
115 max_class = class;
116 }
117 }
118 max_class as f32
119 })
120 .collect();
121
122 Tensor::from_slice(&labels, &[n_samples]).unwrap()
123 }
124
125 pub fn predict_proba(&self, x: &Tensor) -> Tensor {
126 let x_data = x.data_f32();
127 let n_samples = x.dims()[0];
128 let n_features = x.dims()[1];
129 let n_classes = self.classes_.len();
130
131 let mut proba = vec![0.0f32; n_samples * n_classes];
132
133 for i in 0..n_samples {
134 let xi = &x_data[i * n_features..(i + 1) * n_features];
135 let mut sum = 0.0f32;
136
137 for (k, clf) in self.classifiers_.iter().enumerate() {
138 let p = clf.predict_proba(xi, n_features);
139 proba[i * n_classes + k] = p;
140 sum += p;
141 }
142
143 if sum > 0.0 {
145 for k in 0..n_classes {
146 proba[i * n_classes + k] /= sum;
147 }
148 }
149 }
150
151 Tensor::from_slice(&proba, &[n_samples, n_classes]).unwrap()
152 }
153}
154
155impl Default for OneVsRestClassifier {
156 fn default() -> Self { Self::new() }
157}
158
159pub struct OneVsOneClassifier {
161 classifiers_: Vec<(i32, i32, BinaryClassifier)>,
162 classes_: Vec<i32>,
163 n_features_: usize,
164}
165
166impl OneVsOneClassifier {
167 pub fn new() -> Self {
168 OneVsOneClassifier {
169 classifiers_: Vec::new(),
170 classes_: Vec::new(),
171 n_features_: 0,
172 }
173 }
174
175 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
176 let x_data = x.data_f32();
177 let y_data = y.data_f32();
178 let n_samples = x.dims()[0];
179 let n_features = x.dims()[1];
180
181 self.n_features_ = n_features;
182
183 let mut classes: Vec<i32> = y_data.iter().map(|&v| v as i32).collect();
185 classes.sort();
186 classes.dedup();
187 self.classes_ = classes.clone();
188
189 self.classifiers_.clear();
191 for i in 0..classes.len() {
192 for j in (i + 1)..classes.len() {
193 let class_i = classes[i];
194 let class_j = classes[j];
195
196 let indices: Vec<usize> = (0..n_samples)
198 .filter(|&k| y_data[k] as i32 == class_i || y_data[k] as i32 == class_j)
199 .collect();
200
201 let x_subset: Vec<f32> = indices.iter()
202 .flat_map(|&k| x_data[k * n_features..(k + 1) * n_features].to_vec())
203 .collect();
204 let y_subset: Vec<f32> = indices.iter()
205 .map(|&k| if y_data[k] as i32 == class_j { 1.0 } else { 0.0 })
206 .collect();
207
208 let mut clf = BinaryClassifier::new(n_features);
209 clf.fit(&x_subset, &y_subset, indices.len(), n_features);
210 self.classifiers_.push((class_i, class_j, clf));
211 }
212 }
213 }
214
215 pub fn predict(&self, x: &Tensor) -> Tensor {
216 let x_data = x.data_f32();
217 let n_samples = x.dims()[0];
218 let n_features = x.dims()[1];
219
220 let labels: Vec<f32> = (0..n_samples)
221 .map(|i| {
222 let xi = &x_data[i * n_features..(i + 1) * n_features];
223
224 let mut votes: std::collections::HashMap<i32, usize> = std::collections::HashMap::new();
226
227 for (class_i, class_j, clf) in &self.classifiers_ {
228 let prob = clf.predict_proba(xi, n_features);
229 let winner = if prob >= 0.5 { *class_j } else { *class_i };
230 *votes.entry(winner).or_insert(0) += 1;
231 }
232
233 votes.into_iter()
235 .max_by_key(|(_, count)| *count)
236 .map(|(class, _)| class)
237 .unwrap_or(self.classes_[0]) as f32
238 })
239 .collect();
240
241 Tensor::from_slice(&labels, &[n_samples]).unwrap()
242 }
243}
244
245impl Default for OneVsOneClassifier {
246 fn default() -> Self { Self::new() }
247}
248
249pub struct OutputCodeClassifier {
251 pub code_size: f32,
252 classifiers_: Vec<BinaryClassifier>,
253 code_book_: Vec<Vec<i8>>,
254 classes_: Vec<i32>,
255 n_features_: usize,
256}
257
258impl OutputCodeClassifier {
259 pub fn new() -> Self {
260 OutputCodeClassifier {
261 code_size: 1.5,
262 classifiers_: Vec::new(),
263 code_book_: Vec::new(),
264 classes_: Vec::new(),
265 n_features_: 0,
266 }
267 }
268
269 pub fn code_size(mut self, size: f32) -> Self {
270 self.code_size = size;
271 self
272 }
273
274 fn generate_code_book(&self, n_classes: usize) -> Vec<Vec<i8>> {
275 use rand::prelude::*;
276 let mut rng = thread_rng();
277
278 let n_classifiers = (self.code_size * n_classes as f32).ceil() as usize;
279
280 (0..n_classes)
282 .map(|_| {
283 (0..n_classifiers)
284 .map(|_| if rng.gen::<bool>() { 1i8 } else { -1i8 })
285 .collect()
286 })
287 .collect()
288 }
289
290 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
291 let x_data = x.data_f32();
292 let y_data = y.data_f32();
293 let n_samples = x.dims()[0];
294 let n_features = x.dims()[1];
295
296 self.n_features_ = n_features;
297
298 let mut classes: Vec<i32> = y_data.iter().map(|&v| v as i32).collect();
300 classes.sort();
301 classes.dedup();
302 self.classes_ = classes.clone();
303
304 self.code_book_ = self.generate_code_book(classes.len());
306 let n_classifiers = self.code_book_[0].len();
307
308 self.classifiers_ = (0..n_classifiers)
310 .map(|c| {
311 let y_binary: Vec<f32> = y_data.iter()
313 .map(|&v| {
314 let class_idx = classes.iter().position(|&c| c == v as i32).unwrap();
315 if self.code_book_[class_idx][c] == 1 { 1.0 } else { 0.0 }
316 })
317 .collect();
318
319 let mut clf = BinaryClassifier::new(n_features);
320 clf.fit(&x_data, &y_binary, n_samples, n_features);
321 clf
322 })
323 .collect();
324 }
325
326 pub fn predict(&self, x: &Tensor) -> Tensor {
327 let x_data = x.data_f32();
328 let n_samples = x.dims()[0];
329 let n_features = x.dims()[1];
330
331 let labels: Vec<f32> = (0..n_samples)
332 .map(|i| {
333 let xi = &x_data[i * n_features..(i + 1) * n_features];
334
335 let predictions: Vec<i8> = self.classifiers_.iter()
337 .map(|clf| {
338 let prob = clf.predict_proba(xi, n_features);
339 if prob >= 0.5 { 1i8 } else { -1i8 }
340 })
341 .collect();
342
343 let mut min_dist = usize::MAX;
345 let mut best_class = self.classes_[0];
346
347 for (class_idx, code) in self.code_book_.iter().enumerate() {
348 let dist: usize = predictions.iter().zip(code.iter())
349 .filter(|(&p, &c)| p != c)
350 .count();
351 if dist < min_dist {
352 min_dist = dist;
353 best_class = self.classes_[class_idx];
354 }
355 }
356
357 best_class as f32
358 })
359 .collect();
360
361 Tensor::from_slice(&labels, &[n_samples]).unwrap()
362 }
363}
364
365impl Default for OutputCodeClassifier {
366 fn default() -> Self { Self::new() }
367}
368
369pub struct ClassifierChain {
371 pub order: Option<Vec<usize>>,
372 classifiers_: Vec<BinaryClassifier>,
373 n_labels_: usize,
374 n_features_: usize,
375}
376
377impl ClassifierChain {
378 pub fn new() -> Self {
379 ClassifierChain {
380 order: None,
381 classifiers_: Vec::new(),
382 n_labels_: 0,
383 n_features_: 0,
384 }
385 }
386
387 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
388 let x_data = x.data_f32();
389 let y_data = y.data_f32();
390 let n_samples = x.dims()[0];
391 let n_features = x.dims()[1];
392 let n_labels = y.dims()[1];
393
394 self.n_features_ = n_features;
395 self.n_labels_ = n_labels;
396
397 let order: Vec<usize> = self.order.clone()
399 .unwrap_or_else(|| (0..n_labels).collect());
400
401 self.classifiers_ = Vec::with_capacity(n_labels);
403
404 for (chain_idx, &label_idx) in order.iter().enumerate() {
405 let augmented_features = n_features + chain_idx;
407
408 let x_augmented: Vec<f32> = (0..n_samples)
409 .flat_map(|i| {
410 let mut row = x_data[i * n_features..(i + 1) * n_features].to_vec();
411 for &prev_label in &order[..chain_idx] {
413 row.push(y_data[i * n_labels + prev_label]);
414 }
415 row
416 })
417 .collect();
418
419 let y_label: Vec<f32> = (0..n_samples)
420 .map(|i| y_data[i * n_labels + label_idx])
421 .collect();
422
423 let mut clf = BinaryClassifier::new(augmented_features);
424 clf.fit(&x_augmented, &y_label, n_samples, augmented_features);
425 self.classifiers_.push(clf);
426 }
427 }
428
429 pub fn predict(&self, x: &Tensor) -> Tensor {
430 let x_data = x.data_f32();
431 let n_samples = x.dims()[0];
432 let n_features = x.dims()[1];
433
434 let order: Vec<usize> = self.order.clone()
435 .unwrap_or_else(|| (0..self.n_labels_).collect());
436
437 let mut predictions = vec![0.0f32; n_samples * self.n_labels_];
438
439 for i in 0..n_samples {
440 let mut augmented = x_data[i * n_features..(i + 1) * n_features].to_vec();
441
442 for (chain_idx, &label_idx) in order.iter().enumerate() {
443 let prob = self.classifiers_[chain_idx].predict_proba(&augmented, n_features + chain_idx);
444 let pred = if prob >= 0.5 { 1.0 } else { 0.0 };
445 predictions[i * self.n_labels_ + label_idx] = pred;
446 augmented.push(pred);
447 }
448 }
449
450 Tensor::from_slice(&predictions, &[n_samples, self.n_labels_]).unwrap()
451 }
452}
453
454impl Default for ClassifierChain {
455 fn default() -> Self { Self::new() }
456}
457
458#[cfg(test)]
459mod tests {
460 use super::*;
461
462 #[test]
463 fn test_one_vs_rest() {
464 let x = Tensor::from_slice(&[0.0f32, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0,
465 ], &[4, 2]).unwrap();
466 let y = Tensor::from_slice(&[0.0f32, 1.0, 2.0, 1.0], &[4]).unwrap();
467
468 let mut clf = OneVsRestClassifier::new();
469 clf.fit(&x, &y);
470 let pred = clf.predict(&x);
471 assert_eq!(pred.dims(), &[4]);
472 }
473
474 #[test]
475 fn test_one_vs_one() {
476 let x = Tensor::from_slice(&[0.0f32, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0,
477 ], &[4, 2]).unwrap();
478 let y = Tensor::from_slice(&[0.0f32, 1.0, 2.0, 1.0], &[4]).unwrap();
479
480 let mut clf = OneVsOneClassifier::new();
481 clf.fit(&x, &y);
482 let pred = clf.predict(&x);
483 assert_eq!(pred.dims(), &[4]);
484 }
485}
486
487