1use ghostflow_core::Tensor;
4
5pub struct LabelPropagation {
7 pub kernel: LPKernel,
8 pub gamma: f32,
9 pub n_neighbors: usize,
10 pub max_iter: usize,
11 pub tol: f32,
12 label_distributions_: Option<Vec<Vec<f32>>>,
13 classes_: Option<Vec<usize>>,
14 n_classes_: usize,
15}
16
17#[derive(Clone, Copy, Debug)]
18pub enum LPKernel {
19 RBF,
20 KNN,
21}
22
23impl LabelPropagation {
24 pub fn new() -> Self {
25 LabelPropagation {
26 kernel: LPKernel::RBF,
27 gamma: 20.0,
28 n_neighbors: 7,
29 max_iter: 1000,
30 tol: 1e-3,
31 label_distributions_: None,
32 classes_: None,
33 n_classes_: 0,
34 }
35 }
36
37 pub fn kernel(mut self, k: LPKernel) -> Self {
38 self.kernel = k;
39 self
40 }
41
42 pub fn gamma(mut self, g: f32) -> Self {
43 self.gamma = g;
44 self
45 }
46
47 fn compute_affinity(&self, x: &[f32], n_samples: usize, n_features: usize) -> Vec<Vec<f32>> {
48 let mut affinity = vec![vec![0.0f32; n_samples]; n_samples];
49
50 match self.kernel {
51 LPKernel::RBF => {
52 for i in 0..n_samples {
53 for j in i..n_samples {
54 let mut dist_sq = 0.0f32;
55 for k in 0..n_features {
56 let diff = x[i * n_features + k] - x[j * n_features + k];
57 dist_sq += diff * diff;
58 }
59 let a = (-self.gamma * dist_sq).exp();
60 affinity[i][j] = a;
61 affinity[j][i] = a;
62 }
63 }
64 }
65 LPKernel::KNN => {
66 for i in 0..n_samples {
67 let mut distances: Vec<(usize, f32)> = (0..n_samples)
68 .filter(|&j| j != i)
69 .map(|j| {
70 let mut dist_sq = 0.0f32;
71 for k in 0..n_features {
72 let diff = x[i * n_features + k] - x[j * n_features + k];
73 dist_sq += diff * diff;
74 }
75 (j, dist_sq.sqrt())
76 })
77 .collect();
78
79 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
80
81 for (j, _) in distances.into_iter().take(self.n_neighbors) {
82 affinity[i][j] = 1.0;
83 }
84 }
85
86 for i in 0..n_samples {
88 for j in (i + 1)..n_samples {
89 let sym = (affinity[i][j] + affinity[j][i]) / 2.0;
90 affinity[i][j] = sym;
91 affinity[j][i] = sym;
92 }
93 }
94 }
95 }
96
97 affinity
98 }
99
100 fn normalize_affinity(&self, affinity: &mut [Vec<f32>], n_samples: usize) {
101 for i in 0..n_samples {
103 let row_sum: f32 = affinity[i].iter().sum();
104 if row_sum > 1e-10 {
105 for j in 0..n_samples {
106 affinity[i][j] /= row_sum;
107 }
108 }
109 }
110 }
111
112 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
113 let x_data = x.data_f32();
114 let y_data = y.data_f32();
115 let n_samples = x.dims()[0];
116 let n_features = x.dims()[1];
117
118 let mut classes: Vec<usize> = y_data.iter()
120 .filter(|&&yi| yi >= 0.0)
121 .map(|&yi| yi as usize)
122 .collect();
123 classes.sort();
124 classes.dedup();
125 self.n_classes_ = classes.len();
126 self.classes_ = Some(classes.clone());
127
128 let mut y_dist = vec![vec![0.0f32; self.n_classes_]; n_samples];
130 let mut labeled_mask = vec![false; n_samples];
131
132 for i in 0..n_samples {
133 if y_data[i] >= 0.0 {
134 let class_idx = classes.iter().position(|&c| c == y_data[i] as usize).unwrap();
135 y_dist[i][class_idx] = 1.0;
136 labeled_mask[i] = true;
137 } else {
138 for c in 0..self.n_classes_ {
140 y_dist[i][c] = 1.0 / self.n_classes_ as f32;
141 }
142 }
143 }
144
145 let mut affinity = self.compute_affinity(&x_data, n_samples, n_features);
147 self.normalize_affinity(&mut affinity, n_samples);
148
149 for _ in 0..self.max_iter {
151 let y_dist_old = y_dist.clone();
152
153 for i in 0..n_samples {
155 if !labeled_mask[i] {
156 for c in 0..self.n_classes_ {
157 y_dist[i][c] = 0.0;
158 for j in 0..n_samples {
159 y_dist[i][c] += affinity[i][j] * y_dist_old[j][c];
160 }
161 }
162
163 let sum: f32 = y_dist[i].iter().sum();
165 if sum > 1e-10 {
166 for c in 0..self.n_classes_ {
167 y_dist[i][c] /= sum;
168 }
169 }
170 }
171 }
172
173 for i in 0..n_samples {
175 if labeled_mask[i] {
176 let class_idx = classes.iter().position(|&c| c == y_data[i] as usize).unwrap();
177 for c in 0..self.n_classes_ {
178 y_dist[i][c] = if c == class_idx { 1.0 } else { 0.0 };
179 }
180 }
181 }
182
183 let mut max_diff = 0.0f32;
185 for i in 0..n_samples {
186 for c in 0..self.n_classes_ {
187 max_diff = max_diff.max((y_dist[i][c] - y_dist_old[i][c]).abs());
188 }
189 }
190
191 if max_diff < self.tol {
192 break;
193 }
194 }
195
196 self.label_distributions_ = Some(y_dist);
197 }
198
199 pub fn predict(&self, x: &Tensor) -> Tensor {
200 let proba = self.predict_proba(x);
201 let proba_data = proba.data_f32();
202 let n_samples = x.dims()[0];
203
204 let classes = self.classes_.as_ref().expect("Model not fitted");
205
206 let predictions: Vec<f32> = (0..n_samples)
207 .map(|i| {
208 let start = i * self.n_classes_;
209 let probs = &proba_data[start..start + self.n_classes_];
210 let max_idx = probs.iter()
211 .enumerate()
212 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
213 .map(|(idx, _)| idx)
214 .unwrap_or(0);
215 classes[max_idx] as f32
216 })
217 .collect();
218
219 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
220 }
221
222 pub fn predict_proba(&self, _x: &Tensor) -> Tensor {
223 let y_dist = self.label_distributions_.as_ref().expect("Model not fitted");
225 let n_samples = y_dist.len();
226
227 let mut probs: Vec<f32> = Vec::with_capacity(n_samples * self.n_classes_);
228 for dist in y_dist {
229 probs.extend(dist);
230 }
231
232 Tensor::from_slice(&probs, &[n_samples, self.n_classes_]).unwrap()
233 }
234}
235
236impl Default for LabelPropagation {
237 fn default() -> Self {
238 Self::new()
239 }
240}
241
242pub struct LabelSpreading {
244 pub kernel: LPKernel,
245 pub gamma: f32,
246 pub n_neighbors: usize,
247 pub alpha: f32, pub max_iter: usize,
249 pub tol: f32,
250 label_distributions_: Option<Vec<Vec<f32>>>,
251 classes_: Option<Vec<usize>>,
252 n_classes_: usize,
253}
254
255impl LabelSpreading {
256 pub fn new() -> Self {
257 LabelSpreading {
258 kernel: LPKernel::RBF,
259 gamma: 20.0,
260 n_neighbors: 7,
261 alpha: 0.2,
262 max_iter: 30,
263 tol: 1e-3,
264 label_distributions_: None,
265 classes_: None,
266 n_classes_: 0,
267 }
268 }
269
270 pub fn alpha(mut self, a: f32) -> Self {
271 self.alpha = a.clamp(0.0, 1.0);
272 self
273 }
274
275 fn compute_affinity(&self, x: &[f32], n_samples: usize, n_features: usize) -> Vec<Vec<f32>> {
276 let mut affinity = vec![vec![0.0f32; n_samples]; n_samples];
277
278 for i in 0..n_samples {
279 for j in i..n_samples {
280 let mut dist_sq = 0.0f32;
281 for k in 0..n_features {
282 let diff = x[i * n_features + k] - x[j * n_features + k];
283 dist_sq += diff * diff;
284 }
285 let a = (-self.gamma * dist_sq).exp();
286 affinity[i][j] = a;
287 affinity[j][i] = a;
288 }
289 }
290
291 affinity
292 }
293
294 fn normalize_laplacian(&self, affinity: &[Vec<f32>], n_samples: usize) -> Vec<Vec<f32>> {
295 let degrees: Vec<f32> = (0..n_samples)
297 .map(|i| affinity[i].iter().sum::<f32>())
298 .collect();
299
300 let mut s = vec![vec![0.0f32; n_samples]; n_samples];
301
302 for i in 0..n_samples {
303 for j in 0..n_samples {
304 let d_i = degrees[i].max(1e-10).sqrt();
305 let d_j = degrees[j].max(1e-10).sqrt();
306 s[i][j] = affinity[i][j] / (d_i * d_j);
307 }
308 }
309
310 s
311 }
312
313 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
314 let x_data = x.data_f32();
315 let y_data = y.data_f32();
316 let n_samples = x.dims()[0];
317 let n_features = x.dims()[1];
318
319 let mut classes: Vec<usize> = y_data.iter()
321 .filter(|&&yi| yi >= 0.0)
322 .map(|&yi| yi as usize)
323 .collect();
324 classes.sort();
325 classes.dedup();
326 self.n_classes_ = classes.len();
327 self.classes_ = Some(classes.clone());
328
329 let mut y_dist = vec![vec![0.0f32; self.n_classes_]; n_samples];
331 let mut y_static = vec![vec![0.0f32; self.n_classes_]; n_samples];
332
333 for i in 0..n_samples {
334 if y_data[i] >= 0.0 {
335 let class_idx = classes.iter().position(|&c| c == y_data[i] as usize).unwrap();
336 y_dist[i][class_idx] = 1.0;
337 y_static[i][class_idx] = 1.0;
338 }
339 }
340
341 let affinity = self.compute_affinity(&x_data, n_samples, n_features);
343 let s = self.normalize_laplacian(&affinity, n_samples);
344
345 for _ in 0..self.max_iter {
347 let y_dist_old = y_dist.clone();
348
349 for i in 0..n_samples {
350 for c in 0..self.n_classes_ {
351 let mut propagated = 0.0f32;
352 for j in 0..n_samples {
353 propagated += s[i][j] * y_dist_old[j][c];
354 }
355 y_dist[i][c] = self.alpha * propagated + (1.0 - self.alpha) * y_static[i][c];
356 }
357
358 let sum: f32 = y_dist[i].iter().sum();
360 if sum > 1e-10 {
361 for c in 0..self.n_classes_ {
362 y_dist[i][c] /= sum;
363 }
364 }
365 }
366
367 let mut max_diff = 0.0f32;
369 for i in 0..n_samples {
370 for c in 0..self.n_classes_ {
371 max_diff = max_diff.max((y_dist[i][c] - y_dist_old[i][c]).abs());
372 }
373 }
374
375 if max_diff < self.tol {
376 break;
377 }
378 }
379
380 self.label_distributions_ = Some(y_dist);
381 }
382
383 pub fn predict(&self, _x: &Tensor) -> Tensor {
384 let y_dist = self.label_distributions_.as_ref().expect("Model not fitted");
385 let classes = self.classes_.as_ref().expect("Model not fitted");
386 let n_samples = y_dist.len();
387
388 let predictions: Vec<f32> = y_dist.iter()
389 .map(|dist| {
390 let max_idx = dist.iter()
391 .enumerate()
392 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
393 .map(|(idx, _)| idx)
394 .unwrap_or(0);
395 classes[max_idx] as f32
396 })
397 .collect();
398
399 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
400 }
401}
402
403impl Default for LabelSpreading {
404 fn default() -> Self {
405 Self::new()
406 }
407}
408
409pub struct SelfTrainingClassifier {
411 pub threshold: f32,
412 pub max_iter: usize,
413 pub criterion: SelfTrainingCriterion,
414 #[allow(dead_code)]
415 n_classes_: usize,
416 #[allow(dead_code)]
417 n_iter_: usize,
418}
419
420#[derive(Clone, Copy, Debug)]
421pub enum SelfTrainingCriterion {
422 Threshold,
423 KBest(usize),
424}
425
426impl SelfTrainingClassifier {
427 pub fn new(threshold: f32) -> Self {
428 SelfTrainingClassifier {
429 threshold,
430 max_iter: 10,
431 criterion: SelfTrainingCriterion::Threshold,
432 n_classes_: 0,
433 n_iter_: 0,
434 }
435 }
436
437 pub fn max_iter(mut self, n: usize) -> Self {
438 self.max_iter = n;
439 self
440 }
441
442 pub fn criterion(mut self, c: SelfTrainingCriterion) -> Self {
443 self.criterion = c;
444 self
445 }
446}
447
448impl Default for SelfTrainingClassifier {
449 fn default() -> Self {
450 Self::new(0.75)
451 }
452}
453
454#[cfg(test)]
455mod tests {
456 use super::*;
457
458 #[test]
459 fn test_label_propagation() {
460 let x = Tensor::from_slice(&[0.0f32, 0.0,
462 0.1, 0.1,
463 1.0, 1.0,
464 1.1, 1.1,
465 0.5, 0.5, ], &[5, 2]).unwrap();
467
468 let y = Tensor::from_slice(&[0.0f32, 0.0, 1.0, 1.0, -1.0], &[5]).unwrap();
469
470 let mut lp = LabelPropagation::new().gamma(10.0);
471 lp.fit(&x, &y);
472
473 let predictions = lp.predict(&x);
474 assert_eq!(predictions.dims(), &[5]);
475 }
476
477 #[test]
478 fn test_label_spreading() {
479 let x = Tensor::from_slice(&[0.0f32, 0.0,
480 0.1, 0.1,
481 1.0, 1.0,
482 1.1, 1.1,
483 0.5, 0.5,
484 ], &[5, 2]).unwrap();
485
486 let y = Tensor::from_slice(&[0.0f32, 0.0, 1.0, 1.0, -1.0], &[5]).unwrap();
487
488 let mut ls = LabelSpreading::new().alpha(0.2);
489 ls.fit(&x, &y);
490
491 let predictions = ls.predict(&x);
492 assert_eq!(predictions.dims(), &[5]);
493 }
494}
495
496