1use ghostflow_core::Tensor;
4
5pub struct LocalOutlierFactor {
7 pub n_neighbors: usize,
8 pub contamination: f32,
9 pub metric: LOFMetric,
10 x_train_: Option<Vec<f32>>,
11 lrd_: Option<Vec<f32>>,
12 lof_scores_: Option<Vec<f32>>,
13 threshold_: f32,
14 n_samples_: usize,
15 n_features_: usize,
16}
17
18#[derive(Clone, Copy, Debug)]
19pub enum LOFMetric {
20 Euclidean,
21 Manhattan,
22 Minkowski(f32),
23}
24
25impl LocalOutlierFactor {
26 pub fn new(n_neighbors: usize) -> Self {
27 LocalOutlierFactor {
28 n_neighbors,
29 contamination: 0.1,
30 metric: LOFMetric::Euclidean,
31 x_train_: None,
32 lrd_: None,
33 lof_scores_: None,
34 threshold_: 0.0,
35 n_samples_: 0,
36 n_features_: 0,
37 }
38 }
39
40 pub fn contamination(mut self, c: f32) -> Self {
41 self.contamination = c.clamp(0.0, 0.5);
42 self
43 }
44
45 fn distance(&self, a: &[f32], b: &[f32]) -> f32 {
46 match self.metric {
47 LOFMetric::Euclidean => {
48 a.iter().zip(b.iter()).map(|(&ai, &bi)| (ai - bi).powi(2)).sum::<f32>().sqrt()
49 }
50 LOFMetric::Manhattan => {
51 a.iter().zip(b.iter()).map(|(&ai, &bi)| (ai - bi).abs()).sum()
52 }
53 LOFMetric::Minkowski(p) => {
54 a.iter().zip(b.iter())
55 .map(|(&ai, &bi)| (ai - bi).abs().powf(p))
56 .sum::<f32>()
57 .powf(1.0 / p)
58 }
59 }
60 }
61
62 fn k_neighbors(&self, x: &[f32], point_idx: usize, n_samples: usize, n_features: usize) -> Vec<(usize, f32)> {
63 let point = &x[point_idx * n_features..(point_idx + 1) * n_features];
64
65 let mut distances: Vec<(usize, f32)> = (0..n_samples)
66 .filter(|&i| i != point_idx)
67 .map(|i| {
68 let other = &x[i * n_features..(i + 1) * n_features];
69 (i, self.distance(point, other))
70 })
71 .collect();
72
73 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
74 distances.truncate(self.n_neighbors);
75 distances
76 }
77
78 fn k_distance(&self, neighbors: &[(usize, f32)]) -> f32 {
79 neighbors.last().map(|(_, d)| *d).unwrap_or(0.0)
80 }
81
82 fn reachability_distance(&self, x: &[f32], point_a: usize, point_b: usize, k_dist_b: f32, n_features: usize) -> f32 {
83 let a = &x[point_a * n_features..(point_a + 1) * n_features];
84 let b = &x[point_b * n_features..(point_b + 1) * n_features];
85 let dist = self.distance(a, b);
86 dist.max(k_dist_b)
87 }
88
89 pub fn fit(&mut self, x: &Tensor) {
90 let x_data = x.data_f32();
91 let n_samples = x.dims()[0];
92 let n_features = x.dims()[1];
93
94 self.n_samples_ = n_samples;
95 self.n_features_ = n_features;
96
97 let all_neighbors: Vec<Vec<(usize, f32)>> = (0..n_samples)
99 .map(|i| self.k_neighbors(&x_data, i, n_samples, n_features))
100 .collect();
101
102 let k_distances: Vec<f32> = all_neighbors.iter()
103 .map(|neighbors| self.k_distance(neighbors))
104 .collect();
105
106 let lrd: Vec<f32> = (0..n_samples)
108 .map(|i| {
109 let neighbors = &all_neighbors[i];
110 let sum_reach_dist: f32 = neighbors.iter()
111 .map(|&(j, _)| self.reachability_distance(&x_data, i, j, k_distances[j], n_features))
112 .sum();
113
114 if sum_reach_dist > 0.0 {
115 neighbors.len() as f32 / sum_reach_dist
116 } else {
117 f32::INFINITY
118 }
119 })
120 .collect();
121
122 let lof_scores: Vec<f32> = (0..n_samples)
124 .map(|i| {
125 let neighbors = &all_neighbors[i];
126 let sum_lrd_ratio: f32 = neighbors.iter()
127 .map(|&(j, _)| lrd[j] / lrd[i].max(1e-10))
128 .sum();
129
130 sum_lrd_ratio / neighbors.len() as f32
131 })
132 .collect();
133
134 let mut sorted_scores = lof_scores.clone();
136 sorted_scores.sort_by(|a, b| b.partial_cmp(a).unwrap()); let threshold_idx = (self.contamination * n_samples as f32) as usize;
138 self.threshold_ = sorted_scores[threshold_idx.min(n_samples - 1)];
139
140 self.x_train_ = Some(x_data);
141 self.lrd_ = Some(lrd);
142 self.lof_scores_ = Some(lof_scores);
143 }
144
145 pub fn fit_predict(&mut self, x: &Tensor) -> Tensor {
146 self.fit(x);
147
148 let lof_scores = self.lof_scores_.as_ref().unwrap();
149 let predictions: Vec<f32> = lof_scores.iter()
150 .map(|&score| if score > self.threshold_ { -1.0 } else { 1.0 }) .collect();
152
153 Tensor::from_slice(&predictions, &[predictions.len()]).unwrap()
154 }
155
156 pub fn decision_function(&self, x: &Tensor) -> Tensor {
157 let x_data = x.data_f32();
159 let n_samples = x.dims()[0];
160 let n_features = x.dims()[1];
161
162 let x_train = self.x_train_.as_ref().expect("Model not fitted");
163 let lrd_train = self.lrd_.as_ref().expect("Model not fitted");
164 let n_train = self.n_samples_;
165
166 let scores: Vec<f32> = (0..n_samples)
167 .map(|i| {
168 let point = &x_data[i * n_features..(i + 1) * n_features];
169
170 let mut distances: Vec<(usize, f32)> = (0..n_train)
172 .map(|j| {
173 let train_point = &x_train[j * n_features..(j + 1) * n_features];
174 (j, self.distance(point, train_point))
175 })
176 .collect();
177
178 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
179 let neighbors: Vec<(usize, f32)> = distances.into_iter().take(self.n_neighbors).collect();
180
181 let k_dist = neighbors.last().map(|(_, d)| *d).unwrap_or(0.0);
183 let sum_reach_dist: f32 = neighbors.iter()
184 .map(|&(_j, d)| d.max(k_dist))
185 .sum();
186
187 let lrd_point = if sum_reach_dist > 0.0 {
188 neighbors.len() as f32 / sum_reach_dist
189 } else {
190 f32::INFINITY
191 };
192
193 let sum_lrd_ratio: f32 = neighbors.iter()
195 .map(|&(j, _)| lrd_train[j] / lrd_point.max(1e-10))
196 .sum();
197
198 -(sum_lrd_ratio / neighbors.len() as f32) })
200 .collect();
201
202 Tensor::from_slice(&scores, &[n_samples]).unwrap()
203 }
204
205 pub fn predict(&self, x: &Tensor) -> Tensor {
206 let scores = self.decision_function(x);
207 let score_data = scores.data_f32();
208 let n_samples = x.dims()[0];
209
210 let predictions: Vec<f32> = score_data.iter()
211 .map(|&s| if -s > self.threshold_ { -1.0 } else { 1.0 })
212 .collect();
213
214 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
215 }
216}
217
218pub struct OneClassSVM {
220 pub kernel: OCSVMKernel,
221 pub nu: f32,
222 pub gamma: f32,
223 pub max_iter: usize,
224 support_vectors_: Option<Vec<Vec<f32>>>,
225 dual_coef_: Option<Vec<f32>>,
226 intercept_: Option<f32>,
227 n_features_: usize,
228}
229
230#[derive(Clone, Copy, Debug)]
231pub enum OCSVMKernel {
232 RBF,
233 Linear,
234 Poly { degree: i32, coef0: f32 },
235 Sigmoid { coef0: f32 },
236}
237
238impl OneClassSVM {
239 pub fn new() -> Self {
240 OneClassSVM {
241 kernel: OCSVMKernel::RBF,
242 nu: 0.5,
243 gamma: 1.0,
244 max_iter: 1000,
245 support_vectors_: None,
246 dual_coef_: None,
247 intercept_: None,
248 n_features_: 0,
249 }
250 }
251
252 pub fn nu(mut self, nu: f32) -> Self {
253 self.nu = nu.clamp(0.0, 1.0);
254 self
255 }
256
257 pub fn gamma(mut self, gamma: f32) -> Self {
258 self.gamma = gamma;
259 self
260 }
261
262 pub fn kernel(mut self, kernel: OCSVMKernel) -> Self {
263 self.kernel = kernel;
264 self
265 }
266
267 fn kernel_function(&self, xi: &[f32], xj: &[f32]) -> f32 {
268 match self.kernel {
269 OCSVMKernel::RBF => {
270 let dist_sq: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| (a - b).powi(2)).sum();
271 (-self.gamma * dist_sq).exp()
272 }
273 OCSVMKernel::Linear => {
274 xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum()
275 }
276 OCSVMKernel::Poly { degree, coef0 } => {
277 let dot: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum();
278 (self.gamma * dot + coef0).powi(degree)
279 }
280 OCSVMKernel::Sigmoid { coef0 } => {
281 let dot: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum();
282 (self.gamma * dot + coef0).tanh()
283 }
284 }
285 }
286
287 pub fn fit(&mut self, x: &Tensor) {
288 let x_data = x.data_f32();
289 let n_samples = x.dims()[0];
290 let n_features = x.dims()[1];
291
292 self.n_features_ = n_features;
293
294 let mut alpha = vec![0.0f32; n_samples];
296 let upper_bound = 1.0 / (self.nu * n_samples as f32);
297
298 let mut kernel_matrix = vec![vec![0.0f32; n_samples]; n_samples];
300 for i in 0..n_samples {
301 for j in i..n_samples {
302 let xi = &x_data[i * n_features..(i + 1) * n_features];
303 let xj = &x_data[j * n_features..(j + 1) * n_features];
304 let k = self.kernel_function(xi, xj);
305 kernel_matrix[i][j] = k;
306 kernel_matrix[j][i] = k;
307 }
308 }
309
310 let init_alpha = 1.0 / n_samples as f32;
312 for a in &mut alpha {
313 *a = init_alpha.min(upper_bound);
314 }
315
316 for _ in 0..self.max_iter {
318 let mut changed = false;
319
320 for i in 0..n_samples {
321 let mut grad = 0.0f32;
323 for j in 0..n_samples {
324 grad += alpha[j] * kernel_matrix[i][j];
325 }
326
327 let old_alpha = alpha[i];
329 alpha[i] = (alpha[i] - 0.01 * grad).clamp(0.0, upper_bound);
330
331 if (alpha[i] - old_alpha).abs() > 1e-5 {
332 changed = true;
333 }
334 }
335
336 let sum: f32 = alpha.iter().sum();
338 if sum > 0.0 {
339 for a in &mut alpha {
340 *a /= sum;
341 }
342 }
343
344 if !changed {
345 break;
346 }
347 }
348
349 let eps = 1e-5;
351 let mut support_vectors = Vec::new();
352 let mut dual_coef = Vec::new();
353
354 for i in 0..n_samples {
355 if alpha[i] > eps {
356 support_vectors.push(x_data[i * n_features..(i + 1) * n_features].to_vec());
357 dual_coef.push(alpha[i]);
358 }
359 }
360
361 let mut rho = 0.0f32;
363 let mut count = 0;
364 for i in 0..n_samples {
365 if alpha[i] > eps && alpha[i] < upper_bound - eps {
366 let mut sum = 0.0f32;
367 for j in 0..n_samples {
368 sum += alpha[j] * kernel_matrix[i][j];
369 }
370 rho += sum;
371 count += 1;
372 }
373 }
374 if count > 0 {
375 rho /= count as f32;
376 }
377
378 self.support_vectors_ = Some(support_vectors);
379 self.dual_coef_ = Some(dual_coef);
380 self.intercept_ = Some(rho);
381 }
382
383 pub fn decision_function(&self, x: &Tensor) -> Tensor {
384 let x_data = x.data_f32();
385 let n_samples = x.dims()[0];
386 let n_features = x.dims()[1];
387
388 let support_vectors = self.support_vectors_.as_ref().expect("Model not fitted");
389 let dual_coef = self.dual_coef_.as_ref().expect("Model not fitted");
390 let rho = self.intercept_.unwrap_or(0.0);
391
392 let scores: Vec<f32> = (0..n_samples)
393 .map(|i| {
394 let xi = &x_data[i * n_features..(i + 1) * n_features];
395 let mut score = -rho;
396
397 for (sv, &coef) in support_vectors.iter().zip(dual_coef.iter()) {
398 score += coef * self.kernel_function(xi, sv);
399 }
400
401 score
402 })
403 .collect();
404
405 Tensor::from_slice(&scores, &[n_samples]).unwrap()
406 }
407
408 pub fn predict(&self, x: &Tensor) -> Tensor {
409 let scores = self.decision_function(x);
410 let score_data = scores.data_f32();
411 let n_samples = x.dims()[0];
412
413 let predictions: Vec<f32> = score_data.iter()
414 .map(|&s| if s >= 0.0 { 1.0 } else { -1.0 })
415 .collect();
416
417 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
418 }
419
420 pub fn fit_predict(&mut self, x: &Tensor) -> Tensor {
421 self.fit(x);
422 self.predict(x)
423 }
424}
425
426impl Default for OneClassSVM {
427 fn default() -> Self {
428 Self::new()
429 }
430}
431
432
433pub struct EllipticEnvelope {
435 pub contamination: f32,
436 pub support_fraction: Option<f32>,
437 location_: Option<Vec<f32>>,
438 covariance_: Option<Vec<f32>>,
439 precision_: Option<Vec<f32>>,
440 threshold_: f32,
441 n_features_: usize,
442}
443
444impl EllipticEnvelope {
445 pub fn new() -> Self {
446 EllipticEnvelope {
447 contamination: 0.1,
448 support_fraction: None,
449 location_: None,
450 covariance_: None,
451 precision_: None,
452 threshold_: 0.0,
453 n_features_: 0,
454 }
455 }
456
457 pub fn contamination(mut self, c: f32) -> Self {
458 self.contamination = c.clamp(0.0, 0.5);
459 self
460 }
461
462 fn compute_mean(&self, x: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
463 let mut mean = vec![0.0f32; n_features];
464 for i in 0..n_samples {
465 for j in 0..n_features {
466 mean[j] += x[i * n_features + j];
467 }
468 }
469 for m in &mut mean {
470 *m /= n_samples as f32;
471 }
472 mean
473 }
474
475 fn compute_covariance(&self, x: &[f32], mean: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
476 let mut cov = vec![0.0f32; n_features * n_features];
477
478 for i in 0..n_samples {
479 for j in 0..n_features {
480 for k in 0..n_features {
481 let diff_j = x[i * n_features + j] - mean[j];
482 let diff_k = x[i * n_features + k] - mean[k];
483 cov[j * n_features + k] += diff_j * diff_k;
484 }
485 }
486 }
487
488 let denom = (n_samples - 1).max(1) as f32;
489 for c in &mut cov {
490 *c /= denom;
491 }
492
493 cov
494 }
495
496 fn invert_matrix(&self, matrix: &[f32], n: usize) -> Vec<f32> {
497 let mut a = matrix.to_vec();
499
500 for i in 0..n {
502 a[i * n + i] += 1e-6;
503 }
504
505 let mut l = vec![0.0f32; n * n];
507 for i in 0..n {
508 for j in 0..=i {
509 let mut sum = 0.0f32;
510 if i == j {
511 for k in 0..j {
512 sum += l[j * n + k] * l[j * n + k];
513 }
514 l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
515 } else {
516 for k in 0..j {
517 sum += l[i * n + k] * l[j * n + k];
518 }
519 l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
520 }
521 }
522 }
523
524 let mut l_inv = vec![0.0f32; n * n];
526 for i in 0..n {
527 l_inv[i * n + i] = 1.0 / l[i * n + i].max(1e-10);
528 for j in (i + 1)..n {
529 let mut sum = 0.0f32;
530 for k in i..j {
531 sum += l[j * n + k] * l_inv[k * n + i];
532 }
533 l_inv[j * n + i] = -sum / l[j * n + j].max(1e-10);
534 }
535 }
536
537 let mut precision = vec![0.0f32; n * n];
539 for i in 0..n {
540 for j in 0..n {
541 for k in 0..n {
542 precision[i * n + j] += l_inv[k * n + i] * l_inv[k * n + j];
543 }
544 }
545 }
546
547 precision
548 }
549
550 fn mahalanobis_distance(&self, x: &[f32], mean: &[f32], precision: &[f32], n_features: usize) -> f32 {
551 let diff: Vec<f32> = x.iter().zip(mean.iter()).map(|(&xi, &mi)| xi - mi).collect();
552
553 let mut dist = 0.0f32;
554 for i in 0..n_features {
555 for j in 0..n_features {
556 dist += diff[i] * precision[i * n_features + j] * diff[j];
557 }
558 }
559
560 dist.sqrt()
561 }
562
563 pub fn fit(&mut self, x: &Tensor) {
564 let x_data = x.data_f32();
565 let n_samples = x.dims()[0];
566 let n_features = x.dims()[1];
567
568 self.n_features_ = n_features;
569
570 let mean = self.compute_mean(&x_data, n_samples, n_features);
572 let cov = self.compute_covariance(&x_data, &mean, n_samples, n_features);
573 let precision = self.invert_matrix(&cov, n_features);
574
575 let distances: Vec<f32> = (0..n_samples)
577 .map(|i| {
578 let xi = &x_data[i * n_features..(i + 1) * n_features];
579 self.mahalanobis_distance(xi, &mean, &precision, n_features)
580 })
581 .collect();
582
583 let mut sorted_distances = distances.clone();
585 sorted_distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
586 let threshold_idx = ((1.0 - self.contamination) * n_samples as f32) as usize;
587 self.threshold_ = sorted_distances[threshold_idx.min(n_samples - 1)];
588
589 self.location_ = Some(mean);
590 self.covariance_ = Some(cov);
591 self.precision_ = Some(precision);
592 }
593
594 pub fn decision_function(&self, x: &Tensor) -> Tensor {
595 let x_data = x.data_f32();
596 let n_samples = x.dims()[0];
597 let n_features = x.dims()[1];
598
599 let mean = self.location_.as_ref().expect("Model not fitted");
600 let precision = self.precision_.as_ref().expect("Model not fitted");
601
602 let scores: Vec<f32> = (0..n_samples)
603 .map(|i| {
604 let xi = &x_data[i * n_features..(i + 1) * n_features];
605 -self.mahalanobis_distance(xi, mean, precision, n_features)
606 })
607 .collect();
608
609 Tensor::from_slice(&scores, &[n_samples]).unwrap()
610 }
611
612 pub fn predict(&self, x: &Tensor) -> Tensor {
613 let scores = self.decision_function(x);
614 let score_data = scores.data_f32();
615 let n_samples = x.dims()[0];
616
617 let predictions: Vec<f32> = score_data.iter()
618 .map(|&s| if -s <= self.threshold_ { 1.0 } else { -1.0 })
619 .collect();
620
621 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
622 }
623
624 pub fn fit_predict(&mut self, x: &Tensor) -> Tensor {
625 self.fit(x);
626 self.predict(x)
627 }
628
629 pub fn mahalanobis(&self, x: &Tensor) -> Tensor {
630 let x_data = x.data_f32();
631 let n_samples = x.dims()[0];
632 let n_features = x.dims()[1];
633
634 let mean = self.location_.as_ref().expect("Model not fitted");
635 let precision = self.precision_.as_ref().expect("Model not fitted");
636
637 let distances: Vec<f32> = (0..n_samples)
638 .map(|i| {
639 let xi = &x_data[i * n_features..(i + 1) * n_features];
640 self.mahalanobis_distance(xi, mean, precision, n_features)
641 })
642 .collect();
643
644 Tensor::from_slice(&distances, &[n_samples]).unwrap()
645 }
646}
647
648impl Default for EllipticEnvelope {
649 fn default() -> Self {
650 Self::new()
651 }
652}
653
654#[cfg(test)]
655mod tests {
656 use super::*;
657
658 #[test]
659 fn test_lof() {
660 let x = Tensor::from_slice(&[0.0f32, 0.0,
661 0.1, 0.1,
662 0.2, 0.0,
663 10.0, 10.0, ], &[4, 2]).unwrap();
665
666 let mut lof = LocalOutlierFactor::new(2).contamination(0.25);
667 let predictions = lof.fit_predict(&x);
668
669 assert_eq!(predictions.dims(), &[4]);
670 }
671
672 #[test]
673 fn test_one_class_svm() {
674 let x = Tensor::from_slice(&[0.0f32, 0.0,
675 0.1, 0.1,
676 0.2, 0.0,
677 0.1, 0.2,
678 ], &[4, 2]).unwrap();
679
680 let mut ocsvm = OneClassSVM::new().nu(0.1);
681 ocsvm.fit(&x);
682
683 let predictions = ocsvm.predict(&x);
684 assert_eq!(predictions.dims(), &[4]);
685 }
686
687 #[test]
688 fn test_elliptic_envelope() {
689 let x = Tensor::from_slice(&[0.0f32, 0.0,
690 0.1, 0.1,
691 0.2, 0.0,
692 10.0, 10.0, ], &[4, 2]).unwrap();
694
695 let mut ee = EllipticEnvelope::new().contamination(0.25);
696 let predictions = ee.fit_predict(&x);
697
698 assert_eq!(predictions.dims(), &[4]);
699 }
700}
701
702