use ghostflow_core::Tensor;
pub struct LocalOutlierFactor {
pub n_neighbors: usize,
pub contamination: f32,
pub metric: LOFMetric,
x_train_: Option<Vec<f32>>,
lrd_: Option<Vec<f32>>,
lof_scores_: Option<Vec<f32>>,
threshold_: f32,
n_samples_: usize,
n_features_: usize,
}
#[derive(Clone, Copy, Debug)]
pub enum LOFMetric {
Euclidean,
Manhattan,
Minkowski(f32),
}
impl LocalOutlierFactor {
pub fn new(n_neighbors: usize) -> Self {
LocalOutlierFactor {
n_neighbors,
contamination: 0.1,
metric: LOFMetric::Euclidean,
x_train_: None,
lrd_: None,
lof_scores_: None,
threshold_: 0.0,
n_samples_: 0,
n_features_: 0,
}
}
pub fn contamination(mut self, c: f32) -> Self {
self.contamination = c.clamp(0.0, 0.5);
self
}
fn distance(&self, a: &[f32], b: &[f32]) -> f32 {
match self.metric {
LOFMetric::Euclidean => {
a.iter().zip(b.iter()).map(|(&ai, &bi)| (ai - bi).powi(2)).sum::<f32>().sqrt()
}
LOFMetric::Manhattan => {
a.iter().zip(b.iter()).map(|(&ai, &bi)| (ai - bi).abs()).sum()
}
LOFMetric::Minkowski(p) => {
a.iter().zip(b.iter())
.map(|(&ai, &bi)| (ai - bi).abs().powf(p))
.sum::<f32>()
.powf(1.0 / p)
}
}
}
fn k_neighbors(&self, x: &[f32], point_idx: usize, n_samples: usize, n_features: usize) -> Vec<(usize, f32)> {
let point = &x[point_idx * n_features..(point_idx + 1) * n_features];
let mut distances: Vec<(usize, f32)> = (0..n_samples)
.filter(|&i| i != point_idx)
.map(|i| {
let other = &x[i * n_features..(i + 1) * n_features];
(i, self.distance(point, other))
})
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
distances.truncate(self.n_neighbors);
distances
}
fn k_distance(&self, neighbors: &[(usize, f32)]) -> f32 {
neighbors.last().map(|(_, d)| *d).unwrap_or(0.0)
}
fn reachability_distance(&self, x: &[f32], point_a: usize, point_b: usize, k_dist_b: f32, n_features: usize) -> f32 {
let a = &x[point_a * n_features..(point_a + 1) * n_features];
let b = &x[point_b * n_features..(point_b + 1) * n_features];
let dist = self.distance(a, b);
dist.max(k_dist_b)
}
pub fn fit(&mut self, x: &Tensor) {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
self.n_samples_ = n_samples;
self.n_features_ = n_features;
let all_neighbors: Vec<Vec<(usize, f32)>> = (0..n_samples)
.map(|i| self.k_neighbors(&x_data, i, n_samples, n_features))
.collect();
let k_distances: Vec<f32> = all_neighbors.iter()
.map(|neighbors| self.k_distance(neighbors))
.collect();
let lrd: Vec<f32> = (0..n_samples)
.map(|i| {
let neighbors = &all_neighbors[i];
let sum_reach_dist: f32 = neighbors.iter()
.map(|&(j, _)| self.reachability_distance(&x_data, i, j, k_distances[j], n_features))
.sum();
if sum_reach_dist > 0.0 {
neighbors.len() as f32 / sum_reach_dist
} else {
f32::INFINITY
}
})
.collect();
let lof_scores: Vec<f32> = (0..n_samples)
.map(|i| {
let neighbors = &all_neighbors[i];
let sum_lrd_ratio: f32 = neighbors.iter()
.map(|&(j, _)| lrd[j] / lrd[i].max(1e-10))
.sum();
sum_lrd_ratio / neighbors.len() as f32
})
.collect();
let mut sorted_scores = lof_scores.clone();
sorted_scores.sort_by(|a, b| b.partial_cmp(a).unwrap()); let threshold_idx = (self.contamination * n_samples as f32) as usize;
self.threshold_ = sorted_scores[threshold_idx.min(n_samples - 1)];
self.x_train_ = Some(x_data);
self.lrd_ = Some(lrd);
self.lof_scores_ = Some(lof_scores);
}
pub fn fit_predict(&mut self, x: &Tensor) -> Tensor {
self.fit(x);
let lof_scores = self.lof_scores_.as_ref().unwrap();
let predictions: Vec<f32> = lof_scores.iter()
.map(|&score| if score > self.threshold_ { -1.0 } else { 1.0 }) .collect();
Tensor::from_slice(&predictions, &[predictions.len()]).unwrap()
}
pub fn decision_function(&self, x: &Tensor) -> Tensor {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let x_train = self.x_train_.as_ref().expect("Model not fitted");
let lrd_train = self.lrd_.as_ref().expect("Model not fitted");
let n_train = self.n_samples_;
let scores: Vec<f32> = (0..n_samples)
.map(|i| {
let point = &x_data[i * n_features..(i + 1) * n_features];
let mut distances: Vec<(usize, f32)> = (0..n_train)
.map(|j| {
let train_point = &x_train[j * n_features..(j + 1) * n_features];
(j, self.distance(point, train_point))
})
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
let neighbors: Vec<(usize, f32)> = distances.into_iter().take(self.n_neighbors).collect();
let k_dist = neighbors.last().map(|(_, d)| *d).unwrap_or(0.0);
let sum_reach_dist: f32 = neighbors.iter()
.map(|&(_j, d)| d.max(k_dist))
.sum();
let lrd_point = if sum_reach_dist > 0.0 {
neighbors.len() as f32 / sum_reach_dist
} else {
f32::INFINITY
};
let sum_lrd_ratio: f32 = neighbors.iter()
.map(|&(j, _)| lrd_train[j] / lrd_point.max(1e-10))
.sum();
-(sum_lrd_ratio / neighbors.len() as f32) })
.collect();
Tensor::from_slice(&scores, &[n_samples]).unwrap()
}
pub fn predict(&self, x: &Tensor) -> Tensor {
let scores = self.decision_function(x);
let score_data = scores.data_f32();
let n_samples = x.dims()[0];
let predictions: Vec<f32> = score_data.iter()
.map(|&s| if -s > self.threshold_ { -1.0 } else { 1.0 })
.collect();
Tensor::from_slice(&predictions, &[n_samples]).unwrap()
}
}
pub struct OneClassSVM {
pub kernel: OCSVMKernel,
pub nu: f32,
pub gamma: f32,
pub max_iter: usize,
support_vectors_: Option<Vec<Vec<f32>>>,
dual_coef_: Option<Vec<f32>>,
intercept_: Option<f32>,
n_features_: usize,
}
#[derive(Clone, Copy, Debug)]
pub enum OCSVMKernel {
RBF,
Linear,
Poly { degree: i32, coef0: f32 },
Sigmoid { coef0: f32 },
}
impl OneClassSVM {
pub fn new() -> Self {
OneClassSVM {
kernel: OCSVMKernel::RBF,
nu: 0.5,
gamma: 1.0,
max_iter: 1000,
support_vectors_: None,
dual_coef_: None,
intercept_: None,
n_features_: 0,
}
}
pub fn nu(mut self, nu: f32) -> Self {
self.nu = nu.clamp(0.0, 1.0);
self
}
pub fn gamma(mut self, gamma: f32) -> Self {
self.gamma = gamma;
self
}
pub fn kernel(mut self, kernel: OCSVMKernel) -> Self {
self.kernel = kernel;
self
}
fn kernel_function(&self, xi: &[f32], xj: &[f32]) -> f32 {
match self.kernel {
OCSVMKernel::RBF => {
let dist_sq: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| (a - b).powi(2)).sum();
(-self.gamma * dist_sq).exp()
}
OCSVMKernel::Linear => {
xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum()
}
OCSVMKernel::Poly { degree, coef0 } => {
let dot: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum();
(self.gamma * dot + coef0).powi(degree)
}
OCSVMKernel::Sigmoid { coef0 } => {
let dot: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum();
(self.gamma * dot + coef0).tanh()
}
}
}
pub fn fit(&mut self, x: &Tensor) {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
self.n_features_ = n_features;
let mut alpha = vec![0.0f32; n_samples];
let upper_bound = 1.0 / (self.nu * n_samples as f32);
let mut kernel_matrix = vec![vec![0.0f32; n_samples]; n_samples];
for i in 0..n_samples {
for j in i..n_samples {
let xi = &x_data[i * n_features..(i + 1) * n_features];
let xj = &x_data[j * n_features..(j + 1) * n_features];
let k = self.kernel_function(xi, xj);
kernel_matrix[i][j] = k;
kernel_matrix[j][i] = k;
}
}
let init_alpha = 1.0 / n_samples as f32;
for a in &mut alpha {
*a = init_alpha.min(upper_bound);
}
for _ in 0..self.max_iter {
let mut changed = false;
for i in 0..n_samples {
let mut grad = 0.0f32;
for j in 0..n_samples {
grad += alpha[j] * kernel_matrix[i][j];
}
let old_alpha = alpha[i];
alpha[i] = (alpha[i] - 0.01 * grad).clamp(0.0, upper_bound);
if (alpha[i] - old_alpha).abs() > 1e-5 {
changed = true;
}
}
let sum: f32 = alpha.iter().sum();
if sum > 0.0 {
for a in &mut alpha {
*a /= sum;
}
}
if !changed {
break;
}
}
let eps = 1e-5;
let mut support_vectors = Vec::new();
let mut dual_coef = Vec::new();
for i in 0..n_samples {
if alpha[i] > eps {
support_vectors.push(x_data[i * n_features..(i + 1) * n_features].to_vec());
dual_coef.push(alpha[i]);
}
}
let mut rho = 0.0f32;
let mut count = 0;
for i in 0..n_samples {
if alpha[i] > eps && alpha[i] < upper_bound - eps {
let mut sum = 0.0f32;
for j in 0..n_samples {
sum += alpha[j] * kernel_matrix[i][j];
}
rho += sum;
count += 1;
}
}
if count > 0 {
rho /= count as f32;
}
self.support_vectors_ = Some(support_vectors);
self.dual_coef_ = Some(dual_coef);
self.intercept_ = Some(rho);
}
pub fn decision_function(&self, x: &Tensor) -> Tensor {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let support_vectors = self.support_vectors_.as_ref().expect("Model not fitted");
let dual_coef = self.dual_coef_.as_ref().expect("Model not fitted");
let rho = self.intercept_.unwrap_or(0.0);
let scores: Vec<f32> = (0..n_samples)
.map(|i| {
let xi = &x_data[i * n_features..(i + 1) * n_features];
let mut score = -rho;
for (sv, &coef) in support_vectors.iter().zip(dual_coef.iter()) {
score += coef * self.kernel_function(xi, sv);
}
score
})
.collect();
Tensor::from_slice(&scores, &[n_samples]).unwrap()
}
pub fn predict(&self, x: &Tensor) -> Tensor {
let scores = self.decision_function(x);
let score_data = scores.data_f32();
let n_samples = x.dims()[0];
let predictions: Vec<f32> = score_data.iter()
.map(|&s| if s >= 0.0 { 1.0 } else { -1.0 })
.collect();
Tensor::from_slice(&predictions, &[n_samples]).unwrap()
}
pub fn fit_predict(&mut self, x: &Tensor) -> Tensor {
self.fit(x);
self.predict(x)
}
}
impl Default for OneClassSVM {
fn default() -> Self {
Self::new()
}
}
pub struct EllipticEnvelope {
pub contamination: f32,
pub support_fraction: Option<f32>,
location_: Option<Vec<f32>>,
covariance_: Option<Vec<f32>>,
precision_: Option<Vec<f32>>,
threshold_: f32,
n_features_: usize,
}
impl EllipticEnvelope {
pub fn new() -> Self {
EllipticEnvelope {
contamination: 0.1,
support_fraction: None,
location_: None,
covariance_: None,
precision_: None,
threshold_: 0.0,
n_features_: 0,
}
}
pub fn contamination(mut self, c: f32) -> Self {
self.contamination = c.clamp(0.0, 0.5);
self
}
fn compute_mean(&self, x: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
let mut mean = vec![0.0f32; n_features];
for i in 0..n_samples {
for j in 0..n_features {
mean[j] += x[i * n_features + j];
}
}
for m in &mut mean {
*m /= n_samples as f32;
}
mean
}
fn compute_covariance(&self, x: &[f32], mean: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
let mut cov = vec![0.0f32; n_features * n_features];
for i in 0..n_samples {
for j in 0..n_features {
for k in 0..n_features {
let diff_j = x[i * n_features + j] - mean[j];
let diff_k = x[i * n_features + k] - mean[k];
cov[j * n_features + k] += diff_j * diff_k;
}
}
}
let denom = (n_samples - 1).max(1) as f32;
for c in &mut cov {
*c /= denom;
}
cov
}
fn invert_matrix(&self, matrix: &[f32], n: usize) -> Vec<f32> {
let mut a = matrix.to_vec();
for i in 0..n {
a[i * n + i] += 1e-6;
}
let mut l = vec![0.0f32; n * n];
for i in 0..n {
for j in 0..=i {
let mut sum = 0.0f32;
if i == j {
for k in 0..j {
sum += l[j * n + k] * l[j * n + k];
}
l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
} else {
for k in 0..j {
sum += l[i * n + k] * l[j * n + k];
}
l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
}
}
}
let mut l_inv = vec![0.0f32; n * n];
for i in 0..n {
l_inv[i * n + i] = 1.0 / l[i * n + i].max(1e-10);
for j in (i + 1)..n {
let mut sum = 0.0f32;
for k in i..j {
sum += l[j * n + k] * l_inv[k * n + i];
}
l_inv[j * n + i] = -sum / l[j * n + j].max(1e-10);
}
}
let mut precision = vec![0.0f32; n * n];
for i in 0..n {
for j in 0..n {
for k in 0..n {
precision[i * n + j] += l_inv[k * n + i] * l_inv[k * n + j];
}
}
}
precision
}
fn mahalanobis_distance(&self, x: &[f32], mean: &[f32], precision: &[f32], n_features: usize) -> f32 {
let diff: Vec<f32> = x.iter().zip(mean.iter()).map(|(&xi, &mi)| xi - mi).collect();
let mut dist = 0.0f32;
for i in 0..n_features {
for j in 0..n_features {
dist += diff[i] * precision[i * n_features + j] * diff[j];
}
}
dist.sqrt()
}
pub fn fit(&mut self, x: &Tensor) {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
self.n_features_ = n_features;
let mean = self.compute_mean(&x_data, n_samples, n_features);
let cov = self.compute_covariance(&x_data, &mean, n_samples, n_features);
let precision = self.invert_matrix(&cov, n_features);
let distances: Vec<f32> = (0..n_samples)
.map(|i| {
let xi = &x_data[i * n_features..(i + 1) * n_features];
self.mahalanobis_distance(xi, &mean, &precision, n_features)
})
.collect();
let mut sorted_distances = distances.clone();
sorted_distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
let threshold_idx = ((1.0 - self.contamination) * n_samples as f32) as usize;
self.threshold_ = sorted_distances[threshold_idx.min(n_samples - 1)];
self.location_ = Some(mean);
self.covariance_ = Some(cov);
self.precision_ = Some(precision);
}
pub fn decision_function(&self, x: &Tensor) -> Tensor {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let mean = self.location_.as_ref().expect("Model not fitted");
let precision = self.precision_.as_ref().expect("Model not fitted");
let scores: Vec<f32> = (0..n_samples)
.map(|i| {
let xi = &x_data[i * n_features..(i + 1) * n_features];
-self.mahalanobis_distance(xi, mean, precision, n_features)
})
.collect();
Tensor::from_slice(&scores, &[n_samples]).unwrap()
}
pub fn predict(&self, x: &Tensor) -> Tensor {
let scores = self.decision_function(x);
let score_data = scores.data_f32();
let n_samples = x.dims()[0];
let predictions: Vec<f32> = score_data.iter()
.map(|&s| if -s <= self.threshold_ { 1.0 } else { -1.0 })
.collect();
Tensor::from_slice(&predictions, &[n_samples]).unwrap()
}
pub fn fit_predict(&mut self, x: &Tensor) -> Tensor {
self.fit(x);
self.predict(x)
}
pub fn mahalanobis(&self, x: &Tensor) -> Tensor {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let mean = self.location_.as_ref().expect("Model not fitted");
let precision = self.precision_.as_ref().expect("Model not fitted");
let distances: Vec<f32> = (0..n_samples)
.map(|i| {
let xi = &x_data[i * n_features..(i + 1) * n_features];
self.mahalanobis_distance(xi, mean, precision, n_features)
})
.collect();
Tensor::from_slice(&distances, &[n_samples]).unwrap()
}
}
impl Default for EllipticEnvelope {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lof() {
let x = Tensor::from_slice(&[0.0f32, 0.0,
0.1, 0.1,
0.2, 0.0,
10.0, 10.0, ], &[4, 2]).unwrap();
let mut lof = LocalOutlierFactor::new(2).contamination(0.25);
let predictions = lof.fit_predict(&x);
assert_eq!(predictions.dims(), &[4]);
}
#[test]
fn test_one_class_svm() {
let x = Tensor::from_slice(&[0.0f32, 0.0,
0.1, 0.1,
0.2, 0.0,
0.1, 0.2,
], &[4, 2]).unwrap();
let mut ocsvm = OneClassSVM::new().nu(0.1);
ocsvm.fit(&x);
let predictions = ocsvm.predict(&x);
assert_eq!(predictions.dims(), &[4]);
}
#[test]
fn test_elliptic_envelope() {
let x = Tensor::from_slice(&[0.0f32, 0.0,
0.1, 0.1,
0.2, 0.0,
10.0, 10.0, ], &[4, 2]).unwrap();
let mut ee = EllipticEnvelope::new().contamination(0.25);
let predictions = ee.fit_predict(&x);
assert_eq!(predictions.dims(), &[4]);
}
}