Skip to main content

ferrolearn_linear/
one_class_svm.rs

1//! One-Class SVM for novelty detection.
2//!
3//! This module provides [`OneClassSVM`], which learns a decision boundary
4//! around the training data and classifies new points as inliers (`+1`) or
5//! outliers (`-1`).
6//!
7//! # Algorithm
8//!
9//! One-Class SVM trains a standard binary SVC where all training data is
10//! assigned label `+1` and a synthetic origin point is assigned label `-1`.
11//! The decision function then separates the data from the origin in kernel
12//! feature space. Points with positive decision values are inliers; negative
13//! are outliers.
14//!
15//! # Examples
16//!
17//! ```
18//! use ferrolearn_linear::one_class_svm::OneClassSVM;
19//! use ferrolearn_linear::svm::RbfKernel;
20//! use ferrolearn_core::{Fit, Predict};
21//! use ndarray::{array, Array2, Array1};
22//!
23//! let x_train = Array2::from_shape_vec((6, 2), vec![
24//!     1.0, 1.0,  1.5, 1.0,  1.0, 1.5,
25//!     1.2, 1.3,  1.3, 1.2,  1.1, 1.1,
26//! ]).unwrap();
27//!
28//! let model = OneClassSVM::<f64, RbfKernel<f64>>::new(RbfKernel::with_gamma(1.0));
29//! let fitted = model.fit(&x_train, &()).unwrap();
30//!
31//! // Most training data should be classified as inliers.
32//! let preds = fitted.predict(&x_train).unwrap();
33//! let inliers: usize = preds.iter().filter(|&&p| p == 1).count();
34//! assert!(inliers >= 4);
35//! ```
36
37use ferrolearn_core::error::FerroError;
38use ferrolearn_core::traits::{Fit, Predict};
39use ndarray::{Array1, Array2, ScalarOperand};
40use num_traits::Float;
41
42use crate::svm::Kernel;
43
44// ---------------------------------------------------------------------------
45// OneClassSVM
46// ---------------------------------------------------------------------------
47
48/// One-Class SVM for novelty detection.
49///
50/// Learns a decision boundary around the training data. New points are
51/// classified as inliers (`+1`) or outliers (`-1`).
52///
53/// # Type Parameters
54///
55/// - `F`: The floating-point type (`f32` or `f64`).
56/// - `K`: The kernel type (e.g., [`RbfKernel`](super::svm::RbfKernel)).
57#[derive(Debug, Clone)]
58pub struct OneClassSVM<F, K> {
59    /// The nu parameter: upper bound on the fraction of outliers.
60    /// Must be in `(0, 1]`. Default: `0.5`.
61    pub nu: F,
62    /// The kernel function.
63    pub kernel: K,
64    /// Convergence tolerance.
65    pub tol: F,
66    /// Maximum number of SMO iterations.
67    pub max_iter: usize,
68    /// Size of the kernel evaluation LRU cache.
69    pub cache_size: usize,
70}
71
72impl<F: Float, K: Kernel<F>> OneClassSVM<F, K> {
73    /// Create a new `OneClassSVM` with the given kernel and default hyperparameters.
74    ///
75    /// Defaults: `nu = 0.5`, `tol = 1e-3`, `max_iter = 10000`, `cache_size = 1024`.
76    #[must_use]
77    pub fn new(kernel: K) -> Self {
78        Self {
79            nu: F::from(0.5).unwrap(),
80            kernel,
81            tol: F::from(1e-3).unwrap(),
82            max_iter: 10000,
83            cache_size: 1024,
84        }
85    }
86
87    /// Set the nu parameter.
88    #[must_use]
89    pub fn with_nu(mut self, nu: F) -> Self {
90        self.nu = nu;
91        self
92    }
93
94    /// Set the convergence tolerance.
95    #[must_use]
96    pub fn with_tol(mut self, tol: F) -> Self {
97        self.tol = tol;
98        self
99    }
100
101    /// Set the maximum number of SMO iterations.
102    #[must_use]
103    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
104        self.max_iter = max_iter;
105        self
106    }
107
108    /// Set the kernel cache size.
109    #[must_use]
110    pub fn with_cache_size(mut self, cache_size: usize) -> Self {
111        self.cache_size = cache_size;
112        self
113    }
114}
115
116/// Fitted One-Class SVM.
117///
118/// Stores the support vectors and decision boundary. Points are classified
119/// as inliers (+1) or outliers (-1) based on the sign of the decision
120/// function.
121#[derive(Debug, Clone)]
122pub struct FittedOneClassSVM<F, K> {
123    /// The kernel used for predictions.
124    kernel: K,
125    /// Support vectors (stored as rows).
126    support_vectors: Vec<Vec<F>>,
127    /// Dual coefficients for each support vector.
128    dual_coefs: Vec<F>,
129    /// Bias (rho) term. Decision function: sign(f(x) - rho).
130    rho: F,
131}
132
133impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static> Fit<Array2<F>, ()>
134    for OneClassSVM<F, K>
135{
136    type Fitted = FittedOneClassSVM<F, K>;
137    type Error = FerroError;
138
139    /// Fit the One-Class SVM.
140    ///
141    /// # Errors
142    ///
143    /// - [`FerroError::InvalidParameter`] if `nu` is not in `(0, 1]`.
144    /// - [`FerroError::InsufficientSamples`] if no training data is provided.
145    fn fit(&self, x: &Array2<F>, _y: &()) -> Result<FittedOneClassSVM<F, K>, FerroError> {
146        if self.nu <= F::zero() || self.nu > F::one() {
147            return Err(FerroError::InvalidParameter {
148                name: "nu".into(),
149                reason: "must be in (0, 1]".into(),
150            });
151        }
152
153        let n_samples = x.nrows();
154        let n_features = x.ncols();
155
156        if n_samples == 0 {
157            return Err(FerroError::InsufficientSamples {
158                required: 1,
159                actual: 0,
160                context: "OneClassSVM requires at least one sample".into(),
161            });
162        }
163
164        // Solve the one-class SVM dual:
165        // max sum_i alpha_i - 0.5 * sum_{i,j} alpha_i * alpha_j * K(x_i, x_j)
166        // s.t. 0 <= alpha_i <= 1/(n * nu), sum alpha_i = 1
167        //
168        // We use a simplified approach: initialize alphas uniformly, then
169        // iterate with SMO-style updates.
170
171        let c = F::one() / (F::from(n_samples).unwrap() * self.nu);
172        let data: Vec<Vec<F>> = (0..n_samples).map(|i| x.row(i).to_vec()).collect();
173
174        // Initialize alphas uniformly: alpha_i = 1/n
175        let init_alpha = F::one() / F::from(n_samples).unwrap();
176        let mut alphas = vec![init_alpha.min(c); n_samples];
177
178        // Ensure sum(alphas) = 1 after capping at c.
179        let alpha_sum: F = alphas.iter().copied().fold(F::zero(), |a, b| a + b);
180        if alpha_sum < F::one() {
181            // Distribute remaining mass.
182            let remaining = F::one() - alpha_sum;
183            let per_sample = remaining / F::from(n_samples).unwrap();
184            for alpha in &mut alphas {
185                *alpha = (*alpha + per_sample).min(c);
186            }
187        }
188
189        // Compute initial gradient: grad_i = sum_j alpha_j * K(x_i, x_j)
190        let eps = F::from(1e-12).unwrap_or_else(F::epsilon);
191        let two = F::one() + F::one();
192
193        let mut grad = vec![F::zero(); n_samples];
194        for i in 0..n_samples {
195            for j in 0..n_samples {
196                grad[i] = grad[i] + alphas[j] * self.kernel.compute(&data[i], &data[j]);
197            }
198        }
199
200        // SMO iterations
201        for _iter in 0..self.max_iter {
202            // Select working set: i with largest gradient (and alpha_i > 0),
203            // j with smallest gradient (and alpha_j < c).
204            let mut i_best = None;
205            let mut i_max_grad = F::neg_infinity();
206            let mut j_best = None;
207            let mut j_min_grad = F::infinity();
208
209            for k in 0..n_samples {
210                if alphas[k] > eps && grad[k] > i_max_grad {
211                    i_max_grad = grad[k];
212                    i_best = Some(k);
213                }
214                if alphas[k] < c - eps && grad[k] < j_min_grad {
215                    j_min_grad = grad[k];
216                    j_best = Some(k);
217                }
218            }
219
220            if i_best.is_none() || j_best.is_none() || i_max_grad - j_min_grad < self.tol {
221                break;
222            }
223
224            let i = i_best.unwrap();
225            let j = j_best.unwrap();
226
227            if i == j {
228                break;
229            }
230
231            let kii = self.kernel.compute(&data[i], &data[i]);
232            let kjj = self.kernel.compute(&data[j], &data[j]);
233            let kij = self.kernel.compute(&data[i], &data[j]);
234            let eta = kii + kjj - two * kij;
235
236            if eta <= eps {
237                continue;
238            }
239
240            // Update: move mass from i to j.
241            let delta = (grad[i] - grad[j]) / eta;
242            let delta = delta.min(alphas[i]).min(c - alphas[j]);
243
244            if delta.abs() < eps {
245                continue;
246            }
247
248            alphas[i] = alphas[i] - delta;
249            alphas[j] = alphas[j] + delta;
250
251            // Update gradients.
252            for k in 0..n_samples {
253                let kki = self.kernel.compute(&data[k], &data[i]);
254                let kkj = self.kernel.compute(&data[k], &data[j]);
255                grad[k] = grad[k] - delta * kki + delta * kkj;
256            }
257        }
258
259        // Compute rho from the KKT conditions.
260        // For free SVs (0 < alpha_i < c): rho = grad_i = sum_j alpha_j * K(i, j).
261        let mut rho_sum = F::zero();
262        let mut rho_count = 0usize;
263
264        for i in 0..n_samples {
265            if alphas[i] > eps && alphas[i] < c - eps {
266                rho_sum = rho_sum + grad[i];
267                rho_count += 1;
268            }
269        }
270
271        let rho = if rho_count > 0 {
272            rho_sum / F::from(rho_count).unwrap()
273        } else {
274            // Fallback: use the midpoint of the gradient range among all SVs.
275            let sv_grads: Vec<F> = (0..n_samples)
276                .filter(|&i| alphas[i] > eps)
277                .map(|i| grad[i])
278                .collect();
279
280            if sv_grads.is_empty() {
281                F::zero()
282            } else {
283                let min_g = sv_grads.iter().fold(F::infinity(), |a, &b| a.min(b));
284                let max_g = sv_grads.iter().fold(F::neg_infinity(), |a, &b| a.max(b));
285                (min_g + max_g) / two
286            }
287        };
288
289        // Extract support vectors.
290        let mut support_vectors = Vec::new();
291        let mut dual_coefs = Vec::new();
292
293        for (i, &alpha) in alphas.iter().enumerate() {
294            if alpha > eps {
295                support_vectors.push(data[i].clone());
296                dual_coefs.push(alpha);
297            }
298        }
299
300        // If no support vectors found, use all data as fallback.
301        if support_vectors.is_empty() {
302            let weight = F::one() / F::from(n_samples).unwrap();
303            for row in &data {
304                support_vectors.push(row.clone());
305                dual_coefs.push(weight);
306            }
307        }
308
309        let _ = n_features; // used for validation context
310
311        Ok(FittedOneClassSVM {
312            kernel: self.kernel.clone(),
313            support_vectors,
314            dual_coefs,
315            rho,
316        })
317    }
318}
319
320impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static>
321    FittedOneClassSVM<F, K>
322{
323    /// Compute the decision function value for a single sample.
324    ///
325    /// Returns `f(x) - rho`, where `f(x) = sum_i alpha_i * K(sv_i, x)`.
326    fn decision_value(&self, x: &[F]) -> F {
327        let mut val = F::zero();
328        for (sv, &coef) in self.support_vectors.iter().zip(self.dual_coefs.iter()) {
329            val = val + coef * self.kernel.compute(sv, x);
330        }
331        val - self.rho
332    }
333
334    /// Compute the raw decision function values for each sample.
335    ///
336    /// Returns an array of shape `(n_samples,)`. Positive values indicate
337    /// inliers, negative values indicate outliers.
338    ///
339    /// # Errors
340    ///
341    /// Returns `Ok` always for valid input.
342    pub fn decision_function(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
343        let n_samples = x.nrows();
344        let mut result = Array1::<F>::zeros(n_samples);
345        for s in 0..n_samples {
346            let xi: Vec<F> = x.row(s).to_vec();
347            result[s] = self.decision_value(&xi);
348        }
349        Ok(result)
350    }
351}
352
353impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static> Predict<Array2<F>>
354    for FittedOneClassSVM<F, K>
355{
356    type Output = Array1<isize>;
357    type Error = FerroError;
358
359    /// Predict inlier (+1) or outlier (-1) for each sample.
360    ///
361    /// # Errors
362    ///
363    /// Returns `Ok` always for valid input.
364    fn predict(&self, x: &Array2<F>) -> Result<Array1<isize>, FerroError> {
365        let n_samples = x.nrows();
366        let mut predictions = Array1::<isize>::zeros(n_samples);
367
368        for s in 0..n_samples {
369            let xi: Vec<F> = x.row(s).to_vec();
370            let val = self.decision_value(&xi);
371            predictions[s] = if val >= F::zero() { 1 } else { -1 };
372        }
373
374        Ok(predictions)
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381    use crate::svm::{LinearKernel, RbfKernel};
382    use ndarray::Array2;
383
384    fn make_cluster_data() -> Array2<f64> {
385        Array2::from_shape_vec(
386            (8, 2),
387            vec![
388                1.0, 1.0, 1.1, 1.0, 1.0, 1.1, 1.1, 1.1, 0.9, 0.9, 1.0, 0.9, 0.9, 1.0, 1.05, 1.05,
389            ],
390        )
391        .unwrap()
392    }
393
394    #[test]
395    fn test_one_class_svm_fit() {
396        let x = make_cluster_data();
397        let model = OneClassSVM::<f64, RbfKernel<f64>>::new(RbfKernel::with_gamma(10.0));
398        let result = model.fit(&x, &());
399        assert!(result.is_ok());
400    }
401
402    #[test]
403    fn test_one_class_svm_inliers() {
404        let x = make_cluster_data();
405        let model = OneClassSVM::new(RbfKernel::with_gamma(10.0)).with_nu(0.1);
406        let fitted = model.fit(&x, &()).unwrap();
407        let preds = fitted.predict(&x).unwrap();
408
409        // Most training points should be classified as inliers.
410        let inliers: usize = preds.iter().filter(|&&p| p == 1).count();
411        assert!(inliers >= 6, "Expected at least 6 inliers, got {inliers}");
412    }
413
414    #[test]
415    fn test_one_class_svm_outlier_detection() {
416        let x_train = Array2::from_shape_vec(
417            (8, 2),
418            vec![
419                0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.1, 0.1, -0.1, 0.0, 0.0, -0.1, 0.05, 0.05, -0.05,
420                -0.05,
421            ],
422        )
423        .unwrap();
424
425        let model = OneClassSVM::new(RbfKernel::with_gamma(10.0)).with_nu(0.1);
426        let fitted = model.fit(&x_train, &()).unwrap();
427
428        // A far-away point should be an outlier.
429        let x_outlier = Array2::from_shape_vec((1, 2), vec![100.0, 100.0]).unwrap();
430        let preds = fitted.predict(&x_outlier).unwrap();
431        assert_eq!(preds[0], -1, "Far-away point should be an outlier");
432    }
433
434    #[test]
435    fn test_one_class_svm_decision_function() {
436        let x = make_cluster_data();
437        let model = OneClassSVM::new(RbfKernel::with_gamma(10.0)).with_nu(0.1);
438        let fitted = model.fit(&x, &()).unwrap();
439
440        let df = fitted.decision_function(&x).unwrap();
441        assert_eq!(df.len(), 8);
442
443        // Most decision values should be non-negative for training data.
444        let positive: usize = df.iter().filter(|&&v| v >= 0.0).count();
445        assert!(
446            positive >= 6,
447            "Expected at least 6 positive df, got {positive}"
448        );
449    }
450
451    #[test]
452    fn test_one_class_svm_invalid_nu() {
453        let x = Array2::from_shape_vec((4, 2), vec![1.0; 8]).unwrap();
454
455        let model = OneClassSVM::new(RbfKernel::<f64>::new()).with_nu(0.0);
456        assert!(model.fit(&x, &()).is_err());
457
458        let model2 = OneClassSVM::new(RbfKernel::<f64>::new()).with_nu(1.5);
459        assert!(model2.fit(&x, &()).is_err());
460    }
461
462    #[test]
463    fn test_one_class_svm_empty_data() {
464        let x = Array2::<f64>::zeros((0, 2));
465        let model = OneClassSVM::new(RbfKernel::<f64>::new());
466        assert!(model.fit(&x, &()).is_err());
467    }
468
469    #[test]
470    fn test_one_class_svm_builder_pattern() {
471        let model = OneClassSVM::<f64, LinearKernel>::new(LinearKernel)
472            .with_nu(0.3)
473            .with_tol(1e-4)
474            .with_max_iter(5000)
475            .with_cache_size(2048);
476
477        assert!((model.nu - 0.3).abs() < 1e-10);
478        assert!((model.tol - 1e-4).abs() < 1e-10);
479        assert_eq!(model.max_iter, 5000);
480        assert_eq!(model.cache_size, 2048);
481    }
482
483    #[test]
484    fn test_one_class_svm_linear_kernel() {
485        let x = make_cluster_data();
486        let model = OneClassSVM::new(LinearKernel).with_nu(0.5);
487        let fitted = model.fit(&x, &()).unwrap();
488        let preds = fitted.predict(&x).unwrap();
489        assert_eq!(preds.len(), 8);
490    }
491
492    #[test]
493    fn test_one_class_svm_single_sample() {
494        let x = Array2::from_shape_vec((1, 2), vec![1.0, 1.0]).unwrap();
495        let model = OneClassSVM::new(RbfKernel::with_gamma(1.0)).with_nu(0.5);
496        let result = model.fit(&x, &());
497        assert!(result.is_ok());
498    }
499}