Skip to main content

ferrolearn_linear/
svm.rs

1//! Support Vector Machine with kernel trick.
2//!
3//! This module provides [`SVC`] (classification) and [`SVR`] (regression)
4//! support vector machines trained using the **Sequential Minimal Optimization
5//! (SMO)** algorithm (Platt, 1998).
6//!
7//! # Kernels
8//!
9//! Four built-in kernels are provided:
10//!
11//! - [`LinearKernel`]: `K(x, y) = x . y`
12//! - [`RbfKernel`]: `K(x, y) = exp(-gamma * ||x - y||^2)`
13//! - [`PolynomialKernel`]: `K(x, y) = (gamma * x . y + coef0)^degree`
14//! - [`SigmoidKernel`]: `K(x, y) = tanh(gamma * x . y + coef0)`
15//!
16//! Users can implement the [`Kernel`] trait for custom kernels.
17//!
18//! # Multiclass
19//!
20//! `SVC` uses a one-vs-one strategy for multiclass classification.
21//!
22//! # Examples
23//!
24//! ```
25//! use ferrolearn_linear::svm::{SVC, LinearKernel};
26//! use ferrolearn_core::{Fit, Predict};
27//! use ndarray::{array, Array2};
28//!
29//! let x = Array2::from_shape_vec((6, 2), vec![
30//!     1.0, 1.0,  2.0, 1.0,  1.0, 2.0,
31//!     5.0, 5.0,  6.0, 5.0,  5.0, 6.0,
32//! ]).unwrap();
33//! let y = array![0usize, 0, 0, 1, 1, 1];
34//!
35//! let model = SVC::<f64, LinearKernel>::new(LinearKernel);
36//! let fitted = model.fit(&x, &y).unwrap();
37//! let preds = fitted.predict(&x).unwrap();
38//! assert_eq!(preds.len(), 6);
39//! ```
40
41use std::collections::HashMap;
42
43use ferrolearn_core::error::FerroError;
44use ferrolearn_core::traits::{Fit, Predict};
45use ndarray::{Array1, Array2, ScalarOperand};
46use num_traits::Float;
47
48// ---------------------------------------------------------------------------
49// Kernel trait and built-in kernels
50// ---------------------------------------------------------------------------
51
52/// A kernel function for SVM.
53///
54/// Computes the inner product of two vectors in a (possibly implicit)
55/// higher-dimensional feature space.
56pub trait Kernel<F: Float>: Clone + Send + Sync {
57    /// Compute the kernel value between two vectors.
58    fn compute(&self, x: &[F], y: &[F]) -> F;
59}
60
61/// Linear kernel: `K(x, y) = x . y`.
62#[derive(Debug, Clone, Copy)]
63pub struct LinearKernel;
64
65impl<F: Float> Kernel<F> for LinearKernel {
66    fn compute(&self, x: &[F], y: &[F]) -> F {
67        x.iter()
68            .zip(y.iter())
69            .fold(F::zero(), |acc, (&a, &b)| acc + a * b)
70    }
71}
72
73/// Radial Basis Function (Gaussian) kernel.
74///
75/// `K(x, y) = exp(-gamma * ||x - y||^2)`
76#[derive(Debug, Clone, Copy)]
77pub struct RbfKernel<F> {
78    /// The gamma parameter. If `None`, it is set to `1 / (n_features * var(X))`.
79    pub gamma: Option<F>,
80}
81
82impl<F: Float> RbfKernel<F> {
83    /// Create a new RBF kernel with auto gamma.
84    #[must_use]
85    pub fn new() -> Self {
86        Self { gamma: None }
87    }
88
89    /// Create a new RBF kernel with a specified gamma.
90    #[must_use]
91    pub fn with_gamma(gamma: F) -> Self {
92        Self { gamma: Some(gamma) }
93    }
94}
95
96impl<F: Float> Default for RbfKernel<F> {
97    fn default() -> Self {
98        Self::new()
99    }
100}
101
102impl<F: Float + Send + Sync> Kernel<F> for RbfKernel<F> {
103    fn compute(&self, x: &[F], y: &[F]) -> F {
104        let gamma = self.gamma.unwrap_or(F::one());
105        let sq_dist = x.iter().zip(y.iter()).fold(F::zero(), |acc, (&a, &b)| {
106            let d = a - b;
107            acc + d * d
108        });
109        (-gamma * sq_dist).exp()
110    }
111}
112
113/// Polynomial kernel: `K(x, y) = (gamma * x . y + coef0)^degree`.
114#[derive(Debug, Clone, Copy)]
115pub struct PolynomialKernel<F> {
116    /// The gamma parameter. If `None`, uses `1 / n_features`.
117    pub gamma: Option<F>,
118    /// Polynomial degree.
119    pub degree: usize,
120    /// Independent term.
121    pub coef0: F,
122}
123
124impl<F: Float> PolynomialKernel<F> {
125    /// Create a new polynomial kernel with defaults.
126    #[must_use]
127    pub fn new() -> Self {
128        Self {
129            gamma: None,
130            degree: 3,
131            coef0: F::zero(),
132        }
133    }
134}
135
136impl<F: Float> Default for PolynomialKernel<F> {
137    fn default() -> Self {
138        Self::new()
139    }
140}
141
142impl<F: Float + Send + Sync> Kernel<F> for PolynomialKernel<F> {
143    fn compute(&self, x: &[F], y: &[F]) -> F {
144        let gamma = self.gamma.unwrap_or(F::one());
145        let dot: F = x
146            .iter()
147            .zip(y.iter())
148            .fold(F::zero(), |acc, (&a, &b)| acc + a * b);
149        let val = gamma * dot + self.coef0;
150        let mut result = F::one();
151        for _ in 0..self.degree {
152            result = result * val;
153        }
154        result
155    }
156}
157
158/// Sigmoid kernel: `K(x, y) = tanh(gamma * x . y + coef0)`.
159#[derive(Debug, Clone, Copy)]
160pub struct SigmoidKernel<F> {
161    /// The gamma parameter. If `None`, uses `1 / n_features`.
162    pub gamma: Option<F>,
163    /// Independent term.
164    pub coef0: F,
165}
166
167impl<F: Float> SigmoidKernel<F> {
168    /// Create a new sigmoid kernel with defaults.
169    #[must_use]
170    pub fn new() -> Self {
171        Self {
172            gamma: None,
173            coef0: F::zero(),
174        }
175    }
176}
177
178impl<F: Float> Default for SigmoidKernel<F> {
179    fn default() -> Self {
180        Self::new()
181    }
182}
183
184impl<F: Float + Send + Sync> Kernel<F> for SigmoidKernel<F> {
185    fn compute(&self, x: &[F], y: &[F]) -> F {
186        let gamma = self.gamma.unwrap_or(F::one());
187        let dot: F = x
188            .iter()
189            .zip(y.iter())
190            .fold(F::zero(), |acc, (&a, &b)| acc + a * b);
191        (gamma * dot + self.coef0).tanh()
192    }
193}
194
195// ---------------------------------------------------------------------------
196// Kernel cache (LRU)
197// ---------------------------------------------------------------------------
198
199/// Simple LRU cache for kernel evaluations.
200struct KernelCache<F> {
201    cache: HashMap<(usize, usize), F>,
202    order: Vec<(usize, usize)>,
203    capacity: usize,
204}
205
206impl<F: Float> KernelCache<F> {
207    fn new(capacity: usize) -> Self {
208        Self {
209            cache: HashMap::with_capacity(capacity),
210            order: Vec::with_capacity(capacity),
211            capacity,
212        }
213    }
214
215    fn get_or_compute<K: Kernel<F>>(
216        &mut self,
217        i: usize,
218        j: usize,
219        kernel: &K,
220        data: &[Vec<F>],
221    ) -> F {
222        let key = if i <= j { (i, j) } else { (j, i) };
223        if let Some(&val) = self.cache.get(&key) {
224            return val;
225        }
226        let val = kernel.compute(&data[i], &data[j]);
227        if self.order.len() >= self.capacity {
228            if let Some(old_key) = self.order.first().copied() {
229                self.cache.remove(&old_key);
230                self.order.remove(0);
231            }
232        }
233        self.cache.insert(key, val);
234        self.order.push(key);
235        val
236    }
237}
238
239// ---------------------------------------------------------------------------
240// SMO solver for binary SVM
241// ---------------------------------------------------------------------------
242
243/// Result of a binary SMO solve.
244struct SmoResult<F> {
245    alphas: Vec<F>,
246    bias: F,
247}
248
249/// SMO implementation (Platt 1998, Fan-Chen-Lin 2005 WSS).
250///
251/// Uses the dual gradient `grad_i = (Q * alpha)_i - 1` where
252/// `Q_{ij} = y_i * y_j * K(x_i, x_j)`. Bias is computed after
253/// convergence from the KKT conditions.
254fn smo_binary<F: Float, K: Kernel<F>>(
255    data: &[Vec<F>],
256    labels: &[F],
257    kernel: &K,
258    c: F,
259    tol: F,
260    max_iter: usize,
261    cache_size: usize,
262) -> Result<SmoResult<F>, FerroError> {
263    let n = data.len();
264    let mut alphas = vec![F::zero(); n];
265    let mut cache = KernelCache::new(cache_size);
266
267    // Gradient of the dual objective: grad_i = (Q*alpha)_i - 1
268    // where Q_{ij} = y_i * y_j * K(x_i, x_j).
269    // Initially alpha = 0, so grad_i = -1 for all i.
270    let mut grad: Vec<F> = vec![-F::one(); n];
271
272    let two = F::one() + F::one();
273    let eps = F::from(1e-12).unwrap_or(F::epsilon());
274
275    for _iter in 0..max_iter {
276        // Working set selection (Fan-Chen-Lin 2005):
277        // I_up  = {i : (y_i=+1 and alpha_i < C) or (y_i=-1 and alpha_i > 0)}
278        // I_low = {j : (y_j=+1 and alpha_j > 0) or (y_j=-1 and alpha_j < C)}
279        // Select i = argmax_{t in I_up}  -y_t * grad_t
280        // Select j = argmin_{t in I_low} -y_t * grad_t
281
282        let mut i_up = None;
283        let mut max_val = F::neg_infinity();
284        let mut j_low = None;
285        let mut min_val = F::infinity();
286
287        for t in 0..n {
288            let val = -labels[t] * grad[t];
289
290            let in_up = (labels[t] > F::zero() && alphas[t] < c - eps)
291                || (labels[t] < F::zero() && alphas[t] > eps);
292
293            let in_low = (labels[t] > F::zero() && alphas[t] > eps)
294                || (labels[t] < F::zero() && alphas[t] < c - eps);
295
296            if in_up && val > max_val {
297                max_val = val;
298                i_up = Some(t);
299            }
300            if in_low && val < min_val {
301                min_val = val;
302                j_low = Some(t);
303            }
304        }
305
306        // Stopping criterion: KKT gap < tol
307        if i_up.is_none() || j_low.is_none() || max_val - min_val < tol {
308            break;
309        }
310
311        let i = i_up.unwrap();
312        let j = j_low.unwrap();
313
314        if i == j {
315            break;
316        }
317
318        // Compute second-order info
319        let kii = cache.get_or_compute(i, i, kernel, data);
320        let kjj = cache.get_or_compute(j, j, kernel, data);
321        let kij = cache.get_or_compute(i, j, kernel, data);
322        let eta = kii + kjj - two * kij;
323
324        if eta <= eps {
325            continue;
326        }
327
328        // Bounds for alpha_j
329        let old_ai = alphas[i];
330        let old_aj = alphas[j];
331
332        let (lo, hi) = if labels[i] != labels[j] {
333            let diff = old_aj - old_ai;
334            (diff.max(F::zero()), (c + diff).min(c))
335        } else {
336            let sum = old_ai + old_aj;
337            ((sum - c).max(F::zero()), sum.min(c))
338        };
339
340        if (hi - lo).abs() < eps {
341            continue;
342        }
343
344        // Analytic update for alpha_j (Platt 1998).
345        // E_k = y_k * grad_k (dual error, where grad = Q*alpha - e).
346        // alpha_j_new = alpha_j + y_j * (E_i - E_j) / eta
347        //             = alpha_j + y_j * (y_i * grad_i - y_j * grad_j) / eta
348        let mut new_aj = old_aj + labels[j] * (labels[i] * grad[i] - labels[j] * grad[j]) / eta;
349
350        // Clip to [lo, hi]
351        if new_aj > hi {
352            new_aj = hi;
353        }
354        if new_aj < lo {
355            new_aj = lo;
356        }
357
358        if (new_aj - old_aj).abs() < eps {
359            continue;
360        }
361
362        let new_ai = old_ai + labels[i] * labels[j] * (old_aj - new_aj);
363
364        alphas[i] = new_ai;
365        alphas[j] = new_aj;
366
367        // Update dual gradient: grad_k += delta_alpha_i * Q_{k,i} + delta_alpha_j * Q_{k,j}
368        // where Q_{k,t} = y_k * y_t * K(k,t)
369        let delta_ai = new_ai - old_ai;
370        let delta_aj = new_aj - old_aj;
371
372        for (k, grad_k) in grad.iter_mut().enumerate() {
373            let kki = cache.get_or_compute(k, i, kernel, data);
374            let kkj = cache.get_or_compute(k, j, kernel, data);
375            *grad_k = *grad_k
376                + delta_ai * labels[k] * labels[i] * kki
377                + delta_aj * labels[k] * labels[j] * kkj;
378        }
379    }
380
381    // Compute bias from KKT conditions.
382    // For support vectors with 0 < alpha_i < C:
383    //   y_i * (sum_j alpha_j * y_j * K(i,j) + b) = 1
384    //   b = y_i - sum_j alpha_j * y_j * K(i,j)
385    // (since y_i^2 = 1, y_i * (y_i * f) = f, so b = 1/y_i - sum = y_i - sum)
386    let mut b_sum = F::zero();
387    let mut b_count = 0usize;
388
389    for i in 0..n {
390        if alphas[i] > eps && alphas[i] < c - eps {
391            // This is a free support vector.
392            let mut f_no_b = F::zero();
393            for j in 0..n {
394                if alphas[j] > eps {
395                    f_no_b =
396                        f_no_b + alphas[j] * labels[j] * cache.get_or_compute(i, j, kernel, data);
397                }
398            }
399            b_sum = b_sum + labels[i] - f_no_b;
400            b_count += 1;
401        }
402    }
403
404    let bias = if b_count > 0 {
405        b_sum / F::from(b_count).unwrap()
406    } else {
407        // Fallback: use all support vectors (bounded ones too)
408        let mut b_sum_all = F::zero();
409        let mut b_count_all = 0usize;
410        for i in 0..n {
411            if alphas[i] > eps {
412                let mut f_no_b = F::zero();
413                for j in 0..n {
414                    if alphas[j] > eps {
415                        f_no_b = f_no_b
416                            + alphas[j] * labels[j] * cache.get_or_compute(i, j, kernel, data);
417                    }
418                }
419                b_sum_all = b_sum_all + labels[i] - f_no_b;
420                b_count_all += 1;
421            }
422        }
423        if b_count_all > 0 {
424            b_sum_all / F::from(b_count_all).unwrap()
425        } else {
426            F::zero()
427        }
428    };
429
430    Ok(SmoResult { alphas, bias })
431}
432
433// ---------------------------------------------------------------------------
434// SVC (Support Vector Classifier)
435// ---------------------------------------------------------------------------
436
437/// Support Vector Classifier.
438///
439/// Uses Sequential Minimal Optimization (SMO) to solve the dual QP.
440/// Supports multiclass via one-vs-one strategy.
441///
442/// # Type Parameters
443///
444/// - `F`: The floating-point type (`f32` or `f64`).
445/// - `K`: The kernel type (e.g., [`LinearKernel`], [`RbfKernel`]).
446#[derive(Debug, Clone)]
447pub struct SVC<F, K> {
448    /// The kernel function.
449    pub kernel: K,
450    /// Regularization parameter (penalty for misclassification).
451    pub c: F,
452    /// Convergence tolerance.
453    pub tol: F,
454    /// Maximum number of SMO iterations.
455    pub max_iter: usize,
456    /// Size of the kernel evaluation LRU cache.
457    pub cache_size: usize,
458}
459
460impl<F: Float, K: Kernel<F>> SVC<F, K> {
461    /// Create a new `SVC` with the given kernel and default hyperparameters.
462    ///
463    /// Defaults: `C = 1.0`, `tol = 1e-3`, `max_iter = 10000`, `cache_size = 1024`.
464    #[must_use]
465    pub fn new(kernel: K) -> Self {
466        Self {
467            kernel,
468            c: F::one(),
469            tol: F::from(1e-3).unwrap(),
470            max_iter: 10000,
471            cache_size: 1024,
472        }
473    }
474
475    /// Set the regularization parameter C.
476    #[must_use]
477    pub fn with_c(mut self, c: F) -> Self {
478        self.c = c;
479        self
480    }
481
482    /// Set the convergence tolerance.
483    #[must_use]
484    pub fn with_tol(mut self, tol: F) -> Self {
485        self.tol = tol;
486        self
487    }
488
489    /// Set the maximum number of SMO iterations.
490    #[must_use]
491    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
492        self.max_iter = max_iter;
493        self
494    }
495
496    /// Set the kernel cache size.
497    #[must_use]
498    pub fn with_cache_size(mut self, cache_size: usize) -> Self {
499        self.cache_size = cache_size;
500        self
501    }
502}
503
504/// A single binary SVM model (one pair of classes in one-vs-one).
505#[derive(Debug, Clone)]
506struct BinarySvm<F> {
507    /// Support vectors (stored as rows).
508    support_vectors: Vec<Vec<F>>,
509    /// Dual coefficients: alpha_i * y_i for each support vector.
510    dual_coefs: Vec<F>,
511    /// Bias term.
512    bias: F,
513    /// The two class labels: (negative_class, positive_class).
514    class_neg: usize,
515    class_pos: usize,
516}
517
518/// Fitted Support Vector Classifier.
519///
520/// Stores one binary SVM per pair of classes (one-vs-one). Implements
521/// [`Predict`] to produce class labels.
522#[derive(Debug, Clone)]
523pub struct FittedSVC<F, K> {
524    /// The kernel used for predictions.
525    kernel: K,
526    /// One binary SVM per class pair.
527    binary_models: Vec<BinarySvm<F>>,
528    /// Sorted unique classes.
529    classes: Vec<usize>,
530}
531
532impl<F: Float, K: Kernel<F>> FittedSVC<F, K> {
533    /// Compute the decision function value for a single sample against a
534    /// binary model.
535    fn decision_value_binary(&self, x: &[F], model: &BinarySvm<F>) -> F {
536        let mut val = model.bias;
537        for (sv, &coef) in model.support_vectors.iter().zip(model.dual_coefs.iter()) {
538            val = val + coef * self.kernel.compute(sv, x);
539        }
540        val
541    }
542
543    /// Compute the raw decision function values for each sample.
544    ///
545    /// For binary classification, returns a 1-column array.
546    /// For multiclass, returns one column per class pair.
547    ///
548    /// # Errors
549    ///
550    /// Returns [`FerroError::ShapeMismatch`] if the input has no columns.
551    pub fn decision_function(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
552        let n_samples = x.nrows();
553        let n_models = self.binary_models.len();
554        let mut result = Array2::<F>::zeros((n_samples, n_models));
555
556        for s in 0..n_samples {
557            let xi: Vec<F> = x.row(s).to_vec();
558            for (m, model) in self.binary_models.iter().enumerate() {
559                result[[s, m]] = self.decision_value_binary(&xi, model);
560            }
561        }
562
563        Ok(result)
564    }
565}
566
567impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static>
568    Fit<Array2<F>, Array1<usize>> for SVC<F, K>
569{
570    type Fitted = FittedSVC<F, K>;
571    type Error = FerroError;
572
573    /// Fit the SVC model using SMO.
574    ///
575    /// # Errors
576    ///
577    /// Returns [`FerroError::ShapeMismatch`] if `x` and `y` have different
578    /// sample counts.
579    /// Returns [`FerroError::InvalidParameter`] if `C` is not positive.
580    /// Returns [`FerroError::InsufficientSamples`] if fewer than 2 classes.
581    fn fit(&self, x: &Array2<F>, y: &Array1<usize>) -> Result<FittedSVC<F, K>, FerroError> {
582        let (n_samples, _n_features) = x.dim();
583
584        if n_samples != y.len() {
585            return Err(FerroError::ShapeMismatch {
586                expected: vec![n_samples],
587                actual: vec![y.len()],
588                context: "y length must match number of samples in X".into(),
589            });
590        }
591
592        if self.c <= F::zero() {
593            return Err(FerroError::InvalidParameter {
594                name: "C".into(),
595                reason: "must be positive".into(),
596            });
597        }
598
599        // Determine unique classes.
600        let mut classes: Vec<usize> = y.to_vec();
601        classes.sort_unstable();
602        classes.dedup();
603
604        if classes.len() < 2 {
605            return Err(FerroError::InsufficientSamples {
606                required: 2,
607                actual: classes.len(),
608                context: "SVC requires at least 2 distinct classes".into(),
609            });
610        }
611
612        // Convert data to Vec<Vec<F>> for kernel cache.
613        let data: Vec<Vec<F>> = (0..n_samples).map(|i| x.row(i).to_vec()).collect();
614
615        // One-vs-one: train one binary SVM per pair.
616        let n_classes = classes.len();
617        let mut binary_models = Vec::new();
618
619        for ci in 0..n_classes {
620            for cj in (ci + 1)..n_classes {
621                let class_neg = classes[ci];
622                let class_pos = classes[cj];
623
624                // Extract samples for these two classes.
625                let mut sub_data = Vec::new();
626                let mut sub_labels = Vec::new();
627                let mut sub_indices = Vec::new();
628
629                for s in 0..n_samples {
630                    let label = y[s];
631                    if label == class_neg {
632                        sub_data.push(data[s].clone());
633                        sub_labels.push(-F::one());
634                        sub_indices.push(s);
635                    } else if label == class_pos {
636                        sub_data.push(data[s].clone());
637                        sub_labels.push(F::one());
638                        sub_indices.push(s);
639                    }
640                }
641
642                let result = smo_binary(
643                    &sub_data,
644                    &sub_labels,
645                    &self.kernel,
646                    self.c,
647                    self.tol,
648                    self.max_iter,
649                    self.cache_size,
650                )?;
651
652                // Extract support vectors (non-zero alphas).
653                let eps = F::from(1e-8).unwrap_or(F::epsilon());
654                let mut sv_data = Vec::new();
655                let mut sv_coefs = Vec::new();
656
657                for (k, &alpha) in result.alphas.iter().enumerate() {
658                    if alpha > eps {
659                        sv_data.push(sub_data[k].clone());
660                        sv_coefs.push(alpha * sub_labels[k]);
661                    }
662                }
663
664                binary_models.push(BinarySvm {
665                    support_vectors: sv_data,
666                    dual_coefs: sv_coefs,
667                    bias: result.bias,
668                    class_neg,
669                    class_pos,
670                });
671            }
672        }
673
674        Ok(FittedSVC {
675            kernel: self.kernel.clone(),
676            binary_models,
677            classes,
678        })
679    }
680}
681
682impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static> Predict<Array2<F>>
683    for FittedSVC<F, K>
684{
685    type Output = Array1<usize>;
686    type Error = FerroError;
687
688    /// Predict class labels for the given feature matrix.
689    ///
690    /// Uses one-vs-one voting: each binary model casts a vote for the
691    /// winning class, and the class with the most votes wins.
692    ///
693    /// # Errors
694    ///
695    /// Returns [`FerroError::ShapeMismatch`] if the number of features
696    /// does not match the training data.
697    fn predict(&self, x: &Array2<F>) -> Result<Array1<usize>, FerroError> {
698        let n_samples = x.nrows();
699        let n_classes = self.classes.len();
700        let mut predictions = Array1::<usize>::zeros(n_samples);
701
702        for s in 0..n_samples {
703            let xi: Vec<F> = x.row(s).to_vec();
704            let mut votes = vec![0usize; n_classes];
705
706            for model in &self.binary_models {
707                let val = self.decision_value_binary(&xi, model);
708                let winner = if val >= F::zero() {
709                    model.class_pos
710                } else {
711                    model.class_neg
712                };
713                if let Some(idx) = self.classes.iter().position(|&c| c == winner) {
714                    votes[idx] += 1;
715                }
716            }
717
718            let best_class_idx = votes
719                .iter()
720                .enumerate()
721                .max_by_key(|&(_, &v)| v)
722                .map(|(i, _)| i)
723                .unwrap_or(0);
724
725            predictions[s] = self.classes[best_class_idx];
726        }
727
728        Ok(predictions)
729    }
730}
731
732// ---------------------------------------------------------------------------
733// SVR (Support Vector Regressor)
734// ---------------------------------------------------------------------------
735
736/// Support Vector Regressor.
737///
738/// Uses SMO to solve the epsilon-SVR dual problem.
739///
740/// # Type Parameters
741///
742/// - `F`: The floating-point type (`f32` or `f64`).
743/// - `K`: The kernel type.
744#[derive(Debug, Clone)]
745pub struct SVR<F, K> {
746    /// The kernel function.
747    pub kernel: K,
748    /// Regularization parameter.
749    pub c: F,
750    /// Epsilon tube width (insensitive loss zone).
751    pub epsilon: F,
752    /// Convergence tolerance.
753    pub tol: F,
754    /// Maximum number of SMO iterations.
755    pub max_iter: usize,
756    /// Size of the kernel evaluation LRU cache.
757    pub cache_size: usize,
758}
759
760impl<F: Float, K: Kernel<F>> SVR<F, K> {
761    /// Create a new `SVR` with the given kernel and default hyperparameters.
762    ///
763    /// Defaults: `C = 1.0`, `epsilon = 0.1`, `tol = 1e-3`,
764    /// `max_iter = 10000`, `cache_size = 1024`.
765    #[must_use]
766    pub fn new(kernel: K) -> Self {
767        Self {
768            kernel,
769            c: F::one(),
770            epsilon: F::from(0.1).unwrap(),
771            tol: F::from(1e-3).unwrap(),
772            max_iter: 10000,
773            cache_size: 1024,
774        }
775    }
776
777    /// Set the regularization parameter C.
778    #[must_use]
779    pub fn with_c(mut self, c: F) -> Self {
780        self.c = c;
781        self
782    }
783
784    /// Set the epsilon tube width.
785    #[must_use]
786    pub fn with_epsilon(mut self, epsilon: F) -> Self {
787        self.epsilon = epsilon;
788        self
789    }
790
791    /// Set the convergence tolerance.
792    #[must_use]
793    pub fn with_tol(mut self, tol: F) -> Self {
794        self.tol = tol;
795        self
796    }
797
798    /// Set the maximum number of SMO iterations.
799    #[must_use]
800    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
801        self.max_iter = max_iter;
802        self
803    }
804
805    /// Set the kernel cache size.
806    #[must_use]
807    pub fn with_cache_size(mut self, cache_size: usize) -> Self {
808        self.cache_size = cache_size;
809        self
810    }
811}
812
813/// Fitted Support Vector Regressor.
814///
815/// Stores the support vectors, dual coefficients, and bias.
816#[derive(Debug, Clone)]
817pub struct FittedSVR<F, K> {
818    /// The kernel used for predictions.
819    kernel: K,
820    /// Support vectors.
821    support_vectors: Vec<Vec<F>>,
822    /// Dual coefficients (alpha_i* - alpha_i) for each support vector.
823    dual_coefs: Vec<F>,
824    /// Bias term.
825    bias: F,
826}
827
828impl<F: Float, K: Kernel<F>> FittedSVR<F, K> {
829    /// Compute the decision function value for a single sample.
830    fn decision_value(&self, x: &[F]) -> F {
831        let mut val = self.bias;
832        for (sv, &coef) in self.support_vectors.iter().zip(self.dual_coefs.iter()) {
833            val = val + coef * self.kernel.compute(sv, x);
834        }
835        val
836    }
837
838    /// Compute the raw decision function values for each sample.
839    ///
840    /// # Errors
841    ///
842    /// Returns `Ok` always (provided for API symmetry).
843    pub fn decision_function(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
844        let n_samples = x.nrows();
845        let mut result = Array1::<F>::zeros(n_samples);
846        for s in 0..n_samples {
847            let xi: Vec<F> = x.row(s).to_vec();
848            result[s] = self.decision_value(&xi);
849        }
850        Ok(result)
851    }
852}
853
854/// Solve epsilon-SVR using SMO.
855///
856/// Reformulates the epsilon-SVR dual as a standard 2n-variable QP and
857/// solves it with the Fan-Chen-Lin WSS approach, analogous to `smo_binary`.
858///
859/// The 2n variables are indexed 0..2n:
860/// - Index `k` (k < n)  corresponds to alpha\*\_k  with label +1
861/// - Index `k` (k >= n) corresponds to alpha\_{k-n} with label -1
862///
863/// The Q matrix is: `Q_{ij} = s_i * s_j * K(p_i, p_j)` where `s` is the
864/// sign (+1 or -1) and `p` maps to the original sample index.
865///
866/// The linear term is: `q_k = epsilon - y_{p_k} * s_k`.
867#[allow(clippy::too_many_arguments)]
868fn smo_svr<F: Float, K: Kernel<F>>(
869    data: &[Vec<F>],
870    targets: &[F],
871    kernel: &K,
872    c: F,
873    epsilon: F,
874    tol: F,
875    max_iter: usize,
876    cache_size: usize,
877) -> Result<(Vec<F>, F), FerroError> {
878    let n = data.len();
879    let m = 2 * n; // Total number of dual variables.
880
881    // Encoding: variable k in [0, m)
882    //   k < n  => alpha*_k   (sign = +1, sample index = k)
883    //   k >= n => alpha_{k-n} (sign = -1, sample index = k - n)
884    //
885    // The dual is: min 1/2 * beta^T Q beta + q^T beta
886    //   s.t. 0 <= beta_k <= C, sum_k s_k * beta_k = 0
887    // where beta_k = alpha*_k or alpha_{k-n},
888    //       Q_{ij} = s_i * s_j * K(p_i, p_j),
889    //       q_k    = epsilon - y_{p_k} * s_k.
890    //
891    // This has the same structure as the SVC dual.
892
893    let sign = |k: usize| -> F { if k < n { F::one() } else { -F::one() } };
894    let sample = |k: usize| -> usize { if k < n { k } else { k - n } };
895
896    let mut beta = vec![F::zero(); m];
897    let mut cache = KernelCache::new(cache_size);
898
899    // Gradient: grad_k = (Q * beta)_k + q_k.  Initially beta=0 so grad_k = q_k.
900    // q_k = epsilon - y_{p_k} * s_k
901    let mut grad: Vec<F> = (0..m)
902        .map(|k| epsilon - targets[sample(k)] * sign(k))
903        .collect();
904
905    let two = F::one() + F::one();
906    let eps_num = F::from(1e-12).unwrap_or(F::epsilon());
907
908    for _iter in 0..max_iter {
909        // WSS: same as SVC but with the extended variables.
910        // I_up  = {k : (s_k=+1 and beta_k < C) or (s_k=-1 and beta_k > 0)}
911        // I_low = {k : (s_k=+1 and beta_k > 0) or (s_k=-1 and beta_k < C)}
912        // Select i = argmax_{k in I_up}  -s_k * grad_k
913        // Select j = argmin_{k in I_low} -s_k * grad_k
914
915        let mut i_up = None;
916        let mut max_val = F::neg_infinity();
917        let mut j_low = None;
918        let mut min_val = F::infinity();
919
920        for k in 0..m {
921            let sk = sign(k);
922            let val = -sk * grad[k];
923
924            let in_up =
925                (sk > F::zero() && beta[k] < c - eps_num) || (sk < F::zero() && beta[k] > eps_num);
926            let in_low =
927                (sk > F::zero() && beta[k] > eps_num) || (sk < F::zero() && beta[k] < c - eps_num);
928
929            if in_up && val > max_val {
930                max_val = val;
931                i_up = Some(k);
932            }
933            if in_low && val < min_val {
934                min_val = val;
935                j_low = Some(k);
936            }
937        }
938
939        if i_up.is_none() || j_low.is_none() || max_val - min_val < tol {
940            break;
941        }
942
943        let i = i_up.unwrap();
944        let j = j_low.unwrap();
945
946        if i == j {
947            break;
948        }
949
950        let si = sign(i);
951        let sj = sign(j);
952        let pi = sample(i);
953        let pj = sample(j);
954
955        // Q_{ii} = si*si*K(pi,pi) = K(pi,pi),  similarly for jj and ij
956        let kii = cache.get_or_compute(pi, pi, kernel, data);
957        let kjj = cache.get_or_compute(pj, pj, kernel, data);
958        let kij = cache.get_or_compute(pi, pj, kernel, data);
959
960        // eta = Q_{ii} + Q_{jj} - 2*Q_{ij} = K(pi,pi) + K(pj,pj) - 2*si*sj*K(pi,pj)
961        let eta = kii + kjj - two * si * sj * kij;
962
963        if eta <= eps_num {
964            continue;
965        }
966
967        // Bounds for beta_j
968        let old_bi = beta[i];
969        let old_bj = beta[j];
970
971        let (lo, hi) = if si != sj {
972            let diff = old_bj - old_bi;
973            (diff.max(F::zero()), (c + diff).min(c))
974        } else {
975            let sum = old_bi + old_bj;
976            ((sum - c).max(F::zero()), sum.min(c))
977        };
978
979        if (hi - lo).abs() < eps_num {
980            continue;
981        }
982
983        // Analytic update: beta_j += s_j * (E_i - E_j) / eta
984        // where E_k = s_k * grad_k
985        let mut new_bj = old_bj + sj * (si * grad[i] - sj * grad[j]) / eta;
986
987        if new_bj > hi {
988            new_bj = hi;
989        }
990        if new_bj < lo {
991            new_bj = lo;
992        }
993
994        if (new_bj - old_bj).abs() < eps_num {
995            continue;
996        }
997
998        let new_bi = old_bi + si * sj * (old_bj - new_bj);
999
1000        beta[i] = new_bi;
1001        beta[j] = new_bj;
1002
1003        // Update gradient: grad_k += delta_bi * Q_{k,i} + delta_bj * Q_{k,j}
1004        // Q_{k,t} = s_k * s_t * K(p_k, p_t)
1005        let delta_bi = new_bi - old_bi;
1006        let delta_bj = new_bj - old_bj;
1007
1008        for (k, grad_k) in grad.iter_mut().enumerate() {
1009            let sk = sign(k);
1010            let pk = sample(k);
1011            let kki = cache.get_or_compute(pk, pi, kernel, data);
1012            let kkj = cache.get_or_compute(pk, pj, kernel, data);
1013            *grad_k = *grad_k + delta_bi * sk * si * kki + delta_bj * sk * sj * kkj;
1014        }
1015    }
1016
1017    // Recover alpha*_i = beta_i (i < n) and alpha_i = beta_{i+n} (i >= n).
1018    // Coefficient for prediction: coef_i = alpha*_i - alpha_i.
1019    let coefs: Vec<F> = (0..n).map(|i| beta[i] - beta[i + n]).collect();
1020
1021    // Compute bias from KKT conditions on free support vectors.
1022    // For k where 0 < beta_k < C:
1023    //   grad_k = 0 at optimality => (Q*beta)_k + q_k = 0
1024    //   sum_t beta_t * s_k * s_t * K(p_k, p_t) + epsilon - y_{p_k} * s_k = 0
1025    //   s_k * sum_t (beta_t * s_t) * K(p_k, p_t) = y_{p_k} * s_k - epsilon
1026    //   sum_t coef_t_effective * K(p_k, p_t) = y_{p_k} - epsilon / s_k  (nah, let's use f directly)
1027    //
1028    // Prediction: f(x) = sum_i coef_i * K(x_i, x) + b
1029    // For free alpha*_i (0 < alpha*_i < C): y_i - f(x_i) = epsilon  => b = y_i - epsilon - sum coef_j * K(i,j)
1030    // For free alpha_i  (0 < alpha_i  < C): f(x_i) - y_i = epsilon  => b = y_i + epsilon - sum coef_j * K(i,j)
1031
1032    let mut b_sum = F::zero();
1033    let mut b_count = 0usize;
1034
1035    for i in 0..n {
1036        let mut kernel_sum = F::zero();
1037        let has_free = (beta[i] > eps_num && beta[i] < c - eps_num)
1038            || (beta[i + n] > eps_num && beta[i + n] < c - eps_num);
1039
1040        if !has_free {
1041            continue;
1042        }
1043
1044        for (j, &cj) in coefs.iter().enumerate() {
1045            if cj.abs() > eps_num {
1046                kernel_sum = kernel_sum + cj * cache.get_or_compute(i, j, kernel, data);
1047            }
1048        }
1049
1050        if beta[i] > eps_num && beta[i] < c - eps_num {
1051            // alpha*_i is free: y_i - f(x_i) = epsilon => b = y_i - epsilon - kernel_sum
1052            b_sum = b_sum + targets[i] - epsilon - kernel_sum;
1053            b_count += 1;
1054        }
1055        if beta[i + n] > eps_num && beta[i + n] < c - eps_num {
1056            // alpha_i is free: f(x_i) - y_i = epsilon => b = y_i + epsilon - kernel_sum
1057            b_sum = b_sum + targets[i] + epsilon - kernel_sum;
1058            b_count += 1;
1059        }
1060    }
1061
1062    let bias = if b_count > 0 {
1063        b_sum / F::from(b_count).unwrap()
1064    } else {
1065        F::zero()
1066    };
1067
1068    Ok((coefs, bias))
1069}
1070
1071impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static>
1072    Fit<Array2<F>, Array1<F>> for SVR<F, K>
1073{
1074    type Fitted = FittedSVR<F, K>;
1075    type Error = FerroError;
1076
1077    /// Fit the SVR model using SMO.
1078    ///
1079    /// # Errors
1080    ///
1081    /// Returns [`FerroError::ShapeMismatch`] if `x` and `y` have different
1082    /// sample counts.
1083    /// Returns [`FerroError::InvalidParameter`] if `C` is not positive.
1084    fn fit(&self, x: &Array2<F>, y: &Array1<F>) -> Result<FittedSVR<F, K>, FerroError> {
1085        let (n_samples, _n_features) = x.dim();
1086
1087        if n_samples != y.len() {
1088            return Err(FerroError::ShapeMismatch {
1089                expected: vec![n_samples],
1090                actual: vec![y.len()],
1091                context: "y length must match number of samples in X".into(),
1092            });
1093        }
1094
1095        if self.c <= F::zero() {
1096            return Err(FerroError::InvalidParameter {
1097                name: "C".into(),
1098                reason: "must be positive".into(),
1099            });
1100        }
1101
1102        if n_samples == 0 {
1103            return Err(FerroError::InsufficientSamples {
1104                required: 1,
1105                actual: 0,
1106                context: "SVR requires at least one sample".into(),
1107            });
1108        }
1109
1110        let data: Vec<Vec<F>> = (0..n_samples).map(|i| x.row(i).to_vec()).collect();
1111        let targets: Vec<F> = y.to_vec();
1112
1113        let (coefs, bias) = smo_svr(
1114            &data,
1115            &targets,
1116            &self.kernel,
1117            self.c,
1118            self.epsilon,
1119            self.tol,
1120            self.max_iter,
1121            self.cache_size,
1122        )?;
1123
1124        // Extract support vectors (non-zero coefficients).
1125        let eps = F::from(1e-8).unwrap_or(F::epsilon());
1126        let mut sv_data = Vec::new();
1127        let mut sv_coefs = Vec::new();
1128
1129        for (i, &coef) in coefs.iter().enumerate() {
1130            if coef.abs() > eps {
1131                sv_data.push(data[i].clone());
1132                sv_coefs.push(coef);
1133            }
1134        }
1135
1136        Ok(FittedSVR {
1137            kernel: self.kernel.clone(),
1138            support_vectors: sv_data,
1139            dual_coefs: sv_coefs,
1140            bias,
1141        })
1142    }
1143}
1144
1145impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static> Predict<Array2<F>>
1146    for FittedSVR<F, K>
1147{
1148    type Output = Array1<F>;
1149    type Error = FerroError;
1150
1151    /// Predict target values for the given feature matrix.
1152    ///
1153    /// # Errors
1154    ///
1155    /// Returns `Ok` always for valid input.
1156    fn predict(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
1157        self.decision_function(x)
1158    }
1159}
1160
1161#[cfg(test)]
1162mod tests {
1163    use super::*;
1164    use approx::assert_relative_eq;
1165    use ndarray::array;
1166
1167    #[test]
1168    fn test_linear_kernel() {
1169        let k = LinearKernel;
1170        let x = vec![1.0, 2.0, 3.0];
1171        let y = vec![4.0, 5.0, 6.0];
1172        assert_relative_eq!(k.compute(&x, &y), 32.0, epsilon = 1e-10);
1173    }
1174
1175    #[test]
1176    fn test_rbf_kernel() {
1177        let k = RbfKernel::with_gamma(1.0);
1178        let x = vec![0.0, 0.0];
1179        let y = vec![0.0, 0.0];
1180        assert_relative_eq!(k.compute(&x, &y), 1.0, epsilon = 1e-10);
1181
1182        // Different points should give value < 1
1183        let y2 = vec![1.0, 0.0];
1184        let val: f64 = k.compute(&x, &y2);
1185        assert!(val < 1.0);
1186        assert!(val > 0.0);
1187    }
1188
1189    #[test]
1190    fn test_polynomial_kernel() {
1191        let k = PolynomialKernel {
1192            gamma: Some(1.0),
1193            degree: 2,
1194            coef0: 1.0,
1195        };
1196        let x = vec![1.0f64, 0.0];
1197        let y = vec![1.0, 0.0];
1198        // (1.0 * 1.0 + 1.0)^2 = 4.0
1199        assert_relative_eq!(k.compute(&x, &y), 4.0, epsilon = 1e-10);
1200    }
1201
1202    #[test]
1203    fn test_sigmoid_kernel() {
1204        let k = SigmoidKernel {
1205            gamma: Some(1.0),
1206            coef0: 0.0,
1207        };
1208        let x = vec![0.0f64];
1209        let y = vec![0.0];
1210        // tanh(0) = 0
1211        assert_relative_eq!(k.compute(&x, &y), 0.0, epsilon = 1e-10);
1212    }
1213
1214    #[test]
1215    fn test_svc_linear_separable() {
1216        // Two well-separated clusters.
1217        let x = Array2::from_shape_vec(
1218            (8, 2),
1219            vec![
1220                1.0, 1.0, 1.5, 1.0, 1.0, 1.5, 1.5, 1.5, // class 0
1221                5.0, 5.0, 5.5, 5.0, 5.0, 5.5, 5.5, 5.5, // class 1
1222            ],
1223        )
1224        .unwrap();
1225        let y = array![0usize, 0, 0, 0, 1, 1, 1, 1];
1226
1227        let model = SVC::<f64, LinearKernel>::new(LinearKernel).with_c(10.0);
1228        let fitted = model.fit(&x, &y).unwrap();
1229        let preds = fitted.predict(&x).unwrap();
1230
1231        let correct: usize = preds.iter().zip(y.iter()).filter(|(p, a)| p == a).count();
1232        assert!(correct >= 6, "Expected at least 6 correct, got {correct}");
1233    }
1234
1235    #[test]
1236    fn test_svc_rbf_xor() {
1237        // XOR problem: not linearly separable, needs RBF kernel.
1238        let x = Array2::from_shape_vec(
1239            (8, 2),
1240            vec![
1241                0.0, 0.0, 0.1, 0.1, // class 0
1242                1.0, 1.0, 1.1, 1.1, // class 0
1243                1.0, 0.0, 1.1, 0.1, // class 1
1244                0.0, 1.0, 0.1, 1.1, // class 1
1245            ],
1246        )
1247        .unwrap();
1248        let y = array![0usize, 0, 0, 0, 1, 1, 1, 1];
1249
1250        let kernel = RbfKernel::with_gamma(5.0);
1251        let model = SVC::new(kernel).with_c(100.0).with_max_iter(50000);
1252        let fitted = model.fit(&x, &y).unwrap();
1253        let preds = fitted.predict(&x).unwrap();
1254
1255        let correct: usize = preds.iter().zip(y.iter()).filter(|(p, a)| p == a).count();
1256        assert!(
1257            correct >= 6,
1258            "Expected at least 6 correct for XOR, got {correct}"
1259        );
1260    }
1261
1262    #[test]
1263    fn test_svc_multiclass() {
1264        // Three linearly separable clusters.
1265        let x = Array2::from_shape_vec(
1266            (9, 2),
1267            vec![
1268                0.0, 0.0, 0.5, 0.0, 0.0, 0.5, // class 0
1269                5.0, 0.0, 5.5, 0.0, 5.0, 0.5, // class 1
1270                0.0, 5.0, 0.5, 5.0, 0.0, 5.5, // class 2
1271            ],
1272        )
1273        .unwrap();
1274        let y = array![0usize, 0, 0, 1, 1, 1, 2, 2, 2];
1275
1276        let model = SVC::new(LinearKernel).with_c(10.0);
1277        let fitted = model.fit(&x, &y).unwrap();
1278        let preds = fitted.predict(&x).unwrap();
1279
1280        let correct: usize = preds.iter().zip(y.iter()).filter(|(p, a)| p == a).count();
1281        assert!(
1282            correct >= 7,
1283            "Expected at least 7 correct for multiclass, got {correct}"
1284        );
1285    }
1286
1287    #[test]
1288    fn test_svc_decision_function() {
1289        let x = Array2::from_shape_vec(
1290            (6, 2),
1291            vec![
1292                1.0, 1.0, 1.5, 1.0, 1.0, 1.5, // class 0
1293                5.0, 5.0, 5.5, 5.0, 5.0, 5.5, // class 1
1294            ],
1295        )
1296        .unwrap();
1297        let y = array![0usize, 0, 0, 1, 1, 1];
1298
1299        let model = SVC::new(LinearKernel).with_c(10.0);
1300        let fitted = model.fit(&x, &y).unwrap();
1301
1302        let df = fitted.decision_function(&x).unwrap();
1303        assert_eq!(df.nrows(), 6);
1304        assert_eq!(df.ncols(), 1); // One binary model for 2 classes.
1305
1306        // Class 0 samples should have negative decision values,
1307        // class 1 should have positive.
1308        for i in 0..3 {
1309            assert!(
1310                df[[i, 0]] < 0.0 + 0.5, // Allow some tolerance
1311                "Class 0 sample {i} has decision value {}",
1312                df[[i, 0]]
1313            );
1314        }
1315    }
1316
1317    #[test]
1318    fn test_svc_invalid_c() {
1319        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1320        let y = array![0usize, 0, 1, 1];
1321
1322        let model = SVC::new(LinearKernel).with_c(0.0);
1323        assert!(model.fit(&x, &y).is_err());
1324    }
1325
1326    #[test]
1327    fn test_svc_single_class_error() {
1328        let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
1329        let y = array![0usize, 0, 0];
1330
1331        let model = SVC::new(LinearKernel);
1332        assert!(model.fit(&x, &y).is_err());
1333    }
1334
1335    #[test]
1336    fn test_svc_shape_mismatch() {
1337        let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
1338        let y = array![0usize, 1];
1339
1340        let model = SVC::new(LinearKernel);
1341        assert!(model.fit(&x, &y).is_err());
1342    }
1343
1344    #[test]
1345    fn test_svr_simple() {
1346        // Simple linear regression: y = 2x
1347        let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1348        let y = array![2.0, 4.0, 6.0, 8.0, 10.0, 12.0];
1349
1350        let model = SVR::new(LinearKernel)
1351            .with_c(100.0)
1352            .with_epsilon(0.1)
1353            .with_max_iter(50000);
1354        let fitted = model.fit(&x, &y).unwrap();
1355        let preds = fitted.predict(&x).unwrap();
1356
1357        // Check predictions are reasonably close.
1358        for (p, &actual) in preds.iter().zip(y.iter()) {
1359            assert!(
1360                (*p - actual).abs() < 2.0,
1361                "SVR prediction {p} too far from actual {actual}"
1362            );
1363        }
1364    }
1365
1366    #[test]
1367    fn test_svr_decision_function() {
1368        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1369        let y = array![2.0, 4.0, 6.0, 8.0];
1370
1371        let model = SVR::new(LinearKernel).with_c(100.0).with_epsilon(0.1);
1372        let fitted = model.fit(&x, &y).unwrap();
1373
1374        let df = fitted.decision_function(&x).unwrap();
1375        let preds = fitted.predict(&x).unwrap();
1376
1377        // Decision function and predict should return the same values.
1378        for i in 0..4 {
1379            assert_relative_eq!(df[i], preds[i], epsilon = 1e-10);
1380        }
1381    }
1382
1383    #[test]
1384    fn test_svr_invalid_c() {
1385        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1386        let y = array![1.0, 2.0, 3.0, 4.0];
1387
1388        let model = SVR::new(LinearKernel).with_c(-1.0);
1389        assert!(model.fit(&x, &y).is_err());
1390    }
1391
1392    #[test]
1393    fn test_svr_shape_mismatch() {
1394        let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
1395        let y = array![1.0, 2.0];
1396
1397        let model = SVR::new(LinearKernel);
1398        assert!(model.fit(&x, &y).is_err());
1399    }
1400}