Skip to main content

ferrolearn_linear/
svm.rs

1//! Support Vector Machine with kernel trick.
2//!
3//! This module provides [`SVC`] (classification) and [`SVR`] (regression)
4//! support vector machines trained using the **Sequential Minimal Optimization
5//! (SMO)** algorithm (Platt, 1998).
6//!
7//! # Kernels
8//!
9//! Four built-in kernels are provided:
10//!
11//! - [`LinearKernel`]: `K(x, y) = x . y`
12//! - [`RbfKernel`]: `K(x, y) = exp(-gamma * ||x - y||^2)`
13//! - [`PolynomialKernel`]: `K(x, y) = (gamma * x . y + coef0)^degree`
14//! - [`SigmoidKernel`]: `K(x, y) = tanh(gamma * x . y + coef0)`
15//!
16//! Users can implement the [`Kernel`] trait for custom kernels.
17//!
18//! # Multiclass
19//!
20//! `SVC` uses a one-vs-one strategy for multiclass classification.
21//!
22//! # Examples
23//!
24//! ```
25//! use ferrolearn_linear::svm::{SVC, LinearKernel};
26//! use ferrolearn_core::{Fit, Predict};
27//! use ndarray::{array, Array2};
28//!
29//! let x = Array2::from_shape_vec((6, 2), vec![
30//!     1.0, 1.0,  2.0, 1.0,  1.0, 2.0,
31//!     5.0, 5.0,  6.0, 5.0,  5.0, 6.0,
32//! ]).unwrap();
33//! let y = array![0usize, 0, 0, 1, 1, 1];
34//!
35//! let model = SVC::<f64, LinearKernel>::new(LinearKernel);
36//! let fitted = model.fit(&x, &y).unwrap();
37//! let preds = fitted.predict(&x).unwrap();
38//! assert_eq!(preds.len(), 6);
39//! ```
40
41use std::collections::HashMap;
42
43use ferrolearn_core::error::FerroError;
44use ferrolearn_core::traits::{Fit, Predict};
45use ndarray::{Array1, Array2, ScalarOperand};
46use num_traits::Float;
47
48// ---------------------------------------------------------------------------
49// Kernel trait and built-in kernels
50// ---------------------------------------------------------------------------
51
52/// A kernel function for SVM.
53///
54/// Computes the inner product of two vectors in a (possibly implicit)
55/// higher-dimensional feature space.
56pub trait Kernel<F: Float>: Clone + Send + Sync {
57    /// Compute the kernel value between two vectors.
58    fn compute(&self, x: &[F], y: &[F]) -> F;
59}
60
61/// Linear kernel: `K(x, y) = x . y`.
62#[derive(Debug, Clone, Copy)]
63pub struct LinearKernel;
64
65impl<F: Float> Kernel<F> for LinearKernel {
66    fn compute(&self, x: &[F], y: &[F]) -> F {
67        x.iter()
68            .zip(y.iter())
69            .fold(F::zero(), |acc, (&a, &b)| acc + a * b)
70    }
71}
72
73/// Radial Basis Function (Gaussian) kernel.
74///
75/// `K(x, y) = exp(-gamma * ||x - y||^2)`
76#[derive(Debug, Clone, Copy)]
77pub struct RbfKernel<F> {
78    /// The gamma parameter. If `None`, it is set to `1 / (n_features * var(X))`.
79    pub gamma: Option<F>,
80}
81
82impl<F: Float> RbfKernel<F> {
83    /// Create a new RBF kernel with auto gamma.
84    #[must_use]
85    pub fn new() -> Self {
86        Self { gamma: None }
87    }
88
89    /// Create a new RBF kernel with a specified gamma.
90    #[must_use]
91    pub fn with_gamma(gamma: F) -> Self {
92        Self { gamma: Some(gamma) }
93    }
94}
95
96impl<F: Float> Default for RbfKernel<F> {
97    fn default() -> Self {
98        Self::new()
99    }
100}
101
102impl<F: Float + Send + Sync> Kernel<F> for RbfKernel<F> {
103    fn compute(&self, x: &[F], y: &[F]) -> F {
104        let gamma = self.gamma.unwrap_or_else(F::one);
105        let sq_dist = x.iter().zip(y.iter()).fold(F::zero(), |acc, (&a, &b)| {
106            let d = a - b;
107            acc + d * d
108        });
109        (-gamma * sq_dist).exp()
110    }
111}
112
113/// Polynomial kernel: `K(x, y) = (gamma * x . y + coef0)^degree`.
114#[derive(Debug, Clone, Copy)]
115pub struct PolynomialKernel<F> {
116    /// The gamma parameter. If `None`, uses `1 / n_features`.
117    pub gamma: Option<F>,
118    /// Polynomial degree.
119    pub degree: usize,
120    /// Independent term.
121    pub coef0: F,
122}
123
124impl<F: Float> PolynomialKernel<F> {
125    /// Create a new polynomial kernel with defaults.
126    #[must_use]
127    pub fn new() -> Self {
128        Self {
129            gamma: None,
130            degree: 3,
131            coef0: F::zero(),
132        }
133    }
134}
135
136impl<F: Float> Default for PolynomialKernel<F> {
137    fn default() -> Self {
138        Self::new()
139    }
140}
141
142impl<F: Float + Send + Sync> Kernel<F> for PolynomialKernel<F> {
143    fn compute(&self, x: &[F], y: &[F]) -> F {
144        let gamma = self.gamma.unwrap_or_else(F::one);
145        let dot: F = x
146            .iter()
147            .zip(y.iter())
148            .fold(F::zero(), |acc, (&a, &b)| acc + a * b);
149        let val = gamma * dot + self.coef0;
150        let mut result = F::one();
151        for _ in 0..self.degree {
152            result = result * val;
153        }
154        result
155    }
156}
157
158/// Sigmoid kernel: `K(x, y) = tanh(gamma * x . y + coef0)`.
159#[derive(Debug, Clone, Copy)]
160pub struct SigmoidKernel<F> {
161    /// The gamma parameter. If `None`, uses `1 / n_features`.
162    pub gamma: Option<F>,
163    /// Independent term.
164    pub coef0: F,
165}
166
167impl<F: Float> SigmoidKernel<F> {
168    /// Create a new sigmoid kernel with defaults.
169    #[must_use]
170    pub fn new() -> Self {
171        Self {
172            gamma: None,
173            coef0: F::zero(),
174        }
175    }
176}
177
178impl<F: Float> Default for SigmoidKernel<F> {
179    fn default() -> Self {
180        Self::new()
181    }
182}
183
184impl<F: Float + Send + Sync> Kernel<F> for SigmoidKernel<F> {
185    fn compute(&self, x: &[F], y: &[F]) -> F {
186        let gamma = self.gamma.unwrap_or_else(F::one);
187        let dot: F = x
188            .iter()
189            .zip(y.iter())
190            .fold(F::zero(), |acc, (&a, &b)| acc + a * b);
191        (gamma * dot + self.coef0).tanh()
192    }
193}
194
195// ---------------------------------------------------------------------------
196// Kernel cache (LRU)
197// ---------------------------------------------------------------------------
198
199/// Simple LRU cache for kernel evaluations.
200struct KernelCache<F> {
201    cache: HashMap<(usize, usize), F>,
202    order: Vec<(usize, usize)>,
203    capacity: usize,
204}
205
206impl<F: Float> KernelCache<F> {
207    fn new(capacity: usize) -> Self {
208        Self {
209            cache: HashMap::with_capacity(capacity),
210            order: Vec::with_capacity(capacity),
211            capacity,
212        }
213    }
214
215    fn get_or_compute<K: Kernel<F>>(
216        &mut self,
217        i: usize,
218        j: usize,
219        kernel: &K,
220        data: &[Vec<F>],
221    ) -> F {
222        let key = if i <= j { (i, j) } else { (j, i) };
223        if let Some(&val) = self.cache.get(&key) {
224            return val;
225        }
226        let val = kernel.compute(&data[i], &data[j]);
227        if self.order.len() >= self.capacity {
228            if let Some(old_key) = self.order.first().copied() {
229                self.cache.remove(&old_key);
230                self.order.remove(0);
231            }
232        }
233        self.cache.insert(key, val);
234        self.order.push(key);
235        val
236    }
237}
238
239// ---------------------------------------------------------------------------
240// SMO solver for binary SVM
241// ---------------------------------------------------------------------------
242
243/// Result of a binary SMO solve.
244struct SmoResult<F> {
245    alphas: Vec<F>,
246    bias: F,
247}
248
249/// SMO implementation (Platt 1998, Fan-Chen-Lin 2005 WSS).
250///
251/// Uses the dual gradient `grad_i = (Q * alpha)_i - 1` where
252/// `Q_{ij} = y_i * y_j * K(x_i, x_j)`. Bias is computed after
253/// convergence from the KKT conditions.
254fn smo_binary<F: Float, K: Kernel<F>>(
255    data: &[Vec<F>],
256    labels: &[F],
257    kernel: &K,
258    c: F,
259    tol: F,
260    max_iter: usize,
261    cache_size: usize,
262) -> Result<SmoResult<F>, FerroError> {
263    let n = data.len();
264    let mut alphas = vec![F::zero(); n];
265    let mut cache = KernelCache::new(cache_size);
266
267    // Gradient of the dual objective: grad_i = (Q*alpha)_i - 1
268    // where Q_{ij} = y_i * y_j * K(x_i, x_j).
269    // Initially alpha = 0, so grad_i = -1 for all i.
270    let mut grad: Vec<F> = vec![-F::one(); n];
271
272    let two = F::one() + F::one();
273    let eps = F::from(1e-12).unwrap_or_else(F::epsilon);
274
275    for _iter in 0..max_iter {
276        // Working set selection (Fan-Chen-Lin 2005):
277        // I_up  = {i : (y_i=+1 and alpha_i < C) or (y_i=-1 and alpha_i > 0)}
278        // I_low = {j : (y_j=+1 and alpha_j > 0) or (y_j=-1 and alpha_j < C)}
279        // Select i = argmax_{t in I_up}  -y_t * grad_t
280        // Select j = argmin_{t in I_low} -y_t * grad_t
281
282        let mut i_up = None;
283        let mut max_val = F::neg_infinity();
284        let mut j_low = None;
285        let mut min_val = F::infinity();
286
287        for t in 0..n {
288            let val = -labels[t] * grad[t];
289
290            let in_up = (labels[t] > F::zero() && alphas[t] < c - eps)
291                || (labels[t] < F::zero() && alphas[t] > eps);
292
293            let in_low = (labels[t] > F::zero() && alphas[t] > eps)
294                || (labels[t] < F::zero() && alphas[t] < c - eps);
295
296            if in_up && val > max_val {
297                max_val = val;
298                i_up = Some(t);
299            }
300            if in_low && val < min_val {
301                min_val = val;
302                j_low = Some(t);
303            }
304        }
305
306        // Stopping criterion: KKT gap < tol
307        if i_up.is_none() || j_low.is_none() || max_val - min_val < tol {
308            break;
309        }
310
311        let i = i_up.unwrap();
312        let j = j_low.unwrap();
313
314        if i == j {
315            break;
316        }
317
318        // Compute second-order info
319        let kii = cache.get_or_compute(i, i, kernel, data);
320        let kjj = cache.get_or_compute(j, j, kernel, data);
321        let kij = cache.get_or_compute(i, j, kernel, data);
322        let eta = kii + kjj - two * kij;
323
324        if eta <= eps {
325            continue;
326        }
327
328        // Bounds for alpha_j
329        let old_ai = alphas[i];
330        let old_aj = alphas[j];
331
332        let (lo, hi) = if labels[i] == labels[j] {
333            let sum = old_ai + old_aj;
334            ((sum - c).max(F::zero()), sum.min(c))
335        } else {
336            let diff = old_aj - old_ai;
337            (diff.max(F::zero()), (c + diff).min(c))
338        };
339
340        if (hi - lo).abs() < eps {
341            continue;
342        }
343
344        // Analytic update for alpha_j (Platt 1998).
345        // E_k = y_k * grad_k (dual error, where grad = Q*alpha - e).
346        // alpha_j_new = alpha_j + y_j * (E_i - E_j) / eta
347        //             = alpha_j + y_j * (y_i * grad_i - y_j * grad_j) / eta
348        let mut new_aj = old_aj + labels[j] * (labels[i] * grad[i] - labels[j] * grad[j]) / eta;
349
350        // Clip to [lo, hi]
351        if new_aj > hi {
352            new_aj = hi;
353        }
354        if new_aj < lo {
355            new_aj = lo;
356        }
357
358        if (new_aj - old_aj).abs() < eps {
359            continue;
360        }
361
362        let new_ai = old_ai + labels[i] * labels[j] * (old_aj - new_aj);
363
364        alphas[i] = new_ai;
365        alphas[j] = new_aj;
366
367        // Update dual gradient: grad_k += delta_alpha_i * Q_{k,i} + delta_alpha_j * Q_{k,j}
368        // where Q_{k,t} = y_k * y_t * K(k,t)
369        let delta_ai = new_ai - old_ai;
370        let delta_aj = new_aj - old_aj;
371
372        for (k, grad_k) in grad.iter_mut().enumerate() {
373            let kki = cache.get_or_compute(k, i, kernel, data);
374            let kkj = cache.get_or_compute(k, j, kernel, data);
375            *grad_k = *grad_k
376                + delta_ai * labels[k] * labels[i] * kki
377                + delta_aj * labels[k] * labels[j] * kkj;
378        }
379    }
380
381    // Compute bias from KKT conditions.
382    // For support vectors with 0 < alpha_i < C:
383    //   y_i * (sum_j alpha_j * y_j * K(i,j) + b) = 1
384    //   b = y_i - sum_j alpha_j * y_j * K(i,j)
385    // (since y_i^2 = 1, y_i * (y_i * f) = f, so b = 1/y_i - sum = y_i - sum)
386    let mut b_sum = F::zero();
387    let mut b_count = 0usize;
388
389    for i in 0..n {
390        if alphas[i] > eps && alphas[i] < c - eps {
391            // This is a free support vector.
392            let mut f_no_b = F::zero();
393            for j in 0..n {
394                if alphas[j] > eps {
395                    f_no_b =
396                        f_no_b + alphas[j] * labels[j] * cache.get_or_compute(i, j, kernel, data);
397                }
398            }
399            b_sum = b_sum + labels[i] - f_no_b;
400            b_count += 1;
401        }
402    }
403
404    let bias = if b_count > 0 {
405        b_sum / F::from(b_count).unwrap()
406    } else {
407        // Fallback: use all support vectors (bounded ones too)
408        let mut b_sum_all = F::zero();
409        let mut b_count_all = 0usize;
410        for i in 0..n {
411            if alphas[i] > eps {
412                let mut f_no_b = F::zero();
413                for j in 0..n {
414                    if alphas[j] > eps {
415                        f_no_b = f_no_b
416                            + alphas[j] * labels[j] * cache.get_or_compute(i, j, kernel, data);
417                    }
418                }
419                b_sum_all = b_sum_all + labels[i] - f_no_b;
420                b_count_all += 1;
421            }
422        }
423        if b_count_all > 0 {
424            b_sum_all / F::from(b_count_all).unwrap()
425        } else {
426            F::zero()
427        }
428    };
429
430    Ok(SmoResult { alphas, bias })
431}
432
433// ---------------------------------------------------------------------------
434// SVC (Support Vector Classifier)
435// ---------------------------------------------------------------------------
436
437/// Support Vector Classifier.
438///
439/// Uses Sequential Minimal Optimization (SMO) to solve the dual QP.
440/// Supports multiclass via one-vs-one strategy.
441///
442/// # Type Parameters
443///
444/// - `F`: The floating-point type (`f32` or `f64`).
445/// - `K`: The kernel type (e.g., [`LinearKernel`], [`RbfKernel`]).
446#[derive(Debug, Clone)]
447pub struct SVC<F, K> {
448    /// The kernel function.
449    pub kernel: K,
450    /// Regularization parameter (penalty for misclassification).
451    pub c: F,
452    /// Convergence tolerance.
453    pub tol: F,
454    /// Maximum number of SMO iterations.
455    pub max_iter: usize,
456    /// Size of the kernel evaluation LRU cache.
457    pub cache_size: usize,
458}
459
460impl<F: Float, K: Kernel<F>> SVC<F, K> {
461    /// Create a new `SVC` with the given kernel and default hyperparameters.
462    ///
463    /// Defaults: `C = 1.0`, `tol = 1e-3`, `max_iter = 10000`, `cache_size = 1024`.
464    #[must_use]
465    pub fn new(kernel: K) -> Self {
466        Self {
467            kernel,
468            c: F::one(),
469            tol: F::from(1e-3).unwrap(),
470            max_iter: 10000,
471            cache_size: 1024,
472        }
473    }
474
475    /// Set the regularization parameter C.
476    #[must_use]
477    pub fn with_c(mut self, c: F) -> Self {
478        self.c = c;
479        self
480    }
481
482    /// Set the convergence tolerance.
483    #[must_use]
484    pub fn with_tol(mut self, tol: F) -> Self {
485        self.tol = tol;
486        self
487    }
488
489    /// Set the maximum number of SMO iterations.
490    #[must_use]
491    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
492        self.max_iter = max_iter;
493        self
494    }
495
496    /// Set the kernel cache size.
497    #[must_use]
498    pub fn with_cache_size(mut self, cache_size: usize) -> Self {
499        self.cache_size = cache_size;
500        self
501    }
502}
503
504/// A single binary SVM model (one pair of classes in one-vs-one).
505#[derive(Debug, Clone)]
506struct BinarySvm<F> {
507    /// Support vectors (stored as rows).
508    support_vectors: Vec<Vec<F>>,
509    /// Dual coefficients: alpha_i * y_i for each support vector.
510    dual_coefs: Vec<F>,
511    /// Bias term.
512    bias: F,
513    /// The two class labels: (negative_class, positive_class).
514    class_neg: usize,
515    class_pos: usize,
516}
517
518/// Fitted Support Vector Classifier.
519///
520/// Stores one binary SVM per pair of classes (one-vs-one). Implements
521/// [`Predict`] to produce class labels.
522#[derive(Debug, Clone)]
523pub struct FittedSVC<F, K> {
524    /// The kernel used for predictions.
525    kernel: K,
526    /// One binary SVM per class pair.
527    binary_models: Vec<BinarySvm<F>>,
528    /// Sorted unique classes.
529    classes: Vec<usize>,
530}
531
532impl<F: Float, K: Kernel<F>> FittedSVC<F, K> {
533    /// Compute the decision function value for a single sample against a
534    /// binary model.
535    fn decision_value_binary(&self, x: &[F], model: &BinarySvm<F>) -> F {
536        let mut val = model.bias;
537        for (sv, &coef) in model.support_vectors.iter().zip(model.dual_coefs.iter()) {
538            val = val + coef * self.kernel.compute(sv, x);
539        }
540        val
541    }
542
543    /// Compute the raw decision function values for each sample.
544    ///
545    /// For binary classification, returns a 1-column array.
546    /// For multiclass, returns one column per class pair.
547    ///
548    /// # Errors
549    ///
550    /// Returns [`FerroError::ShapeMismatch`] if the input has no columns.
551    pub fn decision_function(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
552        let n_samples = x.nrows();
553        let n_models = self.binary_models.len();
554        let mut result = Array2::<F>::zeros((n_samples, n_models));
555
556        for s in 0..n_samples {
557            let xi: Vec<F> = x.row(s).to_vec();
558            for (m, model) in self.binary_models.iter().enumerate() {
559                result[[s, m]] = self.decision_value_binary(&xi, model);
560            }
561        }
562
563        Ok(result)
564    }
565}
566
567impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static>
568    Fit<Array2<F>, Array1<usize>> for SVC<F, K>
569{
570    type Fitted = FittedSVC<F, K>;
571    type Error = FerroError;
572
573    /// Fit the SVC model using SMO.
574    ///
575    /// # Errors
576    ///
577    /// Returns [`FerroError::ShapeMismatch`] if `x` and `y` have different
578    /// sample counts.
579    /// Returns [`FerroError::InvalidParameter`] if `C` is not positive.
580    /// Returns [`FerroError::InsufficientSamples`] if fewer than 2 classes.
581    fn fit(&self, x: &Array2<F>, y: &Array1<usize>) -> Result<FittedSVC<F, K>, FerroError> {
582        let (n_samples, _n_features) = x.dim();
583
584        if n_samples != y.len() {
585            return Err(FerroError::ShapeMismatch {
586                expected: vec![n_samples],
587                actual: vec![y.len()],
588                context: "y length must match number of samples in X".into(),
589            });
590        }
591
592        if self.c <= F::zero() {
593            return Err(FerroError::InvalidParameter {
594                name: "C".into(),
595                reason: "must be positive".into(),
596            });
597        }
598
599        // Determine unique classes.
600        let mut classes: Vec<usize> = y.to_vec();
601        classes.sort_unstable();
602        classes.dedup();
603
604        if classes.len() < 2 {
605            return Err(FerroError::InsufficientSamples {
606                required: 2,
607                actual: classes.len(),
608                context: "SVC requires at least 2 distinct classes".into(),
609            });
610        }
611
612        // Convert data to Vec<Vec<F>> for kernel cache.
613        let data: Vec<Vec<F>> = (0..n_samples).map(|i| x.row(i).to_vec()).collect();
614
615        // One-vs-one: train one binary SVM per pair.
616        let n_classes = classes.len();
617        let mut binary_models = Vec::new();
618
619        for ci in 0..n_classes {
620            for cj in (ci + 1)..n_classes {
621                let class_neg = classes[ci];
622                let class_pos = classes[cj];
623
624                // Extract samples for these two classes.
625                let mut sub_data = Vec::new();
626                let mut sub_labels = Vec::new();
627                let mut sub_indices = Vec::new();
628
629                for s in 0..n_samples {
630                    let label = y[s];
631                    if label == class_neg {
632                        sub_data.push(data[s].clone());
633                        sub_labels.push(-F::one());
634                        sub_indices.push(s);
635                    } else if label == class_pos {
636                        sub_data.push(data[s].clone());
637                        sub_labels.push(F::one());
638                        sub_indices.push(s);
639                    }
640                }
641
642                let result = smo_binary(
643                    &sub_data,
644                    &sub_labels,
645                    &self.kernel,
646                    self.c,
647                    self.tol,
648                    self.max_iter,
649                    self.cache_size,
650                )?;
651
652                // Extract support vectors (non-zero alphas).
653                let eps = F::from(1e-8).unwrap_or_else(F::epsilon);
654                let mut sv_data = Vec::new();
655                let mut sv_coefs = Vec::new();
656
657                for (k, &alpha) in result.alphas.iter().enumerate() {
658                    if alpha > eps {
659                        sv_data.push(sub_data[k].clone());
660                        sv_coefs.push(alpha * sub_labels[k]);
661                    }
662                }
663
664                binary_models.push(BinarySvm {
665                    support_vectors: sv_data,
666                    dual_coefs: sv_coefs,
667                    bias: result.bias,
668                    class_neg,
669                    class_pos,
670                });
671            }
672        }
673
674        Ok(FittedSVC {
675            kernel: self.kernel.clone(),
676            binary_models,
677            classes,
678        })
679    }
680}
681
682impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static> Predict<Array2<F>>
683    for FittedSVC<F, K>
684{
685    type Output = Array1<usize>;
686    type Error = FerroError;
687
688    /// Predict class labels for the given feature matrix.
689    ///
690    /// Uses one-vs-one voting: each binary model casts a vote for the
691    /// winning class, and the class with the most votes wins.
692    ///
693    /// # Errors
694    ///
695    /// Returns [`FerroError::ShapeMismatch`] if the number of features
696    /// does not match the training data.
697    fn predict(&self, x: &Array2<F>) -> Result<Array1<usize>, FerroError> {
698        let n_samples = x.nrows();
699        let n_classes = self.classes.len();
700        let mut predictions = Array1::<usize>::zeros(n_samples);
701
702        for s in 0..n_samples {
703            let xi: Vec<F> = x.row(s).to_vec();
704            let mut votes = vec![0usize; n_classes];
705
706            for model in &self.binary_models {
707                let val = self.decision_value_binary(&xi, model);
708                let winner = if val >= F::zero() {
709                    model.class_pos
710                } else {
711                    model.class_neg
712                };
713                if let Some(idx) = self.classes.iter().position(|&c| c == winner) {
714                    votes[idx] += 1;
715                }
716            }
717
718            let best_class_idx = votes
719                .iter()
720                .enumerate()
721                .max_by_key(|&(_, &v)| v)
722                .map_or(0, |(i, _)| i);
723
724            predictions[s] = self.classes[best_class_idx];
725        }
726
727        Ok(predictions)
728    }
729}
730
731// ---------------------------------------------------------------------------
732// SVR (Support Vector Regressor)
733// ---------------------------------------------------------------------------
734
735/// Support Vector Regressor.
736///
737/// Uses SMO to solve the epsilon-SVR dual problem.
738///
739/// # Type Parameters
740///
741/// - `F`: The floating-point type (`f32` or `f64`).
742/// - `K`: The kernel type.
743#[derive(Debug, Clone)]
744pub struct SVR<F, K> {
745    /// The kernel function.
746    pub kernel: K,
747    /// Regularization parameter.
748    pub c: F,
749    /// Epsilon tube width (insensitive loss zone).
750    pub epsilon: F,
751    /// Convergence tolerance.
752    pub tol: F,
753    /// Maximum number of SMO iterations.
754    pub max_iter: usize,
755    /// Size of the kernel evaluation LRU cache.
756    pub cache_size: usize,
757}
758
759impl<F: Float, K: Kernel<F>> SVR<F, K> {
760    /// Create a new `SVR` with the given kernel and default hyperparameters.
761    ///
762    /// Defaults: `C = 1.0`, `epsilon = 0.1`, `tol = 1e-3`,
763    /// `max_iter = 10000`, `cache_size = 1024`.
764    #[must_use]
765    pub fn new(kernel: K) -> Self {
766        Self {
767            kernel,
768            c: F::one(),
769            epsilon: F::from(0.1).unwrap(),
770            tol: F::from(1e-3).unwrap(),
771            max_iter: 10000,
772            cache_size: 1024,
773        }
774    }
775
776    /// Set the regularization parameter C.
777    #[must_use]
778    pub fn with_c(mut self, c: F) -> Self {
779        self.c = c;
780        self
781    }
782
783    /// Set the epsilon tube width.
784    #[must_use]
785    pub fn with_epsilon(mut self, epsilon: F) -> Self {
786        self.epsilon = epsilon;
787        self
788    }
789
790    /// Set the convergence tolerance.
791    #[must_use]
792    pub fn with_tol(mut self, tol: F) -> Self {
793        self.tol = tol;
794        self
795    }
796
797    /// Set the maximum number of SMO iterations.
798    #[must_use]
799    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
800        self.max_iter = max_iter;
801        self
802    }
803
804    /// Set the kernel cache size.
805    #[must_use]
806    pub fn with_cache_size(mut self, cache_size: usize) -> Self {
807        self.cache_size = cache_size;
808        self
809    }
810}
811
812/// Fitted Support Vector Regressor.
813///
814/// Stores the support vectors, dual coefficients, and bias.
815#[derive(Debug, Clone)]
816pub struct FittedSVR<F, K> {
817    /// The kernel used for predictions.
818    kernel: K,
819    /// Support vectors.
820    support_vectors: Vec<Vec<F>>,
821    /// Dual coefficients (alpha_i* - alpha_i) for each support vector.
822    dual_coefs: Vec<F>,
823    /// Bias term.
824    bias: F,
825}
826
827impl<F: Float, K: Kernel<F>> FittedSVR<F, K> {
828    /// Compute the decision function value for a single sample.
829    fn decision_value(&self, x: &[F]) -> F {
830        let mut val = self.bias;
831        for (sv, &coef) in self.support_vectors.iter().zip(self.dual_coefs.iter()) {
832            val = val + coef * self.kernel.compute(sv, x);
833        }
834        val
835    }
836
837    /// Compute the raw decision function values for each sample.
838    ///
839    /// # Errors
840    ///
841    /// Returns `Ok` always (provided for API symmetry).
842    pub fn decision_function(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
843        let n_samples = x.nrows();
844        let mut result = Array1::<F>::zeros(n_samples);
845        for s in 0..n_samples {
846            let xi: Vec<F> = x.row(s).to_vec();
847            result[s] = self.decision_value(&xi);
848        }
849        Ok(result)
850    }
851}
852
853/// Solve epsilon-SVR using SMO.
854///
855/// Reformulates the epsilon-SVR dual as a standard 2n-variable QP and
856/// solves it with the Fan-Chen-Lin WSS approach, analogous to `smo_binary`.
857///
858/// The 2n variables are indexed 0..2n:
859/// - Index `k` (k < n)  corresponds to alpha\*\_k  with label +1
860/// - Index `k` (k >= n) corresponds to alpha\_{k-n} with label -1
861///
862/// The Q matrix is: `Q_{ij} = s_i * s_j * K(p_i, p_j)` where `s` is the
863/// sign (+1 or -1) and `p` maps to the original sample index.
864///
865/// The linear term is: `q_k = epsilon - y_{p_k} * s_k`.
866#[allow(clippy::too_many_arguments)]
867fn smo_svr<F: Float, K: Kernel<F>>(
868    data: &[Vec<F>],
869    targets: &[F],
870    kernel: &K,
871    c: F,
872    epsilon: F,
873    tol: F,
874    max_iter: usize,
875    cache_size: usize,
876) -> Result<(Vec<F>, F), FerroError> {
877    let n = data.len();
878    let m = 2 * n; // Total number of dual variables.
879
880    // Encoding: variable k in [0, m)
881    //   k < n  => alpha*_k   (sign = +1, sample index = k)
882    //   k >= n => alpha_{k-n} (sign = -1, sample index = k - n)
883    //
884    // The dual is: min 1/2 * beta^T Q beta + q^T beta
885    //   s.t. 0 <= beta_k <= C, sum_k s_k * beta_k = 0
886    // where beta_k = alpha*_k or alpha_{k-n},
887    //       Q_{ij} = s_i * s_j * K(p_i, p_j),
888    //       q_k    = epsilon - y_{p_k} * s_k.
889    //
890    // This has the same structure as the SVC dual.
891
892    let sign = |k: usize| -> F { if k < n { F::one() } else { -F::one() } };
893    let sample = |k: usize| -> usize { if k < n { k } else { k - n } };
894
895    let mut beta = vec![F::zero(); m];
896    let mut cache = KernelCache::new(cache_size);
897
898    // Gradient: grad_k = (Q * beta)_k + q_k.  Initially beta=0 so grad_k = q_k.
899    // q_k = epsilon - y_{p_k} * s_k
900    let mut grad: Vec<F> = (0..m)
901        .map(|k| epsilon - targets[sample(k)] * sign(k))
902        .collect();
903
904    let two = F::one() + F::one();
905    let eps_num = F::from(1e-12).unwrap_or_else(F::epsilon);
906
907    for _iter in 0..max_iter {
908        // WSS: same as SVC but with the extended variables.
909        // I_up  = {k : (s_k=+1 and beta_k < C) or (s_k=-1 and beta_k > 0)}
910        // I_low = {k : (s_k=+1 and beta_k > 0) or (s_k=-1 and beta_k < C)}
911        // Select i = argmax_{k in I_up}  -s_k * grad_k
912        // Select j = argmin_{k in I_low} -s_k * grad_k
913
914        let mut i_up = None;
915        let mut max_val = F::neg_infinity();
916        let mut j_low = None;
917        let mut min_val = F::infinity();
918
919        for k in 0..m {
920            let sk = sign(k);
921            let val = -sk * grad[k];
922
923            let in_up =
924                (sk > F::zero() && beta[k] < c - eps_num) || (sk < F::zero() && beta[k] > eps_num);
925            let in_low =
926                (sk > F::zero() && beta[k] > eps_num) || (sk < F::zero() && beta[k] < c - eps_num);
927
928            if in_up && val > max_val {
929                max_val = val;
930                i_up = Some(k);
931            }
932            if in_low && val < min_val {
933                min_val = val;
934                j_low = Some(k);
935            }
936        }
937
938        if i_up.is_none() || j_low.is_none() || max_val - min_val < tol {
939            break;
940        }
941
942        let i = i_up.unwrap();
943        let j = j_low.unwrap();
944
945        if i == j {
946            break;
947        }
948
949        let si = sign(i);
950        let sj = sign(j);
951        let pi = sample(i);
952        let pj = sample(j);
953
954        // Q_{ii} = si*si*K(pi,pi) = K(pi,pi),  similarly for jj and ij
955        let kii = cache.get_or_compute(pi, pi, kernel, data);
956        let kjj = cache.get_or_compute(pj, pj, kernel, data);
957        let kij = cache.get_or_compute(pi, pj, kernel, data);
958
959        // eta = Q_{ii} + Q_{jj} - 2*Q_{ij} = K(pi,pi) + K(pj,pj) - 2*si*sj*K(pi,pj)
960        let eta = kii + kjj - two * si * sj * kij;
961
962        if eta <= eps_num {
963            continue;
964        }
965
966        // Bounds for beta_j
967        let old_bi = beta[i];
968        let old_bj = beta[j];
969
970        let (lo, hi) = if si == sj {
971            let sum = old_bi + old_bj;
972            ((sum - c).max(F::zero()), sum.min(c))
973        } else {
974            let diff = old_bj - old_bi;
975            (diff.max(F::zero()), (c + diff).min(c))
976        };
977
978        if (hi - lo).abs() < eps_num {
979            continue;
980        }
981
982        // Analytic update: beta_j += s_j * (E_i - E_j) / eta
983        // where E_k = s_k * grad_k
984        let mut new_bj = old_bj + sj * (si * grad[i] - sj * grad[j]) / eta;
985
986        if new_bj > hi {
987            new_bj = hi;
988        }
989        if new_bj < lo {
990            new_bj = lo;
991        }
992
993        if (new_bj - old_bj).abs() < eps_num {
994            continue;
995        }
996
997        let new_bi = old_bi + si * sj * (old_bj - new_bj);
998
999        beta[i] = new_bi;
1000        beta[j] = new_bj;
1001
1002        // Update gradient: grad_k += delta_bi * Q_{k,i} + delta_bj * Q_{k,j}
1003        // Q_{k,t} = s_k * s_t * K(p_k, p_t)
1004        let delta_bi = new_bi - old_bi;
1005        let delta_bj = new_bj - old_bj;
1006
1007        for (k, grad_k) in grad.iter_mut().enumerate() {
1008            let sk = sign(k);
1009            let pk = sample(k);
1010            let kki = cache.get_or_compute(pk, pi, kernel, data);
1011            let kkj = cache.get_or_compute(pk, pj, kernel, data);
1012            *grad_k = *grad_k + delta_bi * sk * si * kki + delta_bj * sk * sj * kkj;
1013        }
1014    }
1015
1016    // Recover alpha*_i = beta_i (i < n) and alpha_i = beta_{i+n} (i >= n).
1017    // Coefficient for prediction: coef_i = alpha*_i - alpha_i.
1018    let coefs: Vec<F> = (0..n).map(|i| beta[i] - beta[i + n]).collect();
1019
1020    // Compute bias from KKT conditions on free support vectors.
1021    // For k where 0 < beta_k < C:
1022    //   grad_k = 0 at optimality => (Q*beta)_k + q_k = 0
1023    //   sum_t beta_t * s_k * s_t * K(p_k, p_t) + epsilon - y_{p_k} * s_k = 0
1024    //   s_k * sum_t (beta_t * s_t) * K(p_k, p_t) = y_{p_k} * s_k - epsilon
1025    //   sum_t coef_t_effective * K(p_k, p_t) = y_{p_k} - epsilon / s_k  (nah, let's use f directly)
1026    //
1027    // Prediction: f(x) = sum_i coef_i * K(x_i, x) + b
1028    // For free alpha*_i (0 < alpha*_i < C): y_i - f(x_i) = epsilon  => b = y_i - epsilon - sum coef_j * K(i,j)
1029    // For free alpha_i  (0 < alpha_i  < C): f(x_i) - y_i = epsilon  => b = y_i + epsilon - sum coef_j * K(i,j)
1030
1031    let mut b_sum = F::zero();
1032    let mut b_count = 0usize;
1033
1034    for i in 0..n {
1035        let mut kernel_sum = F::zero();
1036        let has_free = (beta[i] > eps_num && beta[i] < c - eps_num)
1037            || (beta[i + n] > eps_num && beta[i + n] < c - eps_num);
1038
1039        if !has_free {
1040            continue;
1041        }
1042
1043        for (j, &cj) in coefs.iter().enumerate() {
1044            if cj.abs() > eps_num {
1045                kernel_sum = kernel_sum + cj * cache.get_or_compute(i, j, kernel, data);
1046            }
1047        }
1048
1049        if beta[i] > eps_num && beta[i] < c - eps_num {
1050            // alpha*_i is free: y_i - f(x_i) = epsilon => b = y_i - epsilon - kernel_sum
1051            b_sum = b_sum + targets[i] - epsilon - kernel_sum;
1052            b_count += 1;
1053        }
1054        if beta[i + n] > eps_num && beta[i + n] < c - eps_num {
1055            // alpha_i is free: f(x_i) - y_i = epsilon => b = y_i + epsilon - kernel_sum
1056            b_sum = b_sum + targets[i] + epsilon - kernel_sum;
1057            b_count += 1;
1058        }
1059    }
1060
1061    let bias = if b_count > 0 {
1062        b_sum / F::from(b_count).unwrap()
1063    } else {
1064        F::zero()
1065    };
1066
1067    Ok((coefs, bias))
1068}
1069
1070impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static>
1071    Fit<Array2<F>, Array1<F>> for SVR<F, K>
1072{
1073    type Fitted = FittedSVR<F, K>;
1074    type Error = FerroError;
1075
1076    /// Fit the SVR model using SMO.
1077    ///
1078    /// # Errors
1079    ///
1080    /// Returns [`FerroError::ShapeMismatch`] if `x` and `y` have different
1081    /// sample counts.
1082    /// Returns [`FerroError::InvalidParameter`] if `C` is not positive.
1083    fn fit(&self, x: &Array2<F>, y: &Array1<F>) -> Result<FittedSVR<F, K>, FerroError> {
1084        let (n_samples, _n_features) = x.dim();
1085
1086        if n_samples != y.len() {
1087            return Err(FerroError::ShapeMismatch {
1088                expected: vec![n_samples],
1089                actual: vec![y.len()],
1090                context: "y length must match number of samples in X".into(),
1091            });
1092        }
1093
1094        if self.c <= F::zero() {
1095            return Err(FerroError::InvalidParameter {
1096                name: "C".into(),
1097                reason: "must be positive".into(),
1098            });
1099        }
1100
1101        if n_samples == 0 {
1102            return Err(FerroError::InsufficientSamples {
1103                required: 1,
1104                actual: 0,
1105                context: "SVR requires at least one sample".into(),
1106            });
1107        }
1108
1109        let data: Vec<Vec<F>> = (0..n_samples).map(|i| x.row(i).to_vec()).collect();
1110        let targets: Vec<F> = y.to_vec();
1111
1112        let (coefs, bias) = smo_svr(
1113            &data,
1114            &targets,
1115            &self.kernel,
1116            self.c,
1117            self.epsilon,
1118            self.tol,
1119            self.max_iter,
1120            self.cache_size,
1121        )?;
1122
1123        // Extract support vectors (non-zero coefficients).
1124        let eps = F::from(1e-8).unwrap_or_else(F::epsilon);
1125        let mut sv_data = Vec::new();
1126        let mut sv_coefs = Vec::new();
1127
1128        for (i, &coef) in coefs.iter().enumerate() {
1129            if coef.abs() > eps {
1130                sv_data.push(data[i].clone());
1131                sv_coefs.push(coef);
1132            }
1133        }
1134
1135        Ok(FittedSVR {
1136            kernel: self.kernel.clone(),
1137            support_vectors: sv_data,
1138            dual_coefs: sv_coefs,
1139            bias,
1140        })
1141    }
1142}
1143
1144impl<F: Float + Send + Sync + ScalarOperand + 'static, K: Kernel<F> + 'static> Predict<Array2<F>>
1145    for FittedSVR<F, K>
1146{
1147    type Output = Array1<F>;
1148    type Error = FerroError;
1149
1150    /// Predict target values for the given feature matrix.
1151    ///
1152    /// # Errors
1153    ///
1154    /// Returns `Ok` always for valid input.
1155    fn predict(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
1156        self.decision_function(x)
1157    }
1158}
1159
1160#[cfg(test)]
1161mod tests {
1162    use super::*;
1163    use approx::assert_relative_eq;
1164    use ndarray::array;
1165
1166    #[test]
1167    fn test_linear_kernel() {
1168        let k = LinearKernel;
1169        let x = vec![1.0, 2.0, 3.0];
1170        let y = vec![4.0, 5.0, 6.0];
1171        assert_relative_eq!(k.compute(&x, &y), 32.0, epsilon = 1e-10);
1172    }
1173
1174    #[test]
1175    fn test_rbf_kernel() {
1176        let k = RbfKernel::with_gamma(1.0);
1177        let x = vec![0.0, 0.0];
1178        let y = vec![0.0, 0.0];
1179        assert_relative_eq!(k.compute(&x, &y), 1.0, epsilon = 1e-10);
1180
1181        // Different points should give value < 1
1182        let y2 = vec![1.0, 0.0];
1183        let val: f64 = k.compute(&x, &y2);
1184        assert!(val < 1.0);
1185        assert!(val > 0.0);
1186    }
1187
1188    #[test]
1189    fn test_polynomial_kernel() {
1190        let k = PolynomialKernel {
1191            gamma: Some(1.0),
1192            degree: 2,
1193            coef0: 1.0,
1194        };
1195        let x = vec![1.0f64, 0.0];
1196        let y = vec![1.0, 0.0];
1197        // (1.0 * 1.0 + 1.0)^2 = 4.0
1198        assert_relative_eq!(k.compute(&x, &y), 4.0, epsilon = 1e-10);
1199    }
1200
1201    #[test]
1202    fn test_sigmoid_kernel() {
1203        let k = SigmoidKernel {
1204            gamma: Some(1.0),
1205            coef0: 0.0,
1206        };
1207        let x = vec![0.0f64];
1208        let y = vec![0.0];
1209        // tanh(0) = 0
1210        assert_relative_eq!(k.compute(&x, &y), 0.0, epsilon = 1e-10);
1211    }
1212
1213    #[test]
1214    fn test_svc_linear_separable() {
1215        // Two well-separated clusters.
1216        let x = Array2::from_shape_vec(
1217            (8, 2),
1218            vec![
1219                1.0, 1.0, 1.5, 1.0, 1.0, 1.5, 1.5, 1.5, // class 0
1220                5.0, 5.0, 5.5, 5.0, 5.0, 5.5, 5.5, 5.5, // class 1
1221            ],
1222        )
1223        .unwrap();
1224        let y = array![0usize, 0, 0, 0, 1, 1, 1, 1];
1225
1226        let model = SVC::<f64, LinearKernel>::new(LinearKernel).with_c(10.0);
1227        let fitted = model.fit(&x, &y).unwrap();
1228        let preds = fitted.predict(&x).unwrap();
1229
1230        let correct: usize = preds.iter().zip(y.iter()).filter(|(p, a)| p == a).count();
1231        assert!(correct >= 6, "Expected at least 6 correct, got {correct}");
1232    }
1233
1234    #[test]
1235    fn test_svc_rbf_xor() {
1236        // XOR problem: not linearly separable, needs RBF kernel.
1237        let x = Array2::from_shape_vec(
1238            (8, 2),
1239            vec![
1240                0.0, 0.0, 0.1, 0.1, // class 0
1241                1.0, 1.0, 1.1, 1.1, // class 0
1242                1.0, 0.0, 1.1, 0.1, // class 1
1243                0.0, 1.0, 0.1, 1.1, // class 1
1244            ],
1245        )
1246        .unwrap();
1247        let y = array![0usize, 0, 0, 0, 1, 1, 1, 1];
1248
1249        let kernel = RbfKernel::with_gamma(5.0);
1250        let model = SVC::new(kernel).with_c(100.0).with_max_iter(50000);
1251        let fitted = model.fit(&x, &y).unwrap();
1252        let preds = fitted.predict(&x).unwrap();
1253
1254        let correct: usize = preds.iter().zip(y.iter()).filter(|(p, a)| p == a).count();
1255        assert!(
1256            correct >= 6,
1257            "Expected at least 6 correct for XOR, got {correct}"
1258        );
1259    }
1260
1261    #[test]
1262    fn test_svc_multiclass() {
1263        // Three linearly separable clusters.
1264        let x = Array2::from_shape_vec(
1265            (9, 2),
1266            vec![
1267                0.0, 0.0, 0.5, 0.0, 0.0, 0.5, // class 0
1268                5.0, 0.0, 5.5, 0.0, 5.0, 0.5, // class 1
1269                0.0, 5.0, 0.5, 5.0, 0.0, 5.5, // class 2
1270            ],
1271        )
1272        .unwrap();
1273        let y = array![0usize, 0, 0, 1, 1, 1, 2, 2, 2];
1274
1275        let model = SVC::new(LinearKernel).with_c(10.0);
1276        let fitted = model.fit(&x, &y).unwrap();
1277        let preds = fitted.predict(&x).unwrap();
1278
1279        let correct: usize = preds.iter().zip(y.iter()).filter(|(p, a)| p == a).count();
1280        assert!(
1281            correct >= 7,
1282            "Expected at least 7 correct for multiclass, got {correct}"
1283        );
1284    }
1285
1286    #[test]
1287    fn test_svc_decision_function() {
1288        let x = Array2::from_shape_vec(
1289            (6, 2),
1290            vec![
1291                1.0, 1.0, 1.5, 1.0, 1.0, 1.5, // class 0
1292                5.0, 5.0, 5.5, 5.0, 5.0, 5.5, // class 1
1293            ],
1294        )
1295        .unwrap();
1296        let y = array![0usize, 0, 0, 1, 1, 1];
1297
1298        let model = SVC::new(LinearKernel).with_c(10.0);
1299        let fitted = model.fit(&x, &y).unwrap();
1300
1301        let df = fitted.decision_function(&x).unwrap();
1302        assert_eq!(df.nrows(), 6);
1303        assert_eq!(df.ncols(), 1); // One binary model for 2 classes.
1304
1305        // Class 0 samples should have negative decision values,
1306        // class 1 should have positive.
1307        for i in 0..3 {
1308            assert!(
1309                df[[i, 0]] < 0.0 + 0.5, // Allow some tolerance
1310                "Class 0 sample {i} has decision value {}",
1311                df[[i, 0]]
1312            );
1313        }
1314    }
1315
1316    #[test]
1317    fn test_svc_invalid_c() {
1318        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1319        let y = array![0usize, 0, 1, 1];
1320
1321        let model = SVC::new(LinearKernel).with_c(0.0);
1322        assert!(model.fit(&x, &y).is_err());
1323    }
1324
1325    #[test]
1326    fn test_svc_single_class_error() {
1327        let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
1328        let y = array![0usize, 0, 0];
1329
1330        let model = SVC::new(LinearKernel);
1331        assert!(model.fit(&x, &y).is_err());
1332    }
1333
1334    #[test]
1335    fn test_svc_shape_mismatch() {
1336        let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
1337        let y = array![0usize, 1];
1338
1339        let model = SVC::new(LinearKernel);
1340        assert!(model.fit(&x, &y).is_err());
1341    }
1342
1343    #[test]
1344    fn test_svr_simple() {
1345        // Simple linear regression: y = 2x
1346        let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1347        let y = array![2.0, 4.0, 6.0, 8.0, 10.0, 12.0];
1348
1349        let model = SVR::new(LinearKernel)
1350            .with_c(100.0)
1351            .with_epsilon(0.1)
1352            .with_max_iter(50000);
1353        let fitted = model.fit(&x, &y).unwrap();
1354        let preds = fitted.predict(&x).unwrap();
1355
1356        // Check predictions are reasonably close.
1357        for (p, &actual) in preds.iter().zip(y.iter()) {
1358            assert!(
1359                (*p - actual).abs() < 2.0,
1360                "SVR prediction {p} too far from actual {actual}"
1361            );
1362        }
1363    }
1364
1365    #[test]
1366    fn test_svr_decision_function() {
1367        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1368        let y = array![2.0, 4.0, 6.0, 8.0];
1369
1370        let model = SVR::new(LinearKernel).with_c(100.0).with_epsilon(0.1);
1371        let fitted = model.fit(&x, &y).unwrap();
1372
1373        let df = fitted.decision_function(&x).unwrap();
1374        let preds = fitted.predict(&x).unwrap();
1375
1376        // Decision function and predict should return the same values.
1377        for i in 0..4 {
1378            assert_relative_eq!(df[i], preds[i], epsilon = 1e-10);
1379        }
1380    }
1381
1382    #[test]
1383    fn test_svr_invalid_c() {
1384        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1385        let y = array![1.0, 2.0, 3.0, 4.0];
1386
1387        let model = SVR::new(LinearKernel).with_c(-1.0);
1388        assert!(model.fit(&x, &y).is_err());
1389    }
1390
1391    #[test]
1392    fn test_svr_shape_mismatch() {
1393        let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
1394        let y = array![1.0, 2.0];
1395
1396        let model = SVR::new(LinearKernel);
1397        assert!(model.fit(&x, &y).is_err());
1398    }
1399}